Index: projects/clang390-import/cddl/usr.sbin/zfsd/tests/Makefile =================================================================== --- projects/clang390-import/cddl/usr.sbin/zfsd/tests/Makefile (revision 305016) +++ projects/clang390-import/cddl/usr.sbin/zfsd/tests/Makefile (revision 305017) @@ -1,45 +1,45 @@ # $FreeBSD$ SRCDIR=${.CURDIR}/../../../.. .include "${.CURDIR}/../Makefile.common" .PATH: ${.CURDIR}/.. -TESTSDIR?= ${TESTSBASE}/cddl/sbin/zfsd +TESTSDIR?= ${TESTSBASE}/cddl/usr.sbin/zfsd PLAIN_TESTS_CXX= zfsd_unittest SRCS.zfsd_unittest:= ${SRCS:Nzfsd_main.cc} SRCS.zfsd_unittest+= libmocks.c zfsd_unittest.cc SRCS= # Use #include in test programs. INCFLAGS+= -I${.CURDIR}/../.. .if defined(DESTDIR) INCFLAGS+= -I${DESTDIR}/usr/include LIBRARY_PATH= ${DESTDIR}/lib:${DESTDIR}/usr/lib LDFLAGS.zfsd_unittest+= -L${DESTDIR}/lib -L${DESTDIR}/usr/lib .elif defined(WORLDTMP) INCFLAGS+= -I${WORLDTMP}/usr/include LIBRARY_PATH= ${WORLDTMP}/lib:${WORLDTMP}/usr/lib LDFLAGS.zfsd_unittest+= -L${WORLDTMP}/lib -L${WORLDTMP}/usr/lib .else LIBRARY_PATH= .endif # Googletest options LOCALBASE?= /usr/local INCFLAGS+= -I${LOCALBASE}/include -D_THREAD_SAFE -pthread LDFLAGS.zfsd_unittest+= -L${LOCALBASE}/lib -D_THREAD_SAFE -pthread LDADD.zfsd_unittest+= ${LOCALBASE}/lib/libgtest.a # GoogleMock options LDADD.zfsd_unittest+= ${LOCALBASE}/lib/libgmock.a ${LOCALBASE}/lib/libgmock_main.a # Googlemock fails if we don't have this line # https://groups.google.com/forum/#!msg/googletestframework/h8ixEPCFm0o/amwfu4xGJb0J CFLAGS.zfsd_unittest+= -DGTEST_HAS_PTHREAD # Install the tests TESTSBASE?= /usr/tests .include Index: projects/clang390-import/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc =================================================================== --- projects/clang390-import/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc (revision 305016) +++ projects/clang390-import/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc (revision 305017) @@ -1,771 +1,770 @@ /*- * Copyright (c) 2012, 2013, 2014 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Alan Somers (Spectra Logic Corporation) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libmocks.h" __FBSDID("$FreeBSD$"); /*================================== Macros ==================================*/ #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) /*============================ Namespace Control =============================*/ using std::string; using std::stringstream; using DevdCtl::Event; -using DevdCtl::EventBuffer; using DevdCtl::EventFactory; using DevdCtl::EventList; using DevdCtl::Guid; using DevdCtl::NVPairMap; /* redefine zpool_handle here because libzfs_impl.h is not includable */ struct zpool_handle { libzfs_handle_t *zpool_hdl; zpool_handle_t *zpool_next; char zpool_name[ZPOOL_MAXNAMELEN]; int zpool_state; size_t zpool_config_size; nvlist_t *zpool_config; nvlist_t *zpool_old_config; nvlist_t *zpool_props; diskaddr_t zpool_start_block; }; class MockZfsEvent : public ZfsEvent { public: MockZfsEvent(Event::Type, NVPairMap&, const string&); virtual ~MockZfsEvent() {} static BuildMethod MockZfsEventBuilder; MOCK_CONST_METHOD0(ProcessPoolEvent, void()); static EventFactory::Record s_buildRecords[]; }; EventFactory::Record MockZfsEvent::s_buildRecords[] = { { Event::NOTIFY, "ZFS", &MockZfsEvent::MockZfsEventBuilder } }; MockZfsEvent::MockZfsEvent(Event::Type type, NVPairMap& map, const string& str) : ZfsEvent(type, map, str) { } Event * MockZfsEvent::MockZfsEventBuilder(Event::Type type, NVPairMap &nvpairs, const string &eventString) { return (new MockZfsEvent(type, nvpairs, eventString)); } /* * A dummy Vdev class used for testing other classes */ class MockVdev : public Vdev { public: MockVdev(nvlist_t *vdevConfig); virtual ~MockVdev() {} MOCK_CONST_METHOD0(GUID, Guid()); MOCK_CONST_METHOD0(PoolGUID, Guid()); MOCK_CONST_METHOD0(State, vdev_state()); MOCK_CONST_METHOD0(PhysicalPath, string()); }; MockVdev::MockVdev(nvlist_t *vdevConfig) : Vdev(vdevConfig) { } /* * A CaseFile class with side effects removed, for testing */ class TestableCaseFile : public CaseFile { public: static TestableCaseFile &Create(Vdev &vdev); TestableCaseFile(Vdev &vdev); virtual ~TestableCaseFile() {} MOCK_METHOD0(Close, void()); MOCK_METHOD1(RegisterCallout, void(const Event &event)); MOCK_METHOD0(RefreshVdevState, bool()); MOCK_METHOD1(ReEvaluate, bool(const ZfsEvent &event)); bool RealReEvaluate(const ZfsEvent &event) { return (CaseFile::ReEvaluate(event)); } /* * This splices the event lists, a procedure that would normally be done * by OnGracePeriodEnded, but we don't necessarily call that in the * unit tests */ void SpliceEvents(); /* * Used by some of our expectations. CaseFile does not publicize this */ static int getActiveCases() { return (s_activeCases.size()); } }; TestableCaseFile::TestableCaseFile(Vdev &vdev) : CaseFile(vdev) { } TestableCaseFile & TestableCaseFile::Create(Vdev &vdev) { TestableCaseFile *newCase; newCase = new TestableCaseFile(vdev); return (*newCase); } void TestableCaseFile::SpliceEvents() { m_events.splice(m_events.begin(), m_tentativeEvents); } /* * Test class ZfsdException */ class ZfsdExceptionTest : public ::testing::Test { protected: virtual void SetUp() { ASSERT_EQ(0, nvlist_alloc(&poolConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_string(poolConfig, ZPOOL_CONFIG_POOL_NAME, "unit_test_pool")); ASSERT_EQ(0, nvlist_add_uint64(poolConfig, ZPOOL_CONFIG_POOL_GUID, 0x1234)); ASSERT_EQ(0, nvlist_alloc(&vdevConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(vdevConfig, ZPOOL_CONFIG_GUID, 0x5678)); bzero(&poolHandle, sizeof(poolHandle)); poolHandle.zpool_config = poolConfig; } virtual void TearDown() { nvlist_free(poolConfig); nvlist_free(vdevConfig); } nvlist_t *poolConfig; nvlist_t *vdevConfig; zpool_handle_t poolHandle; }; TEST_F(ZfsdExceptionTest, StringConstructorNull) { ZfsdException ze(""); EXPECT_STREQ("", ze.GetString().c_str()); } TEST_F(ZfsdExceptionTest, StringConstructorFormatted) { ZfsdException ze(" %d %s", 55, "hello world"); EXPECT_STREQ(" 55 hello world", ze.GetString().c_str()); } TEST_F(ZfsdExceptionTest, LogSimple) { ZfsdException ze("unit test w/o vdev or pool"); ze.Log(); EXPECT_EQ(LOG_ERR, syslog_last_priority); EXPECT_STREQ("unit test w/o vdev or pool\n", syslog_last_message); } TEST_F(ZfsdExceptionTest, Pool) { const char msg[] = "Exception with pool name"; char expected[4096]; sprintf(expected, "Pool unit_test_pool: %s\n", msg); ZfsdException ze(poolConfig, msg); ze.Log(); EXPECT_STREQ(expected, syslog_last_message); } TEST_F(ZfsdExceptionTest, PoolHandle) { const char msg[] = "Exception with pool handle"; char expected[4096]; sprintf(expected, "Pool unit_test_pool: %s\n", msg); ZfsdException ze(&poolHandle, msg); ze.Log(); EXPECT_STREQ(expected, syslog_last_message); } /* * Test class Vdev */ class VdevTest : public ::testing::Test { protected: virtual void SetUp() { ASSERT_EQ(0, nvlist_alloc(&m_poolConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(m_poolConfig, ZPOOL_CONFIG_POOL_GUID, 0x1234)); ASSERT_EQ(0, nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_GUID, 0x5678)); } virtual void TearDown() { nvlist_free(m_poolConfig); nvlist_free(m_vdevConfig); } nvlist_t *m_poolConfig; nvlist_t *m_vdevConfig; }; TEST_F(VdevTest, StateFromConfig) { vdev_stat_t vs; vs.vs_state = VDEV_STATE_OFFLINE; ASSERT_EQ(0, nvlist_add_uint64_array(m_vdevConfig, ZPOOL_CONFIG_VDEV_STATS, (uint64_t*)&vs, sizeof(vs) / sizeof(uint64_t))); Vdev vdev(m_poolConfig, m_vdevConfig); EXPECT_EQ(VDEV_STATE_OFFLINE, vdev.State()); } TEST_F(VdevTest, StateFaulted) { ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_FAULTED, 1)); Vdev vdev(m_poolConfig, m_vdevConfig); EXPECT_EQ(VDEV_STATE_FAULTED, vdev.State()); } /* * Test that we can construct a Vdev from the label information that is stored * on an available spare drive */ TEST_F(VdevTest, ConstructAvailSpare) { nvlist_t *labelConfig; ASSERT_EQ(0, nvlist_alloc(&labelConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_GUID, 1948339428197961030)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_SPARE)); EXPECT_NO_THROW(Vdev vdev(labelConfig)); nvlist_free(labelConfig); } /* Available spares will always show the HEALTHY state */ TEST_F(VdevTest, AvailSpareState) { nvlist_t *labelConfig; ASSERT_EQ(0, nvlist_alloc(&labelConfig, NV_UNIQUE_NAME, 0)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_GUID, 1948339428197961030)); ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_SPARE)); Vdev vdev(labelConfig); EXPECT_EQ(VDEV_STATE_HEALTHY, vdev.State()); nvlist_free(labelConfig); } /* Test the Vdev::IsSpare method */ TEST_F(VdevTest, IsSpare) { Vdev notSpare(m_poolConfig, m_vdevConfig); EXPECT_EQ(false, notSpare.IsSpare()); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_IS_SPARE, 1)); Vdev isSpare(m_poolConfig, m_vdevConfig); EXPECT_EQ(true, isSpare.IsSpare()); } /* * Test class ZFSEvent */ class ZfsEventTest : public ::testing::Test { protected: virtual void SetUp() { m_eventFactory = new EventFactory(); m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords, NUM_ELEMENTS(MockZfsEvent::s_buildRecords)); m_event = NULL; } virtual void TearDown() { delete m_eventFactory; delete m_event; } EventFactory *m_eventFactory; Event *m_event; }; TEST_F(ZfsEventTest, ProcessPoolEventGetsCalled) { string evString("!system=ZFS " "subsystem=ZFS " "type=misc.fs.zfs.vdev_remove " "pool_name=foo " "pool_guid=9756779504028057996 " "vdev_guid=1631193447431603339 " "vdev_path=/dev/da1 " "timestamp=1348871594"); m_event = Event::CreateEvent(*m_eventFactory, evString); MockZfsEvent *mock_event = static_cast(m_event); EXPECT_CALL(*mock_event, ProcessPoolEvent()).Times(1); mock_event->Process(); } /* * Test class CaseFile */ class CaseFileTest : public ::testing::Test { protected: virtual void SetUp() { m_eventFactory = new EventFactory(); m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords, NUM_ELEMENTS(MockZfsEvent::s_buildRecords)); m_event = NULL; nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_GUID, 0xbeef)); m_vdev = new MockVdev(m_vdevConfig); ON_CALL(*m_vdev, GUID()) .WillByDefault(::testing::Return(Guid(123))); ON_CALL(*m_vdev, PoolGUID()) .WillByDefault(::testing::Return(Guid(456))); ON_CALL(*m_vdev, State()) .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY)); m_caseFile = &TestableCaseFile::Create(*m_vdev); ON_CALL(*m_caseFile, ReEvaluate(::testing::_)) .WillByDefault(::testing::Invoke(m_caseFile, &TestableCaseFile::RealReEvaluate)); return; } virtual void TearDown() { delete m_caseFile; nvlist_free(m_vdevConfig); delete m_vdev; delete m_event; delete m_eventFactory; } nvlist_t *m_vdevConfig; MockVdev *m_vdev; TestableCaseFile *m_caseFile; Event *m_event; EventFactory *m_eventFactory; }; /* * A Vdev with no events should not be degraded or faulted */ TEST_F(CaseFileTest, HealthyVdev) { EXPECT_FALSE(m_caseFile->ShouldDegrade()); EXPECT_FALSE(m_caseFile->ShouldFault()); } /* * A Vdev with only one event should not be degraded or faulted * For performance reasons, RefreshVdevState should not be called. */ TEST_F(CaseFileTest, HealthyishVdev) { string evString("!system=ZFS " "class=ereport.fs.zfs.io " "ena=12091638756982918145 " "parent_guid=13237004955564865395 " "parent_type=raidz " "pool=testpool.4415 " "pool_context=0 " "pool_failmode=wait " "pool_guid=456 " "subsystem=ZFS " "timestamp=1348867914 " "type=ereport.fs.zfs.io " "vdev_guid=123 " "vdev_path=/dev/da400 " "vdev_type=disk " "zio_blkid=622 " "zio_err=1 " "zio_level=-2 " "zio_object=0 " "zio_objset=37 " "zio_offset=25598976 " "zio_size=1024"); m_event = Event::CreateEvent(*m_eventFactory, evString); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_CALL(*m_caseFile, RefreshVdevState()) .Times(::testing::Exactly(0)); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); EXPECT_FALSE(m_caseFile->ShouldDegrade()); EXPECT_FALSE(m_caseFile->ShouldFault()); } /* The case file should be closed when its pool is destroyed */ TEST_F(CaseFileTest, PoolDestroy) { string evString("!system=ZFS " "pool_name=testpool.4415 " "pool_guid=456 " "subsystem=ZFS " "timestamp=1348867914 " "type=misc.fs.zfs.pool_destroy "); m_event = Event::CreateEvent(*m_eventFactory, evString); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_CALL(*m_caseFile, Close()); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); } /* * A Vdev with a very large number of IO errors should fault * For performance reasons, RefreshVdevState should be called at most once */ TEST_F(CaseFileTest, VeryManyIOErrors) { EXPECT_CALL(*m_caseFile, RefreshVdevState()) .Times(::testing::AtMost(1)) .WillRepeatedly(::testing::Return(true)); for(int i=0; i<100; i++) { stringstream evStringStream; evStringStream << "!system=ZFS " "class=ereport.fs.zfs.io " "ena=12091638756982918145 " "parent_guid=13237004955564865395 " "parent_type=raidz " "pool=testpool.4415 " "pool_context=0 " "pool_failmode=wait " "pool_guid=456 " "subsystem=ZFS " "timestamp="; evStringStream << i << " "; evStringStream << "type=ereport.fs.zfs.io " "vdev_guid=123 " "vdev_path=/dev/da400 " "vdev_type=disk " "zio_blkid=622 " "zio_err=1 " "zio_level=-2 " "zio_object=0 " "zio_objset=37 " "zio_offset=25598976 " "zio_size=1024"; Event *event(Event::CreateEvent(*m_eventFactory, evStringStream.str())); ZfsEvent *zfs_event = static_cast(event); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); delete event; } m_caseFile->SpliceEvents(); EXPECT_FALSE(m_caseFile->ShouldDegrade()); EXPECT_TRUE(m_caseFile->ShouldFault()); } /* * A Vdev with a very large number of checksum errors should degrade * For performance reasons, RefreshVdevState should be called at most once */ TEST_F(CaseFileTest, VeryManyChecksumErrors) { EXPECT_CALL(*m_caseFile, RefreshVdevState()) .Times(::testing::AtMost(1)) .WillRepeatedly(::testing::Return(true)); for(int i=0; i<100; i++) { stringstream evStringStream; evStringStream << "!system=ZFS " "bad_cleared_bits=03000000000000803f50b00000000000 " "bad_range_clears=0000000e " "bad_range_sets=00000000 " "bad_ranges=0000000000000010 " "bad_ranges_min_gap=8 " "bad_set_bits=00000000000000000000000000000000 " "class=ereport.fs.zfs.checksum " "ena=12272856582652437505 " "parent_guid=5838204195352909894 " "parent_type=raidz pool=testpool.7640 " "pool_context=0 " "pool_failmode=wait " "pool_guid=456 " "subsystem=ZFS timestamp="; evStringStream << i << " "; evStringStream << "type=ereport.fs.zfs.checksum " "vdev_guid=123 " "vdev_path=/mnt/tmp/file1.7702 " "vdev_type=file " "zio_blkid=0 " "zio_err=0 " "zio_level=0 " "zio_object=3 " "zio_objset=0 " "zio_offset=16896 " "zio_size=512"; Event *event(Event::CreateEvent(*m_eventFactory, evStringStream.str())); ZfsEvent *zfs_event = static_cast(event); EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event)); delete event; } m_caseFile->SpliceEvents(); EXPECT_TRUE(m_caseFile->ShouldDegrade()); EXPECT_FALSE(m_caseFile->ShouldFault()); } /* * Test CaseFile::ReEvaluateByGuid */ class ReEvaluateByGuidTest : public ::testing::Test { protected: virtual void SetUp() { m_eventFactory = new EventFactory(); m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords, NUM_ELEMENTS(MockZfsEvent::s_buildRecords)); m_event = Event::CreateEvent(*m_eventFactory, s_evString); nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0); ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_GUID, 0xbeef)); m_vdev456 = new ::testing::NiceMock(m_vdevConfig); m_vdev789 = new ::testing::NiceMock(m_vdevConfig); ON_CALL(*m_vdev456, GUID()) .WillByDefault(::testing::Return(Guid(123))); ON_CALL(*m_vdev456, PoolGUID()) .WillByDefault(::testing::Return(Guid(456))); ON_CALL(*m_vdev456, State()) .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY)); ON_CALL(*m_vdev789, GUID()) .WillByDefault(::testing::Return(Guid(123))); ON_CALL(*m_vdev789, PoolGUID()) .WillByDefault(::testing::Return(Guid(789))); ON_CALL(*m_vdev789, State()) .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY)); m_caseFile456 = NULL; m_caseFile789 = NULL; return; } virtual void TearDown() { delete m_caseFile456; delete m_caseFile789; nvlist_free(m_vdevConfig); delete m_vdev456; delete m_vdev789; delete m_event; delete m_eventFactory; } static string s_evString; nvlist_t *m_vdevConfig; ::testing::NiceMock *m_vdev456; ::testing::NiceMock *m_vdev789; TestableCaseFile *m_caseFile456; TestableCaseFile *m_caseFile789; Event *m_event; EventFactory *m_eventFactory; }; string ReEvaluateByGuidTest::s_evString( "!system=ZFS " "pool_guid=16271873792808333580 " "pool_name=foo " "subsystem=ZFS " "timestamp=1360620391 " "type=misc.fs.zfs.config_sync"); /* * Test the ReEvaluateByGuid method on an empty list of casefiles. * We must create one event, even though it never gets used, because it will * be passed by reference to ReEvaluateByGuid */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_empty) { ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(0, TestableCaseFile::getActiveCases()); CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event); EXPECT_EQ(0, TestableCaseFile::getActiveCases()); } /* * Test the ReEvaluateByGuid method on a list of CaseFiles that contains only * one CaseFile, which doesn't match the criteria */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_oneFalse) { m_caseFile456 = &TestableCaseFile::Create(*m_vdev456); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); EXPECT_CALL(*m_caseFile456, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); CaseFile::ReEvaluateByGuid(Guid(789), *zfs_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); } /* * Test the ReEvaluateByGuid method on a list of CaseFiles that contains only * one CaseFile, which does match the criteria */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_oneTrue) { m_caseFile456 = &TestableCaseFile::Create(*m_vdev456); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); EXPECT_CALL(*m_caseFile456, ReEvaluate(::testing::_)) .Times(::testing::Exactly(1)) .WillRepeatedly(::testing::Return(false)); CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event); EXPECT_EQ(1, TestableCaseFile::getActiveCases()); } /* * Test the ReEvaluateByGuid method on a long list of CaseFiles that contains a * few cases which meet the criteria */ TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_five) { TestableCaseFile *CaseFile1 = &TestableCaseFile::Create(*m_vdev456); TestableCaseFile *CaseFile2 = &TestableCaseFile::Create(*m_vdev789); TestableCaseFile *CaseFile3 = &TestableCaseFile::Create(*m_vdev456); TestableCaseFile *CaseFile4 = &TestableCaseFile::Create(*m_vdev789); TestableCaseFile *CaseFile5 = &TestableCaseFile::Create(*m_vdev789); ZfsEvent *zfs_event = static_cast(m_event); EXPECT_EQ(5, TestableCaseFile::getActiveCases()); EXPECT_CALL(*CaseFile1, ReEvaluate(::testing::_)) .Times(::testing::Exactly(1)) .WillRepeatedly(::testing::Return(false)); EXPECT_CALL(*CaseFile3, ReEvaluate(::testing::_)) .Times(::testing::Exactly(1)) .WillRepeatedly(::testing::Return(false)); EXPECT_CALL(*CaseFile2, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); EXPECT_CALL(*CaseFile4, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); EXPECT_CALL(*CaseFile5, ReEvaluate(::testing::_)) .Times(::testing::Exactly(0)); CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event); EXPECT_EQ(5, TestableCaseFile::getActiveCases()); delete CaseFile1; delete CaseFile2; delete CaseFile3; delete CaseFile4; delete CaseFile5; } Index: projects/clang390-import/cddl/usr.sbin/zfsd/vdev.h =================================================================== --- projects/clang390-import/cddl/usr.sbin/zfsd/vdev.h (revision 305016) +++ projects/clang390-import/cddl/usr.sbin/zfsd/vdev.h (revision 305017) @@ -1,178 +1,188 @@ /*- * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Justin T. Gibbs (Spectra Logic Corporation) * * $FreeBSD$ */ /** * \file vdev.h * * Definition of the Vdev class. * * Header requirements: * * #include * #include * * #include */ #ifndef _VDEV_H_ #define _VDEV_H_ /*=========================== Forward Declarations ===========================*/ struct zpool_handle; typedef struct zpool_handle zpool_handle_t; struct nvlist; typedef struct nvlist nvlist_t; /*============================= Class Definitions ============================*/ /*----------------------------------- Vdev -----------------------------------*/ /** * \brief Wrapper class for a vdev's name/value configuration list * simplifying access to commonly used vdev attributes. */ class Vdev { public: /** * \brief Instantiate a vdev object for a vdev that is a member * of an imported pool. * * \param pool The pool object containing the vdev with * configuration data provided in vdevConfig. * \param vdevConfig Vdev configuration data. * * This method should be used whenever dealing with vdev's * enumerated via the ZpoolList class. The in-core configuration * data for a vdev does not contain all of the items found in * the on-disk label. This requires the vdev class to augment * the data in vdevConfig with data found in the pool object. */ Vdev(zpool_handle_t *pool, nvlist_t *vdevConfig); /** * \brief Instantiate a vdev object for a vdev that is a member * of a pool configuration. * * \param poolConfig The pool configuration containing the vdev * configuration data provided in vdevConfig. * \param vdevConfig Vdev configuration data. * * This method should be used whenever dealing with vdev's * enumerated via the ZpoolList class. The in-core configuration * data for a vdev does not contain all of the items found in * the on-disk label. This requires the vdev class to augment * the data in vdevConfig with data found in the pool object. */ Vdev(nvlist_t *poolConfig, nvlist_t *vdevConfig); /** * \brief Instantiate a vdev object from a ZFS label stored on * the device. * * \param vdevConfig The name/value list retrieved by reading * the label information on a leaf vdev. */ Vdev(nvlist_t *vdevConfig); /** * \brief No-op copy constructor for nonexistent vdevs. */ Vdev(); + + /** + * \brief No-op virtual destructor, since this class has virtual + * functions. + */ + virtual ~Vdev(); bool DoesNotExist() const; /** * \brief Return a list of the vdev's children. */ std::list Children(); virtual DevdCtl::Guid GUID() const; bool IsSpare() const; virtual DevdCtl::Guid PoolGUID() const; virtual vdev_state State() const; std::string Path() const; virtual std::string PhysicalPath() const; std::string GUIDString() const; nvlist_t *PoolConfig() const; nvlist_t *Config() const; Vdev Parent(); Vdev RootVdev(); std::string Name(zpool_handle_t *, bool verbose) const; bool IsSpare(); bool IsAvailableSpare() const; bool IsActiveSpare() const; bool IsResilvering() const; private: void VdevLookupGuid(); bool VdevLookupPoolGuid(); DevdCtl::Guid m_poolGUID; DevdCtl::Guid m_vdevGUID; nvlist_t *m_poolConfig; nvlist_t *m_config; }; //- Special objects ----------------------------------------------------------- extern Vdev NonexistentVdev; //- Vdev Inline Public Methods ------------------------------------------------ +inline Vdev::~Vdev() +{ +} + inline DevdCtl::Guid Vdev::PoolGUID() const { return (m_poolGUID); } inline DevdCtl::Guid Vdev::GUID() const { return (m_vdevGUID); } inline nvlist_t * Vdev::PoolConfig() const { return (m_poolConfig); } inline nvlist_t * Vdev::Config() const { return (m_config); } inline bool Vdev::DoesNotExist() const { return (m_config == NULL); } #endif /* _VDEV_H_ */ Index: projects/clang390-import/cddl =================================================================== --- projects/clang390-import/cddl (revision 305016) +++ projects/clang390-import/cddl (revision 305017) Property changes on: projects/clang390-import/cddl ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl:r304885-305016 Index: projects/clang390-import/contrib/libarchive/libarchive/archive_acl.c =================================================================== --- projects/clang390-import/contrib/libarchive/libarchive/archive_acl.c (revision 305016) +++ projects/clang390-import/contrib/libarchive/libarchive/archive_acl.c (revision 305017) @@ -1,1798 +1,1278 @@ /*- * Copyright (c) 2003-2010 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_LIMITS_H #include #endif #ifdef HAVE_WCHAR_H #include #endif #include "archive_acl_private.h" #include "archive_entry.h" #include "archive_private.h" #undef max #define max(a, b) ((a)>(b)?(a):(b)) #ifndef HAVE_WMEMCMP /* Good enough for simple equality testing, but not for sorting. */ #define wmemcmp(a,b,i) memcmp((a), (b), (i) * sizeof(wchar_t)) #endif static int acl_special(struct archive_acl *acl, int type, int permset, int tag); static struct archive_acl_entry *acl_new_entry(struct archive_acl *acl, int type, int permset, int tag, int id); static int archive_acl_add_entry_len_l(struct archive_acl *acl, int type, int permset, int tag, int id, const char *name, size_t len, struct archive_string_conv *sc); static int isint_w(const wchar_t *start, const wchar_t *end, int *result); static int ismode_w(const wchar_t *start, const wchar_t *end, int *result); -static int parse_nfs4_flags_w(const wchar_t *start, const wchar_t *end, - int *result); -static int parse_nfs4_perms_w(const wchar_t *start, const wchar_t *end, - int *result); static void next_field_w(const wchar_t **wp, const wchar_t **start, const wchar_t **end, wchar_t *sep); static int prefix_w(const wchar_t *start, const wchar_t *end, const wchar_t *test); -static void append_entry_w(wchar_t **wp, const wchar_t *prefix, int type, - int tag, const wchar_t *wname, int perm, int id); +static void append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag, + const wchar_t *wname, int perm, int id); static void append_id_w(wchar_t **wp, int id); static int isint(const char *start, const char *end, int *result); static int ismode(const char *start, const char *end, int *result); -static int parse_nfs4_flags(const char *start, const char *end, int *result); -static int parse_nfs4_perms(const char *start, const char *end, int *result); static void next_field(const char **p, const char **start, const char **end, char *sep); static int prefix_c(const char *start, const char *end, const char *test); -static void append_entry(char **p, const char *prefix, int type, - int tag, const char *name, int perm, int id); +static void append_entry(char **p, const char *prefix, int tag, + const char *name, int perm, int id); static void append_id(char **p, int id); void archive_acl_clear(struct archive_acl *acl) { struct archive_acl_entry *ap; while (acl->acl_head != NULL) { ap = acl->acl_head->next; archive_mstring_clean(&acl->acl_head->name); free(acl->acl_head); acl->acl_head = ap; } if (acl->acl_text_w != NULL) { free(acl->acl_text_w); acl->acl_text_w = NULL; } if (acl->acl_text != NULL) { free(acl->acl_text); acl->acl_text = NULL; } acl->acl_p = NULL; acl->acl_state = 0; /* Not counting. */ } void archive_acl_copy(struct archive_acl *dest, struct archive_acl *src) { struct archive_acl_entry *ap, *ap2; archive_acl_clear(dest); dest->mode = src->mode; ap = src->acl_head; while (ap != NULL) { ap2 = acl_new_entry(dest, ap->type, ap->permset, ap->tag, ap->id); if (ap2 != NULL) archive_mstring_copy(&ap2->name, &ap->name); ap = ap->next; } } int archive_acl_add_entry(struct archive_acl *acl, int type, int permset, int tag, int id, const char *name) { struct archive_acl_entry *ap; if (acl_special(acl, type, permset, tag) == 0) return ARCHIVE_OK; ap = acl_new_entry(acl, type, permset, tag, id); if (ap == NULL) { /* XXX Error XXX */ return ARCHIVE_FAILED; } if (name != NULL && *name != '\0') archive_mstring_copy_mbs(&ap->name, name); else archive_mstring_clean(&ap->name); return ARCHIVE_OK; } int archive_acl_add_entry_w_len(struct archive_acl *acl, int type, int permset, int tag, int id, const wchar_t *name, size_t len) { struct archive_acl_entry *ap; if (acl_special(acl, type, permset, tag) == 0) return ARCHIVE_OK; ap = acl_new_entry(acl, type, permset, tag, id); if (ap == NULL) { /* XXX Error XXX */ return ARCHIVE_FAILED; } if (name != NULL && *name != L'\0' && len > 0) archive_mstring_copy_wcs_len(&ap->name, name, len); else archive_mstring_clean(&ap->name); return ARCHIVE_OK; } static int archive_acl_add_entry_len_l(struct archive_acl *acl, int type, int permset, int tag, int id, const char *name, size_t len, struct archive_string_conv *sc) { struct archive_acl_entry *ap; int r; if (acl_special(acl, type, permset, tag) == 0) return ARCHIVE_OK; ap = acl_new_entry(acl, type, permset, tag, id); if (ap == NULL) { /* XXX Error XXX */ return ARCHIVE_FAILED; } if (name != NULL && *name != '\0' && len > 0) { r = archive_mstring_copy_mbs_len_l(&ap->name, name, len, sc); } else { r = 0; archive_mstring_clean(&ap->name); } if (r == 0) return (ARCHIVE_OK); else if (errno == ENOMEM) return (ARCHIVE_FATAL); else return (ARCHIVE_WARN); } /* * If this ACL entry is part of the standard POSIX permissions set, * store the permissions in the stat structure and return zero. */ static int acl_special(struct archive_acl *acl, int type, int permset, int tag) { if (type == ARCHIVE_ENTRY_ACL_TYPE_ACCESS && ((permset & ~007) == 0)) { switch (tag) { case ARCHIVE_ENTRY_ACL_USER_OBJ: acl->mode &= ~0700; acl->mode |= (permset & 7) << 6; return (0); case ARCHIVE_ENTRY_ACL_GROUP_OBJ: acl->mode &= ~0070; acl->mode |= (permset & 7) << 3; return (0); case ARCHIVE_ENTRY_ACL_OTHER: acl->mode &= ~0007; acl->mode |= permset & 7; return (0); } } return (1); } /* * Allocate and populate a new ACL entry with everything but the * name. */ static struct archive_acl_entry * acl_new_entry(struct archive_acl *acl, int type, int permset, int tag, int id) { struct archive_acl_entry *ap, *aq; /* Type argument must be a valid NFS4 or POSIX.1e type. * The type must agree with anything already set and * the permset must be compatible. */ if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) { if (acl->acl_types & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) { return (NULL); } if (permset & ~(ARCHIVE_ENTRY_ACL_PERMS_NFS4 | ARCHIVE_ENTRY_ACL_INHERITANCE_NFS4)) { return (NULL); } } else if (type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) { if (acl->acl_types & ~ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) { return (NULL); } if (permset & ~ARCHIVE_ENTRY_ACL_PERMS_POSIX1E) { return (NULL); } } else { return (NULL); } /* Verify the tag is valid and compatible with NFS4 or POSIX.1e. */ switch (tag) { case ARCHIVE_ENTRY_ACL_USER: case ARCHIVE_ENTRY_ACL_USER_OBJ: case ARCHIVE_ENTRY_ACL_GROUP: case ARCHIVE_ENTRY_ACL_GROUP_OBJ: /* Tags valid in both NFS4 and POSIX.1e */ break; case ARCHIVE_ENTRY_ACL_MASK: case ARCHIVE_ENTRY_ACL_OTHER: /* Tags valid only in POSIX.1e. */ if (type & ~ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) { return (NULL); } break; case ARCHIVE_ENTRY_ACL_EVERYONE: /* Tags valid only in NFS4. */ if (type & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) { return (NULL); } break; default: /* No other values are valid. */ return (NULL); } if (acl->acl_text_w != NULL) { free(acl->acl_text_w); acl->acl_text_w = NULL; } if (acl->acl_text != NULL) { free(acl->acl_text); acl->acl_text = NULL; } /* If there's a matching entry already in the list, overwrite it. */ ap = acl->acl_head; aq = NULL; while (ap != NULL) { if (ap->type == type && ap->tag == tag && ap->id == id) { ap->permset = permset; return (ap); } aq = ap; ap = ap->next; } /* Add a new entry to the end of the list. */ ap = (struct archive_acl_entry *)malloc(sizeof(*ap)); if (ap == NULL) return (NULL); memset(ap, 0, sizeof(*ap)); if (aq == NULL) acl->acl_head = ap; else aq->next = ap; ap->type = type; ap->tag = tag; ap->id = id; ap->permset = permset; acl->acl_types |= type; return (ap); } /* * Return a count of entries matching "want_type". */ int archive_acl_count(struct archive_acl *acl, int want_type) { int count; struct archive_acl_entry *ap; count = 0; ap = acl->acl_head; while (ap != NULL) { if ((ap->type & want_type) != 0) count++; ap = ap->next; } if (count > 0 && ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) count += 3; return (count); } /* * Prepare for reading entries from the ACL data. Returns a count * of entries matching "want_type", or zero if there are no * non-extended ACL entries of that type. */ int archive_acl_reset(struct archive_acl *acl, int want_type) { int count, cutoff; count = archive_acl_count(acl, want_type); /* * If the only entries are the three standard ones, * then don't return any ACL data. (In this case, * client can just use chmod(2) to set permissions.) */ if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) cutoff = 3; else cutoff = 0; if (count > cutoff) acl->acl_state = ARCHIVE_ENTRY_ACL_USER_OBJ; else acl->acl_state = 0; acl->acl_p = acl->acl_head; return (count); } /* * Return the next ACL entry in the list. Fake entries for the * standard permissions and include them in the returned list. */ int archive_acl_next(struct archive *a, struct archive_acl *acl, int want_type, int *type, int *permset, int *tag, int *id, const char **name) { *name = NULL; *id = -1; /* * The acl_state is either zero (no entries available), -1 * (reading from list), or an entry type (retrieve that type * from ae_stat.aest_mode). */ if (acl->acl_state == 0) return (ARCHIVE_WARN); /* The first three access entries are special. */ if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { switch (acl->acl_state) { case ARCHIVE_ENTRY_ACL_USER_OBJ: *permset = (acl->mode >> 6) & 7; *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; *tag = ARCHIVE_ENTRY_ACL_USER_OBJ; acl->acl_state = ARCHIVE_ENTRY_ACL_GROUP_OBJ; return (ARCHIVE_OK); case ARCHIVE_ENTRY_ACL_GROUP_OBJ: *permset = (acl->mode >> 3) & 7; *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; *tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; acl->acl_state = ARCHIVE_ENTRY_ACL_OTHER; return (ARCHIVE_OK); case ARCHIVE_ENTRY_ACL_OTHER: *permset = acl->mode & 7; *type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS; *tag = ARCHIVE_ENTRY_ACL_OTHER; acl->acl_state = -1; acl->acl_p = acl->acl_head; return (ARCHIVE_OK); default: break; } } while (acl->acl_p != NULL && (acl->acl_p->type & want_type) == 0) acl->acl_p = acl->acl_p->next; if (acl->acl_p == NULL) { acl->acl_state = 0; *type = 0; *permset = 0; *tag = 0; *id = -1; *name = NULL; return (ARCHIVE_EOF); /* End of ACL entries. */ } *type = acl->acl_p->type; *permset = acl->acl_p->permset; *tag = acl->acl_p->tag; *id = acl->acl_p->id; if (archive_mstring_get_mbs(a, &acl->acl_p->name, name) != 0) { if (errno == ENOMEM) return (ARCHIVE_FATAL); *name = NULL; } acl->acl_p = acl->acl_p->next; return (ARCHIVE_OK); } /* * Generate a text version of the ACL. The flags parameter controls * the style of the generated ACL. */ const wchar_t * archive_acl_text_w(struct archive *a, struct archive_acl *acl, int flags) { int count; size_t length; const wchar_t *wname; const wchar_t *prefix; wchar_t separator; struct archive_acl_entry *ap; int id, r; wchar_t *wp; - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) && - (flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT))) { - /* cannot convert NFSv4 ACLs and POSIX1e ACLs at the same time */ - return (NULL); - } - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) && (flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)) { - /* cannot have access and default at the same time */ - return (NULL); - } - if (acl->acl_text_w != NULL) { free (acl->acl_text_w); acl->acl_text_w = NULL; } separator = L','; count = 0; length = 0; ap = acl->acl_head; while (ap != NULL) { if ((ap->type & flags) != 0) { count++; if ((flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) && (ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)) length += 8; /* "default:" */ - switch (ap->tag) { - case ARCHIVE_ENTRY_ACL_USER_OBJ: - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { - length += 6; /* "owner@" */ - break; - } - /* FALLTHROUGH */ - case ARCHIVE_ENTRY_ACL_USER: - length += 4; /* "user" */ - break; - case ARCHIVE_ENTRY_ACL_GROUP_OBJ: - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { - length += 6; /* "group@" */ - break; - } - /* FALLTHROUGH */ - case ARCHIVE_ENTRY_ACL_GROUP: - case ARCHIVE_ENTRY_ACL_OTHER: - length += 5; /* "group", "other" */ - break; - case ARCHIVE_ENTRY_ACL_EVERYONE: - length += 9; /* "everyone@" */ - break; - } + length += 5; /* tag name */ length += 1; /* colon */ - if (((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0) || - ap->tag == ARCHIVE_ENTRY_ACL_USER || - ap->tag == ARCHIVE_ENTRY_ACL_GROUP) { - r = archive_mstring_get_wcs(a, &ap->name, &wname); - if (r == 0 && wname != NULL) - length += wcslen(wname); - else if (r < 0 && errno == ENOMEM) - return (NULL); - else - length += sizeof(uid_t) * 3 + 1; - length += 1; /* colon */ - } - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) - length += 14; /* rwxpdDaARWcCos */ + r = archive_mstring_get_wcs(a, &ap->name, &wname); + if (r == 0 && wname != NULL) + length += wcslen(wname); + else if (r < 0 && errno == ENOMEM) + return (NULL); else - length += 3; /* rwx */ + length += sizeof(uid_t) * 3 + 1; + length ++; /* colon */ + length += 3; /* rwx */ length += 1; /* colon */ - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { - length += 7; /* fdinSFI */ - length += 1; /* colon */ - if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DENY) != 0) - length += 4; /* deny */ - else - length += 5; /* allow, alarm, audit */ - length += 1; /* colon */ - } - length += 1; /* colon */ length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1; length ++; /* newline */ } ap = ap->next; } - if (count == 0) - return (NULL); - - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { + if (count > 0 && ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) { length += 10; /* "user::rwx\n" */ length += 11; /* "group::rwx\n" */ length += 11; /* "other::rwx\n" */ } + if (count == 0) + return (NULL); + /* Now, allocate the string and actually populate it. */ wp = acl->acl_text_w = (wchar_t *)malloc(length * sizeof(wchar_t)); if (wp == NULL) return (NULL); count = 0; - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { - append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, - ARCHIVE_ENTRY_ACL_USER_OBJ, NULL, acl->mode & 0700, -1); + append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_USER_OBJ, NULL, + acl->mode & 0700, -1); *wp++ = ','; - append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, - ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL, acl->mode & 0070, -1); + append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL, + acl->mode & 0070, -1); *wp++ = ','; - append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, - ARCHIVE_ENTRY_ACL_OTHER, NULL, acl->mode & 0007, -1); + append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_OTHER, NULL, + acl->mode & 0007, -1); count += 3; - } - if ((flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | - ARCHIVE_ENTRY_ACL_TYPE_NFS4)) != 0) { ap = acl->acl_head; while (ap != NULL) { - if ((ap->type & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | - ARCHIVE_ENTRY_ACL_TYPE_NFS4)) != 0) { + if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { r = archive_mstring_get_wcs(a, &ap->name, &wname); if (r == 0) { *wp++ = separator; if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) id = ap->id; else id = -1; - append_entry_w(&wp, NULL, ap->type, ap->tag, - wname, ap->permset, id); + append_entry_w(&wp, NULL, ap->tag, wname, + ap->permset, id); count++; } else if (r < 0 && errno == ENOMEM) return (NULL); } ap = ap->next; } } + if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) { if (flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) prefix = L"default:"; else prefix = NULL; ap = acl->acl_head; count = 0; while (ap != NULL) { if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) { r = archive_mstring_get_wcs(a, &ap->name, &wname); if (r == 0) { if (count > 0) *wp++ = separator; if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) id = ap->id; else id = -1; - append_entry_w(&wp, prefix, ap->type, - ap->tag, wname, ap->permset, id); + append_entry_w(&wp, prefix, ap->tag, + wname, ap->permset, id); count ++; } else if (r < 0 && errno == ENOMEM) return (NULL); } ap = ap->next; } } return (acl->acl_text_w); } static void append_id_w(wchar_t **wp, int id) { if (id < 0) id = 0; if (id > 9) append_id_w(wp, id / 10); *(*wp)++ = L"0123456789"[id % 10]; } static void -append_entry_w(wchar_t **wp, const wchar_t *prefix, int type, - int tag, const wchar_t *wname, int perm, int id) +append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag, + const wchar_t *wname, int perm, int id) { if (prefix != NULL) { wcscpy(*wp, prefix); *wp += wcslen(*wp); } switch (tag) { case ARCHIVE_ENTRY_ACL_USER_OBJ: wname = NULL; id = -1; - if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - wcscpy(*wp, L"owner@"); - break; - } /* FALLTHROUGH */ - /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_USER: wcscpy(*wp, L"user"); break; case ARCHIVE_ENTRY_ACL_GROUP_OBJ: wname = NULL; id = -1; /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_GROUP: wcscpy(*wp, L"group"); break; case ARCHIVE_ENTRY_ACL_MASK: wcscpy(*wp, L"mask"); wname = NULL; id = -1; break; case ARCHIVE_ENTRY_ACL_OTHER: wcscpy(*wp, L"other"); wname = NULL; id = -1; break; } *wp += wcslen(*wp); *(*wp)++ = L':'; - if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0 || - tag == ARCHIVE_ENTRY_ACL_USER || - tag == ARCHIVE_ENTRY_ACL_GROUP) { - if (wname != NULL) { - wcscpy(*wp, wname); - *wp += wcslen(*wp); - } else if (tag == ARCHIVE_ENTRY_ACL_USER - || tag == ARCHIVE_ENTRY_ACL_GROUP) { - append_id_w(wp, id); - id = -1; - } - *(*wp)++ = L':'; - } - *(*wp)++ = (perm & (ARCHIVE_ENTRY_ACL_READ | - ARCHIVE_ENTRY_ACL_READ_DATA | - ARCHIVE_ENTRY_ACL_LIST_DIRECTORY)) ? L'r' : L'-'; - *(*wp)++ = (perm & (ARCHIVE_ENTRY_ACL_WRITE | - ARCHIVE_ENTRY_ACL_WRITE_DATA | - ARCHIVE_ENTRY_ACL_ADD_FILE)) ? L'w' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_EXECUTE) ? L'x' : L'-'; - if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - *(*wp)++ = (perm & (ARCHIVE_ENTRY_ACL_APPEND_DATA | ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY)) ? L'p' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE) ? L'd' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE_CHILD) ? L'D' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES) ? L'a' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES) ? L'A' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS) ? L'R' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS) ? L'W' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ACL) ? L'c' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ACL) ? L'C' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_OWNER) ? L'o' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_SYNCHRONIZE) ? L's' : L'-'; - *(*wp)++ = L':'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT) ? L'f' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT) ? L'd' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY) ? L'i' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT) ? L'n' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS) ? L'S' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS) ? L'F' : L'-'; - *(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERITED) ? L'I' : L'-'; - *(*wp)++ = L':'; - if (type & ARCHIVE_ENTRY_ACL_TYPE_ALLOW) - wcscpy(*wp, L"allow"); - else if (type & ARCHIVE_ENTRY_ACL_TYPE_DENY) - wcscpy(*wp, L"deny"); - else if (type & ARCHIVE_ENTRY_ACL_TYPE_AUDIT) - wcscpy(*wp, L"audit"); - else if (type & ARCHIVE_ENTRY_ACL_TYPE_ALARM) - wcscpy(*wp, L"alarm"); + if (wname != NULL) { + wcscpy(*wp, wname); *wp += wcslen(*wp); + } else if (tag == ARCHIVE_ENTRY_ACL_USER + || tag == ARCHIVE_ENTRY_ACL_GROUP) { + append_id_w(wp, id); + id = -1; } + *(*wp)++ = L':'; + *(*wp)++ = (perm & 0444) ? L'r' : L'-'; + *(*wp)++ = (perm & 0222) ? L'w' : L'-'; + *(*wp)++ = (perm & 0111) ? L'x' : L'-'; if (id != -1) { *(*wp)++ = L':'; append_id_w(wp, id); } **wp = L'\0'; } int archive_acl_text_l(struct archive_acl *acl, int flags, const char **acl_text, size_t *acl_text_len, struct archive_string_conv *sc) { int count; size_t length; const char *name; const char *prefix; char separator; struct archive_acl_entry *ap; size_t len; int id, r; char *p; - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) && - (flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT))) { - /* cannot convert NFSv4 ACLs and POSIX1e ACLs at the same time */ - return (-1); - } - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) && (flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)) { - /* cannot have access and default at the same time */ - return (-1); - } - if (acl->acl_text != NULL) { free (acl->acl_text); acl->acl_text = NULL; } *acl_text = NULL; if (acl_text_len != NULL) *acl_text_len = 0; separator = ','; count = 0; length = 0; ap = acl->acl_head; while (ap != NULL) { if ((ap->type & flags) != 0) { count++; if ((flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) && (ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)) length += 8; /* "default:" */ - switch (ap->tag) { - case ARCHIVE_ENTRY_ACL_USER_OBJ: - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { - length += 6; /* "owner@" */ - break; - } - /* FALLTHROUGH */ - case ARCHIVE_ENTRY_ACL_USER: - length += 4; /* "user" */ - break; - case ARCHIVE_ENTRY_ACL_GROUP_OBJ: - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { - length += 6; /* "group@" */ - break; - } - /* FALLTHROUGH */ - case ARCHIVE_ENTRY_ACL_GROUP: - case ARCHIVE_ENTRY_ACL_OTHER: - length += 5; /* "group", "other" */ - break; - case ARCHIVE_ENTRY_ACL_EVERYONE: - length += 9; /* "everyone@" */ - break; - } - + length += 5; /* tag name */ length += 1; /* colon */ - if (((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0) || - ap->tag == ARCHIVE_ENTRY_ACL_USER || - ap->tag == ARCHIVE_ENTRY_ACL_GROUP) { - r = archive_mstring_get_mbs_l( - &ap->name, &name, &len, sc); - if (r != 0) - return (-1); - if (len > 0 && name != NULL) - length += len; - else - length += sizeof(uid_t) * 3 + 1; - length += 1; /* colon */ - } - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) - length += 14; /* rwxpdDaARWcCos */ + r = archive_mstring_get_mbs_l( + &ap->name, &name, &len, sc); + if (r != 0) + return (-1); + if (len > 0 && name != NULL) + length += len; else - length += 3; /* rwx */ + length += sizeof(uid_t) * 3 + 1; + length ++; /* colon */ + length += 3; /* rwx */ length += 1; /* colon */ - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) { - length += 7; /* fdinSFI */ - length += 1; /* colon */ - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DENY) != 0) - length += 4; /* deny */ - else - length += 5; /* allow, alarm, audit */ - length += 1; /* colon */ - } - length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1; length ++; /* newline */ } ap = ap->next; } - if (count == 0) - return (0); - - if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { + if (count > 0 && ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) { length += 10; /* "user::rwx\n" */ length += 11; /* "group::rwx\n" */ length += 11; /* "other::rwx\n" */ } + if (count == 0) + return (0); + /* Now, allocate the string and actually populate it. */ p = acl->acl_text = (char *)malloc(length); if (p == NULL) return (-1); count = 0; if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) { - append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, - ARCHIVE_ENTRY_ACL_USER_OBJ, NULL, acl->mode & 0700, -1); + append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_USER_OBJ, NULL, + acl->mode & 0700, -1); *p++ = ','; - append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, - ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL, acl->mode & 0070, -1); + append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL, + acl->mode & 0070, -1); *p++ = ','; - append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, - ARCHIVE_ENTRY_ACL_OTHER, NULL, acl->mode & 0007, -1); + append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_OTHER, NULL, + acl->mode & 0007, -1); count += 3; - } - if ((flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | - ARCHIVE_ENTRY_ACL_TYPE_NFS4)) != 0) { for (ap = acl->acl_head; ap != NULL; ap = ap->next) { - if ((ap->type & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | - ARCHIVE_ENTRY_ACL_TYPE_NFS4)) == 0) + if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) == 0) continue; r = archive_mstring_get_mbs_l( &ap->name, &name, &len, sc); if (r != 0) return (-1); - if (count > 0) - *p++ = separator; + *p++ = separator; if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) id = ap->id; else id = -1; - append_entry(&p, NULL, ap->type, ap->tag, name, + append_entry(&p, NULL, ap->tag, name, ap->permset, id); count++; } } if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) { if (flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) prefix = "default:"; else prefix = NULL; count = 0; for (ap = acl->acl_head; ap != NULL; ap = ap->next) { if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) == 0) continue; r = archive_mstring_get_mbs_l( &ap->name, &name, &len, sc); if (r != 0) return (-1); if (count > 0) *p++ = separator; if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID) id = ap->id; else id = -1; - append_entry(&p, prefix, ap->type, ap->tag, + append_entry(&p, prefix, ap->tag, name, ap->permset, id); count ++; } } *acl_text = acl->acl_text; if (acl_text_len != NULL) *acl_text_len = strlen(acl->acl_text); return (0); } static void append_id(char **p, int id) { if (id < 0) id = 0; if (id > 9) append_id(p, id / 10); *(*p)++ = "0123456789"[id % 10]; } static void -append_entry(char **p, const char *prefix, int type, - int tag, const char *name, int perm, int id) +append_entry(char **p, const char *prefix, int tag, + const char *name, int perm, int id) { if (prefix != NULL) { strcpy(*p, prefix); *p += strlen(*p); } switch (tag) { case ARCHIVE_ENTRY_ACL_USER_OBJ: name = NULL; id = -1; - if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - strcpy(*p, "owner@"); - break; - } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_USER: strcpy(*p, "user"); break; case ARCHIVE_ENTRY_ACL_GROUP_OBJ: name = NULL; id = -1; - if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - strcpy(*p, "group@"); - break; - } /* FALLTHROUGH */ case ARCHIVE_ENTRY_ACL_GROUP: strcpy(*p, "group"); break; case ARCHIVE_ENTRY_ACL_MASK: strcpy(*p, "mask"); name = NULL; id = -1; break; case ARCHIVE_ENTRY_ACL_OTHER: strcpy(*p, "other"); name = NULL; id = -1; break; - case ARCHIVE_ENTRY_ACL_EVERYONE: - strcpy(*p, "everyone@"); - name = NULL; - id = -1; - break; } *p += strlen(*p); *(*p)++ = ':'; - if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0 || - tag == ARCHIVE_ENTRY_ACL_USER || - tag == ARCHIVE_ENTRY_ACL_GROUP) { - if (name != NULL) { - strcpy(*p, name); - *p += strlen(*p); - } else if (tag == ARCHIVE_ENTRY_ACL_USER - || tag == ARCHIVE_ENTRY_ACL_GROUP) { - append_id(p, id); - id = -1; - } - *(*p)++ = ':'; - } - *(*p)++ = (perm & (ARCHIVE_ENTRY_ACL_READ | - ARCHIVE_ENTRY_ACL_READ_DATA | - ARCHIVE_ENTRY_ACL_LIST_DIRECTORY)) ? 'r' : '-'; - *(*p)++ = (perm & (ARCHIVE_ENTRY_ACL_WRITE | - ARCHIVE_ENTRY_ACL_WRITE_DATA | - ARCHIVE_ENTRY_ACL_ADD_FILE)) ? 'w' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_EXECUTE) ? 'x' : '-'; - if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - *(*p)++ = (perm & (ARCHIVE_ENTRY_ACL_APPEND_DATA | ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY)) ? 'p' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE) ? 'd' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE_CHILD) ? 'D' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES) ? 'a' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES) ? 'A' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS) ? 'R' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS) ? 'W' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ACL) ? 'c' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ACL) ? 'C' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_OWNER) ? 'o' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_SYNCHRONIZE) ? 's' : '-'; - *(*p)++ = ':'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT) ? 'f' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT) ? 'd' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY) ? 'i' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT) ? 'n' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS) ? 'S' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS) ? 'F' : '-'; - *(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERITED) ? 'I' : '-'; - *(*p)++ = ':'; - if (type & ARCHIVE_ENTRY_ACL_TYPE_ALLOW) - strcpy(*p, "allow"); - else if (type & ARCHIVE_ENTRY_ACL_TYPE_DENY) - strcpy(*p, "deny"); - else if (type & ARCHIVE_ENTRY_ACL_TYPE_AUDIT) - strcpy(*p, "audit"); - else if (type & ARCHIVE_ENTRY_ACL_TYPE_ALARM) - strcpy(*p, "alarm"); + if (name != NULL) { + strcpy(*p, name); *p += strlen(*p); + } else if (tag == ARCHIVE_ENTRY_ACL_USER + || tag == ARCHIVE_ENTRY_ACL_GROUP) { + append_id(p, id); + id = -1; } + *(*p)++ = ':'; + *(*p)++ = (perm & 0444) ? 'r' : '-'; + *(*p)++ = (perm & 0222) ? 'w' : '-'; + *(*p)++ = (perm & 0111) ? 'x' : '-'; if (id != -1) { *(*p)++ = ':'; append_id(p, id); } **p = '\0'; } /* * Parse a textual ACL. This automatically recognizes and supports * extensions described above. The 'type' argument is used to * indicate the type that should be used for any entries not * explicitly marked as "default:". */ int archive_acl_parse_w(struct archive_acl *acl, const wchar_t *text, int default_type) { struct { const wchar_t *start; const wchar_t *end; - } field[6], name; + } field[4], name; - int numfields, fields, n; + int fields, n; int type, tag, permset, id; - int offset; wchar_t sep; - if (default_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4) - numfields = 6; - else - numfields = 4; - - while (text != NULL && *text != L'\0') { /* * Parse the fields out of the next entry, * advance 'text' to start of next entry. */ fields = 0; do { const wchar_t *start, *end; next_field_w(&text, &start, &end, &sep); - if (fields < numfields) { + if (fields < 4) { field[fields].start = start; field[fields].end = end; } ++fields; } while (sep == L':'); /* Set remaining fields to blank. */ - for (n = fields; n < numfields; ++n) + for (n = fields; n < 4; ++n) field[n].start = field[n].end = NULL; - if (default_type != ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - /* POSIX.1e ACLs */ - /* Check for a numeric ID in field 1 or 3. */ - id = -1; - isint_w(field[1].start, field[1].end, &id); - /* Field 3 is optional. */ - if (id == -1 && fields > 3) - isint_w(field[3].start, field[3].end, &id); + /* Check for a numeric ID in field 1 or 3. */ + id = -1; + isint_w(field[1].start, field[1].end, &id); + /* Field 3 is optional. */ + if (id == -1 && fields > 3) + isint_w(field[3].start, field[3].end, &id); - /* - * Solaris extension: "defaultuser::rwx" is the - * default ACL corresponding to "user::rwx", etc. - */ - if (field[0].end - field[0].start > 7 - && wmemcmp(field[0].start, L"default", 7) == 0) { - type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; - field[0].start += 7; - } else - type = default_type; + /* + * Solaris extension: "defaultuser::rwx" is the + * default ACL corresponding to "user::rwx", etc. + */ + if (field[0].end - field[0].start > 7 + && wmemcmp(field[0].start, L"default", 7) == 0) { + type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; + field[0].start += 7; + } else + type = default_type; - name.start = name.end = NULL; - if (prefix_w(field[0].start, field[0].end, L"user")) { - if (!ismode_w(field[2].start, field[2].end, - &permset)) + name.start = name.end = NULL; + if (prefix_w(field[0].start, field[0].end, L"user")) { + if (!ismode_w(field[2].start, field[2].end, &permset)) return (ARCHIVE_WARN); - if (id != -1 || field[1].start < field[1].end) { - tag = ARCHIVE_ENTRY_ACL_USER; - name = field[1]; - } else - tag = ARCHIVE_ENTRY_ACL_USER_OBJ; - } else if (prefix_w(field[0].start, field[0].end, - L"group")) { - if (!ismode_w(field[2].start, field[2].end, - &permset)) - return (ARCHIVE_WARN); - if (id != -1 || field[1].start < field[1].end) { - tag = ARCHIVE_ENTRY_ACL_GROUP; - name = field[1]; - } else - tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; - } else if (prefix_w(field[0].start, field[0].end, - L"other")) { - if (fields == 2 - && field[1].start < field[1].end - && ismode_w(field[1].start, field[1].end, - &permset)) { - /* This is Solaris-style "other:rwx" */ - } else if (fields == 3 - && field[1].start == field[1].end - && field[2].start < field[2].end - && ismode_w(field[2].start, field[2].end, - &permset)) { - /* This is FreeBSD-style "other::rwx" */ - } else - return (ARCHIVE_WARN); - tag = ARCHIVE_ENTRY_ACL_OTHER; - } else if (prefix_w(field[0].start, field[0].end, - L"mask")) { - if (fields == 2 - && field[1].start < field[1].end - && ismode_w(field[1].start, field[1].end, - &permset)) { - /* This is Solaris-style "mask:rwx" */ - } else if (fields == 3 - && field[1].start == field[1].end - && field[2].start < field[2].end - && ismode_w(field[2].start, field[2].end, - &permset)) { - /* This is FreeBSD-style "mask::rwx" */ - } else - return (ARCHIVE_WARN); - tag = ARCHIVE_ENTRY_ACL_MASK; + if (id != -1 || field[1].start < field[1].end) { + tag = ARCHIVE_ENTRY_ACL_USER; + name = field[1]; } else + tag = ARCHIVE_ENTRY_ACL_USER_OBJ; + } else if (prefix_w(field[0].start, field[0].end, L"group")) { + if (!ismode_w(field[2].start, field[2].end, &permset)) return (ARCHIVE_WARN); - } else { - /* NFSv4 ACLs */ - if (wcsncmp(field[0].start, L"user", - field[0].end - field[0].start) == 0) - tag = ARCHIVE_ENTRY_ACL_USER; - else if (wcsncmp(field[0].start, L"group", - field[0].end - field[0].start) == 0) + if (id != -1 || field[1].start < field[1].end) { tag = ARCHIVE_ENTRY_ACL_GROUP; - else if (wcsncmp(field[0].start, L"owner@", - field[0].end - field[0].start) == 0) - tag = ARCHIVE_ENTRY_ACL_USER_OBJ; - else if (wcsncmp(field[0].start, L"group@", - field[0].end - field[0].start) == 0) + name = field[1]; + } else tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; - else if (wcsncmp(field[0].start, L"everyone@", - field[0].end - field[0].start) == 0) - tag = ARCHIVE_ENTRY_ACL_EVERYONE; - else { - /* Unknown entry */ + } else if (prefix_w(field[0].start, field[0].end, L"other")) { + if (fields == 2 + && field[1].start < field[1].end + && ismode_w(field[1].start, field[1].end, &permset)) { + /* This is Solaris-style "other:rwx" */ + } else if (fields == 3 + && field[1].start == field[1].end + && field[2].start < field[2].end + && ismode_w(field[2].start, field[2].end, &permset)) { + /* This is FreeBSD-style "other::rwx" */ + } else return (ARCHIVE_WARN); - } - - permset = 0; - name.start = name.end = NULL; - - if (tag == ARCHIVE_ENTRY_ACL_USER || - tag == ARCHIVE_ENTRY_ACL_GROUP) { - offset = 1; - name = field[1]; + tag = ARCHIVE_ENTRY_ACL_OTHER; + } else if (prefix_w(field[0].start, field[0].end, L"mask")) { + if (fields == 2 + && field[1].start < field[1].end + && ismode_w(field[1].start, field[1].end, &permset)) { + /* This is Solaris-style "mask:rwx" */ + } else if (fields == 3 + && field[1].start == field[1].end + && field[2].start < field[2].end + && ismode_w(field[2].start, field[2].end, &permset)) { + /* This is FreeBSD-style "mask::rwx" */ } else - offset = 0; - - if (parse_nfs4_perms_w(field[1 + offset].start, - field[1 + offset].end, &permset) != 0) { - /* NFS4 perms are invalid */ return (ARCHIVE_WARN); - } - if (parse_nfs4_flags_w(field[2 + offset].start, - field[2 + offset].end, &permset) != 0) { - /* NFS4 flags are invalid */ - return (ARCHIVE_WARN); - } - if (wcsncmp(field[3 + offset].start, L"allow", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW; - else if (wcsncmp(field[3 + offset].start, L"deny", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_DENY; - else if (wcsncmp(field[3 + offset].start, L"audit", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT; - else if (wcsncmp(field[3 + offset].start, L"alarm", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_ALARM; - else { - /* Unknown type */ - return (ARCHIVE_WARN); - } - isint_w(field[4 + offset].start, field[4 + offset].end, - &id); - } + tag = ARCHIVE_ENTRY_ACL_MASK; + } else + return (ARCHIVE_WARN); /* Add entry to the internal list. */ archive_acl_add_entry_w_len(acl, type, permset, tag, id, name.start, name.end - name.start); } return (ARCHIVE_OK); } /* * Parse a string to a positive decimal integer. Returns true if * the string is non-empty and consists only of decimal digits, * false otherwise. */ static int isint_w(const wchar_t *start, const wchar_t *end, int *result) { int n = 0; if (start >= end) return (0); while (start < end) { if (*start < '0' || *start > '9') return (0); if (n > (INT_MAX / 10) || (n == INT_MAX / 10 && (*start - '0') > INT_MAX % 10)) { n = INT_MAX; } else { n *= 10; n += *start - '0'; } start++; } *result = n; return (1); } /* * Parse a string as a mode field. Returns true if * the string is non-empty and consists only of mode characters, * false otherwise. */ static int ismode_w(const wchar_t *start, const wchar_t *end, int *permset) { const wchar_t *p; if (start >= end) return (0); p = start; *permset = 0; while (p < end) { switch (*p++) { case 'r': case 'R': *permset |= ARCHIVE_ENTRY_ACL_READ; break; case 'w': case 'W': *permset |= ARCHIVE_ENTRY_ACL_WRITE; break; case 'x': case 'X': *permset |= ARCHIVE_ENTRY_ACL_EXECUTE; break; case '-': break; default: return (0); } } return (1); } -/* Parse a wstring as a strict NFSv4 ACL permission field. */ -static int -parse_nfs4_perms_w(const wchar_t *start, const wchar_t *end, int *permset) -{ - const wchar_t *p; - int pos; - const wchar_t *letter = L"rwxpdDaARWcCos"; - const int perms[14] = { - ARCHIVE_ENTRY_ACL_READ_DATA, - ARCHIVE_ENTRY_ACL_WRITE_DATA, - ARCHIVE_ENTRY_ACL_EXECUTE, - ARCHIVE_ENTRY_ACL_APPEND_DATA, - ARCHIVE_ENTRY_ACL_DELETE, - ARCHIVE_ENTRY_ACL_DELETE_CHILD, - ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES, - ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES, - ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS, - ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS, - ARCHIVE_ENTRY_ACL_READ_ACL, - ARCHIVE_ENTRY_ACL_WRITE_ACL, - ARCHIVE_ENTRY_ACL_WRITE_OWNER, - ARCHIVE_ENTRY_ACL_SYNCHRONIZE - }; - - if (start >= end) - return (0); - p = start; - pos = 0; - while (p < end && pos < 14) { - if (*p == letter[pos]) - *permset |= perms[pos]; - else if (*p != '-') - return (-1); - p = p + sizeof(wchar_t); - pos++; - } - return (0); -} - -/* Parse a string as a strict NFSv4 ACL flags field. */ -static int -parse_nfs4_flags_w(const wchar_t *start, const wchar_t *end, int *permset) -{ - const wchar_t *p; - int pos; - const wchar_t *letter = L"fdinSFI"; - const int perms[7] = { - ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT, - ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT, - ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY, - ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT, - ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS, - ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS, - ARCHIVE_ENTRY_ACL_ENTRY_INHERITED - }; - - if (start >= end) - return (0); - p = start; - pos = 0; - while (p < end && pos < 7) { - if (*p == letter[pos]) - *permset |= perms[pos]; - else if (*p != '-') - return (-1); - p = p + sizeof(wchar_t); - pos++; - } - return (0); -} - - /* * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated * to point to just after the separator. *start points to the first * character of the matched text and *end just after the last * character of the matched identifier. In particular *end - *start * is the length of the field body, not including leading or trailing * whitespace. */ static void next_field_w(const wchar_t **wp, const wchar_t **start, const wchar_t **end, wchar_t *sep) { /* Skip leading whitespace to find start of field. */ while (**wp == L' ' || **wp == L'\t' || **wp == L'\n') { (*wp)++; } *start = *wp; /* Scan for the separator. */ while (**wp != L'\0' && **wp != L',' && **wp != L':' && **wp != L'\n') { (*wp)++; } *sep = **wp; /* Trim trailing whitespace to locate end of field. */ *end = *wp - 1; while (**end == L' ' || **end == L'\t' || **end == L'\n') { (*end)--; } (*end)++; /* Adjust scanner location. */ if (**wp != L'\0') (*wp)++; } /* * Return true if the characters [start...end) are a prefix of 'test'. * This makes it easy to handle the obvious abbreviations: 'u' for 'user', etc. */ static int prefix_w(const wchar_t *start, const wchar_t *end, const wchar_t *test) { if (start == end) return (0); if (*start++ != *test++) return (0); while (start < end && *start++ == *test++) ; if (start < end) return (0); return (1); } /* * Parse a textual ACL. This automatically recognizes and supports * extensions described above. The 'type' argument is used to * indicate the type that should be used for any entries not * explicitly marked as "default:". */ int archive_acl_parse_l(struct archive_acl *acl, const char *text, int default_type, struct archive_string_conv *sc) { struct { const char *start; const char *end; - } field[6], name; + } field[4], name; - int numfields, fields, n, r, ret = ARCHIVE_OK; + int fields, n, r, ret = ARCHIVE_OK; int type, tag, permset, id; - int offset; char sep; - if (default_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4) - numfields = 6; - else - numfields = 4; - while (text != NULL && *text != '\0') { /* * Parse the fields out of the next entry, * advance 'text' to start of next entry. */ fields = 0; do { const char *start, *end; next_field(&text, &start, &end, &sep); - if (fields < numfields) { + if (fields < 4) { field[fields].start = start; field[fields].end = end; } ++fields; } while (sep == ':'); /* Set remaining fields to blank. */ - for (n = fields; n < numfields; ++n) + for (n = fields; n < 4; ++n) field[n].start = field[n].end = NULL; - if (default_type != ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - /* POSIX.1e ACLs */ - /* Check for a numeric ID in field 1 or 3. */ - id = -1; - isint(field[1].start, field[1].end, &id); - /* Field 3 is optional. */ - if (id == -1 && fields > 3) - isint(field[3].start, field[3].end, &id); + /* Check for a numeric ID in field 1 or 3. */ + id = -1; + isint(field[1].start, field[1].end, &id); + /* Field 3 is optional. */ + if (id == -1 && fields > 3) + isint(field[3].start, field[3].end, &id); - /* - * Solaris extension: "defaultuser::rwx" is the - * default ACL corresponding to "user::rwx", etc. - */ - if (field[0].end - field[0].start > 7 - && memcmp(field[0].start, "default", 7) == 0) { - type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; - field[0].start += 7; - } else - type = default_type; + /* + * Solaris extension: "defaultuser::rwx" is the + * default ACL corresponding to "user::rwx", etc. + */ + if (field[0].end - field[0].start > 7 + && memcmp(field[0].start, "default", 7) == 0) { + type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT; + field[0].start += 7; + } else + type = default_type; - name.start = name.end = NULL; - if (prefix_c(field[0].start, field[0].end, "user")) { - if (!ismode(field[2].start, field[2].end, - &permset)) - return (ARCHIVE_WARN); - if (id != -1 || field[1].start < field[1].end) { - tag = ARCHIVE_ENTRY_ACL_USER; - name = field[1]; - } else - tag = ARCHIVE_ENTRY_ACL_USER_OBJ; - } else if (prefix_c(field[0].start, field[0].end, - "group")) { - if (!ismode(field[2].start, field[2].end, - &permset)) - return (ARCHIVE_WARN); - if (id != -1 || field[1].start < field[1].end) { - tag = ARCHIVE_ENTRY_ACL_GROUP; - name = field[1]; - } else - tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; - } else if (prefix_c(field[0].start, field[0].end, - "other")) { - if (fields == 2 - && field[1].start < field[1].end - && ismode(field[1].start, field[1].end, - &permset)) { - /* This is Solaris-style "other:rwx" */ - } else if (fields == 3 - && field[1].start == field[1].end - && field[2].start < field[2].end - && ismode(field[2].start, field[2].end, - &permset)) { - /* This is FreeBSD-style "other::rwx" */ - } else - return (ARCHIVE_WARN); - tag = ARCHIVE_ENTRY_ACL_OTHER; - } else if (prefix_c(field[0].start, field[0].end, - "mask")) { - if (fields == 2 - && field[1].start < field[1].end - && ismode(field[1].start, field[1].end, - &permset)) { - /* This is Solaris-style "mask:rwx" */ - } else if (fields == 3 - && field[1].start == field[1].end - && field[2].start < field[2].end - && ismode(field[2].start, field[2].end, - &permset)) { - /* This is FreeBSD-style "mask::rwx" */ - } else - return (ARCHIVE_WARN); - tag = ARCHIVE_ENTRY_ACL_MASK; - } else + name.start = name.end = NULL; + if (prefix_c(field[0].start, field[0].end, "user")) { + if (!ismode(field[2].start, field[2].end, &permset)) return (ARCHIVE_WARN); - } else { - /* NFSv4 ACLs */ - if (strncmp(field[0].start, "user", - field[0].end - field[0].start) == 0) + if (id != -1 || field[1].start < field[1].end) { tag = ARCHIVE_ENTRY_ACL_USER; - else if (strncmp(field[0].start, "group", - field[0].end - field[0].start) == 0) - tag = ARCHIVE_ENTRY_ACL_GROUP; - else if (strncmp(field[0].start, "owner@", - field[0].end - field[0].start) == 0) + name = field[1]; + } else tag = ARCHIVE_ENTRY_ACL_USER_OBJ; - else if (strncmp(field[0].start, "group@", - field[0].end - field[0].start) == 0) - tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; - else if (strncmp(field[0].start, "everyone@", - field[0].end - field[0].start) == 0) - tag = ARCHIVE_ENTRY_ACL_EVERYONE; - else { - /* Unknown entry */ + } else if (prefix_c(field[0].start, field[0].end, "group")) { + if (!ismode(field[2].start, field[2].end, &permset)) return (ARCHIVE_WARN); - } - - permset = 0; - name.start = name.end = NULL; - - if (tag == ARCHIVE_ENTRY_ACL_USER || - tag == ARCHIVE_ENTRY_ACL_GROUP) { - offset = 1; + if (id != -1 || field[1].start < field[1].end) { + tag = ARCHIVE_ENTRY_ACL_GROUP; name = field[1]; } else - offset = 0; - - if (parse_nfs4_perms(field[1 + offset].start, - field[1 + offset].end, &permset) != 0) { - /* NFS4 perms are invalid */ + tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; + } else if (prefix_c(field[0].start, field[0].end, "other")) { + if (fields == 2 + && field[1].start < field[1].end + && ismode(field[1].start, field[1].end, &permset)) { + /* This is Solaris-style "other:rwx" */ + } else if (fields == 3 + && field[1].start == field[1].end + && field[2].start < field[2].end + && ismode(field[2].start, field[2].end, &permset)) { + /* This is FreeBSD-style "other::rwx" */ + } else return (ARCHIVE_WARN); - } - if (parse_nfs4_flags(field[2 + offset].start, - field[2 + offset].end, &permset) != 0) { - /* NFS4 flags are invalid */ + tag = ARCHIVE_ENTRY_ACL_OTHER; + } else if (prefix_c(field[0].start, field[0].end, "mask")) { + if (fields == 2 + && field[1].start < field[1].end + && ismode(field[1].start, field[1].end, &permset)) { + /* This is Solaris-style "mask:rwx" */ + } else if (fields == 3 + && field[1].start == field[1].end + && field[2].start < field[2].end + && ismode(field[2].start, field[2].end, &permset)) { + /* This is FreeBSD-style "mask::rwx" */ + } else return (ARCHIVE_WARN); - } - if (strncmp(field[3 + offset].start, "allow", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW; - else if (strncmp(field[3 + offset].start, "deny", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_DENY; - else if (strncmp(field[3 + offset].start, "audit", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT; - else if (strncmp(field[3 + offset].start, "alarm", - field[3 + offset].end - field[3 + offset].start) - == 0) - type = ARCHIVE_ENTRY_ACL_TYPE_ALARM; - else { - /* Unknown type */ - return (ARCHIVE_WARN); - } - isint(field[4 + offset].start, field[4 + offset].end, - &id); - } + tag = ARCHIVE_ENTRY_ACL_MASK; + } else + return (ARCHIVE_WARN); /* Add entry to the internal list. */ r = archive_acl_add_entry_len_l(acl, type, permset, tag, id, name.start, name.end - name.start, sc); if (r < ARCHIVE_WARN) return (r); if (r != ARCHIVE_OK) ret = ARCHIVE_WARN; } return (ret); } /* * Parse a string to a positive decimal integer. Returns true if * the string is non-empty and consists only of decimal digits, * false otherwise. */ static int isint(const char *start, const char *end, int *result) { int n = 0; if (start >= end) return (0); while (start < end) { if (*start < '0' || *start > '9') return (0); if (n > (INT_MAX / 10) || (n == INT_MAX / 10 && (*start - '0') > INT_MAX % 10)) { n = INT_MAX; } else { n *= 10; n += *start - '0'; } start++; } *result = n; return (1); } /* * Parse a string as a mode field. Returns true if * the string is non-empty and consists only of mode characters, * false otherwise. */ static int ismode(const char *start, const char *end, int *permset) { const char *p; if (start >= end) return (0); p = start; *permset = 0; while (p < end) { switch (*p++) { case 'r': case 'R': *permset |= ARCHIVE_ENTRY_ACL_READ; break; case 'w': case 'W': *permset |= ARCHIVE_ENTRY_ACL_WRITE; break; case 'x': case 'X': *permset |= ARCHIVE_ENTRY_ACL_EXECUTE; break; case '-': break; default: return (0); } } return (1); -} - -/* Parse a string as a strict NFSv4 ACL permission field. */ -static int -parse_nfs4_perms(const char *start, const char *end, int *permset) -{ - const char *p; - int pos; - const char *letter = "rwxpdDaARWcCos"; - const int perms[14] = { - ARCHIVE_ENTRY_ACL_READ_DATA, - ARCHIVE_ENTRY_ACL_WRITE_DATA, - ARCHIVE_ENTRY_ACL_EXECUTE, - ARCHIVE_ENTRY_ACL_APPEND_DATA, - ARCHIVE_ENTRY_ACL_DELETE, - ARCHIVE_ENTRY_ACL_DELETE_CHILD, - ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES, - ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES, - ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS, - ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS, - ARCHIVE_ENTRY_ACL_READ_ACL, - ARCHIVE_ENTRY_ACL_WRITE_ACL, - ARCHIVE_ENTRY_ACL_WRITE_OWNER, - ARCHIVE_ENTRY_ACL_SYNCHRONIZE - }; - - if (start >= end) - return (0); - p = start; - pos = 0; - while (p < end && pos < 14) { - if (*p == letter[pos]) - *permset |= perms[pos]; - else if (*p != '-') - return (-1); - p = p + sizeof(char); - pos++; - } - return (0); -} - -/* Parse a string as a strict NFSv4 ACL flags field. */ -static int -parse_nfs4_flags(const char *start, const char *end, int *permset) -{ - const char *p; - int pos; - const char *letter = "fdinSFI"; - const int perms[7] = { - ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT, - ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT, - ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY, - ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT, - ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS, - ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS, - ARCHIVE_ENTRY_ACL_ENTRY_INHERITED - }; - - if (start >= end) - return (0); - p = start; - pos = 0; - while (p < end && pos < 7) { - if (*p == letter[pos]) - *permset |= perms[pos]; - else if (*p != '-') - return (-1); - p = p + sizeof(char); - pos++; - } - return (0); } /* * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]". *wp is updated * to point to just after the separator. *start points to the first * character of the matched text and *end just after the last * character of the matched identifier. In particular *end - *start * is the length of the field body, not including leading or trailing * whitespace. */ static void next_field(const char **p, const char **start, const char **end, char *sep) { /* Skip leading whitespace to find start of field. */ while (**p == ' ' || **p == '\t' || **p == '\n') { (*p)++; } *start = *p; /* Scan for the separator. */ while (**p != '\0' && **p != ',' && **p != ':' && **p != '\n') { (*p)++; } *sep = **p; /* Trim trailing whitespace to locate end of field. */ *end = *p - 1; while (**end == ' ' || **end == '\t' || **end == '\n') { (*end)--; } (*end)++; /* Adjust scanner location. */ if (**p != '\0') (*p)++; } /* * Return true if the characters [start...end) are a prefix of 'test'. * This makes it easy to handle the obvious abbreviations: 'u' for 'user', etc. */ static int prefix_c(const char *start, const char *end, const char *test) { if (start == end) return (0); if (*start++ != *test++) return (0); while (start < end && *start++ == *test++) ; if (start < end) return (0); return (1); } Index: projects/clang390-import/contrib/libarchive/libarchive/archive_entry.h =================================================================== --- projects/clang390-import/contrib/libarchive/libarchive/archive_entry.h (revision 305016) +++ projects/clang390-import/contrib/libarchive/libarchive/archive_entry.h (revision 305017) @@ -1,643 +1,642 @@ /*- * Copyright (c) 2003-2008 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef ARCHIVE_ENTRY_H_INCLUDED #define ARCHIVE_ENTRY_H_INCLUDED /* Note: Compiler will complain if this does not match archive.h! */ #define ARCHIVE_VERSION_NUMBER 3002001 /* * Note: archive_entry.h is for use outside of libarchive; the * configuration headers (config.h, archive_platform.h, etc.) are * purely internal. Do NOT use HAVE_XXX configuration macros to * control the behavior of this header! If you must conditionalize, * use predefined compiler and/or platform macros. */ #include #include /* for wchar_t */ #include #if defined(_WIN32) && !defined(__CYGWIN__) #include #endif /* Get a suitable 64-bit integer type. */ #if !defined(__LA_INT64_T_DEFINED) # if ARCHIVE_VERSION_NUMBER < 4000000 #define __LA_INT64_T la_int64_t # endif #define __LA_INT64_T_DEFINED # if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__WATCOMC__) typedef __int64 la_int64_t; # else #include # if defined(_SCO_DS) || defined(__osf__) typedef long long la_int64_t; # else typedef int64_t la_int64_t; # endif # endif #endif /* Get a suitable definition for mode_t */ #if ARCHIVE_VERSION_NUMBER >= 3999000 /* Switch to plain 'int' for libarchive 4.0. It's less broken than 'mode_t' */ # define __LA_MODE_T int #elif defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__) && !defined(__WATCOMC__) # define __LA_MODE_T unsigned short #else # define __LA_MODE_T mode_t #endif /* Large file support for Android */ #ifdef __ANDROID__ #include "android_lf.h" #endif /* * On Windows, define LIBARCHIVE_STATIC if you're building or using a * .lib. The default here assumes you're building a DLL. Only * libarchive source should ever define __LIBARCHIVE_BUILD. */ #if ((defined __WIN32__) || (defined _WIN32) || defined(__CYGWIN__)) && (!defined LIBARCHIVE_STATIC) # ifdef __LIBARCHIVE_BUILD # ifdef __GNUC__ # define __LA_DECL __attribute__((dllexport)) extern # else # define __LA_DECL __declspec(dllexport) # endif # else # ifdef __GNUC__ # define __LA_DECL # else # define __LA_DECL __declspec(dllimport) # endif # endif #else /* Static libraries on all platforms and shared libraries on non-Windows. */ # define __LA_DECL #endif #ifdef __cplusplus extern "C" { #endif /* * Description of an archive entry. * * You can think of this as "struct stat" with some text fields added in. * * TODO: Add "comment", "charset", and possibly other entries that are * supported by "pax interchange" format. However, GNU, ustar, cpio, * and other variants don't support these features, so they're not an * excruciatingly high priority right now. * * TODO: "pax interchange" format allows essentially arbitrary * key/value attributes to be attached to any entry. Supporting * such extensions may make this library useful for special * applications (e.g., a package manager could attach special * package-management attributes to each entry). */ struct archive; struct archive_entry; /* * File-type constants. These are returned from archive_entry_filetype() * and passed to archive_entry_set_filetype(). * * These values match S_XXX defines on every platform I've checked, * including Windows, AIX, Linux, Solaris, and BSD. They're * (re)defined here because platforms generally don't define the ones * they don't support. For example, Windows doesn't define S_IFLNK or * S_IFBLK. Instead of having a mass of conditional logic and system * checks to define any S_XXX values that aren't supported locally, * I've just defined a new set of such constants so that * libarchive-based applications can manipulate and identify archive * entries properly even if the hosting platform can't store them on * disk. * * These values are also used directly within some portable formats, * such as cpio. If you find a platform that varies from these, the * correct solution is to leave these alone and translate from these * portable values to platform-native values when entries are read from * or written to disk. */ /* * In libarchive 4.0, we can drop the casts here. * They're needed to work around Borland C's broken mode_t. */ #define AE_IFMT ((__LA_MODE_T)0170000) #define AE_IFREG ((__LA_MODE_T)0100000) #define AE_IFLNK ((__LA_MODE_T)0120000) #define AE_IFSOCK ((__LA_MODE_T)0140000) #define AE_IFCHR ((__LA_MODE_T)0020000) #define AE_IFBLK ((__LA_MODE_T)0060000) #define AE_IFDIR ((__LA_MODE_T)0040000) #define AE_IFIFO ((__LA_MODE_T)0010000) /* * Basic object manipulation */ __LA_DECL struct archive_entry *archive_entry_clear(struct archive_entry *); /* The 'clone' function does a deep copy; all of the strings are copied too. */ __LA_DECL struct archive_entry *archive_entry_clone(struct archive_entry *); __LA_DECL void archive_entry_free(struct archive_entry *); __LA_DECL struct archive_entry *archive_entry_new(void); /* * This form of archive_entry_new2() will pull character-set * conversion information from the specified archive handle. The * older archive_entry_new(void) form is equivalent to calling * archive_entry_new2(NULL) and will result in the use of an internal * default character-set conversion. */ __LA_DECL struct archive_entry *archive_entry_new2(struct archive *); /* * Retrieve fields from an archive_entry. * * There are a number of implicit conversions among these fields. For * example, if a regular string field is set and you read the _w wide * character field, the entry will implicitly convert narrow-to-wide * using the current locale. Similarly, dev values are automatically * updated when you write devmajor or devminor and vice versa. * * In addition, fields can be "set" or "unset." Unset string fields * return NULL, non-string fields have _is_set() functions to test * whether they've been set. You can "unset" a string field by * assigning NULL; non-string fields have _unset() functions to * unset them. * * Note: There is one ambiguity in the above; string fields will * also return NULL when implicit character set conversions fail. * This is usually what you want. */ __LA_DECL time_t archive_entry_atime(struct archive_entry *); __LA_DECL long archive_entry_atime_nsec(struct archive_entry *); __LA_DECL int archive_entry_atime_is_set(struct archive_entry *); __LA_DECL time_t archive_entry_birthtime(struct archive_entry *); __LA_DECL long archive_entry_birthtime_nsec(struct archive_entry *); __LA_DECL int archive_entry_birthtime_is_set(struct archive_entry *); __LA_DECL time_t archive_entry_ctime(struct archive_entry *); __LA_DECL long archive_entry_ctime_nsec(struct archive_entry *); __LA_DECL int archive_entry_ctime_is_set(struct archive_entry *); __LA_DECL dev_t archive_entry_dev(struct archive_entry *); __LA_DECL int archive_entry_dev_is_set(struct archive_entry *); __LA_DECL dev_t archive_entry_devmajor(struct archive_entry *); __LA_DECL dev_t archive_entry_devminor(struct archive_entry *); __LA_DECL __LA_MODE_T archive_entry_filetype(struct archive_entry *); __LA_DECL void archive_entry_fflags(struct archive_entry *, unsigned long * /* set */, unsigned long * /* clear */); __LA_DECL const char *archive_entry_fflags_text(struct archive_entry *); __LA_DECL la_int64_t archive_entry_gid(struct archive_entry *); __LA_DECL const char *archive_entry_gname(struct archive_entry *); __LA_DECL const char *archive_entry_gname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_gname_w(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink(struct archive_entry *); __LA_DECL const char *archive_entry_hardlink_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino(struct archive_entry *); __LA_DECL la_int64_t archive_entry_ino64(struct archive_entry *); __LA_DECL int archive_entry_ino_is_set(struct archive_entry *); __LA_DECL __LA_MODE_T archive_entry_mode(struct archive_entry *); __LA_DECL time_t archive_entry_mtime(struct archive_entry *); __LA_DECL long archive_entry_mtime_nsec(struct archive_entry *); __LA_DECL int archive_entry_mtime_is_set(struct archive_entry *); __LA_DECL unsigned int archive_entry_nlink(struct archive_entry *); __LA_DECL const char *archive_entry_pathname(struct archive_entry *); __LA_DECL const char *archive_entry_pathname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_pathname_w(struct archive_entry *); __LA_DECL __LA_MODE_T archive_entry_perm(struct archive_entry *); __LA_DECL dev_t archive_entry_rdev(struct archive_entry *); __LA_DECL dev_t archive_entry_rdevmajor(struct archive_entry *); __LA_DECL dev_t archive_entry_rdevminor(struct archive_entry *); __LA_DECL const char *archive_entry_sourcepath(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_sourcepath_w(struct archive_entry *); __LA_DECL la_int64_t archive_entry_size(struct archive_entry *); __LA_DECL int archive_entry_size_is_set(struct archive_entry *); __LA_DECL const char *archive_entry_strmode(struct archive_entry *); __LA_DECL const char *archive_entry_symlink(struct archive_entry *); __LA_DECL const char *archive_entry_symlink_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_symlink_w(struct archive_entry *); __LA_DECL la_int64_t archive_entry_uid(struct archive_entry *); __LA_DECL const char *archive_entry_uname(struct archive_entry *); __LA_DECL const char *archive_entry_uname_utf8(struct archive_entry *); __LA_DECL const wchar_t *archive_entry_uname_w(struct archive_entry *); __LA_DECL int archive_entry_is_data_encrypted(struct archive_entry *); __LA_DECL int archive_entry_is_metadata_encrypted(struct archive_entry *); __LA_DECL int archive_entry_is_encrypted(struct archive_entry *); /* * Set fields in an archive_entry. * * Note: Before libarchive 2.4, there were 'set' and 'copy' versions * of the string setters. 'copy' copied the actual string, 'set' just * stored the pointer. In libarchive 2.4 and later, strings are * always copied. */ __LA_DECL void archive_entry_set_atime(struct archive_entry *, time_t, long); __LA_DECL void archive_entry_unset_atime(struct archive_entry *); #if defined(_WIN32) && !defined(__CYGWIN__) __LA_DECL void archive_entry_copy_bhfi(struct archive_entry *, BY_HANDLE_FILE_INFORMATION *); #endif __LA_DECL void archive_entry_set_birthtime(struct archive_entry *, time_t, long); __LA_DECL void archive_entry_unset_birthtime(struct archive_entry *); __LA_DECL void archive_entry_set_ctime(struct archive_entry *, time_t, long); __LA_DECL void archive_entry_unset_ctime(struct archive_entry *); __LA_DECL void archive_entry_set_dev(struct archive_entry *, dev_t); __LA_DECL void archive_entry_set_devmajor(struct archive_entry *, dev_t); __LA_DECL void archive_entry_set_devminor(struct archive_entry *, dev_t); __LA_DECL void archive_entry_set_filetype(struct archive_entry *, unsigned int); __LA_DECL void archive_entry_set_fflags(struct archive_entry *, unsigned long /* set */, unsigned long /* clear */); /* Returns pointer to start of first invalid token, or NULL if none. */ /* Note that all recognized tokens are processed, regardless. */ __LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *, const char *); __LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *, const wchar_t *); __LA_DECL void archive_entry_set_gid(struct archive_entry *, la_int64_t); __LA_DECL void archive_entry_set_gname(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_gname_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_gname(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *); __LA_DECL int archive_entry_update_gname_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_hardlink(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_hardlink_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_hardlink(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *); __LA_DECL int archive_entry_update_hardlink_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_ino(struct archive_entry *, la_int64_t); __LA_DECL void archive_entry_set_ino64(struct archive_entry *, la_int64_t); __LA_DECL void archive_entry_set_link(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_link_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_link(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_link_w(struct archive_entry *, const wchar_t *); __LA_DECL int archive_entry_update_link_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_mode(struct archive_entry *, __LA_MODE_T); __LA_DECL void archive_entry_set_mtime(struct archive_entry *, time_t, long); __LA_DECL void archive_entry_unset_mtime(struct archive_entry *); __LA_DECL void archive_entry_set_nlink(struct archive_entry *, unsigned int); __LA_DECL void archive_entry_set_pathname(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_pathname_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_pathname(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *); __LA_DECL int archive_entry_update_pathname_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_perm(struct archive_entry *, __LA_MODE_T); __LA_DECL void archive_entry_set_rdev(struct archive_entry *, dev_t); __LA_DECL void archive_entry_set_rdevmajor(struct archive_entry *, dev_t); __LA_DECL void archive_entry_set_rdevminor(struct archive_entry *, dev_t); __LA_DECL void archive_entry_set_size(struct archive_entry *, la_int64_t); __LA_DECL void archive_entry_unset_size(struct archive_entry *); __LA_DECL void archive_entry_copy_sourcepath(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_sourcepath_w(struct archive_entry *, const wchar_t *); __LA_DECL void archive_entry_set_symlink(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_symlink_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_symlink(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *); __LA_DECL int archive_entry_update_symlink_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_uid(struct archive_entry *, la_int64_t); __LA_DECL void archive_entry_set_uname(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_uname_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_uname(struct archive_entry *, const char *); __LA_DECL void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *); __LA_DECL int archive_entry_update_uname_utf8(struct archive_entry *, const char *); __LA_DECL void archive_entry_set_is_data_encrypted(struct archive_entry *, char is_encrypted); __LA_DECL void archive_entry_set_is_metadata_encrypted(struct archive_entry *, char is_encrypted); /* * Routines to bulk copy fields to/from a platform-native "struct * stat." Libarchive used to just store a struct stat inside of each * archive_entry object, but this created issues when trying to * manipulate archives on systems different than the ones they were * created on. * * TODO: On Linux and other LFS systems, provide both stat32 and * stat64 versions of these functions and all of the macro glue so * that archive_entry_stat is magically defined to * archive_entry_stat32 or archive_entry_stat64 as appropriate. */ __LA_DECL const struct stat *archive_entry_stat(struct archive_entry *); __LA_DECL void archive_entry_copy_stat(struct archive_entry *, const struct stat *); /* * Storage for Mac OS-specific AppleDouble metadata information. * Apple-format tar files store a separate binary blob containing * encoded metadata with ACL, extended attributes, etc. * This provides a place to store that blob. */ __LA_DECL const void * archive_entry_mac_metadata(struct archive_entry *, size_t *); __LA_DECL void archive_entry_copy_mac_metadata(struct archive_entry *, const void *, size_t); /* * ACL routines. This used to simply store and return text-format ACL * strings, but that proved insufficient for a number of reasons: * = clients need control over uname/uid and gname/gid mappings * = there are many different ACL text formats * = would like to be able to read/convert archives containing ACLs * on platforms that lack ACL libraries * * This last point, in particular, forces me to implement a reasonably * complete set of ACL support routines. */ /* * Permission bits. */ #define ARCHIVE_ENTRY_ACL_EXECUTE 0x00000001 #define ARCHIVE_ENTRY_ACL_WRITE 0x00000002 #define ARCHIVE_ENTRY_ACL_READ 0x00000004 #define ARCHIVE_ENTRY_ACL_READ_DATA 0x00000008 #define ARCHIVE_ENTRY_ACL_LIST_DIRECTORY 0x00000008 #define ARCHIVE_ENTRY_ACL_WRITE_DATA 0x00000010 #define ARCHIVE_ENTRY_ACL_ADD_FILE 0x00000010 #define ARCHIVE_ENTRY_ACL_APPEND_DATA 0x00000020 #define ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY 0x00000020 #define ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS 0x00000040 #define ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS 0x00000080 #define ARCHIVE_ENTRY_ACL_DELETE_CHILD 0x00000100 #define ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES 0x00000200 #define ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES 0x00000400 #define ARCHIVE_ENTRY_ACL_DELETE 0x00000800 #define ARCHIVE_ENTRY_ACL_READ_ACL 0x00001000 #define ARCHIVE_ENTRY_ACL_WRITE_ACL 0x00002000 #define ARCHIVE_ENTRY_ACL_WRITE_OWNER 0x00004000 #define ARCHIVE_ENTRY_ACL_SYNCHRONIZE 0x00008000 #define ARCHIVE_ENTRY_ACL_PERMS_POSIX1E \ (ARCHIVE_ENTRY_ACL_EXECUTE \ | ARCHIVE_ENTRY_ACL_WRITE \ | ARCHIVE_ENTRY_ACL_READ) #define ARCHIVE_ENTRY_ACL_PERMS_NFS4 \ (ARCHIVE_ENTRY_ACL_EXECUTE \ | ARCHIVE_ENTRY_ACL_READ_DATA \ | ARCHIVE_ENTRY_ACL_LIST_DIRECTORY \ | ARCHIVE_ENTRY_ACL_WRITE_DATA \ | ARCHIVE_ENTRY_ACL_ADD_FILE \ | ARCHIVE_ENTRY_ACL_APPEND_DATA \ | ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY \ | ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS \ | ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS \ | ARCHIVE_ENTRY_ACL_DELETE_CHILD \ | ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES \ | ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES \ | ARCHIVE_ENTRY_ACL_DELETE \ | ARCHIVE_ENTRY_ACL_READ_ACL \ | ARCHIVE_ENTRY_ACL_WRITE_ACL \ | ARCHIVE_ENTRY_ACL_WRITE_OWNER \ | ARCHIVE_ENTRY_ACL_SYNCHRONIZE) /* * Inheritance values (NFS4 ACLs only); included in permset. */ #define ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT 0x02000000 #define ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT 0x04000000 #define ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT 0x08000000 #define ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY 0x10000000 #define ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS 0x20000000 #define ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS 0x40000000 -#define ARCHIVE_ENTRY_ACL_ENTRY_INHERITED 0x80000000 #define ARCHIVE_ENTRY_ACL_INHERITANCE_NFS4 \ (ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT \ | ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT \ | ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT \ | ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY \ | ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS \ | ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS) /* We need to be able to specify combinations of these. */ #define ARCHIVE_ENTRY_ACL_TYPE_ACCESS 256 /* POSIX.1e only */ #define ARCHIVE_ENTRY_ACL_TYPE_DEFAULT 512 /* POSIX.1e only */ #define ARCHIVE_ENTRY_ACL_TYPE_ALLOW 1024 /* NFS4 only */ #define ARCHIVE_ENTRY_ACL_TYPE_DENY 2048 /* NFS4 only */ #define ARCHIVE_ENTRY_ACL_TYPE_AUDIT 4096 /* NFS4 only */ #define ARCHIVE_ENTRY_ACL_TYPE_ALARM 8192 /* NFS4 only */ #define ARCHIVE_ENTRY_ACL_TYPE_POSIX1E (ARCHIVE_ENTRY_ACL_TYPE_ACCESS \ | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) #define ARCHIVE_ENTRY_ACL_TYPE_NFS4 (ARCHIVE_ENTRY_ACL_TYPE_ALLOW \ | ARCHIVE_ENTRY_ACL_TYPE_DENY \ | ARCHIVE_ENTRY_ACL_TYPE_AUDIT \ | ARCHIVE_ENTRY_ACL_TYPE_ALARM) /* Tag values mimic POSIX.1e */ #define ARCHIVE_ENTRY_ACL_USER 10001 /* Specified user. */ #define ARCHIVE_ENTRY_ACL_USER_OBJ 10002 /* User who owns the file. */ #define ARCHIVE_ENTRY_ACL_GROUP 10003 /* Specified group. */ #define ARCHIVE_ENTRY_ACL_GROUP_OBJ 10004 /* Group who owns the file. */ #define ARCHIVE_ENTRY_ACL_MASK 10005 /* Modify group access (POSIX.1e only) */ #define ARCHIVE_ENTRY_ACL_OTHER 10006 /* Public (POSIX.1e only) */ #define ARCHIVE_ENTRY_ACL_EVERYONE 10107 /* Everyone (NFS4 only) */ /* * Set the ACL by clearing it and adding entries one at a time. * Unlike the POSIX.1e ACL routines, you must specify the type * (access/default) for each entry. Internally, the ACL data is just * a soup of entries. API calls here allow you to retrieve just the * entries of interest. This design (which goes against the spirit of * POSIX.1e) is useful for handling archive formats that combine * default and access information in a single ACL list. */ __LA_DECL void archive_entry_acl_clear(struct archive_entry *); __LA_DECL int archive_entry_acl_add_entry(struct archive_entry *, int /* type */, int /* permset */, int /* tag */, int /* qual */, const char * /* name */); __LA_DECL int archive_entry_acl_add_entry_w(struct archive_entry *, int /* type */, int /* permset */, int /* tag */, int /* qual */, const wchar_t * /* name */); /* * To retrieve the ACL, first "reset", then repeatedly ask for the * "next" entry. The want_type parameter allows you to request only * certain types of entries. */ __LA_DECL int archive_entry_acl_reset(struct archive_entry *, int /* want_type */); __LA_DECL int archive_entry_acl_next(struct archive_entry *, int /* want_type */, int * /* type */, int * /* permset */, int * /* tag */, int * /* qual */, const char ** /* name */); __LA_DECL int archive_entry_acl_next_w(struct archive_entry *, int /* want_type */, int * /* type */, int * /* permset */, int * /* tag */, int * /* qual */, const wchar_t ** /* name */); /* * Construct a text-format ACL. The flags argument is a bitmask that * can include any of the following: * * ARCHIVE_ENTRY_ACL_TYPE_ACCESS - Include POSIX.1e "access" entries. * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT - Include POSIX.1e "default" entries. * ARCHIVE_ENTRY_ACL_TYPE_NFS4 - Include NFS4 entries. * ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID - Include extra numeric ID field in * each ACL entry. ('star' introduced this for POSIX.1e, this flag * also applies to NFS4.) * ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT - Include "default:" before each * default ACL entry, as used in old Solaris ACLs. */ -#define ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID 16384 -#define ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT 32768 +#define ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID 1024 +#define ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT 2048 __LA_DECL const wchar_t *archive_entry_acl_text_w(struct archive_entry *, int /* flags */); __LA_DECL const char *archive_entry_acl_text(struct archive_entry *, int /* flags */); /* Return a count of entries matching 'want_type' */ __LA_DECL int archive_entry_acl_count(struct archive_entry *, int /* want_type */); /* Return an opaque ACL object. */ /* There's not yet anything clients can actually do with this... */ struct archive_acl; __LA_DECL struct archive_acl *archive_entry_acl(struct archive_entry *); /* * extended attributes */ __LA_DECL void archive_entry_xattr_clear(struct archive_entry *); __LA_DECL void archive_entry_xattr_add_entry(struct archive_entry *, const char * /* name */, const void * /* value */, size_t /* size */); /* * To retrieve the xattr list, first "reset", then repeatedly ask for the * "next" entry. */ __LA_DECL int archive_entry_xattr_count(struct archive_entry *); __LA_DECL int archive_entry_xattr_reset(struct archive_entry *); __LA_DECL int archive_entry_xattr_next(struct archive_entry *, const char ** /* name */, const void ** /* value */, size_t *); /* * sparse */ __LA_DECL void archive_entry_sparse_clear(struct archive_entry *); __LA_DECL void archive_entry_sparse_add_entry(struct archive_entry *, la_int64_t /* offset */, la_int64_t /* length */); /* * To retrieve the xattr list, first "reset", then repeatedly ask for the * "next" entry. */ __LA_DECL int archive_entry_sparse_count(struct archive_entry *); __LA_DECL int archive_entry_sparse_reset(struct archive_entry *); __LA_DECL int archive_entry_sparse_next(struct archive_entry *, la_int64_t * /* offset */, la_int64_t * /* length */); /* * Utility to match up hardlinks. * * The 'struct archive_entry_linkresolver' is a cache of archive entries * for files with multiple links. Here's how to use it: * 1. Create a lookup object with archive_entry_linkresolver_new() * 2. Tell it the archive format you're using. * 3. Hand each archive_entry to archive_entry_linkify(). * That function will return 0, 1, or 2 entries that should * be written. * 4. Call archive_entry_linkify(resolver, NULL) until * no more entries are returned. * 5. Call archive_entry_linkresolver_free(resolver) to free resources. * * The entries returned have their hardlink and size fields updated * appropriately. If an entry is passed in that does not refer to * a file with multiple links, it is returned unchanged. The intention * is that you should be able to simply filter all entries through * this machine. * * To make things more efficient, be sure that each entry has a valid * nlinks value. The hardlink cache uses this to track when all links * have been found. If the nlinks value is zero, it will keep every * name in the cache indefinitely, which can use a lot of memory. * * Note that archive_entry_size() is reset to zero if the file * body should not be written to the archive. Pay attention! */ struct archive_entry_linkresolver; /* * There are three different strategies for marking hardlinks. * The descriptions below name them after the best-known * formats that rely on each strategy: * * "Old cpio" is the simplest, it always returns any entry unmodified. * As far as I know, only cpio formats use this. Old cpio archives * store every link with the full body; the onus is on the dearchiver * to detect and properly link the files as they are restored. * "tar" is also pretty simple; it caches a copy the first time it sees * any link. Subsequent appearances are modified to be hardlink * references to the first one without any body. Used by all tar * formats, although the newest tar formats permit the "old cpio" strategy * as well. This strategy is very simple for the dearchiver, * and reasonably straightforward for the archiver. * "new cpio" is trickier. It stores the body only with the last * occurrence. The complication is that we might not * see every link to a particular file in a single session, so * there's no easy way to know when we've seen the last occurrence. * The solution here is to queue one link until we see the next. * At the end of the session, you can enumerate any remaining * entries by calling archive_entry_linkify(NULL) and store those * bodies. If you have a file with three links l1, l2, and l3, * you'll get the following behavior if you see all three links: * linkify(l1) => NULL (the resolver stores l1 internally) * linkify(l2) => l1 (resolver stores l2, you write l1) * linkify(l3) => l2, l3 (all links seen, you can write both). * If you only see l1 and l2, you'll get this behavior: * linkify(l1) => NULL * linkify(l2) => l1 * linkify(NULL) => l2 (at end, you retrieve remaining links) * As the name suggests, this strategy is used by newer cpio variants. * It's noticeably more complex for the archiver, slightly more complex * for the dearchiver than the tar strategy, but makes it straightforward * to restore a file using any link by simply continuing to scan until * you see a link that is stored with a body. In contrast, the tar * strategy requires you to rescan the archive from the beginning to * correctly extract an arbitrary link. */ __LA_DECL struct archive_entry_linkresolver *archive_entry_linkresolver_new(void); __LA_DECL void archive_entry_linkresolver_set_strategy( struct archive_entry_linkresolver *, int /* format_code */); __LA_DECL void archive_entry_linkresolver_free(struct archive_entry_linkresolver *); __LA_DECL void archive_entry_linkify(struct archive_entry_linkresolver *, struct archive_entry **, struct archive_entry **); __LA_DECL struct archive_entry *archive_entry_partial_links( struct archive_entry_linkresolver *res, unsigned int *links); #ifdef __cplusplus } #endif /* This is meaningless outside of this header. */ #undef __LA_DECL #endif /* !ARCHIVE_ENTRY_H_INCLUDED */ Index: projects/clang390-import/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c =================================================================== --- projects/clang390-import/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c (revision 305016) +++ projects/clang390-import/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c (revision 305017) @@ -1,1349 +1,1264 @@ /*- * Copyright (c) 2003-2009 Tim Kientzle * Copyright (c) 2010-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); /* This is the tree-walking code for POSIX systems. */ #if !defined(_WIN32) || defined(__CYGWIN__) #ifdef HAVE_SYS_TYPES_H /* Mac OSX requires sys/types.h before sys/acl.h. */ #include #endif #ifdef HAVE_SYS_ACL_H #include #endif #ifdef HAVE_SYS_EXTATTR_H #include #endif #ifdef HAVE_SYS_IOCTL_H #include #endif #ifdef HAVE_SYS_PARAM_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #if defined(HAVE_SYS_XATTR_H) #include #elif defined(HAVE_ATTR_XATTR_H) #include #endif #ifdef HAVE_SYS_EA_H #include #endif #ifdef HAVE_ACL_LIBACL_H #include #endif #ifdef HAVE_COPYFILE_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_LIMITS_H #include #endif #ifdef HAVE_LINUX_TYPES_H #include #endif #ifdef HAVE_LINUX_FIEMAP_H #include #endif #ifdef HAVE_LINUX_FS_H #include #endif /* * Some Linux distributions have both linux/ext2_fs.h and ext2fs/ext2_fs.h. * As the include guards don't agree, the order of include is important. */ #ifdef HAVE_LINUX_EXT2_FS_H #include /* for Linux file flags */ #endif #if defined(HAVE_EXT2FS_EXT2_FS_H) && !defined(__CYGWIN__) #include /* Linux file flags, broken on Cygwin */ #endif #ifdef HAVE_PATHS_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #include "archive.h" #include "archive_entry.h" #include "archive_private.h" #include "archive_read_disk_private.h" #ifndef O_CLOEXEC #define O_CLOEXEC 0 #endif /* * Linux and FreeBSD plug this obvious hole in POSIX.1e in * different ways. */ #if HAVE_ACL_GET_PERM #define ACL_GET_PERM acl_get_perm #elif HAVE_ACL_GET_PERM_NP #define ACL_GET_PERM acl_get_perm_np #endif static int setup_acls(struct archive_read_disk *, struct archive_entry *, int *fd); static int setup_mac_metadata(struct archive_read_disk *, struct archive_entry *, int *fd); static int setup_xattrs(struct archive_read_disk *, struct archive_entry *, int *fd); static int setup_sparse(struct archive_read_disk *, struct archive_entry *, int *fd); int archive_read_disk_entry_from_file(struct archive *_a, struct archive_entry *entry, int fd, const struct stat *st) { struct archive_read_disk *a = (struct archive_read_disk *)_a; const char *path, *name; struct stat s; int initial_fd = fd; int r, r1; archive_clear_error(_a); path = archive_entry_sourcepath(entry); if (path == NULL) path = archive_entry_pathname(entry); if (a->tree == NULL) { if (st == NULL) { #if HAVE_FSTAT if (fd >= 0) { if (fstat(fd, &s) != 0) { archive_set_error(&a->archive, errno, "Can't fstat"); return (ARCHIVE_FAILED); } } else #endif #if HAVE_LSTAT if (!a->follow_symlinks) { if (lstat(path, &s) != 0) { archive_set_error(&a->archive, errno, "Can't lstat %s", path); return (ARCHIVE_FAILED); } } else #endif if (stat(path, &s) != 0) { archive_set_error(&a->archive, errno, "Can't stat %s", path); return (ARCHIVE_FAILED); } st = &s; } archive_entry_copy_stat(entry, st); } /* Lookup uname/gname */ name = archive_read_disk_uname(_a, archive_entry_uid(entry)); if (name != NULL) archive_entry_copy_uname(entry, name); name = archive_read_disk_gname(_a, archive_entry_gid(entry)); if (name != NULL) archive_entry_copy_gname(entry, name); #ifdef HAVE_STRUCT_STAT_ST_FLAGS /* On FreeBSD, we get flags for free with the stat. */ /* TODO: Does this belong in copy_stat()? */ if (st->st_flags != 0) archive_entry_set_fflags(entry, st->st_flags, 0); #endif #if defined(EXT2_IOC_GETFLAGS) && defined(HAVE_WORKING_EXT2_IOC_GETFLAGS) /* Linux requires an extra ioctl to pull the flags. Although * this is an extra step, it has a nice side-effect: We get an * open file descriptor which we can use in the subsequent lookups. */ if ((S_ISREG(st->st_mode) || S_ISDIR(st->st_mode))) { if (fd < 0) { if (a->tree != NULL) fd = a->open_on_current_dir(a->tree, path, O_RDONLY | O_NONBLOCK | O_CLOEXEC); else fd = open(path, O_RDONLY | O_NONBLOCK | O_CLOEXEC); __archive_ensure_cloexec_flag(fd); } if (fd >= 0) { int stflags; r = ioctl(fd, EXT2_IOC_GETFLAGS, &stflags); if (r == 0 && stflags != 0) archive_entry_set_fflags(entry, stflags, 0); } } #endif #if defined(HAVE_READLINK) || defined(HAVE_READLINKAT) if (S_ISLNK(st->st_mode)) { size_t linkbuffer_len = st->st_size + 1; char *linkbuffer; int lnklen; linkbuffer = malloc(linkbuffer_len); if (linkbuffer == NULL) { archive_set_error(&a->archive, ENOMEM, "Couldn't read link data"); return (ARCHIVE_FAILED); } if (a->tree != NULL) { #ifdef HAVE_READLINKAT lnklen = readlinkat(a->tree_current_dir_fd(a->tree), path, linkbuffer, linkbuffer_len); #else if (a->tree_enter_working_dir(a->tree) != 0) { archive_set_error(&a->archive, errno, "Couldn't read link data"); free(linkbuffer); return (ARCHIVE_FAILED); } lnklen = readlink(path, linkbuffer, linkbuffer_len); #endif /* HAVE_READLINKAT */ } else lnklen = readlink(path, linkbuffer, linkbuffer_len); if (lnklen < 0) { archive_set_error(&a->archive, errno, "Couldn't read link data"); free(linkbuffer); return (ARCHIVE_FAILED); } linkbuffer[lnklen] = 0; archive_entry_set_symlink(entry, linkbuffer); free(linkbuffer); } #endif /* HAVE_READLINK || HAVE_READLINKAT */ r = setup_acls(a, entry, &fd); if (!a->suppress_xattr) { r1 = setup_xattrs(a, entry, &fd); if (r1 < r) r = r1; } if (a->enable_copyfile) { r1 = setup_mac_metadata(a, entry, &fd); if (r1 < r) r = r1; } r1 = setup_sparse(a, entry, &fd); if (r1 < r) r = r1; /* If we opened the file earlier in this function, close it. */ if (initial_fd != fd) close(fd); return (r); } #if defined(__APPLE__) && defined(HAVE_COPYFILE_H) /* * The Mac OS "copyfile()" API copies the extended metadata for a * file into a separate file in AppleDouble format (see RFC 1740). * * Mac OS tar and cpio implementations store this extended * metadata as a separate entry just before the regular entry * with a "._" prefix added to the filename. * * Note that this is currently done unconditionally; the tar program has * an option to discard this information before the archive is written. * * TODO: If there's a failure, report it and return ARCHIVE_WARN. */ static int setup_mac_metadata(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { int tempfd = -1; int copyfile_flags = COPYFILE_NOFOLLOW | COPYFILE_ACL | COPYFILE_XATTR; struct stat copyfile_stat; int ret = ARCHIVE_OK; void *buff = NULL; int have_attrs; const char *name, *tempdir; struct archive_string tempfile; (void)fd; /* UNUSED */ name = archive_entry_sourcepath(entry); if (name == NULL) name = archive_entry_pathname(entry); if (name == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't open file to read extended attributes: No name"); return (ARCHIVE_WARN); } if (a->tree != NULL) { if (a->tree_enter_working_dir(a->tree) != 0) { archive_set_error(&a->archive, errno, "Couldn't change dir"); return (ARCHIVE_FAILED); } } /* Short-circuit if there's nothing to do. */ have_attrs = copyfile(name, NULL, 0, copyfile_flags | COPYFILE_CHECK); if (have_attrs == -1) { archive_set_error(&a->archive, errno, "Could not check extended attributes"); return (ARCHIVE_WARN); } if (have_attrs == 0) return (ARCHIVE_OK); tempdir = NULL; if (issetugid() == 0) tempdir = getenv("TMPDIR"); if (tempdir == NULL) tempdir = _PATH_TMP; archive_string_init(&tempfile); archive_strcpy(&tempfile, tempdir); archive_strcat(&tempfile, "tar.md.XXXXXX"); tempfd = mkstemp(tempfile.s); if (tempfd < 0) { archive_set_error(&a->archive, errno, "Could not open extended attribute file"); ret = ARCHIVE_WARN; goto cleanup; } __archive_ensure_cloexec_flag(tempfd); /* XXX I wish copyfile() could pack directly to a memory * buffer; that would avoid the temp file here. For that * matter, it would be nice if fcopyfile() actually worked, * that would reduce the many open/close races here. */ if (copyfile(name, tempfile.s, 0, copyfile_flags | COPYFILE_PACK)) { archive_set_error(&a->archive, errno, "Could not pack extended attributes"); ret = ARCHIVE_WARN; goto cleanup; } if (fstat(tempfd, ©file_stat)) { archive_set_error(&a->archive, errno, "Could not check size of extended attributes"); ret = ARCHIVE_WARN; goto cleanup; } buff = malloc(copyfile_stat.st_size); if (buff == NULL) { archive_set_error(&a->archive, errno, "Could not allocate memory for extended attributes"); ret = ARCHIVE_WARN; goto cleanup; } if (copyfile_stat.st_size != read(tempfd, buff, copyfile_stat.st_size)) { archive_set_error(&a->archive, errno, "Could not read extended attributes into memory"); ret = ARCHIVE_WARN; goto cleanup; } archive_entry_copy_mac_metadata(entry, buff, copyfile_stat.st_size); cleanup: if (tempfd >= 0) { close(tempfd); unlink(tempfile.s); } archive_string_free(&tempfile); free(buff); return (ret); } #else /* * Stub implementation for non-Mac systems. */ static int setup_mac_metadata(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { (void)a; /* UNUSED */ (void)entry; /* UNUSED */ (void)fd; /* UNUSED */ return (ARCHIVE_OK); } #endif #ifdef HAVE_POSIX_ACL static int translate_acl(struct archive_read_disk *a, struct archive_entry *entry, acl_t acl, int archive_entry_acl_type); static int setup_acls(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { const char *accpath; acl_t acl; +#if HAVE_ACL_IS_TRIVIAL_NP int r; +#endif accpath = archive_entry_sourcepath(entry); if (accpath == NULL) accpath = archive_entry_pathname(entry); - if (*fd < 0 && a->tree != NULL) { - if (a->follow_symlinks || - archive_entry_filetype(entry) != AE_IFLNK) - *fd = a->open_on_current_dir(a->tree, - accpath, O_RDONLY | O_NONBLOCK); - if (*fd < 0) { - if (a->tree_enter_working_dir(a->tree) != 0) { - archive_set_error(&a->archive, errno, - "Couldn't access %s", accpath); - return (ARCHIVE_FAILED); - } - } - } - archive_entry_acl_clear(entry); - acl = NULL; - #ifdef ACL_TYPE_NFS4 /* Try NFS4 ACL first. */ -#if HAVE_ACL_GET_FD_NP if (*fd >= 0) - acl = acl_get_fd_np(*fd, ACL_TYPE_NFS4); -#endif - if (acl == NULL) { + acl = acl_get_fd(*fd); #if HAVE_ACL_GET_LINK_NP - if (!a->follow_symlinks) - acl = acl_get_link_np(accpath, ACL_TYPE_NFS4); + else if (!a->follow_symlinks) + acl = acl_get_link_np(accpath, ACL_TYPE_NFS4); #else - if ((!a->follow_symlinks) - && (archive_entry_filetype(entry) == AE_IFLNK)) } - /* We can't get the ACL of a symlink, so we assume - it can't have one. */ - acl = NULL; - } + else if ((!a->follow_symlinks) + && (archive_entry_filetype(entry) == AE_IFLNK)) + /* We can't get the ACL of a symlink, so we assume it can't + have one. */ + acl = NULL; #endif - } - if (acl == NULL) + else acl = acl_get_file(accpath, ACL_TYPE_NFS4); - - if (acl != NULL) { #if HAVE_ACL_IS_TRIVIAL_NP - /* Ignore "trivial" ACLs that just mirror the file mode. */ - if (acl_is_trivial_np(acl, &r) == 0) { - if (r) { - acl_free(acl); - acl = NULL; - return (ARCHIVE_OK); - } - } + /* Ignore "trivial" ACLs that just mirror the file mode. */ + acl_is_trivial_np(acl, &r); + if (r) { + acl_free(acl); + acl = NULL; + } #endif - r = translate_acl(a, entry, acl, ARCHIVE_ENTRY_ACL_TYPE_NFS4); + if (acl != NULL) { + translate_acl(a, entry, acl, ARCHIVE_ENTRY_ACL_TYPE_NFS4); acl_free(acl); - return (r); + return (ARCHIVE_OK); } #endif /* Retrieve access ACL from file. */ if (*fd >= 0) acl = acl_get_fd(*fd); - if (acl == NULL) { #if HAVE_ACL_GET_LINK_NP - if (!a->follow_symlinks) - acl = acl_get_link_np(accpath, ACL_TYPE_ACCESS); + else if (!a->follow_symlinks) + acl = acl_get_link_np(accpath, ACL_TYPE_ACCESS); #else - if ((!a->follow_symlinks) - && (archive_entry_filetype(entry) == AE_IFLNK)) { - /* We can't get the ACL of a symlink, so we assume it - can't have one. */ - acl = NULL; - } + else if ((!a->follow_symlinks) + && (archive_entry_filetype(entry) == AE_IFLNK)) + /* We can't get the ACL of a symlink, so we assume it can't + have one. */ + acl = NULL; #endif - } - if (acl == NULL) + else acl = acl_get_file(accpath, ACL_TYPE_ACCESS); - if (acl != NULL) { -#if HAVE_ACL_IS_TRIVIAL_NP - /* Ignore "trivial" ACLs that just mirror the file mode. */ - if (acl_is_trivial_np(acl, &r) == 0) { - if (r) { - acl_free(acl); - acl = NULL; - return (ARCHIVE_OK); - } - } -#endif - r = translate_acl(a, entry, acl, + translate_acl(a, entry, acl, ARCHIVE_ENTRY_ACL_TYPE_ACCESS); acl_free(acl); - acl = NULL; - if (r != 0) - return (r); } /* Only directories can have default ACLs. */ if (S_ISDIR(archive_entry_mode(entry))) { acl = acl_get_file(accpath, ACL_TYPE_DEFAULT); if (acl != NULL) { - r = translate_acl(a, entry, acl, + translate_acl(a, entry, acl, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); acl_free(acl); - if (r != 0) - return (r); } } return (ARCHIVE_OK); } /* * Translate system ACL into libarchive internal structure. */ static struct { int archive_perm; int platform_perm; } acl_perm_map[] = { {ARCHIVE_ENTRY_ACL_EXECUTE, ACL_EXECUTE}, {ARCHIVE_ENTRY_ACL_WRITE, ACL_WRITE}, {ARCHIVE_ENTRY_ACL_READ, ACL_READ}, #ifdef ACL_TYPE_NFS4 {ARCHIVE_ENTRY_ACL_READ_DATA, ACL_READ_DATA}, {ARCHIVE_ENTRY_ACL_LIST_DIRECTORY, ACL_LIST_DIRECTORY}, {ARCHIVE_ENTRY_ACL_WRITE_DATA, ACL_WRITE_DATA}, {ARCHIVE_ENTRY_ACL_ADD_FILE, ACL_ADD_FILE}, {ARCHIVE_ENTRY_ACL_APPEND_DATA, ACL_APPEND_DATA}, {ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY, ACL_ADD_SUBDIRECTORY}, {ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS}, {ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS}, {ARCHIVE_ENTRY_ACL_DELETE_CHILD, ACL_DELETE_CHILD}, {ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES}, {ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES}, {ARCHIVE_ENTRY_ACL_DELETE, ACL_DELETE}, {ARCHIVE_ENTRY_ACL_READ_ACL, ACL_READ_ACL}, {ARCHIVE_ENTRY_ACL_WRITE_ACL, ACL_WRITE_ACL}, {ARCHIVE_ENTRY_ACL_WRITE_OWNER, ACL_WRITE_OWNER}, {ARCHIVE_ENTRY_ACL_SYNCHRONIZE, ACL_SYNCHRONIZE} #endif }; #ifdef ACL_TYPE_NFS4 static struct { int archive_inherit; int platform_inherit; } acl_inherit_map[] = { {ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT, ACL_ENTRY_FILE_INHERIT}, {ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT, ACL_ENTRY_DIRECTORY_INHERIT}, {ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT, ACL_ENTRY_NO_PROPAGATE_INHERIT}, {ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY, ACL_ENTRY_INHERIT_ONLY} }; #endif static int translate_acl(struct archive_read_disk *a, struct archive_entry *entry, acl_t acl, int default_entry_acl_type) { acl_tag_t acl_tag; #ifdef ACL_TYPE_NFS4 acl_entry_type_t acl_type; acl_flagset_t acl_flagset; - int brand; + int brand, r; #endif acl_entry_t acl_entry; acl_permset_t acl_permset; int i, entry_acl_type; - int r, s, ae_id, ae_tag, ae_perm; + int s, ae_id, ae_tag, ae_perm; const char *ae_name; #ifdef ACL_TYPE_NFS4 // FreeBSD "brands" ACLs as POSIX.1e or NFSv4 // Make sure the "brand" on this ACL is consistent // with the default_entry_acl_type bits provided. - if (acl_get_brand_np(acl, &brand) != 0) { - archive_set_error(&a->archive, errno, - "Failed to read ACL brand"); - return (ARCHIVE_FAILED); - } + acl_get_brand_np(acl, &brand); switch (brand) { case ACL_BRAND_POSIX: switch (default_entry_acl_type) { case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: break; default: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Invalid ACL entry type for POSIX.1e ACL"); - return (ARCHIVE_FAILED); + // XXX set warning message? + return ARCHIVE_FAILED; } break; case ACL_BRAND_NFS4: if (default_entry_acl_type & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "ACL brand mismatch"); - return (ARCHIVE_FAILED); + // XXX set warning message? + return ARCHIVE_FAILED; } break; default: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Invalid ACL brand"); - return (ARCHIVE_FAILED); + // XXX set warning message? + return ARCHIVE_FAILED; break; } #endif s = acl_get_entry(acl, ACL_FIRST_ENTRY, &acl_entry); - if (s == -1) { - archive_set_error(&a->archive, errno, - "Failed to get ACL entry"); - return (ARCHIVE_FAILED); - } while (s == 1) { ae_id = -1; ae_name = NULL; ae_perm = 0; - if (acl_get_tag_type(acl_entry, &acl_tag) != 0) { - archive_set_error(&a->archive, errno, - "Failed to get ACL tag type"); - return (ARCHIVE_FAILED); - } + acl_get_tag_type(acl_entry, &acl_tag); switch (acl_tag) { case ACL_USER: ae_id = (int)*(uid_t *)acl_get_qualifier(acl_entry); ae_name = archive_read_disk_uname(&a->archive, ae_id); ae_tag = ARCHIVE_ENTRY_ACL_USER; break; case ACL_GROUP: ae_id = (int)*(gid_t *)acl_get_qualifier(acl_entry); ae_name = archive_read_disk_gname(&a->archive, ae_id); ae_tag = ARCHIVE_ENTRY_ACL_GROUP; break; case ACL_MASK: ae_tag = ARCHIVE_ENTRY_ACL_MASK; break; case ACL_USER_OBJ: ae_tag = ARCHIVE_ENTRY_ACL_USER_OBJ; break; case ACL_GROUP_OBJ: ae_tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ; break; case ACL_OTHER: ae_tag = ARCHIVE_ENTRY_ACL_OTHER; break; #ifdef ACL_TYPE_NFS4 case ACL_EVERYONE: ae_tag = ARCHIVE_ENTRY_ACL_EVERYONE; break; #endif default: /* Skip types that libarchive can't support. */ s = acl_get_entry(acl, ACL_NEXT_ENTRY, &acl_entry); continue; } // XXX acl type maps to allow/deny/audit/YYYY bits // XXX acl_get_entry_type_np on FreeBSD returns EINVAL for // non-NFSv4 ACLs entry_acl_type = default_entry_acl_type; #ifdef ACL_TYPE_NFS4 - if (default_entry_acl_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4) { - if (acl_get_entry_type_np(acl_entry, &acl_type) != 0) { - archive_set_error(&a->archive, errno, - "Failed to get ACL type from an NFSv4 ACL entry"); - return (ARCHIVE_FAILED); - } + r = acl_get_entry_type_np(acl_entry, &acl_type); + if (r == 0) { switch (acl_type) { case ACL_ENTRY_TYPE_ALLOW: entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW; break; case ACL_ENTRY_TYPE_DENY: entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_DENY; break; case ACL_ENTRY_TYPE_AUDIT: entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT; break; case ACL_ENTRY_TYPE_ALARM: entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_ALARM; break; - default: - archive_set_error(&a->archive, errno, - "Unknown NFSv4 ACL entry type: %d", acl_type); - return (ARCHIVE_FAILED); } + } - /* - * Libarchive stores "flag" (NFSv4 inheritance bits) - * in the ae_perm bitmap. - */ - if (acl_get_flagset_np(acl_entry, &acl_flagset) != 0) { - archive_set_error(&a->archive, errno, - "Failed to get flagset from an NFSv4 ACL entry"); - return (ARCHIVE_FAILED); - } - for (i = 0; i < (int)(sizeof(acl_inherit_map) / sizeof(acl_inherit_map[0])); ++i) { - r = acl_get_flag_np(acl_flagset, acl_inherit_map[i].platform_inherit); - if (r == -1) { - archive_set_error(&a->archive, errno, - "Failed to check flag in a NFSv4 ACL flagset"); - return (ARCHIVE_FAILED); - } else if (r) + /* + * Libarchive stores "flag" (NFSv4 inheritance bits) + * in the ae_perm bitmap. + */ + // XXX acl_get_flagset_np on FreeBSD returns EINVAL for + // non-NFSv4 ACLs + r = acl_get_flagset_np(acl_entry, &acl_flagset); + if (r == 0) { + for (i = 0; i < (int)(sizeof(acl_inherit_map) / sizeof(acl_inherit_map[0])); ++i) { + if (acl_get_flag_np(acl_flagset, + acl_inherit_map[i].platform_inherit)) ae_perm |= acl_inherit_map[i].archive_inherit; } } #endif - if (acl_get_permset(acl_entry, &acl_permset) != 0) { - archive_set_error(&a->archive, errno, - "Failed to get ACL permission set"); - return (ARCHIVE_FAILED); - } + acl_get_permset(acl_entry, &acl_permset); for (i = 0; i < (int)(sizeof(acl_perm_map) / sizeof(acl_perm_map[0])); ++i) { /* * acl_get_perm() is spelled differently on different * platforms; see above. */ - r = ACL_GET_PERM(acl_permset, acl_perm_map[i].platform_perm); - if (r == -1) { - archive_set_error(&a->archive, errno, - "Failed to check permission in an ACL permission set"); - return (ARCHIVE_FAILED); - } else if (r) + if (ACL_GET_PERM(acl_permset, acl_perm_map[i].platform_perm)) ae_perm |= acl_perm_map[i].archive_perm; } - r = archive_entry_acl_add_entry(entry, entry_acl_type, + archive_entry_acl_add_entry(entry, entry_acl_type, ae_perm, ae_tag, ae_id, ae_name); - if (r != 0) - return (r); s = acl_get_entry(acl, ACL_NEXT_ENTRY, &acl_entry); - if (s == -1) { - archive_set_error(&a->archive, errno, - "Failed to get ACL entry"); - return (ARCHIVE_FAILED); - } } return (ARCHIVE_OK); } #else static int setup_acls(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { (void)a; /* UNUSED */ (void)entry; /* UNUSED */ (void)fd; /* UNUSED */ return (ARCHIVE_OK); } #endif #if (HAVE_FGETXATTR && HAVE_FLISTXATTR && HAVE_LISTXATTR && \ HAVE_LLISTXATTR && HAVE_GETXATTR && HAVE_LGETXATTR) || \ (HAVE_FGETEA && HAVE_FLISTEA && HAVE_LISTEA) /* * Linux and AIX extended attribute support. * * TODO: By using a stack-allocated buffer for the first * call to getxattr(), we might be able to avoid the second * call entirely. We only need the second call if the * stack-allocated buffer is too small. But a modest buffer * of 1024 bytes or so will often be big enough. Same applies * to listxattr(). */ static int setup_xattr(struct archive_read_disk *a, struct archive_entry *entry, const char *name, int fd) { ssize_t size; void *value = NULL; const char *accpath; accpath = archive_entry_sourcepath(entry); if (accpath == NULL) accpath = archive_entry_pathname(entry); #if HAVE_FGETXATTR if (fd >= 0) size = fgetxattr(fd, name, NULL, 0); else if (!a->follow_symlinks) size = lgetxattr(accpath, name, NULL, 0); else size = getxattr(accpath, name, NULL, 0); #elif HAVE_FGETEA if (fd >= 0) size = fgetea(fd, name, NULL, 0); else if (!a->follow_symlinks) size = lgetea(accpath, name, NULL, 0); else size = getea(accpath, name, NULL, 0); #endif if (size == -1) { archive_set_error(&a->archive, errno, "Couldn't query extended attribute"); return (ARCHIVE_WARN); } if (size > 0 && (value = malloc(size)) == NULL) { archive_set_error(&a->archive, errno, "Out of memory"); return (ARCHIVE_FATAL); } #if HAVE_FGETXATTR if (fd >= 0) size = fgetxattr(fd, name, value, size); else if (!a->follow_symlinks) size = lgetxattr(accpath, name, value, size); else size = getxattr(accpath, name, value, size); #elif HAVE_FGETEA if (fd >= 0) size = fgetea(fd, name, value, size); else if (!a->follow_symlinks) size = lgetea(accpath, name, value, size); else size = getea(accpath, name, value, size); #endif if (size == -1) { archive_set_error(&a->archive, errno, "Couldn't read extended attribute"); return (ARCHIVE_WARN); } archive_entry_xattr_add_entry(entry, name, value, size); free(value); return (ARCHIVE_OK); } static int setup_xattrs(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { char *list, *p; const char *path; ssize_t list_size; path = archive_entry_sourcepath(entry); if (path == NULL) path = archive_entry_pathname(entry); if (*fd < 0 && a->tree != NULL) { if (a->follow_symlinks || archive_entry_filetype(entry) != AE_IFLNK) *fd = a->open_on_current_dir(a->tree, path, O_RDONLY | O_NONBLOCK); if (*fd < 0) { if (a->tree_enter_working_dir(a->tree) != 0) { archive_set_error(&a->archive, errno, "Couldn't access %s", path); return (ARCHIVE_FAILED); } } } #if HAVE_FLISTXATTR if (*fd >= 0) list_size = flistxattr(*fd, NULL, 0); else if (!a->follow_symlinks) list_size = llistxattr(path, NULL, 0); else list_size = listxattr(path, NULL, 0); #elif HAVE_FLISTEA if (*fd >= 0) list_size = flistea(*fd, NULL, 0); else if (!a->follow_symlinks) list_size = llistea(path, NULL, 0); else list_size = listea(path, NULL, 0); #endif if (list_size == -1) { if (errno == ENOTSUP || errno == ENOSYS) return (ARCHIVE_OK); archive_set_error(&a->archive, errno, "Couldn't list extended attributes"); return (ARCHIVE_WARN); } if (list_size == 0) return (ARCHIVE_OK); if ((list = malloc(list_size)) == NULL) { archive_set_error(&a->archive, errno, "Out of memory"); return (ARCHIVE_FATAL); } #if HAVE_FLISTXATTR if (*fd >= 0) list_size = flistxattr(*fd, list, list_size); else if (!a->follow_symlinks) list_size = llistxattr(path, list, list_size); else list_size = listxattr(path, list, list_size); #elif HAVE_FLISTEA if (*fd >= 0) list_size = flistea(*fd, list, list_size); else if (!a->follow_symlinks) list_size = llistea(path, list, list_size); else list_size = listea(path, list, list_size); #endif if (list_size == -1) { archive_set_error(&a->archive, errno, "Couldn't retrieve extended attributes"); free(list); return (ARCHIVE_WARN); } for (p = list; (p - list) < list_size; p += strlen(p) + 1) { if (strncmp(p, "system.", 7) == 0 || strncmp(p, "xfsroot.", 8) == 0) continue; setup_xattr(a, entry, p, *fd); } free(list); return (ARCHIVE_OK); } #elif HAVE_EXTATTR_GET_FILE && HAVE_EXTATTR_LIST_FILE && \ HAVE_DECL_EXTATTR_NAMESPACE_USER /* * FreeBSD extattr interface. */ /* TODO: Implement this. Follow the Linux model above, but * with FreeBSD-specific system calls, of course. Be careful * to not include the system extattrs that hold ACLs; we handle * those separately. */ static int setup_xattr(struct archive_read_disk *a, struct archive_entry *entry, int namespace, const char *name, const char *fullname, int fd); static int setup_xattr(struct archive_read_disk *a, struct archive_entry *entry, int namespace, const char *name, const char *fullname, int fd) { ssize_t size; void *value = NULL; const char *accpath; accpath = archive_entry_sourcepath(entry); if (accpath == NULL) accpath = archive_entry_pathname(entry); if (fd >= 0) size = extattr_get_fd(fd, namespace, name, NULL, 0); else if (!a->follow_symlinks) size = extattr_get_link(accpath, namespace, name, NULL, 0); else size = extattr_get_file(accpath, namespace, name, NULL, 0); if (size == -1) { archive_set_error(&a->archive, errno, "Couldn't query extended attribute"); return (ARCHIVE_WARN); } if (size > 0 && (value = malloc(size)) == NULL) { archive_set_error(&a->archive, errno, "Out of memory"); return (ARCHIVE_FATAL); } if (fd >= 0) size = extattr_get_fd(fd, namespace, name, value, size); else if (!a->follow_symlinks) size = extattr_get_link(accpath, namespace, name, value, size); else size = extattr_get_file(accpath, namespace, name, value, size); if (size == -1) { free(value); archive_set_error(&a->archive, errno, "Couldn't read extended attribute"); return (ARCHIVE_WARN); } archive_entry_xattr_add_entry(entry, fullname, value, size); free(value); return (ARCHIVE_OK); } static int setup_xattrs(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { char buff[512]; char *list, *p; ssize_t list_size; const char *path; int namespace = EXTATTR_NAMESPACE_USER; path = archive_entry_sourcepath(entry); if (path == NULL) path = archive_entry_pathname(entry); if (*fd < 0 && a->tree != NULL) { if (a->follow_symlinks || archive_entry_filetype(entry) != AE_IFLNK) *fd = a->open_on_current_dir(a->tree, path, O_RDONLY | O_NONBLOCK); if (*fd < 0) { if (a->tree_enter_working_dir(a->tree) != 0) { archive_set_error(&a->archive, errno, "Couldn't access %s", path); return (ARCHIVE_FAILED); } } } if (*fd >= 0) list_size = extattr_list_fd(*fd, namespace, NULL, 0); else if (!a->follow_symlinks) list_size = extattr_list_link(path, namespace, NULL, 0); else list_size = extattr_list_file(path, namespace, NULL, 0); if (list_size == -1 && errno == EOPNOTSUPP) return (ARCHIVE_OK); if (list_size == -1) { archive_set_error(&a->archive, errno, "Couldn't list extended attributes"); return (ARCHIVE_WARN); } if (list_size == 0) return (ARCHIVE_OK); if ((list = malloc(list_size)) == NULL) { archive_set_error(&a->archive, errno, "Out of memory"); return (ARCHIVE_FATAL); } if (*fd >= 0) list_size = extattr_list_fd(*fd, namespace, list, list_size); else if (!a->follow_symlinks) list_size = extattr_list_link(path, namespace, list, list_size); else list_size = extattr_list_file(path, namespace, list, list_size); if (list_size == -1) { archive_set_error(&a->archive, errno, "Couldn't retrieve extended attributes"); free(list); return (ARCHIVE_WARN); } p = list; while ((p - list) < list_size) { size_t len = 255 & (int)*p; char *name; strcpy(buff, "user."); name = buff + strlen(buff); memcpy(name, p + 1, len); name[len] = '\0'; setup_xattr(a, entry, namespace, name, buff, *fd); p += 1 + len; } free(list); return (ARCHIVE_OK); } #else /* * Generic (stub) extended attribute support. */ static int setup_xattrs(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { (void)a; /* UNUSED */ (void)entry; /* UNUSED */ (void)fd; /* UNUSED */ return (ARCHIVE_OK); } #endif #if defined(HAVE_LINUX_FIEMAP_H) /* * Linux sparse interface. * * The FIEMAP ioctl returns an "extent" for each physical allocation * on disk. We need to process those to generate a more compact list * of logical file blocks. We also need to be very careful to use * FIEMAP_FLAG_SYNC here, since there are reports that Linux sometimes * does not report allocations for newly-written data that hasn't * been synced to disk. * * It's important to return a minimal sparse file list because we want * to not trigger sparse file extensions if we don't have to, since * not all readers support them. */ static int setup_sparse(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { char buff[4096]; struct fiemap *fm; struct fiemap_extent *fe; int64_t size; int count, do_fiemap, iters; int exit_sts = ARCHIVE_OK; if (archive_entry_filetype(entry) != AE_IFREG || archive_entry_size(entry) <= 0 || archive_entry_hardlink(entry) != NULL) return (ARCHIVE_OK); if (*fd < 0) { const char *path; path = archive_entry_sourcepath(entry); if (path == NULL) path = archive_entry_pathname(entry); if (a->tree != NULL) *fd = a->open_on_current_dir(a->tree, path, O_RDONLY | O_NONBLOCK | O_CLOEXEC); else *fd = open(path, O_RDONLY | O_NONBLOCK | O_CLOEXEC); if (*fd < 0) { archive_set_error(&a->archive, errno, "Can't open `%s'", path); return (ARCHIVE_FAILED); } __archive_ensure_cloexec_flag(*fd); } /* Initialize buffer to avoid the error valgrind complains about. */ memset(buff, 0, sizeof(buff)); count = (sizeof(buff) - sizeof(*fm))/sizeof(*fe); fm = (struct fiemap *)buff; fm->fm_start = 0; fm->fm_length = ~0ULL;; fm->fm_flags = FIEMAP_FLAG_SYNC; fm->fm_extent_count = count; do_fiemap = 1; size = archive_entry_size(entry); for (iters = 0; ; ++iters) { int i, r; r = ioctl(*fd, FS_IOC_FIEMAP, fm); if (r < 0) { /* When something error happens, it is better we * should return ARCHIVE_OK because an earlier * version(<2.6.28) cannot perfom FS_IOC_FIEMAP. */ goto exit_setup_sparse; } if (fm->fm_mapped_extents == 0) { if (iters == 0) { /* Fully sparse file; insert a zero-length "data" entry */ archive_entry_sparse_add_entry(entry, 0, 0); } break; } fe = fm->fm_extents; for (i = 0; i < (int)fm->fm_mapped_extents; i++, fe++) { if (!(fe->fe_flags & FIEMAP_EXTENT_UNWRITTEN)) { /* The fe_length of the last block does not * adjust itself to its size files. */ int64_t length = fe->fe_length; if (fe->fe_logical + length > (uint64_t)size) length -= fe->fe_logical + length - size; if (fe->fe_logical == 0 && length == size) { /* This is not sparse. */ do_fiemap = 0; break; } if (length > 0) archive_entry_sparse_add_entry(entry, fe->fe_logical, length); } if (fe->fe_flags & FIEMAP_EXTENT_LAST) do_fiemap = 0; } if (do_fiemap) { fe = fm->fm_extents + fm->fm_mapped_extents -1; fm->fm_start = fe->fe_logical + fe->fe_length; } else break; } exit_setup_sparse: return (exit_sts); } #elif defined(SEEK_HOLE) && defined(SEEK_DATA) && defined(_PC_MIN_HOLE_SIZE) /* * FreeBSD and Solaris sparse interface. */ static int setup_sparse(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { int64_t size; off_t initial_off; /* FreeBSD/Solaris only, so off_t okay here */ off_t off_s, off_e; /* FreeBSD/Solaris only, so off_t okay here */ int exit_sts = ARCHIVE_OK; int check_fully_sparse = 0; if (archive_entry_filetype(entry) != AE_IFREG || archive_entry_size(entry) <= 0 || archive_entry_hardlink(entry) != NULL) return (ARCHIVE_OK); /* Does filesystem support the reporting of hole ? */ if (*fd < 0 && a->tree != NULL) { const char *path; path = archive_entry_sourcepath(entry); if (path == NULL) path = archive_entry_pathname(entry); *fd = a->open_on_current_dir(a->tree, path, O_RDONLY | O_NONBLOCK); if (*fd < 0) { archive_set_error(&a->archive, errno, "Can't open `%s'", path); return (ARCHIVE_FAILED); } } if (*fd >= 0) { if (fpathconf(*fd, _PC_MIN_HOLE_SIZE) <= 0) return (ARCHIVE_OK); initial_off = lseek(*fd, 0, SEEK_CUR); if (initial_off != 0) lseek(*fd, 0, SEEK_SET); } else { const char *path; path = archive_entry_sourcepath(entry); if (path == NULL) path = archive_entry_pathname(entry); if (pathconf(path, _PC_MIN_HOLE_SIZE) <= 0) return (ARCHIVE_OK); *fd = open(path, O_RDONLY | O_NONBLOCK | O_CLOEXEC); if (*fd < 0) { archive_set_error(&a->archive, errno, "Can't open `%s'", path); return (ARCHIVE_FAILED); } __archive_ensure_cloexec_flag(*fd); initial_off = 0; } off_s = 0; size = archive_entry_size(entry); while (off_s < size) { off_s = lseek(*fd, off_s, SEEK_DATA); if (off_s == (off_t)-1) { if (errno == ENXIO) { /* no more hole */ if (archive_entry_sparse_count(entry) == 0) { /* Potentially a fully-sparse file. */ check_fully_sparse = 1; } break; } archive_set_error(&a->archive, errno, "lseek(SEEK_HOLE) failed"); exit_sts = ARCHIVE_FAILED; goto exit_setup_sparse; } off_e = lseek(*fd, off_s, SEEK_HOLE); if (off_e == (off_t)-1) { if (errno == ENXIO) { off_e = lseek(*fd, 0, SEEK_END); if (off_e != (off_t)-1) break;/* no more data */ } archive_set_error(&a->archive, errno, "lseek(SEEK_DATA) failed"); exit_sts = ARCHIVE_FAILED; goto exit_setup_sparse; } if (off_s == 0 && off_e == size) break;/* This is not spase. */ archive_entry_sparse_add_entry(entry, off_s, off_e - off_s); off_s = off_e; } if (check_fully_sparse) { if (lseek(*fd, 0, SEEK_HOLE) == 0 && lseek(*fd, 0, SEEK_END) == size) { /* Fully sparse file; insert a zero-length "data" entry */ archive_entry_sparse_add_entry(entry, 0, 0); } } exit_setup_sparse: lseek(*fd, initial_off, SEEK_SET); return (exit_sts); } #else /* * Generic (stub) sparse support. */ static int setup_sparse(struct archive_read_disk *a, struct archive_entry *entry, int *fd) { (void)a; /* UNUSED */ (void)entry; /* UNUSED */ (void)fd; /* UNUSED */ return (ARCHIVE_OK); } #endif #endif /* !defined(_WIN32) || defined(__CYGWIN__) */ Index: projects/clang390-import/contrib/libarchive/libarchive/archive_read_support_format_tar.c =================================================================== --- projects/clang390-import/contrib/libarchive/libarchive/archive_read_support_format_tar.c (revision 305016) +++ projects/clang390-import/contrib/libarchive/libarchive/archive_read_support_format_tar.c (revision 305017) @@ -1,2792 +1,2775 @@ /*- * Copyright (c) 2003-2007 Tim Kientzle * Copyright (c) 2011-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_ERRNO_H #include #endif #include #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #include "archive.h" #include "archive_acl_private.h" /* For ACL parsing routines. */ #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_private.h" #include "archive_read_private.h" #define tar_min(a,b) ((a) < (b) ? (a) : (b)) /* * Layout of POSIX 'ustar' tar header. */ struct archive_entry_header_ustar { char name[100]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char checksum[8]; char typeflag[1]; char linkname[100]; /* "old format" header ends here */ char magic[6]; /* For POSIX: "ustar\0" */ char version[2]; /* For POSIX: "00" */ char uname[32]; char gname[32]; char rdevmajor[8]; char rdevminor[8]; char prefix[155]; }; /* * Structure of GNU tar header */ struct gnu_sparse { char offset[12]; char numbytes[12]; }; struct archive_entry_header_gnutar { char name[100]; char mode[8]; char uid[8]; char gid[8]; char size[12]; char mtime[12]; char checksum[8]; char typeflag[1]; char linkname[100]; char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ char uname[32]; char gname[32]; char rdevmajor[8]; char rdevminor[8]; char atime[12]; char ctime[12]; char offset[12]; char longnames[4]; char unused[1]; struct gnu_sparse sparse[4]; char isextended[1]; char realsize[12]; /* * Old GNU format doesn't use POSIX 'prefix' field; they use * the 'L' (longname) entry instead. */ }; /* * Data specific to this format. */ struct sparse_block { struct sparse_block *next; int64_t offset; int64_t remaining; int hole; }; struct tar { struct archive_string acl_text; struct archive_string entry_pathname; /* For "GNU.sparse.name" and other similar path extensions. */ struct archive_string entry_pathname_override; struct archive_string entry_linkpath; struct archive_string entry_uname; struct archive_string entry_gname; struct archive_string longlink; struct archive_string longname; struct archive_string pax_header; struct archive_string pax_global; struct archive_string line; int pax_hdrcharset_binary; int header_recursion_depth; int64_t entry_bytes_remaining; int64_t entry_offset; int64_t entry_padding; int64_t entry_bytes_unconsumed; int64_t realsize; struct sparse_block *sparse_list; struct sparse_block *sparse_last; int64_t sparse_offset; int64_t sparse_numbytes; int sparse_gnu_major; int sparse_gnu_minor; char sparse_gnu_pending; struct archive_string localname; struct archive_string_conv *opt_sconv; struct archive_string_conv *sconv; struct archive_string_conv *sconv_acl; struct archive_string_conv *sconv_default; int init_default_conversion; int compat_2x; int process_mac_extensions; int read_concatenated_archives; }; static int archive_block_is_null(const char *p); static char *base64_decode(const char *, size_t, size_t *); static int gnu_add_sparse_entry(struct archive_read *, struct tar *, int64_t offset, int64_t remaining); static void gnu_clear_sparse_list(struct tar *); static int gnu_sparse_old_read(struct archive_read *, struct tar *, const struct archive_entry_header_gnutar *header, size_t *); static int gnu_sparse_old_parse(struct archive_read *, struct tar *, const struct gnu_sparse *sparse, int length); static int gnu_sparse_01_parse(struct archive_read *, struct tar *, const char *); static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *, size_t *); static int header_Solaris_ACL(struct archive_read *, struct tar *, struct archive_entry *, const void *, size_t *); static int header_common(struct archive_read *, struct tar *, struct archive_entry *, const void *); static int header_old_tar(struct archive_read *, struct tar *, struct archive_entry *, const void *); static int header_pax_extensions(struct archive_read *, struct tar *, struct archive_entry *, const void *, size_t *); static int header_pax_global(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_longlink(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_longname(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int read_mac_metadata_blob(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_volume(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int header_ustar(struct archive_read *, struct tar *, struct archive_entry *, const void *h); static int header_gnutar(struct archive_read *, struct tar *, struct archive_entry *, const void *h, size_t *); static int archive_read_format_tar_bid(struct archive_read *, int); static int archive_read_format_tar_options(struct archive_read *, const char *, const char *); static int archive_read_format_tar_cleanup(struct archive_read *); static int archive_read_format_tar_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset); static int archive_read_format_tar_skip(struct archive_read *a); static int archive_read_format_tar_read_header(struct archive_read *, struct archive_entry *); static int checksum(struct archive_read *, const void *); static int pax_attribute(struct archive_read *, struct tar *, struct archive_entry *, const char *key, const char *value); -static int pax_attribute_acl(struct archive_read *, struct tar *, - struct archive_entry *, const char *, int); -static int pax_attribute_xattr(struct archive_entry *, const char *, - const char *); static int pax_header(struct archive_read *, struct tar *, struct archive_entry *, char *attr); static void pax_time(const char *, int64_t *sec, long *nanos); static ssize_t readline(struct archive_read *, struct tar *, const char **, ssize_t limit, size_t *); static int read_body_to_string(struct archive_read *, struct tar *, struct archive_string *, const void *h, size_t *); static int solaris_sparse_parse(struct archive_read *, struct tar *, struct archive_entry *, const char *); static int64_t tar_atol(const char *, size_t); static int64_t tar_atol10(const char *, size_t); static int64_t tar_atol256(const char *, size_t); static int64_t tar_atol8(const char *, size_t); static int tar_read_header(struct archive_read *, struct tar *, struct archive_entry *, size_t *); static int tohex(int c); static char *url_decode(const char *); static void tar_flush_unconsumed(struct archive_read *, size_t *); int archive_read_support_format_gnutar(struct archive *a) { archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar"); return (archive_read_support_format_tar(a)); } int archive_read_support_format_tar(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; struct tar *tar; int r; archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_support_format_tar"); tar = (struct tar *)calloc(1, sizeof(*tar)); #ifdef HAVE_COPYFILE_H /* Set this by default on Mac OS. */ tar->process_mac_extensions = 1; #endif if (tar == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate tar data"); return (ARCHIVE_FATAL); } r = __archive_read_register_format(a, tar, "tar", archive_read_format_tar_bid, archive_read_format_tar_options, archive_read_format_tar_read_header, archive_read_format_tar_read_data, archive_read_format_tar_skip, NULL, archive_read_format_tar_cleanup, NULL, NULL); if (r != ARCHIVE_OK) free(tar); return (ARCHIVE_OK); } static int archive_read_format_tar_cleanup(struct archive_read *a) { struct tar *tar; tar = (struct tar *)(a->format->data); gnu_clear_sparse_list(tar); archive_string_free(&tar->acl_text); archive_string_free(&tar->entry_pathname); archive_string_free(&tar->entry_pathname_override); archive_string_free(&tar->entry_linkpath); archive_string_free(&tar->entry_uname); archive_string_free(&tar->entry_gname); archive_string_free(&tar->line); archive_string_free(&tar->pax_global); archive_string_free(&tar->pax_header); archive_string_free(&tar->longname); archive_string_free(&tar->longlink); archive_string_free(&tar->localname); free(tar); (a->format->data) = NULL; return (ARCHIVE_OK); } static int archive_read_format_tar_bid(struct archive_read *a, int best_bid) { int bid; const char *h; const struct archive_entry_header_ustar *header; (void)best_bid; /* UNUSED */ bid = 0; /* Now let's look at the actual header and see if it matches. */ h = __archive_read_ahead(a, 512, NULL); if (h == NULL) return (-1); /* If it's an end-of-archive mark, we can handle it. */ if (h[0] == 0 && archive_block_is_null(h)) { /* * Usually, I bid the number of bits verified, but * in this case, 4096 seems excessive so I picked 10 as * an arbitrary but reasonable-seeming value. */ return (10); } /* If it's not an end-of-archive mark, it must have a valid checksum.*/ if (!checksum(a, h)) return (0); bid += 48; /* Checksum is usually 6 octal digits. */ header = (const struct archive_entry_header_ustar *)h; /* Recognize POSIX formats. */ if ((memcmp(header->magic, "ustar\0", 6) == 0) && (memcmp(header->version, "00", 2) == 0)) bid += 56; /* Recognize GNU tar format. */ if ((memcmp(header->magic, "ustar ", 6) == 0) && (memcmp(header->version, " \0", 2) == 0)) bid += 56; /* Type flag must be null, digit or A-Z, a-z. */ if (header->typeflag[0] != 0 && !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) return (0); bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ /* Sanity check: Look at first byte of mode field. */ switch (255 & (unsigned)header->mode[0]) { case 0: case 255: /* Base-256 value: No further verification possible! */ break; case ' ': /* Not recommended, but not illegal, either. */ break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* Octal Value. */ /* TODO: Check format of remainder of this field. */ break; default: /* Not a valid mode; bail out here. */ return (0); } /* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */ return (bid); } static int archive_read_format_tar_options(struct archive_read *a, const char *key, const char *val) { struct tar *tar; int ret = ARCHIVE_FAILED; tar = (struct tar *)(a->format->data); if (strcmp(key, "compat-2x") == 0) { /* Handle UTF-8 filnames as libarchive 2.x */ tar->compat_2x = (val != NULL && val[0] != 0); tar->init_default_conversion = tar->compat_2x; return (ARCHIVE_OK); } else if (strcmp(key, "hdrcharset") == 0) { if (val == NULL || val[0] == 0) archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "tar: hdrcharset option needs a character-set name"); else { tar->opt_sconv = archive_string_conversion_from_charset( &a->archive, val, 0); if (tar->opt_sconv != NULL) ret = ARCHIVE_OK; else ret = ARCHIVE_FATAL; } return (ret); } else if (strcmp(key, "mac-ext") == 0) { tar->process_mac_extensions = (val != NULL && val[0] != 0); return (ARCHIVE_OK); } else if (strcmp(key, "read_concatenated_archives") == 0) { tar->read_concatenated_archives = (val != NULL && val[0] != 0); return (ARCHIVE_OK); } /* Note: The "warn" return is just to inform the options * supervisor that we didn't handle it. It will generate * a suitable error if no one used this option. */ return (ARCHIVE_WARN); } /* utility function- this exists to centralize the logic of tracking * how much unconsumed data we have floating around, and to consume * anything outstanding since we're going to do read_aheads */ static void tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed) { if (*unconsumed) { /* void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL); * this block of code is to poison claimed unconsumed space, ensuring * things break if it is in use still. * currently it WILL break things, so enable it only for debugging this issue if (data) { memset(data, 0xff, *unconsumed); } */ __archive_read_consume(a, *unconsumed); *unconsumed = 0; } } /* * The function invoked by archive_read_next_header(). This * just sets up a few things and then calls the internal * tar_read_header() function below. */ static int archive_read_format_tar_read_header(struct archive_read *a, struct archive_entry *entry) { /* * When converting tar archives to cpio archives, it is * essential that each distinct file have a distinct inode * number. To simplify this, we keep a static count here to * assign fake dev/inode numbers to each tar entry. Note that * pax format archives may overwrite this with something more * useful. * * Ideally, we would track every file read from the archive so * that we could assign the same dev/ino pair to hardlinks, * but the memory required to store a complete lookup table is * probably not worthwhile just to support the relatively * obscure tar->cpio conversion case. */ static int default_inode; static int default_dev; struct tar *tar; const char *p; const wchar_t *wp; int r; size_t l, unconsumed = 0; /* Assign default device/inode values. */ archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ /* Limit generated st_ino number to 16 bits. */ if (default_inode >= 0xffff) { ++default_dev; default_inode = 0; } tar = (struct tar *)(a->format->data); tar->entry_offset = 0; gnu_clear_sparse_list(tar); tar->realsize = -1; /* Mark this as "unset" */ /* Setup default string conversion. */ tar->sconv = tar->opt_sconv; if (tar->sconv == NULL) { if (!tar->init_default_conversion) { tar->sconv_default = archive_string_default_conversion_for_read(&(a->archive)); tar->init_default_conversion = 1; } tar->sconv = tar->sconv_default; } r = tar_read_header(a, tar, entry, &unconsumed); tar_flush_unconsumed(a, &unconsumed); /* * "non-sparse" files are really just sparse files with * a single block. */ if (tar->sparse_list == NULL) { if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining) != ARCHIVE_OK) return (ARCHIVE_FATAL); } else { struct sparse_block *sb; for (sb = tar->sparse_list; sb != NULL; sb = sb->next) { if (!sb->hole) archive_entry_sparse_add_entry(entry, sb->offset, sb->remaining); } } if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) { /* * "Regular" entry with trailing '/' is really * directory: This is needed for certain old tar * variants and even for some broken newer ones. */ if ((wp = archive_entry_pathname_w(entry)) != NULL) { l = wcslen(wp); if (l > 0 && wp[l - 1] == L'/') { archive_entry_set_filetype(entry, AE_IFDIR); } } else if ((p = archive_entry_pathname(entry)) != NULL) { l = strlen(p); if (l > 0 && p[l - 1] == '/') { archive_entry_set_filetype(entry, AE_IFDIR); } } } return (r); } static int archive_read_format_tar_read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset) { ssize_t bytes_read; struct tar *tar; struct sparse_block *p; tar = (struct tar *)(a->format->data); for (;;) { /* Remove exhausted entries from sparse list. */ while (tar->sparse_list != NULL && tar->sparse_list->remaining == 0) { p = tar->sparse_list; tar->sparse_list = p->next; free(p); } if (tar->entry_bytes_unconsumed) { __archive_read_consume(a, tar->entry_bytes_unconsumed); tar->entry_bytes_unconsumed = 0; } /* If we're at end of file, return EOF. */ if (tar->sparse_list == NULL || tar->entry_bytes_remaining == 0) { if (__archive_read_consume(a, tar->entry_padding) < 0) return (ARCHIVE_FATAL); tar->entry_padding = 0; *buff = NULL; *size = 0; *offset = tar->realsize; return (ARCHIVE_EOF); } *buff = __archive_read_ahead(a, 1, &bytes_read); if (bytes_read < 0) return (ARCHIVE_FATAL); if (*buff == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Truncated tar archive"); return (ARCHIVE_FATAL); } if (bytes_read > tar->entry_bytes_remaining) bytes_read = (ssize_t)tar->entry_bytes_remaining; /* Don't read more than is available in the * current sparse block. */ if (tar->sparse_list->remaining < bytes_read) bytes_read = (ssize_t)tar->sparse_list->remaining; *size = bytes_read; *offset = tar->sparse_list->offset; tar->sparse_list->remaining -= bytes_read; tar->sparse_list->offset += bytes_read; tar->entry_bytes_remaining -= bytes_read; tar->entry_bytes_unconsumed = bytes_read; if (!tar->sparse_list->hole) return (ARCHIVE_OK); /* Current is hole data and skip this. */ } } static int archive_read_format_tar_skip(struct archive_read *a) { int64_t bytes_skipped; int64_t request; struct sparse_block *p; struct tar* tar; tar = (struct tar *)(a->format->data); /* Do not consume the hole of a sparse file. */ request = 0; for (p = tar->sparse_list; p != NULL; p = p->next) { if (!p->hole) { if (p->remaining >= INT64_MAX - request) { return ARCHIVE_FATAL; } request += p->remaining; } } if (request > tar->entry_bytes_remaining) request = tar->entry_bytes_remaining; request += tar->entry_padding + tar->entry_bytes_unconsumed; bytes_skipped = __archive_read_consume(a, request); if (bytes_skipped < 0) return (ARCHIVE_FATAL); tar->entry_bytes_remaining = 0; tar->entry_bytes_unconsumed = 0; tar->entry_padding = 0; /* Free the sparse list. */ gnu_clear_sparse_list(tar); return (ARCHIVE_OK); } /* * This function recursively interprets all of the headers associated * with a single entry. */ static int tar_read_header(struct archive_read *a, struct tar *tar, struct archive_entry *entry, size_t *unconsumed) { ssize_t bytes; int err; const char *h; const struct archive_entry_header_ustar *header; const struct archive_entry_header_gnutar *gnuheader; /* Loop until we find a workable header record. */ for (;;) { tar_flush_unconsumed(a, unconsumed); /* Read 512-byte header record */ h = __archive_read_ahead(a, 512, &bytes); if (bytes < 0) return ((int)bytes); if (bytes == 0) { /* EOF at a block boundary. */ /* Some writers do omit the block of nulls. */ return (ARCHIVE_EOF); } if (bytes < 512) { /* Short block at EOF; this is bad. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated tar archive"); return (ARCHIVE_FATAL); } *unconsumed = 512; /* Header is workable if it's not an end-of-archive mark. */ if (h[0] != 0 || !archive_block_is_null(h)) break; /* Ensure format is set for archives with only null blocks. */ if (a->archive.archive_format_name == NULL) { a->archive.archive_format = ARCHIVE_FORMAT_TAR; a->archive.archive_format_name = "tar"; } if (!tar->read_concatenated_archives) { /* Try to consume a second all-null record, as well. */ tar_flush_unconsumed(a, unconsumed); h = __archive_read_ahead(a, 512, NULL); if (h != NULL && h[0] == 0 && archive_block_is_null(h)) __archive_read_consume(a, 512); archive_clear_error(&a->archive); return (ARCHIVE_EOF); } /* * We're reading concatenated archives, ignore this block and * loop to get the next. */ } /* * Note: If the checksum fails and we return ARCHIVE_RETRY, * then the client is likely to just retry. This is a very * crude way to search for the next valid header! * * TODO: Improve this by implementing a real header scan. */ if (!checksum(a, h)) { tar_flush_unconsumed(a, unconsumed); archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); return (ARCHIVE_RETRY); /* Retryable: Invalid header */ } if (++tar->header_recursion_depth > 32) { tar_flush_unconsumed(a, unconsumed); archive_set_error(&a->archive, EINVAL, "Too many special headers"); return (ARCHIVE_WARN); } /* Determine the format variant. */ header = (const struct archive_entry_header_ustar *)h; switch(header->typeflag[0]) { case 'A': /* Solaris tar ACL */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "Solaris tar"; err = header_Solaris_ACL(a, tar, entry, h, unconsumed); break; case 'g': /* POSIX-standard 'g' header. */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange format"; err = header_pax_global(a, tar, entry, h, unconsumed); if (err == ARCHIVE_EOF) return (err); break; case 'K': /* Long link name (GNU tar, others) */ err = header_longlink(a, tar, entry, h, unconsumed); break; case 'L': /* Long filename (GNU tar, others) */ err = header_longname(a, tar, entry, h, unconsumed); break; case 'V': /* GNU volume header */ err = header_volume(a, tar, entry, h, unconsumed); break; case 'X': /* Used by SUN tar; same as 'x'. */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange format (Sun variant)"; err = header_pax_extensions(a, tar, entry, h, unconsumed); break; case 'x': /* POSIX-standard 'x' header. */ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange format"; err = header_pax_extensions(a, tar, entry, h, unconsumed); break; default: gnuheader = (const struct archive_entry_header_gnutar *)h; if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) { a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; a->archive.archive_format_name = "GNU tar format"; err = header_gnutar(a, tar, entry, h, unconsumed); } else if (memcmp(header->magic, "ustar", 5) == 0) { if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; a->archive.archive_format_name = "POSIX ustar format"; } err = header_ustar(a, tar, entry, h); } else { a->archive.archive_format = ARCHIVE_FORMAT_TAR; a->archive.archive_format_name = "tar (non-POSIX)"; err = header_old_tar(a, tar, entry, h); } } if (err == ARCHIVE_FATAL) return (err); tar_flush_unconsumed(a, unconsumed); h = NULL; header = NULL; --tar->header_recursion_depth; /* Yuck. Apple's design here ends up storing long pathname * extensions for both the AppleDouble extension entry and the * regular entry. */ if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) && tar->header_recursion_depth == 0 && tar->process_mac_extensions) { int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed); if (err2 < err) err = err2; } /* We return warnings or success as-is. Anything else is fatal. */ if (err == ARCHIVE_WARN || err == ARCHIVE_OK) { if (tar->sparse_gnu_pending) { if (tar->sparse_gnu_major == 1 && tar->sparse_gnu_minor == 0) { ssize_t bytes_read; tar->sparse_gnu_pending = 0; /* Read initial sparse map. */ bytes_read = gnu_sparse_10_read(a, tar, unconsumed); tar->entry_bytes_remaining -= bytes_read; if (bytes_read < 0) return ((int)bytes_read); } else { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Unrecognized GNU sparse file format"); return (ARCHIVE_WARN); } tar->sparse_gnu_pending = 0; } return (err); } if (err == ARCHIVE_EOF) /* EOF when recursively reading a header is bad. */ archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); return (ARCHIVE_FATAL); } /* * Return true if block checksum is correct. */ static int checksum(struct archive_read *a, const void *h) { const unsigned char *bytes; const struct archive_entry_header_ustar *header; int check, sum; size_t i; (void)a; /* UNUSED */ bytes = (const unsigned char *)h; header = (const struct archive_entry_header_ustar *)h; /* Checksum field must hold an octal number */ for (i = 0; i < sizeof(header->checksum); ++i) { char c = header->checksum[i]; if (c != ' ' && c != '\0' && (c < '0' || c > '7')) return 0; } /* * Test the checksum. Note that POSIX specifies _unsigned_ * bytes for this calculation. */ sum = (int)tar_atol(header->checksum, sizeof(header->checksum)); check = 0; for (i = 0; i < 148; i++) check += (unsigned char)bytes[i]; for (; i < 156; i++) check += 32; for (; i < 512; i++) check += (unsigned char)bytes[i]; if (sum == check) return (1); /* * Repeat test with _signed_ bytes, just in case this archive * was created by an old BSD, Solaris, or HP-UX tar with a * broken checksum calculation. */ check = 0; for (i = 0; i < 148; i++) check += (signed char)bytes[i]; for (; i < 156; i++) check += 32; for (; i < 512; i++) check += (signed char)bytes[i]; if (sum == check) return (1); return (0); } /* * Return true if this block contains only nulls. */ static int archive_block_is_null(const char *p) { unsigned i; for (i = 0; i < 512; i++) if (*p++) return (0); return (1); } /* * Interpret 'A' Solaris ACL header */ static int header_Solaris_ACL(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { const struct archive_entry_header_ustar *header; size_t size; int err; int64_t type; char *acl, *p; /* * read_body_to_string adds a NUL terminator, but we need a little * more to make sure that we don't overrun acl_text later. */ header = (const struct archive_entry_header_ustar *)h; size = (size_t)tar_atol(header->size, sizeof(header->size)); err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed); if (err != ARCHIVE_OK) return (err); /* Recursively read next header */ err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); /* TODO: Examine the first characters to see if this * is an AIX ACL descriptor. We'll likely never support * them, but it would be polite to recognize and warn when * we do see them. */ /* Leading octal number indicates ACL type and number of entries. */ p = acl = tar->acl_text.s; type = 0; while (*p != '\0' && p < acl + size) { if (*p < '0' || *p > '7') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (invalid digit)"); return(ARCHIVE_WARN); } type <<= 3; type += *p - '0'; if (type > 077777777) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (count too large)"); return (ARCHIVE_WARN); } p++; } switch ((int)type & ~0777777) { case 01000000: /* POSIX.1e ACL */ break; case 03000000: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Solaris NFSv4 ACLs not supported"); return (ARCHIVE_WARN); default: archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (unsupported type %o)", (int)type); return (ARCHIVE_WARN); } p++; if (p >= acl + size) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (body overflow)"); return(ARCHIVE_WARN); } /* ACL text is null-terminated; find the end. */ size -= (p - acl); acl = p; while (*p != '\0' && p < acl + size) p++; if (tar->sconv_acl == NULL) { tar->sconv_acl = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (tar->sconv_acl == NULL) return (ARCHIVE_FATAL); } archive_strncpy(&(tar->localname), acl, p - acl); err = archive_acl_parse_l(archive_entry_acl(entry), tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl); if (err != ARCHIVE_OK) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for ACL"); } else archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed Solaris ACL attribute (unparsable)"); } return (err); } /* * Interpret 'K' long linkname header. */ static int header_longlink(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed); if (err != ARCHIVE_OK) return (err); err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); /* Set symlink if symlink already set, else hardlink. */ archive_entry_copy_link(entry, tar->longlink.s); return (ARCHIVE_OK); } static int set_conversion_failed_error(struct archive_read *a, struct archive_string_conv *sconv, const char *name) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for %s", name); return (ARCHIVE_FATAL); } archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "%s can't be converted from %s to current locale.", name, archive_string_conversion_charset_name(sconv)); return (ARCHIVE_WARN); } /* * Interpret 'L' long filename header. */ static int header_longname(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed); if (err != ARCHIVE_OK) return (err); /* Read and parse "real" header, then override name. */ err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); if (archive_entry_copy_pathname_l(entry, tar->longname.s, archive_strlen(&(tar->longname)), tar->sconv) != 0) err = set_conversion_failed_error(a, tar->sconv, "Pathname"); return (err); } /* * Interpret 'V' GNU tar volume header. */ static int header_volume(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { (void)h; /* Just skip this and read the next header. */ return (tar_read_header(a, tar, entry, unconsumed)); } /* * Read body of an archive entry into an archive_string object. */ static int read_body_to_string(struct archive_read *a, struct tar *tar, struct archive_string *as, const void *h, size_t *unconsumed) { int64_t size; const struct archive_entry_header_ustar *header; const void *src; (void)tar; /* UNUSED */ header = (const struct archive_entry_header_ustar *)h; size = tar_atol(header->size, sizeof(header->size)); if ((size > 1048576) || (size < 0)) { archive_set_error(&a->archive, EINVAL, "Special header too large"); return (ARCHIVE_FATAL); } /* Fail if we can't make our buffer big enough. */ if (archive_string_ensure(as, (size_t)size+1) == NULL) { archive_set_error(&a->archive, ENOMEM, "No memory"); return (ARCHIVE_FATAL); } tar_flush_unconsumed(a, unconsumed); /* Read the body into the string. */ *unconsumed = (size_t)((size + 511) & ~ 511); src = __archive_read_ahead(a, *unconsumed, NULL); if (src == NULL) { *unconsumed = 0; return (ARCHIVE_FATAL); } memcpy(as->s, src, (size_t)size); as->s[size] = '\0'; as->length = (size_t)size; return (ARCHIVE_OK); } /* * Parse out common header elements. * * This would be the same as header_old_tar, except that the * filename is handled slightly differently for old and POSIX * entries (POSIX entries support a 'prefix'). This factoring * allows header_old_tar and header_ustar * to handle filenames differently, while still putting most of the * common parsing into one place. */ static int header_common(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; char tartype; int err = ARCHIVE_OK; header = (const struct archive_entry_header_ustar *)h; if (header->linkname[0]) archive_strncpy(&(tar->entry_linkpath), header->linkname, sizeof(header->linkname)); else archive_string_empty(&(tar->entry_linkpath)); /* Parse out the numeric fields (all are octal) */ archive_entry_set_mode(entry, (mode_t)tar_atol(header->mode, sizeof(header->mode))); archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); if (tar->entry_bytes_remaining < 0) { tar->entry_bytes_remaining = 0; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Tar entry has negative size"); return (ARCHIVE_FATAL); } if (tar->entry_bytes_remaining == INT64_MAX) { /* Note: tar_atol returns INT64_MAX on overflow */ tar->entry_bytes_remaining = 0; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Tar entry size overflow"); return (ARCHIVE_FATAL); } tar->realsize = tar->entry_bytes_remaining; archive_entry_set_size(entry, tar->entry_bytes_remaining); archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); /* Handle the tar type flag appropriately. */ tartype = header->typeflag[0]; switch (tartype) { case '1': /* Hard link */ if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s, archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Linkname"); if (err == ARCHIVE_FATAL) return (err); } /* * The following may seem odd, but: Technically, tar * does not store the file type for a "hard link" * entry, only the fact that it is a hard link. So, I * leave the type zero normally. But, pax interchange * format allows hard links to have data, which * implies that the underlying entry is a regular * file. */ if (archive_entry_size(entry) > 0) archive_entry_set_filetype(entry, AE_IFREG); /* * A tricky point: Traditionally, tar readers have * ignored the size field when reading hardlink * entries, and some writers put non-zero sizes even * though the body is empty. POSIX blessed this * convention in the 1988 standard, but broke with * this tradition in 2001 by permitting hardlink * entries to store valid bodies in pax interchange * format, but not in ustar format. Since there is no * hard and fast way to distinguish pax interchange * from earlier archives (the 'x' and 'g' entries are * optional, after all), we need a heuristic. */ if (archive_entry_size(entry) == 0) { /* If the size is already zero, we're done. */ } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { /* Definitely pax extended; must obey hardlink size. */ } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR) { /* Old-style or GNU tar: we must ignore the size. */ archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; } else if (archive_read_format_tar_bid(a, 50) > 50) { /* * We don't know if it's pax: If the bid * function sees a valid ustar header * immediately following, then let's ignore * the hardlink size. */ archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; } /* * TODO: There are still two cases I'd like to handle: * = a ustar non-pax archive with a hardlink entry at * end-of-archive. (Look for block of nulls following?) * = a pax archive that has not seen any pax headers * and has an entry which is a hardlink entry storing * a body containing an uncompressed tar archive. * The first is worth addressing; I don't see any reliable * way to deal with the second possibility. */ break; case '2': /* Symlink */ archive_entry_set_filetype(entry, AE_IFLNK); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s, archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Linkname"); if (err == ARCHIVE_FATAL) return (err); } break; case '3': /* Character device */ archive_entry_set_filetype(entry, AE_IFCHR); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case '4': /* Block device */ archive_entry_set_filetype(entry, AE_IFBLK); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case '5': /* Dir */ archive_entry_set_filetype(entry, AE_IFDIR); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case '6': /* FIFO device */ archive_entry_set_filetype(entry, AE_IFIFO); archive_entry_set_size(entry, 0); tar->entry_bytes_remaining = 0; break; case 'D': /* GNU incremental directory type */ /* * No special handling is actually required here. * It might be nice someday to preprocess the file list and * provide it to the client, though. */ archive_entry_set_filetype(entry, AE_IFDIR); break; case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ /* * As far as I can tell, this is just like a regular file * entry, except that the contents should be _appended_ to * the indicated file at the indicated offset. This may * require some API work to fully support. */ break; case 'N': /* Old GNU "long filename" entry. */ /* The body of this entry is a script for renaming * previously-extracted entries. Ugh. It will never * be supported by libarchive. */ archive_entry_set_filetype(entry, AE_IFREG); break; case 'S': /* GNU sparse files */ /* * Sparse files are really just regular files with * sparse information in the extended area. */ /* FALLTHROUGH */ default: /* Regular file and non-standard types */ /* * Per POSIX: non-recognized types should always be * treated as regular files. */ archive_entry_set_filetype(entry, AE_IFREG); break; } return (err); } /* * Parse out header elements for "old-style" tar archives. */ static int header_old_tar(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; int err = ARCHIVE_OK, err2; /* Copy filename over (to ensure null termination). */ header = (const struct archive_entry_header_ustar *)h; if (archive_entry_copy_pathname_l(entry, header->name, sizeof(header->name), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); } /* Grab rest of common fields */ err2 = header_common(a, tar, entry, h); if (err > err2) err = err2; tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } /* * Read a Mac AppleDouble-encoded blob of file metadata, * if there is one. */ static int read_mac_metadata_blob(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int64_t size; const void *data; const char *p, *name; const wchar_t *wp, *wname; (void)h; /* UNUSED */ wname = wp = archive_entry_pathname_w(entry); if (wp != NULL) { /* Find the last path element. */ for (; *wp != L'\0'; ++wp) { if (wp[0] == '/' && wp[1] != L'\0') wname = wp + 1; } /* * If last path element starts with "._", then * this is a Mac extension. */ if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0') return ARCHIVE_OK; } else { /* Find the last path element. */ name = p = archive_entry_pathname(entry); if (p == NULL) return (ARCHIVE_FAILED); for (; *p != '\0'; ++p) { if (p[0] == '/' && p[1] != '\0') name = p + 1; } /* * If last path element starts with "._", then * this is a Mac extension. */ if (name[0] != '.' || name[1] != '_' || name[2] == '\0') return ARCHIVE_OK; } /* Read the body as a Mac OS metadata blob. */ size = archive_entry_size(entry); /* * TODO: Look beyond the body here to peek at the next header. * If it's a regular header (not an extension header) * that has the wrong name, just return the current * entry as-is, without consuming the body here. * That would reduce the risk of us mis-identifying * an ordinary file that just happened to have * a name starting with "._". * * Q: Is the above idea really possible? Even * when there are GNU or pax extension entries? */ data = __archive_read_ahead(a, (size_t)size, NULL); if (data == NULL) { *unconsumed = 0; return (ARCHIVE_FATAL); } archive_entry_copy_mac_metadata(entry, data, (size_t)size); *unconsumed = (size_t)((size + 511) & ~ 511); tar_flush_unconsumed(a, unconsumed); return (tar_read_header(a, tar, entry, unconsumed)); } /* * Parse a file header for a pax extended archive entry. */ static int header_pax_global(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err; err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed); if (err != ARCHIVE_OK) return (err); err = tar_read_header(a, tar, entry, unconsumed); return (err); } static int header_pax_extensions(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { int err, err2; err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed); if (err != ARCHIVE_OK) return (err); /* Parse the next header. */ err = tar_read_header(a, tar, entry, unconsumed); if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) return (err); /* * TODO: Parse global/default options into 'entry' struct here * before handling file-specific options. * * This design (parse standard header, then overwrite with pax * extended attribute data) usually works well, but isn't ideal; * it would be better to parse the pax extended attributes first * and then skip any fields in the standard header that were * defined in the pax header. */ err2 = pax_header(a, tar, entry, tar->pax_header.s); err = err_combine(err, err2); tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } /* * Parse a file header for a Posix "ustar" archive entry. This also * handles "pax" or "extended ustar" entries. */ static int header_ustar(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h) { const struct archive_entry_header_ustar *header; struct archive_string *as; int err = ARCHIVE_OK, r; header = (const struct archive_entry_header_ustar *)h; /* Copy name into an internal buffer to ensure null-termination. */ as = &(tar->entry_pathname); if (header->prefix[0]) { archive_strncpy(as, header->prefix, sizeof(header->prefix)); if (as->s[archive_strlen(as) - 1] != '/') archive_strappend_char(as, '/'); archive_strncat(as, header->name, sizeof(header->name)); } else { archive_strncpy(as, header->name, sizeof(header->name)); } if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); } /* Handle rest of common fields. */ r = header_common(a, tar, entry, h); if (r == ARCHIVE_FATAL) return (r); if (r < err) err = r; /* Handle POSIX ustar fields. */ if (archive_entry_copy_uname_l(entry, header->uname, sizeof(header->uname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Uname"); if (err == ARCHIVE_FATAL) return (err); } if (archive_entry_copy_gname_l(entry, header->gname, sizeof(header->gname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Gname"); if (err == ARCHIVE_FATAL) return (err); } /* Parse out device numbers only for char and block specials. */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { archive_entry_set_rdevmajor(entry, (dev_t) tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); archive_entry_set_rdevminor(entry, (dev_t) tar_atol(header->rdevminor, sizeof(header->rdevminor))); } tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); return (err); } /* * Parse the pax extended attributes record. * * Returns non-zero if there's an error in the data. */ static int pax_header(struct archive_read *a, struct tar *tar, struct archive_entry *entry, char *attr) { size_t attr_length, l, line_length; char *p; char *key, *value; struct archive_string *as; struct archive_string_conv *sconv; int err, err2; attr_length = strlen(attr); tar->pax_hdrcharset_binary = 0; archive_string_empty(&(tar->entry_gname)); archive_string_empty(&(tar->entry_linkpath)); archive_string_empty(&(tar->entry_pathname)); archive_string_empty(&(tar->entry_pathname_override)); archive_string_empty(&(tar->entry_uname)); err = ARCHIVE_OK; while (attr_length > 0) { /* Parse decimal length field at start of line. */ line_length = 0; l = attr_length; p = attr; /* Record start of line. */ while (l>0) { if (*p == ' ') { p++; l--; break; } if (*p < '0' || *p > '9') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Ignoring malformed pax extended attributes"); return (ARCHIVE_WARN); } line_length *= 10; line_length += *p - '0'; if (line_length > 999999) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Rejecting pax extended attribute > 1MB"); return (ARCHIVE_WARN); } p++; l--; } /* * Parsed length must be no bigger than available data, * at least 1, and the last character of the line must * be '\n'. */ if (line_length > attr_length || line_length < 1 || attr[line_length - 1] != '\n') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Ignoring malformed pax extended attribute"); return (ARCHIVE_WARN); } /* Null-terminate the line. */ attr[line_length - 1] = '\0'; /* Find end of key and null terminate it. */ key = p; if (key[0] == '=') return (-1); while (*p && *p != '=') ++p; if (*p == '\0') { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Invalid pax extended attributes"); return (ARCHIVE_WARN); } *p = '\0'; /* Identify null-terminated 'value' portion. */ value = p + 1; /* Identify this attribute and set it in the entry. */ err2 = pax_attribute(a, tar, entry, key, value); if (err2 == ARCHIVE_FATAL) return (err2); err = err_combine(err, err2); /* Skip to next line */ attr += line_length; attr_length -= line_length; } /* * PAX format uses UTF-8 as default charset for its metadata * unless hdrcharset=BINARY is present in its header. * We apply the charset specified by the hdrcharset option only * when the hdrcharset attribute(in PAX header) is BINARY because * we respect the charset described in PAX header and BINARY also * means that metadata(filename,uname and gname) character-set * is unknown. */ if (tar->pax_hdrcharset_binary) sconv = tar->opt_sconv; else { sconv = archive_string_conversion_from_charset( &(a->archive), "UTF-8", 1); if (sconv == NULL) return (ARCHIVE_FATAL); if (tar->compat_2x) archive_string_conversion_set_opt(sconv, SCONV_SET_OPT_UTF8_LIBARCHIVE2X); } if (archive_strlen(&(tar->entry_gname)) > 0) { if (archive_entry_copy_gname_l(entry, tar->entry_gname.s, archive_strlen(&(tar->entry_gname)), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Gname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_gname(entry, tar->entry_gname.s); } } if (archive_strlen(&(tar->entry_linkpath)) > 0) { if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s, archive_strlen(&(tar->entry_linkpath)), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Linkname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_link(entry, tar->entry_linkpath.s); } } /* * Some extensions (such as the GNU sparse file extensions) * deliberately store a synthetic name under the regular 'path' * attribute and the real file name under a different attribute. * Since we're supposed to not care about the order, we * have no choice but to store all of the various filenames * we find and figure it all out afterwards. This is the * figuring out part. */ as = NULL; if (archive_strlen(&(tar->entry_pathname_override)) > 0) as = &(tar->entry_pathname_override); else if (archive_strlen(&(tar->entry_pathname)) > 0) as = &(tar->entry_pathname); if (as != NULL) { if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_pathname(entry, as->s); } } if (archive_strlen(&(tar->entry_uname)) > 0) { if (archive_entry_copy_uname_l(entry, tar->entry_uname.s, archive_strlen(&(tar->entry_uname)), sconv) != 0) { err = set_conversion_failed_error(a, sconv, "Uname"); if (err == ARCHIVE_FATAL) return (err); /* Use a converted an original name. */ archive_entry_copy_uname(entry, tar->entry_uname.s); } } return (err); } static int pax_attribute_xattr(struct archive_entry *entry, const char *name, const char *value) { char *name_decoded; void *value_decoded; size_t value_len; if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0) return 3; name += 17; /* URL-decode name */ name_decoded = url_decode(name); if (name_decoded == NULL) return 2; /* Base-64 decode value */ value_decoded = base64_decode(value, strlen(value), &value_len); if (value_decoded == NULL) { free(name_decoded); return 1; } archive_entry_xattr_add_entry(entry, name_decoded, value_decoded, value_len); free(name_decoded); free(value_decoded); return 0; } -static int -pax_attribute_acl(struct archive_read *a, struct tar *tar, - struct archive_entry *entry, const char *value, int type) -{ - int r; - const char* errstr; - - switch (type) { - case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: - errstr = "SCHILY.acl.access"; - break; - case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: - errstr = "SCHILY.acl.default"; - break; - case ARCHIVE_ENTRY_ACL_TYPE_NFS4: - errstr = "SCHILY.acl.ace"; - break; - default: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Unknown ACL type: %d", type); - return(ARCHIVE_FATAL); - } - - if (tar->sconv_acl == NULL) { - tar->sconv_acl = - archive_string_conversion_from_charset( - &(a->archive), "UTF-8", 1); - if (tar->sconv_acl == NULL) - return (ARCHIVE_FATAL); - } - - r = archive_acl_parse_l(archive_entry_acl(entry), value, type, - tar->sconv_acl); - if (r != ARCHIVE_OK) { - if (r == ARCHIVE_FATAL) { - archive_set_error(&a->archive, ENOMEM, - "%s %s", "Can't allocate memory for ", - errstr); - return (r); - } - archive_set_error(&a->archive, - ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr); - } - return (r); -} - /* * Parse a single key=value attribute. key/value pointers are * assumed to point into reasonably long-lived storage. * * Note that POSIX reserves all-lowercase keywords. Vendor-specific * extensions should always have keywords of the form "VENDOR.attribute" * In particular, it's quite feasible to support many different * vendor extensions here. I'm using "LIBARCHIVE" for extensions * unique to this library. * * Investigate other vendor-specific extensions and see if * any of them look useful. */ static int pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const char *key, const char *value) { int64_t s; long n; int err = ARCHIVE_OK, r; #ifndef __FreeBSD__ if (value == NULL) value = ""; /* Disable compiler warning; do not pass * NULL pointer to strlen(). */ #endif switch (key[0]) { case 'G': /* GNU "0.0" sparse pax format. */ if (strcmp(key, "GNU.sparse.numblocks") == 0) { tar->sparse_offset = -1; tar->sparse_numbytes = -1; tar->sparse_gnu_major = 0; tar->sparse_gnu_minor = 0; } if (strcmp(key, "GNU.sparse.offset") == 0) { tar->sparse_offset = tar_atol10(value, strlen(value)); if (tar->sparse_numbytes != -1) { if (gnu_add_sparse_entry(a, tar, tar->sparse_offset, tar->sparse_numbytes) != ARCHIVE_OK) return (ARCHIVE_FATAL); tar->sparse_offset = -1; tar->sparse_numbytes = -1; } } if (strcmp(key, "GNU.sparse.numbytes") == 0) { tar->sparse_numbytes = tar_atol10(value, strlen(value)); if (tar->sparse_numbytes != -1) { if (gnu_add_sparse_entry(a, tar, tar->sparse_offset, tar->sparse_numbytes) != ARCHIVE_OK) return (ARCHIVE_FATAL); tar->sparse_offset = -1; tar->sparse_numbytes = -1; } } if (strcmp(key, "GNU.sparse.size") == 0) { tar->realsize = tar_atol10(value, strlen(value)); archive_entry_set_size(entry, tar->realsize); } /* GNU "0.1" sparse pax format. */ if (strcmp(key, "GNU.sparse.map") == 0) { tar->sparse_gnu_major = 0; tar->sparse_gnu_minor = 1; if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK) return (ARCHIVE_WARN); } /* GNU "1.0" sparse pax format */ if (strcmp(key, "GNU.sparse.major") == 0) { tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value)); tar->sparse_gnu_pending = 1; } if (strcmp(key, "GNU.sparse.minor") == 0) { tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value)); tar->sparse_gnu_pending = 1; } if (strcmp(key, "GNU.sparse.name") == 0) { /* * The real filename; when storing sparse * files, GNU tar puts a synthesized name into * the regular 'path' attribute in an attempt * to limit confusion. ;-) */ archive_strcpy(&(tar->entry_pathname_override), value); } if (strcmp(key, "GNU.sparse.realsize") == 0) { tar->realsize = tar_atol10(value, strlen(value)); archive_entry_set_size(entry, tar->realsize); } break; case 'L': /* Our extensions */ /* TODO: Handle arbitrary extended attributes... */ /* if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0) archive_entry_set_xxxxxx(entry, value); */ if (strcmp(key, "LIBARCHIVE.creationtime") == 0) { pax_time(value, &s, &n); archive_entry_set_birthtime(entry, s, n); } if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0) pax_attribute_xattr(entry, key, value); break; case 'S': /* We support some keys used by the "star" archiver */ if (strcmp(key, "SCHILY.acl.access") == 0) { - r = pax_attribute_acl(a, tar, entry, value, - ARCHIVE_ENTRY_ACL_TYPE_ACCESS); - if (r == ARCHIVE_FATAL) - return (r); + if (tar->sconv_acl == NULL) { + tar->sconv_acl = + archive_string_conversion_from_charset( + &(a->archive), "UTF-8", 1); + if (tar->sconv_acl == NULL) + return (ARCHIVE_FATAL); + } + + r = archive_acl_parse_l(archive_entry_acl(entry), + value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, + tar->sconv_acl); + if (r != ARCHIVE_OK) { + err = r; + if (err == ARCHIVE_FATAL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for " + "SCHILY.acl.access"); + return (err); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Parse error: SCHILY.acl.access"); + } } else if (strcmp(key, "SCHILY.acl.default") == 0) { - r = pax_attribute_acl(a, tar, entry, value, - ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); - if (r == ARCHIVE_FATAL) - return (r); - } else if (strcmp(key, "SCHILY.acl.ace") == 0) { - r = pax_attribute_acl(a, tar, entry, value, - ARCHIVE_ENTRY_ACL_TYPE_NFS4); - if (r == ARCHIVE_FATAL) - return (r); + if (tar->sconv_acl == NULL) { + tar->sconv_acl = + archive_string_conversion_from_charset( + &(a->archive), "UTF-8", 1); + if (tar->sconv_acl == NULL) + return (ARCHIVE_FATAL); + } + + r = archive_acl_parse_l(archive_entry_acl(entry), + value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT, + tar->sconv_acl); + if (r != ARCHIVE_OK) { + err = r; + if (err == ARCHIVE_FATAL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for " + "SCHILY.acl.default"); + return (err); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_MISC, + "Parse error: SCHILY.acl.default"); + } } else if (strcmp(key, "SCHILY.devmajor") == 0) { archive_entry_set_rdevmajor(entry, (dev_t)tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.devminor") == 0) { archive_entry_set_rdevminor(entry, (dev_t)tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.fflags") == 0) { archive_entry_copy_fflags_text(entry, value); } else if (strcmp(key, "SCHILY.dev") == 0) { archive_entry_set_dev(entry, (dev_t)tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.ino") == 0) { archive_entry_set_ino(entry, tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.nlink") == 0) { archive_entry_set_nlink(entry, (unsigned) tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.realsize") == 0) { tar->realsize = tar_atol10(value, strlen(value)); archive_entry_set_size(entry, tar->realsize); } else if (strcmp(key, "SUN.holesdata") == 0) { /* A Solaris extension for sparse. */ r = solaris_sparse_parse(a, tar, entry, value); if (r < err) { if (r == ARCHIVE_FATAL) return (r); err = r; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Parse error: SUN.holesdata"); } } break; case 'a': if (strcmp(key, "atime") == 0) { pax_time(value, &s, &n); archive_entry_set_atime(entry, s, n); } break; case 'c': if (strcmp(key, "ctime") == 0) { pax_time(value, &s, &n); archive_entry_set_ctime(entry, s, n); } else if (strcmp(key, "charset") == 0) { /* TODO: Publish charset information in entry. */ } else if (strcmp(key, "comment") == 0) { /* TODO: Publish comment in entry. */ } break; case 'g': if (strcmp(key, "gid") == 0) { archive_entry_set_gid(entry, tar_atol10(value, strlen(value))); } else if (strcmp(key, "gname") == 0) { archive_strcpy(&(tar->entry_gname), value); } break; case 'h': if (strcmp(key, "hdrcharset") == 0) { if (strcmp(value, "BINARY") == 0) /* Binary mode. */ tar->pax_hdrcharset_binary = 1; else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0) tar->pax_hdrcharset_binary = 0; } break; case 'l': /* pax interchange doesn't distinguish hardlink vs. symlink. */ if (strcmp(key, "linkpath") == 0) { archive_strcpy(&(tar->entry_linkpath), value); } break; case 'm': if (strcmp(key, "mtime") == 0) { pax_time(value, &s, &n); archive_entry_set_mtime(entry, s, n); } break; case 'p': if (strcmp(key, "path") == 0) { archive_strcpy(&(tar->entry_pathname), value); } break; case 'r': /* POSIX has reserved 'realtime.*' */ break; case 's': /* POSIX has reserved 'security.*' */ /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */ if (strcmp(key, "size") == 0) { /* "size" is the size of the data in the entry. */ tar->entry_bytes_remaining = tar_atol10(value, strlen(value)); /* * But, "size" is not necessarily the size of * the file on disk; if this is a sparse file, * the disk size may have already been set from * GNU.sparse.realsize or GNU.sparse.size or * an old GNU header field or SCHILY.realsize * or .... */ if (tar->realsize < 0) { archive_entry_set_size(entry, tar->entry_bytes_remaining); tar->realsize = tar->entry_bytes_remaining; } } break; case 'u': if (strcmp(key, "uid") == 0) { archive_entry_set_uid(entry, tar_atol10(value, strlen(value))); } else if (strcmp(key, "uname") == 0) { archive_strcpy(&(tar->entry_uname), value); } break; } return (err); } /* * parse a decimal time value, which may include a fractional portion */ static void pax_time(const char *p, int64_t *ps, long *pn) { char digit; int64_t s; unsigned long l; int sign; int64_t limit, last_digit_limit; limit = INT64_MAX / 10; last_digit_limit = INT64_MAX % 10; s = 0; sign = 1; if (*p == '-') { sign = -1; p++; } while (*p >= '0' && *p <= '9') { digit = *p - '0'; if (s > limit || (s == limit && digit > last_digit_limit)) { s = INT64_MAX; break; } s = (s * 10) + digit; ++p; } *ps = s * sign; /* Calculate nanoseconds. */ *pn = 0; if (*p != '.') return; l = 100000000UL; do { ++p; if (*p >= '0' && *p <= '9') *pn += (*p - '0') * l; else break; } while (l /= 10); } /* * Parse GNU tar header */ static int header_gnutar(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const void *h, size_t *unconsumed) { const struct archive_entry_header_gnutar *header; int64_t t; int err = ARCHIVE_OK; /* * GNU header is like POSIX ustar, except 'prefix' is * replaced with some other fields. This also means the * filename is stored as in old-style archives. */ /* Grab fields common to all tar variants. */ err = header_common(a, tar, entry, h); if (err == ARCHIVE_FATAL) return (err); /* Copy filename over (to ensure null termination). */ header = (const struct archive_entry_header_gnutar *)h; if (archive_entry_copy_pathname_l(entry, header->name, sizeof(header->name), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Pathname"); if (err == ARCHIVE_FATAL) return (err); } /* Fields common to ustar and GNU */ /* XXX Can the following be factored out since it's common * to ustar and gnu tar? Is it okay to move it down into * header_common, perhaps? */ if (archive_entry_copy_uname_l(entry, header->uname, sizeof(header->uname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Uname"); if (err == ARCHIVE_FATAL) return (err); } if (archive_entry_copy_gname_l(entry, header->gname, sizeof(header->gname), tar->sconv) != 0) { err = set_conversion_failed_error(a, tar->sconv, "Gname"); if (err == ARCHIVE_FATAL) return (err); } /* Parse out device numbers only for char and block specials */ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { archive_entry_set_rdevmajor(entry, (dev_t) tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); archive_entry_set_rdevminor(entry, (dev_t) tar_atol(header->rdevminor, sizeof(header->rdevminor))); } else archive_entry_set_rdev(entry, 0); tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); /* Grab GNU-specific fields. */ t = tar_atol(header->atime, sizeof(header->atime)); if (t > 0) archive_entry_set_atime(entry, t, 0); t = tar_atol(header->ctime, sizeof(header->ctime)); if (t > 0) archive_entry_set_ctime(entry, t, 0); if (header->realsize[0] != 0) { tar->realsize = tar_atol(header->realsize, sizeof(header->realsize)); archive_entry_set_size(entry, tar->realsize); } if (header->sparse[0].offset[0] != 0) { if (gnu_sparse_old_read(a, tar, header, unconsumed) != ARCHIVE_OK) return (ARCHIVE_FATAL); } else { if (header->isextended[0] != 0) { /* XXX WTF? XXX */ } } return (err); } static int gnu_add_sparse_entry(struct archive_read *a, struct tar *tar, int64_t offset, int64_t remaining) { struct sparse_block *p; p = (struct sparse_block *)malloc(sizeof(*p)); if (p == NULL) { archive_set_error(&a->archive, ENOMEM, "Out of memory"); return (ARCHIVE_FATAL); } memset(p, 0, sizeof(*p)); if (tar->sparse_last != NULL) tar->sparse_last->next = p; else tar->sparse_list = p; tar->sparse_last = p; if (remaining < 0 || offset < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data"); return (ARCHIVE_FATAL); } p->offset = offset; p->remaining = remaining; return (ARCHIVE_OK); } static void gnu_clear_sparse_list(struct tar *tar) { struct sparse_block *p; while (tar->sparse_list != NULL) { p = tar->sparse_list; tar->sparse_list = p->next; free(p); } tar->sparse_last = NULL; } /* * GNU tar old-format sparse data. * * GNU old-format sparse data is stored in a fixed-field * format. Offset/size values are 11-byte octal fields (same * format as 'size' field in ustart header). These are * stored in the header, allocating subsequent header blocks * as needed. Extending the header in this way is a pretty * severe POSIX violation; this design has earned GNU tar a * lot of criticism. */ static int gnu_sparse_old_read(struct archive_read *a, struct tar *tar, const struct archive_entry_header_gnutar *header, size_t *unconsumed) { ssize_t bytes_read; const void *data; struct extended { struct gnu_sparse sparse[21]; char isextended[1]; char padding[7]; }; const struct extended *ext; if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK) return (ARCHIVE_FATAL); if (header->isextended[0] == 0) return (ARCHIVE_OK); do { tar_flush_unconsumed(a, unconsumed); data = __archive_read_ahead(a, 512, &bytes_read); if (bytes_read < 0) return (ARCHIVE_FATAL); if (bytes_read < 512) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Truncated tar archive " "detected while reading sparse file data"); return (ARCHIVE_FATAL); } *unconsumed = 512; ext = (const struct extended *)data; if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK) return (ARCHIVE_FATAL); } while (ext->isextended[0] != 0); if (tar->sparse_list != NULL) tar->entry_offset = tar->sparse_list->offset; return (ARCHIVE_OK); } static int gnu_sparse_old_parse(struct archive_read *a, struct tar *tar, const struct gnu_sparse *sparse, int length) { while (length > 0 && sparse->offset[0] != 0) { if (gnu_add_sparse_entry(a, tar, tar_atol(sparse->offset, sizeof(sparse->offset)), tar_atol(sparse->numbytes, sizeof(sparse->numbytes))) != ARCHIVE_OK) return (ARCHIVE_FATAL); sparse++; length--; } return (ARCHIVE_OK); } /* * GNU tar sparse format 0.0 * * Beginning with GNU tar 1.15, sparse files are stored using * information in the pax extended header. The GNU tar maintainers * have gone through a number of variations in the process of working * out this scheme; fortunately, they're all numbered. * * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to * store offset/size for each block. The repeated instances of these * latter fields violate the pax specification (which frowns on * duplicate keys), so this format was quickly replaced. */ /* * GNU tar sparse format 0.1 * * This version replaced the offset/numbytes attributes with * a single "map" attribute that stored a list of integers. This * format had two problems: First, the "map" attribute could be very * long, which caused problems for some implementations. More * importantly, the sparse data was lost when extracted by archivers * that didn't recognize this extension. */ static int gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p) { const char *e; int64_t offset = -1, size = -1; for (;;) { e = p; while (*e != '\0' && *e != ',') { if (*e < '0' || *e > '9') return (ARCHIVE_WARN); e++; } if (offset < 0) { offset = tar_atol10(p, e - p); if (offset < 0) return (ARCHIVE_WARN); } else { size = tar_atol10(p, e - p); if (size < 0) return (ARCHIVE_WARN); if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) return (ARCHIVE_FATAL); offset = -1; } if (*e == '\0') return (ARCHIVE_OK); p = e + 1; } } /* * GNU tar sparse format 1.0 * * The idea: The offset/size data is stored as a series of base-10 * ASCII numbers prepended to the file data, so that dearchivers that * don't support this format will extract the block map along with the * data and a separate post-process can restore the sparseness. * * Unfortunately, GNU tar 1.16 had a bug that added unnecessary * padding to the body of the file when using this format. GNU tar * 1.17 corrected this bug without bumping the version number, so * it's not possible to support both variants. This code supports * the later variant at the expense of not supporting the former. * * This variant also replaced GNU.sparse.size with GNU.sparse.realsize * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. */ /* * Read the next line from the input, and parse it as a decimal * integer followed by '\n'. Returns positive integer value or * negative on error. */ static int64_t gnu_sparse_10_atol(struct archive_read *a, struct tar *tar, int64_t *remaining, size_t *unconsumed) { int64_t l, limit, last_digit_limit; const char *p; ssize_t bytes_read; int base, digit; base = 10; limit = INT64_MAX / base; last_digit_limit = INT64_MAX % base; /* * Skip any lines starting with '#'; GNU tar specs * don't require this, but they should. */ do { bytes_read = readline(a, tar, &p, (ssize_t)tar_min(*remaining, 100), unconsumed); if (bytes_read <= 0) return (ARCHIVE_FATAL); *remaining -= bytes_read; } while (p[0] == '#'); l = 0; while (bytes_read > 0) { if (*p == '\n') return (l); if (*p < '0' || *p >= '0' + base) return (ARCHIVE_WARN); digit = *p - '0'; if (l > limit || (l == limit && digit > last_digit_limit)) l = INT64_MAX; /* Truncate on overflow. */ else l = (l * base) + digit; p++; bytes_read--; } /* TODO: Error message. */ return (ARCHIVE_WARN); } /* * Returns length (in bytes) of the sparse data description * that was read. */ static ssize_t gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed) { ssize_t bytes_read; int entries; int64_t offset, size, to_skip, remaining; /* Clear out the existing sparse list. */ gnu_clear_sparse_list(tar); remaining = tar->entry_bytes_remaining; /* Parse entries. */ entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed); if (entries < 0) return (ARCHIVE_FATAL); /* Parse the individual entries. */ while (entries-- > 0) { /* Parse offset/size */ offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); if (offset < 0) return (ARCHIVE_FATAL); size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed); if (size < 0) return (ARCHIVE_FATAL); /* Add a new sparse entry. */ if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK) return (ARCHIVE_FATAL); } /* Skip rest of block... */ tar_flush_unconsumed(a, unconsumed); bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining); to_skip = 0x1ff & -bytes_read; if (to_skip != __archive_read_consume(a, to_skip)) return (ARCHIVE_FATAL); return ((ssize_t)(bytes_read + to_skip)); } /* * Solaris pax extension for a sparse file. This is recorded with the * data and hole pairs. The way recording sparse information by Solaris' * pax simply indicates where data and sparse are, so the stored contents * consist of both data and hole. */ static int solaris_sparse_parse(struct archive_read *a, struct tar *tar, struct archive_entry *entry, const char *p) { const char *e; int64_t start, end; int hole = 1; (void)entry; /* UNUSED */ end = 0; if (*p == ' ') p++; else return (ARCHIVE_WARN); for (;;) { e = p; while (*e != '\0' && *e != ' ') { if (*e < '0' || *e > '9') return (ARCHIVE_WARN); e++; } start = end; end = tar_atol10(p, e - p); if (end < 0) return (ARCHIVE_WARN); if (start < end) { if (gnu_add_sparse_entry(a, tar, start, end - start) != ARCHIVE_OK) return (ARCHIVE_FATAL); tar->sparse_last->hole = hole; } if (*e == '\0') return (ARCHIVE_OK); p = e + 1; hole = hole == 0; } } /*- * Convert text->integer. * * Traditional tar formats (including POSIX) specify base-8 for * all of the standard numeric fields. This is a significant limitation * in practice: * = file size is limited to 8GB * = rdevmajor and rdevminor are limited to 21 bits * = uid/gid are limited to 21 bits * * There are two workarounds for this: * = pax extended headers, which use variable-length string fields * = GNU tar and STAR both allow either base-8 or base-256 in * most fields. The high bit is set to indicate base-256. * * On read, this implementation supports both extensions. */ static int64_t tar_atol(const char *p, size_t char_cnt) { /* * Technically, GNU tar considers a field to be in base-256 * only if the first byte is 0xff or 0x80. */ if (*p & 0x80) return (tar_atol256(p, char_cnt)); return (tar_atol8(p, char_cnt)); } /* * Note that this implementation does not (and should not!) obey * locale settings; you cannot simply substitute strtol here, since * it does obey locale. */ static int64_t tar_atol_base_n(const char *p, size_t char_cnt, int base) { int64_t l, maxval, limit, last_digit_limit; int digit, sign; maxval = INT64_MAX; limit = INT64_MAX / base; last_digit_limit = INT64_MAX % base; /* the pointer will not be dereferenced if char_cnt is zero * due to the way the && operator is evaulated. */ while (char_cnt != 0 && (*p == ' ' || *p == '\t')) { p++; char_cnt--; } sign = 1; if (char_cnt != 0 && *p == '-') { sign = -1; p++; char_cnt--; maxval = INT64_MIN; limit = -(INT64_MIN / base); last_digit_limit = INT64_MIN % base; } l = 0; if (char_cnt != 0) { digit = *p - '0'; while (digit >= 0 && digit < base && char_cnt != 0) { if (l>limit || (l == limit && digit > last_digit_limit)) { return maxval; /* Truncate on overflow. */ } l = (l * base) + digit; digit = *++p - '0'; char_cnt--; } } return (sign < 0) ? -l : l; } static int64_t tar_atol8(const char *p, size_t char_cnt) { return tar_atol_base_n(p, char_cnt, 8); } static int64_t tar_atol10(const char *p, size_t char_cnt) { return tar_atol_base_n(p, char_cnt, 10); } /* * Parse a base-256 integer. This is just a variable-length * twos-complement signed binary value in big-endian order, except * that the high-order bit is ignored. The values here can be up to * 12 bytes, so we need to be careful about overflowing 64-bit * (8-byte) integers. * * This code unashamedly assumes that the local machine uses 8-bit * bytes and twos-complement arithmetic. */ static int64_t tar_atol256(const char *_p, size_t char_cnt) { uint64_t l; const unsigned char *p = (const unsigned char *)_p; unsigned char c, neg; /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */ c = *p; if (c & 0x40) { neg = 0xff; c |= 0x80; l = ~ARCHIVE_LITERAL_ULL(0); } else { neg = 0; c &= 0x7f; l = 0; } /* If more than 8 bytes, check that we can ignore * high-order bits without overflow. */ while (char_cnt > sizeof(int64_t)) { --char_cnt; if (c != neg) return neg ? INT64_MIN : INT64_MAX; c = *++p; } /* c is first byte that fits; if sign mismatch, return overflow */ if ((c ^ neg) & 0x80) { return neg ? INT64_MIN : INT64_MAX; } /* Accumulate remaining bytes. */ while (--char_cnt > 0) { l = (l << 8) | c; c = *++p; } l = (l << 8) | c; /* Return signed twos-complement value. */ return (int64_t)(l); } /* * Returns length of line (including trailing newline) * or negative on error. 'start' argument is updated to * point to first character of line. This avoids copying * when possible. */ static ssize_t readline(struct archive_read *a, struct tar *tar, const char **start, ssize_t limit, size_t *unconsumed) { ssize_t bytes_read; ssize_t total_size = 0; const void *t; const char *s; void *p; tar_flush_unconsumed(a, unconsumed); t = __archive_read_ahead(a, 1, &bytes_read); if (bytes_read <= 0) return (ARCHIVE_FATAL); s = t; /* Start of line? */ p = memchr(t, '\n', bytes_read); /* If we found '\n' in the read buffer, return pointer to that. */ if (p != NULL) { bytes_read = 1 + ((const char *)p) - s; if (bytes_read > limit) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Line too long"); return (ARCHIVE_FATAL); } *unconsumed = bytes_read; *start = s; return (bytes_read); } *unconsumed = bytes_read; /* Otherwise, we need to accumulate in a line buffer. */ for (;;) { if (total_size + bytes_read > limit) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Line too long"); return (ARCHIVE_FATAL); } if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate working buffer"); return (ARCHIVE_FATAL); } memcpy(tar->line.s + total_size, t, bytes_read); tar_flush_unconsumed(a, unconsumed); total_size += bytes_read; /* If we found '\n', clean up and return. */ if (p != NULL) { *start = tar->line.s; return (total_size); } /* Read some more. */ t = __archive_read_ahead(a, 1, &bytes_read); if (bytes_read <= 0) return (ARCHIVE_FATAL); s = t; /* Start of line? */ p = memchr(t, '\n', bytes_read); /* If we found '\n', trim the read. */ if (p != NULL) { bytes_read = 1 + ((const char *)p) - s; } *unconsumed = bytes_read; } } /* * base64_decode - Base64 decode * * This accepts most variations of base-64 encoding, including: * * with or without line breaks * * with or without the final group padded with '=' or '_' characters * (The most economical Base-64 variant does not pad the last group and * omits line breaks; RFC1341 used for MIME requires both.) */ static char * base64_decode(const char *s, size_t len, size_t *out_len) { static const unsigned char digits[64] = { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', '4','5','6','7','8','9','+','/' }; static unsigned char decode_table[128]; char *out, *d; const unsigned char *src = (const unsigned char *)s; /* If the decode table is not yet initialized, prepare it. */ if (decode_table[digits[1]] != 1) { unsigned i; memset(decode_table, 0xff, sizeof(decode_table)); for (i = 0; i < sizeof(digits); i++) decode_table[digits[i]] = i; } /* Allocate enough space to hold the entire output. */ /* Note that we may not use all of this... */ out = (char *)malloc(len - len / 4 + 1); if (out == NULL) { *out_len = 0; return (NULL); } d = out; while (len > 0) { /* Collect the next group of (up to) four characters. */ int v = 0; int group_size = 0; while (group_size < 4 && len > 0) { /* '=' or '_' padding indicates final group. */ if (*src == '=' || *src == '_') { len = 0; break; } /* Skip illegal characters (including line breaks) */ if (*src > 127 || *src < 32 || decode_table[*src] == 0xff) { len--; src++; continue; } v <<= 6; v |= decode_table[*src++]; len --; group_size++; } /* Align a short group properly. */ v <<= 6 * (4 - group_size); /* Unpack the group we just collected. */ switch (group_size) { case 4: d[2] = v & 0xff; /* FALLTHROUGH */ case 3: d[1] = (v >> 8) & 0xff; /* FALLTHROUGH */ case 2: d[0] = (v >> 16) & 0xff; break; case 1: /* this is invalid! */ break; } d += group_size * 3 / 4; } *out_len = d - out; return (out); } static char * url_decode(const char *in) { char *out, *d; const char *s; out = (char *)malloc(strlen(in) + 1); if (out == NULL) return (NULL); for (s = in, d = out; *s != '\0'; ) { if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') { /* Try to convert % escape */ int digit1 = tohex(s[1]); int digit2 = tohex(s[2]); if (digit1 >= 0 && digit2 >= 0) { /* Looks good, consume three chars */ s += 3; /* Convert output */ *d++ = ((digit1 << 4) | digit2); continue; } /* Else fall through and treat '%' as normal char */ } *d++ = *s++; } *d = '\0'; return (out); } static int tohex(int c) { if (c >= '0' && c <= '9') return (c - '0'); else if (c >= 'A' && c <= 'F') return (c - 'A' + 10); else if (c >= 'a' && c <= 'f') return (c - 'a' + 10); else return (-1); } Index: projects/clang390-import/contrib/libarchive/libarchive/archive_write_disk_acl.c =================================================================== --- projects/clang390-import/contrib/libarchive/libarchive/archive_write_disk_acl.c (revision 305016) +++ projects/clang390-import/contrib/libarchive/libarchive/archive_write_disk_acl.c (revision 305017) @@ -1,308 +1,268 @@ /*- * Copyright (c) 2003-2010 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_disk.c 201159 2009-12-29 05:35:40Z kientzle $"); #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_ACL_H #define _ACL_PRIVATE /* For debugging */ #include #endif #ifdef HAVE_ERRNO_H #include #endif #include "archive.h" #include "archive_entry.h" #include "archive_acl_private.h" #include "archive_write_disk_private.h" #ifndef HAVE_POSIX_ACL /* Default empty function body to satisfy mainline code. */ int archive_write_disk_set_acls(struct archive *a, int fd, const char *name, struct archive_acl *abstract_acl) { (void)a; /* UNUSED */ (void)fd; /* UNUSED */ (void)name; /* UNUSED */ (void)abstract_acl; /* UNUSED */ return (ARCHIVE_OK); } #else static int set_acl(struct archive *, int fd, const char *, struct archive_acl *, acl_type_t, int archive_entry_acl_type, const char *tn); /* * XXX TODO: What about ACL types other than ACCESS and DEFAULT? */ int archive_write_disk_set_acls(struct archive *a, int fd, const char *name, struct archive_acl *abstract_acl) { int ret; if (archive_acl_count(abstract_acl, ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) > 0) { ret = set_acl(a, fd, name, abstract_acl, ACL_TYPE_ACCESS, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, "access"); if (ret != ARCHIVE_OK) return (ret); ret = set_acl(a, fd, name, abstract_acl, ACL_TYPE_DEFAULT, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT, "default"); return (ret); #ifdef ACL_TYPE_NFS4 } else if (archive_acl_count(abstract_acl, ARCHIVE_ENTRY_ACL_TYPE_NFS4) > 0) { ret = set_acl(a, fd, name, abstract_acl, ACL_TYPE_NFS4, ARCHIVE_ENTRY_ACL_TYPE_NFS4, "nfs4"); return (ret); #endif } else return ARCHIVE_OK; } static struct { int archive_perm; int platform_perm; } acl_perm_map[] = { {ARCHIVE_ENTRY_ACL_EXECUTE, ACL_EXECUTE}, {ARCHIVE_ENTRY_ACL_WRITE, ACL_WRITE}, {ARCHIVE_ENTRY_ACL_READ, ACL_READ}, #ifdef ACL_TYPE_NFS4 {ARCHIVE_ENTRY_ACL_READ_DATA, ACL_READ_DATA}, {ARCHIVE_ENTRY_ACL_LIST_DIRECTORY, ACL_LIST_DIRECTORY}, {ARCHIVE_ENTRY_ACL_WRITE_DATA, ACL_WRITE_DATA}, {ARCHIVE_ENTRY_ACL_ADD_FILE, ACL_ADD_FILE}, {ARCHIVE_ENTRY_ACL_APPEND_DATA, ACL_APPEND_DATA}, {ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY, ACL_ADD_SUBDIRECTORY}, {ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS}, {ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS}, {ARCHIVE_ENTRY_ACL_DELETE_CHILD, ACL_DELETE_CHILD}, {ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES}, {ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES}, {ARCHIVE_ENTRY_ACL_DELETE, ACL_DELETE}, {ARCHIVE_ENTRY_ACL_READ_ACL, ACL_READ_ACL}, {ARCHIVE_ENTRY_ACL_WRITE_ACL, ACL_WRITE_ACL}, {ARCHIVE_ENTRY_ACL_WRITE_OWNER, ACL_WRITE_OWNER}, {ARCHIVE_ENTRY_ACL_SYNCHRONIZE, ACL_SYNCHRONIZE} #endif }; #ifdef ACL_TYPE_NFS4 static struct { int archive_inherit; int platform_inherit; } acl_inherit_map[] = { {ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT, ACL_ENTRY_FILE_INHERIT}, {ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT, ACL_ENTRY_DIRECTORY_INHERIT}, {ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT, ACL_ENTRY_NO_PROPAGATE_INHERIT}, {ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY, ACL_ENTRY_INHERIT_ONLY} }; #endif static int set_acl(struct archive *a, int fd, const char *name, struct archive_acl *abstract_acl, acl_type_t acl_type, int ae_requested_type, const char *tname) { acl_t acl; acl_entry_t acl_entry; acl_permset_t acl_permset; #ifdef ACL_TYPE_NFS4 acl_flagset_t acl_flagset; int r; #endif int ret; int ae_type, ae_permset, ae_tag, ae_id; uid_t ae_uid; gid_t ae_gid; const char *ae_name; int entries; int i; ret = ARCHIVE_OK; entries = archive_acl_reset(abstract_acl, ae_requested_type); if (entries == 0) return (ARCHIVE_OK); acl = acl_init(entries); - if (acl == (acl_t)NULL) { - archive_set_error(a, errno, - "Failed to initialize ACL working storage"); - return (ARCHIVE_FAILED); - } while (archive_acl_next(a, abstract_acl, ae_requested_type, &ae_type, &ae_permset, &ae_tag, &ae_id, &ae_name) == ARCHIVE_OK) { - if (acl_create_entry(&acl, &acl_entry) != 0) { - archive_set_error(a, errno, - "Failed to create a new ACL entry"); - return (ARCHIVE_FAILED); - } + acl_create_entry(&acl, &acl_entry); switch (ae_tag) { case ARCHIVE_ENTRY_ACL_USER: acl_set_tag_type(acl_entry, ACL_USER); ae_uid = archive_write_disk_uid(a, ae_name, ae_id); acl_set_qualifier(acl_entry, &ae_uid); break; case ARCHIVE_ENTRY_ACL_GROUP: acl_set_tag_type(acl_entry, ACL_GROUP); ae_gid = archive_write_disk_gid(a, ae_name, ae_id); acl_set_qualifier(acl_entry, &ae_gid); break; case ARCHIVE_ENTRY_ACL_USER_OBJ: acl_set_tag_type(acl_entry, ACL_USER_OBJ); break; case ARCHIVE_ENTRY_ACL_GROUP_OBJ: acl_set_tag_type(acl_entry, ACL_GROUP_OBJ); break; case ARCHIVE_ENTRY_ACL_MASK: acl_set_tag_type(acl_entry, ACL_MASK); break; case ARCHIVE_ENTRY_ACL_OTHER: acl_set_tag_type(acl_entry, ACL_OTHER); break; #ifdef ACL_TYPE_NFS4 case ARCHIVE_ENTRY_ACL_EVERYONE: acl_set_tag_type(acl_entry, ACL_EVERYONE); break; #endif default: - archive_set_error(a, ARCHIVE_ERRNO_MISC, - "Unknown ACL tag: %d", ae_tag); - return (ARCHIVE_FAILED); + /* XXX */ + break; } #ifdef ACL_TYPE_NFS4 - r = 0; switch (ae_type) { case ARCHIVE_ENTRY_ACL_TYPE_ALLOW: - r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALLOW); + acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALLOW); break; case ARCHIVE_ENTRY_ACL_TYPE_DENY: - r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_DENY); + acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_DENY); break; case ARCHIVE_ENTRY_ACL_TYPE_AUDIT: - r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_AUDIT); + acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_AUDIT); break; case ARCHIVE_ENTRY_ACL_TYPE_ALARM: - r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALARM); + acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALARM); break; case ARCHIVE_ENTRY_ACL_TYPE_ACCESS: case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT: // These don't translate directly into the system ACL. break; default: - archive_set_error(a, ARCHIVE_ERRNO_MISC, - "Unknown ACL entry type: %d", ae_type); - return (ARCHIVE_FAILED); + // XXX error handling here. + break; } - if (r != 0) { - archive_set_error(a, errno, - "Failed to set ACL entry type"); - return (ARCHIVE_FAILED); - } #endif - if (acl_get_permset(acl_entry, &acl_permset) != 0) { - archive_set_error(a, errno, - "Failed to get ACL permission set"); - return (ARCHIVE_FAILED); - } - if (acl_clear_perms(acl_permset) != 0) { - archive_set_error(a, errno, - "Failed to clear ACL permissions"); - return (ARCHIVE_FAILED); - } + acl_get_permset(acl_entry, &acl_permset); + acl_clear_perms(acl_permset); for (i = 0; i < (int)(sizeof(acl_perm_map) / sizeof(acl_perm_map[0])); ++i) { if (ae_permset & acl_perm_map[i].archive_perm) - if (acl_add_perm(acl_permset, - acl_perm_map[i].platform_perm) != 0) { - archive_set_error(a, errno, - "Failed to add ACL permission"); - return (ARCHIVE_FAILED); - } + acl_add_perm(acl_permset, + acl_perm_map[i].platform_perm); } #ifdef ACL_TYPE_NFS4 - if (acl_type == ACL_TYPE_NFS4) { - if (acl_get_flagset_np(acl_entry, &acl_flagset) != 0) { - archive_set_error(a, errno, - "Failed to get flagset from an NFSv4 ACL entry"); - return (ARCHIVE_FAILED); - } - if (acl_clear_flags_np(acl_flagset) != 0) { - archive_set_error(a, errno, - "Failed to clear flags from an NFSv4 ACL flagset"); - return (ARCHIVE_FAILED); - } + // XXX acl_get_flagset_np on FreeBSD returns EINVAL for + // non-NFSv4 ACLs + r = acl_get_flagset_np(acl_entry, &acl_flagset); + if (r == 0) { + acl_clear_flags_np(acl_flagset); for (i = 0; i < (int)(sizeof(acl_inherit_map) / sizeof(acl_inherit_map[0])); ++i) { - if (ae_permset & acl_inherit_map[i].archive_inherit) { - if (acl_add_flag_np(acl_flagset, - acl_inherit_map[i].platform_inherit) != 0) { - archive_set_error(a, errno, - "Failed to add flag to NFSv4 ACL flagset"); - return (ARCHIVE_FAILED); - } - } + if (ae_permset & acl_inherit_map[i].archive_inherit) + acl_add_flag_np(acl_flagset, + acl_inherit_map[i].platform_inherit); } } #endif } /* Try restoring the ACL through 'fd' if we can. */ #if HAVE_ACL_SET_FD if (fd >= 0 && acl_type == ACL_TYPE_ACCESS && acl_set_fd(fd, acl) == 0) ret = ARCHIVE_OK; else #else #if HAVE_ACL_SET_FD_NP if (fd >= 0 && acl_set_fd_np(fd, acl, acl_type) == 0) ret = ARCHIVE_OK; else #endif #endif #if HAVE_ACL_SET_LINK_NP if (acl_set_link_np(name, acl_type, acl) != 0) { archive_set_error(a, errno, "Failed to set %s acl", tname); ret = ARCHIVE_WARN; } #else /* TODO: Skip this if 'name' is a symlink. */ if (acl_set_file(name, acl_type, acl) != 0) { archive_set_error(a, errno, "Failed to set %s acl", tname); ret = ARCHIVE_WARN; } #endif acl_free(acl); return (ret); } #endif Index: projects/clang390-import/contrib/libarchive/libarchive/archive_write_set_format_pax.c =================================================================== --- projects/clang390-import/contrib/libarchive/libarchive/archive_write_set_format_pax.c (revision 305016) +++ projects/clang390-import/contrib/libarchive/libarchive/archive_write_set_format_pax.c (revision 305017) @@ -1,1931 +1,1907 @@ /*- * Copyright (c) 2003-2007 Tim Kientzle * Copyright (c) 2010-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "archive_platform.h" __FBSDID("$FreeBSD$"); #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STRING_H #include #endif #include "archive.h" #include "archive_entry.h" #include "archive_entry_locale.h" #include "archive_private.h" #include "archive_write_private.h" struct sparse_block { struct sparse_block *next; int is_hole; uint64_t offset; uint64_t remaining; }; struct pax { uint64_t entry_bytes_remaining; uint64_t entry_padding; struct archive_string l_url_encoded_name; struct archive_string pax_header; struct archive_string sparse_map; size_t sparse_map_padding; struct sparse_block *sparse_list; struct sparse_block *sparse_tail; struct archive_string_conv *sconv_utf8; int opt_binary; }; static void add_pax_attr(struct archive_string *, const char *key, const char *value); static void add_pax_attr_int(struct archive_string *, const char *key, int64_t value); static void add_pax_attr_time(struct archive_string *, const char *key, int64_t sec, unsigned long nanos); -static int add_pax_acl(struct archive_write *, - struct archive_entry *, struct pax *, int); static ssize_t archive_write_pax_data(struct archive_write *, const void *, size_t); static int archive_write_pax_close(struct archive_write *); static int archive_write_pax_free(struct archive_write *); static int archive_write_pax_finish_entry(struct archive_write *); static int archive_write_pax_header(struct archive_write *, struct archive_entry *); static int archive_write_pax_options(struct archive_write *, const char *, const char *); static char *base64_encode(const char *src, size_t len); static char *build_gnu_sparse_name(char *dest, const char *src); static char *build_pax_attribute_name(char *dest, const char *src); static char *build_ustar_entry_name(char *dest, const char *src, size_t src_length, const char *insert); static char *format_int(char *dest, int64_t); static int has_non_ASCII(const char *); static void sparse_list_clear(struct pax *); static int sparse_list_add(struct pax *, int64_t, int64_t); static char *url_encode(const char *in); /* * Set output format to 'restricted pax' format. * * This is the same as normal 'pax', but tries to suppress * the pax header whenever possible. This is the default for * bsdtar, for instance. */ int archive_write_set_format_pax_restricted(struct archive *_a) { struct archive_write *a = (struct archive_write *)_a; int r; archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_set_format_pax_restricted"); r = archive_write_set_format_pax(&a->archive); a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED; a->archive.archive_format_name = "restricted POSIX pax interchange"; return (r); } /* * Set output format to 'pax' format. */ int archive_write_set_format_pax(struct archive *_a) { struct archive_write *a = (struct archive_write *)_a; struct pax *pax; archive_check_magic(_a, ARCHIVE_WRITE_MAGIC, ARCHIVE_STATE_NEW, "archive_write_set_format_pax"); if (a->format_free != NULL) (a->format_free)(a); pax = (struct pax *)malloc(sizeof(*pax)); if (pax == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data"); return (ARCHIVE_FATAL); } memset(pax, 0, sizeof(*pax)); a->format_data = pax; a->format_name = "pax"; a->format_options = archive_write_pax_options; a->format_write_header = archive_write_pax_header; a->format_write_data = archive_write_pax_data; a->format_close = archive_write_pax_close; a->format_free = archive_write_pax_free; a->format_finish_entry = archive_write_pax_finish_entry; a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; a->archive.archive_format_name = "POSIX pax interchange"; return (ARCHIVE_OK); } static int archive_write_pax_options(struct archive_write *a, const char *key, const char *val) { struct pax *pax = (struct pax *)a->format_data; int ret = ARCHIVE_FAILED; if (strcmp(key, "hdrcharset") == 0) { /* * The character-set we can use are defined in * IEEE Std 1003.1-2001 */ if (val == NULL || val[0] == 0) archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "pax: hdrcharset option needs a character-set name"); else if (strcmp(val, "BINARY") == 0 || strcmp(val, "binary") == 0) { /* * Specify binary mode. We will not convert * filenames, uname and gname to any charsets. */ pax->opt_binary = 1; ret = ARCHIVE_OK; } else if (strcmp(val, "UTF-8") == 0) { /* * Specify UTF-8 character-set to be used for * filenames. This is almost the test that * running platform supports the string conversion. * Especially libarchive_test needs this trick for * its test. */ pax->sconv_utf8 = archive_string_conversion_to_charset( &(a->archive), "UTF-8", 0); if (pax->sconv_utf8 == NULL) ret = ARCHIVE_FATAL; else ret = ARCHIVE_OK; } else archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "pax: invalid charset name"); return (ret); } /* Note: The "warn" return is just to inform the options * supervisor that we didn't handle it. It will generate * a suitable error if no one used this option. */ return (ARCHIVE_WARN); } /* * Note: This code assumes that 'nanos' has the same sign as 'sec', * which implies that sec=-1, nanos=200000000 represents -1.2 seconds * and not -0.8 seconds. This is a pretty pedantic point, as we're * unlikely to encounter many real files created before Jan 1, 1970, * much less ones with timestamps recorded to sub-second resolution. */ static void add_pax_attr_time(struct archive_string *as, const char *key, int64_t sec, unsigned long nanos) { int digit, i; char *t; /* * Note that each byte contributes fewer than 3 base-10 * digits, so this will always be big enough. */ char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)]; tmp[sizeof(tmp) - 1] = 0; t = tmp + sizeof(tmp) - 1; /* Skip trailing zeros in the fractional part. */ for (digit = 0, i = 10; i > 0 && digit == 0; i--) { digit = nanos % 10; nanos /= 10; } /* Only format the fraction if it's non-zero. */ if (i > 0) { while (i > 0) { *--t = "0123456789"[digit]; digit = nanos % 10; nanos /= 10; i--; } *--t = '.'; } t = format_int(t, sec); add_pax_attr(as, key, t); } static char * format_int(char *t, int64_t i) { uint64_t ui; if (i < 0) ui = (i == INT64_MIN) ? (uint64_t)(INT64_MAX) + 1 : (uint64_t)(-i); else ui = i; do { *--t = "0123456789"[ui % 10]; } while (ui /= 10); if (i < 0) *--t = '-'; return (t); } static void add_pax_attr_int(struct archive_string *as, const char *key, int64_t value) { char tmp[1 + 3 * sizeof(value)]; tmp[sizeof(tmp) - 1] = 0; add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); } /* * Add a key/value attribute to the pax header. This function handles * the length field and various other syntactic requirements. */ static void add_pax_attr(struct archive_string *as, const char *key, const char *value) { int digits, i, len, next_ten; char tmp[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */ /*- * PAX attributes have the following layout: * <=> */ len = 1 + (int)strlen(key) + 1 + (int)strlen(value) + 1; /* * The field includes the length of the field, so * computing the correct length is tricky. I start by * counting the number of base-10 digits in 'len' and * computing the next higher power of 10. */ next_ten = 1; digits = 0; i = len; while (i > 0) { i = i / 10; digits++; next_ten = next_ten * 10; } /* * For example, if string without the length field is 99 * chars, then adding the 2 digit length "99" will force the * total length past 100, requiring an extra digit. The next * statement adjusts for this effect. */ if (len + digits >= next_ten) digits++; /* Now, we have the right length so we can build the line. */ tmp[sizeof(tmp) - 1] = 0; /* Null-terminate the work area. */ archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits)); archive_strappend_char(as, ' '); archive_strcat(as, key); archive_strappend_char(as, '='); archive_strcat(as, value); archive_strappend_char(as, '\n'); } static int archive_write_pax_header_xattrs(struct archive_write *a, struct pax *pax, struct archive_entry *entry) { struct archive_string s; int i = archive_entry_xattr_reset(entry); while (i--) { const char *name; const void *value; char *encoded_value; char *url_encoded_name = NULL, *encoded_name = NULL; size_t size; int r; archive_entry_xattr_next(entry, &name, &value, &size); url_encoded_name = url_encode(name); if (url_encoded_name != NULL) { /* Convert narrow-character to UTF-8. */ r = archive_strcpy_l(&(pax->l_url_encoded_name), url_encoded_name, pax->sconv_utf8); free(url_encoded_name); /* Done with this. */ if (r == 0) encoded_name = pax->l_url_encoded_name.s; else if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Linkname"); return (ARCHIVE_FATAL); } } encoded_value = base64_encode((const char *)value, size); if (encoded_name != NULL && encoded_value != NULL) { archive_string_init(&s); archive_strcpy(&s, "LIBARCHIVE.xattr."); archive_strcat(&s, encoded_name); add_pax_attr(&(pax->pax_header), s.s, encoded_value); archive_string_free(&s); } free(encoded_value); } return (ARCHIVE_OK); } static int get_entry_hardlink(struct archive_write *a, struct archive_entry *entry, const char **name, size_t *length, struct archive_string_conv *sc) { int r; r = archive_entry_hardlink_l(entry, name, length, sc); if (r != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Linkname"); return (ARCHIVE_FATAL); } return (ARCHIVE_WARN); } return (ARCHIVE_OK); } static int get_entry_pathname(struct archive_write *a, struct archive_entry *entry, const char **name, size_t *length, struct archive_string_conv *sc) { int r; r = archive_entry_pathname_l(entry, name, length, sc); if (r != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Pathname"); return (ARCHIVE_FATAL); } return (ARCHIVE_WARN); } return (ARCHIVE_OK); } static int get_entry_uname(struct archive_write *a, struct archive_entry *entry, const char **name, size_t *length, struct archive_string_conv *sc) { int r; r = archive_entry_uname_l(entry, name, length, sc); if (r != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Uname"); return (ARCHIVE_FATAL); } return (ARCHIVE_WARN); } return (ARCHIVE_OK); } static int get_entry_gname(struct archive_write *a, struct archive_entry *entry, const char **name, size_t *length, struct archive_string_conv *sc) { int r; r = archive_entry_gname_l(entry, name, length, sc); if (r != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Gname"); return (ARCHIVE_FATAL); } return (ARCHIVE_WARN); } return (ARCHIVE_OK); } static int get_entry_symlink(struct archive_write *a, struct archive_entry *entry, const char **name, size_t *length, struct archive_string_conv *sc) { int r; r = archive_entry_symlink_l(entry, name, length, sc); if (r != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Linkname"); return (ARCHIVE_FATAL); } return (ARCHIVE_WARN); } return (ARCHIVE_OK); } -/* Add ACL to pax header */ -static int -add_pax_acl(struct archive_write *a, - struct archive_entry *entry, struct pax *pax, int flags) -{ - const char *p; - const char *attr; - int r; - - if (flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) - attr = "SCHILY.acl.access"; - else if (flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) - attr = "SCHILY.acl.default"; - else if (flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) - attr = "SCHILY.acl.ace"; - else - return(ARCHIVE_FATAL); - - r = archive_entry_acl_text_l(entry, flags, &p, NULL, - pax->sconv_utf8); - if (r != 0) { - if (errno == ENOMEM) { - archive_set_error(&a->archive, ENOMEM, "%s %s", - "Can't allocate memory for ", attr); - return (ARCHIVE_FATAL); - } - archive_set_error(&a->archive, - ARCHIVE_ERRNO_FILE_FORMAT, "%s %s %s", - "Can't translate ", attr, " to UTF-8"); - return(ARCHIVE_WARN); - } else if (p != NULL && *p != '\0') { - add_pax_attr(&(pax->pax_header), - attr, p); - } - return(ARCHIVE_OK); -} - /* * TODO: Consider adding 'comment' and 'charset' fields to * archive_entry so that clients can specify them. Also, consider * adding generic key/value tags so clients can add arbitrary * key/value data. * * TODO: Break up this 700-line function!!!! Yowza! */ static int archive_write_pax_header(struct archive_write *a, struct archive_entry *entry_original) { struct archive_entry *entry_main; const char *p; const char *suffix; int need_extension, r, ret; - int acl_access, acl_default, acl_nfs4; int sparse_count; uint64_t sparse_total, real_size; struct pax *pax; const char *hardlink; const char *path = NULL, *linkpath = NULL; const char *uname = NULL, *gname = NULL; const void *mac_metadata; size_t mac_metadata_size; struct archive_string_conv *sconv; size_t hardlink_length, path_length, linkpath_length; size_t uname_length, gname_length; char paxbuff[512]; char ustarbuff[512]; char ustar_entry_name[256]; char pax_entry_name[256]; char gnu_sparse_name[256]; struct archive_string entry_name; ret = ARCHIVE_OK; need_extension = 0; pax = (struct pax *)a->format_data; /* Sanity check. */ if (archive_entry_pathname(entry_original) == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Can't record entry in tar file without pathname"); return (ARCHIVE_FAILED); } /* * Choose a header encoding. */ if (pax->opt_binary) sconv = NULL;/* Binary mode. */ else { /* Header encoding is UTF-8. */ if (pax->sconv_utf8 == NULL) { /* Initialize the string conversion object * we must need */ pax->sconv_utf8 = archive_string_conversion_to_charset( &(a->archive), "UTF-8", 1); if (pax->sconv_utf8 == NULL) /* Couldn't allocate memory */ return (ARCHIVE_FAILED); } sconv = pax->sconv_utf8; } r = get_entry_hardlink(a, entry_original, &hardlink, &hardlink_length, sconv); if (r == ARCHIVE_FATAL) return (r); else if (r != ARCHIVE_OK) { r = get_entry_hardlink(a, entry_original, &hardlink, &hardlink_length, NULL); if (r == ARCHIVE_FATAL) return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Can't translate linkname '%s' to %s", hardlink, archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; sconv = NULL;/* The header charset switches to binary mode. */ } /* Make sure this is a type of entry that we can handle here */ if (hardlink == NULL) { switch (archive_entry_filetype(entry_original)) { case AE_IFBLK: case AE_IFCHR: case AE_IFIFO: case AE_IFLNK: case AE_IFREG: break; case AE_IFDIR: { /* * Ensure a trailing '/'. Modify the original * entry so the client sees the change. */ #if defined(_WIN32) && !defined(__CYGWIN__) const wchar_t *wp; wp = archive_entry_pathname_w(entry_original); if (wp != NULL && wp[wcslen(wp) -1] != L'/') { struct archive_wstring ws; archive_string_init(&ws); path_length = wcslen(wp); if (archive_wstring_ensure(&ws, path_length + 2) == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data"); archive_wstring_free(&ws); return(ARCHIVE_FATAL); } /* Should we keep '\' ? */ if (wp[path_length -1] == L'\\') path_length--; archive_wstrncpy(&ws, wp, path_length); archive_wstrappend_wchar(&ws, L'/'); archive_entry_copy_pathname_w( entry_original, ws.s); archive_wstring_free(&ws); p = NULL; } else #endif p = archive_entry_pathname(entry_original); /* * On Windows, this is a backup operation just in * case getting WCS failed. On POSIX, this is a * normal operation. */ if (p != NULL && p[strlen(p) - 1] != '/') { struct archive_string as; archive_string_init(&as); path_length = strlen(p); if (archive_string_ensure(&as, path_length + 2) == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data"); archive_string_free(&as); return(ARCHIVE_FATAL); } #if defined(_WIN32) && !defined(__CYGWIN__) /* NOTE: This might break the pathname * if the current code page is CP932 and * the pathname includes a character '\' * as a part of its multibyte pathname. */ if (p[strlen(p) -1] == '\\') path_length--; else #endif archive_strncpy(&as, p, path_length); archive_strappend_char(&as, '/'); archive_entry_copy_pathname( entry_original, as.s); archive_string_free(&as); } break; } case AE_IFSOCK: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "tar format cannot archive socket"); return (ARCHIVE_FAILED); default: archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "tar format cannot archive this (type=0%lo)", (unsigned long) archive_entry_filetype(entry_original)); return (ARCHIVE_FAILED); } } /* * If Mac OS metadata blob is here, recurse to write that * as a separate entry. This is really a pretty poor design: * In particular, it doubles the overhead for long filenames. * TODO: Help Apple folks design something better and figure * out how to transition from this legacy format. * * Note that this code is present on every platform; clients * on non-Mac are unlikely to ever provide this data, but * applications that copy entries from one archive to another * should not lose data just because the local filesystem * can't store it. */ mac_metadata = archive_entry_mac_metadata(entry_original, &mac_metadata_size); if (mac_metadata != NULL) { const char *oname; char *name, *bname; size_t name_length; struct archive_entry *extra = archive_entry_new2(&a->archive); oname = archive_entry_pathname(entry_original); name_length = strlen(oname); name = malloc(name_length + 3); if (name == NULL || extra == NULL) { /* XXX error message */ archive_entry_free(extra); free(name); return (ARCHIVE_FAILED); } strcpy(name, oname); /* Find last '/'; strip trailing '/' characters */ bname = strrchr(name, '/'); while (bname != NULL && bname[1] == '\0') { *bname = '\0'; bname = strrchr(name, '/'); } if (bname == NULL) { memmove(name + 2, name, name_length + 1); memmove(name, "._", 2); } else { bname += 1; memmove(bname + 2, bname, strlen(bname) + 1); memmove(bname, "._", 2); } archive_entry_copy_pathname(extra, name); free(name); archive_entry_set_size(extra, mac_metadata_size); archive_entry_set_filetype(extra, AE_IFREG); archive_entry_set_perm(extra, archive_entry_perm(entry_original)); archive_entry_set_mtime(extra, archive_entry_mtime(entry_original), archive_entry_mtime_nsec(entry_original)); archive_entry_set_gid(extra, archive_entry_gid(entry_original)); archive_entry_set_gname(extra, archive_entry_gname(entry_original)); archive_entry_set_uid(extra, archive_entry_uid(entry_original)); archive_entry_set_uname(extra, archive_entry_uname(entry_original)); /* Recurse to write the special copyfile entry. */ r = archive_write_pax_header(a, extra); archive_entry_free(extra); if (r < ARCHIVE_WARN) return (r); if (r < ret) ret = r; r = (int)archive_write_pax_data(a, mac_metadata, mac_metadata_size); if (r < ARCHIVE_WARN) return (r); if (r < ret) ret = r; r = archive_write_pax_finish_entry(a); if (r < ARCHIVE_WARN) return (r); if (r < ret) ret = r; } /* Copy entry so we can modify it as needed. */ #if defined(_WIN32) && !defined(__CYGWIN__) /* Make sure the path separators in pahtname, hardlink and symlink * are all slash '/', not the Windows path separator '\'. */ entry_main = __la_win_entry_in_posix_pathseparator(entry_original); if (entry_main == entry_original) entry_main = archive_entry_clone(entry_original); #else entry_main = archive_entry_clone(entry_original); #endif if (entry_main == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data"); return(ARCHIVE_FATAL); } archive_string_empty(&(pax->pax_header)); /* Blank our work area. */ archive_string_empty(&(pax->sparse_map)); sparse_total = 0; sparse_list_clear(pax); if (hardlink == NULL && archive_entry_filetype(entry_main) == AE_IFREG) sparse_count = archive_entry_sparse_reset(entry_main); else sparse_count = 0; if (sparse_count) { int64_t offset, length, last_offset = 0; /* Get the last entry of sparse block. */ while (archive_entry_sparse_next( entry_main, &offset, &length) == ARCHIVE_OK) last_offset = offset + length; /* If the last sparse block does not reach the end of file, * We have to add a empty sparse block as the last entry to * manage storing file data. */ if (last_offset < archive_entry_size(entry_main)) archive_entry_sparse_add_entry(entry_main, archive_entry_size(entry_main), 0); sparse_count = archive_entry_sparse_reset(entry_main); } /* * First, check the name fields and see if any of them * require binary coding. If any of them does, then all of * them do. */ r = get_entry_pathname(a, entry_main, &path, &path_length, sconv); if (r == ARCHIVE_FATAL) return (r); else if (r != ARCHIVE_OK) { r = get_entry_pathname(a, entry_main, &path, &path_length, NULL); if (r == ARCHIVE_FATAL) return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Can't translate pathname '%s' to %s", path, archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; sconv = NULL;/* The header charset switches to binary mode. */ } r = get_entry_uname(a, entry_main, &uname, &uname_length, sconv); if (r == ARCHIVE_FATAL) return (r); else if (r != ARCHIVE_OK) { r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL); if (r == ARCHIVE_FATAL) return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Can't translate uname '%s' to %s", uname, archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; sconv = NULL;/* The header charset switches to binary mode. */ } r = get_entry_gname(a, entry_main, &gname, &gname_length, sconv); if (r == ARCHIVE_FATAL) return (r); else if (r != ARCHIVE_OK) { r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL); if (r == ARCHIVE_FATAL) return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Can't translate gname '%s' to %s", gname, archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; sconv = NULL;/* The header charset switches to binary mode. */ } linkpath = hardlink; linkpath_length = hardlink_length; if (linkpath == NULL) { r = get_entry_symlink(a, entry_main, &linkpath, &linkpath_length, sconv); if (r == ARCHIVE_FATAL) return (r); else if (r != ARCHIVE_OK) { r = get_entry_symlink(a, entry_main, &linkpath, &linkpath_length, NULL); if (r == ARCHIVE_FATAL) return (r); archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Can't translate linkname '%s' to %s", linkpath, archive_string_conversion_charset_name(sconv)); ret = ARCHIVE_WARN; sconv = NULL; } } /* If any string conversions failed, get all attributes * in binary-mode. */ if (sconv == NULL && !pax->opt_binary) { if (hardlink != NULL) { r = get_entry_hardlink(a, entry_main, &hardlink, &hardlink_length, NULL); if (r == ARCHIVE_FATAL) return (r); linkpath = hardlink; linkpath_length = hardlink_length; } r = get_entry_pathname(a, entry_main, &path, &path_length, NULL); if (r == ARCHIVE_FATAL) return (r); r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL); if (r == ARCHIVE_FATAL) return (r); r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL); if (r == ARCHIVE_FATAL) return (r); } /* Store the header encoding first, to be nice to readers. */ if (sconv == NULL) add_pax_attr(&(pax->pax_header), "hdrcharset", "BINARY"); /* * If name is too long, or has non-ASCII characters, add * 'path' to pax extended attrs. (Note that an unconvertible * name must have non-ASCII characters.) */ if (has_non_ASCII(path)) { /* We have non-ASCII characters. */ add_pax_attr(&(pax->pax_header), "path", path); archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, path, path_length, NULL)); need_extension = 1; } else { /* We have an all-ASCII path; we'd like to just store * it in the ustar header if it will fit. Yes, this * duplicates some of the logic in * archive_write_set_format_ustar.c */ if (path_length <= 100) { /* Fits in the old 100-char tar name field. */ } else { /* Find largest suffix that will fit. */ /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */ suffix = strchr(path + path_length - 100 - 1, '/'); /* Don't attempt an empty prefix. */ if (suffix == path) suffix = strchr(suffix + 1, '/'); /* We can put it in the ustar header if it's * all ASCII and it's either <= 100 characters * or can be split at a '/' into a prefix <= * 155 chars and a suffix <= 100 chars. (Note * the strchr() above will return NULL exactly * when the path can't be split.) */ if (suffix == NULL /* Suffix > 100 chars. */ || suffix[1] == '\0' /* empty suffix */ || suffix - path > 155) /* Prefix > 155 chars */ { add_pax_attr(&(pax->pax_header), "path", path); archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, path, path_length, NULL)); need_extension = 1; } } } if (linkpath != NULL) { /* If link name is too long or has non-ASCII characters, add * 'linkpath' to pax extended attrs. */ if (linkpath_length > 100 || has_non_ASCII(linkpath)) { add_pax_attr(&(pax->pax_header), "linkpath", linkpath); if (linkpath_length > 100) { if (hardlink != NULL) archive_entry_set_hardlink(entry_main, "././@LongHardLink"); else archive_entry_set_symlink(entry_main, "././@LongSymLink"); } need_extension = 1; } } /* Save a pathname since it will be renamed if `entry_main` has * sparse blocks. */ archive_string_init(&entry_name); archive_strcpy(&entry_name, archive_entry_pathname(entry_main)); /* If file size is too large, add 'size' to pax extended attrs. */ if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) { add_pax_attr_int(&(pax->pax_header), "size", archive_entry_size(entry_main)); need_extension = 1; } /* If numeric GID is too large, add 'gid' to pax extended attrs. */ if ((unsigned int)archive_entry_gid(entry_main) >= (1 << 18)) { add_pax_attr_int(&(pax->pax_header), "gid", archive_entry_gid(entry_main)); need_extension = 1; } /* If group name is too large or has non-ASCII characters, add * 'gname' to pax extended attrs. */ if (gname != NULL) { if (gname_length > 31 || has_non_ASCII(gname)) { add_pax_attr(&(pax->pax_header), "gname", gname); need_extension = 1; } } /* If numeric UID is too large, add 'uid' to pax extended attrs. */ if ((unsigned int)archive_entry_uid(entry_main) >= (1 << 18)) { add_pax_attr_int(&(pax->pax_header), "uid", archive_entry_uid(entry_main)); need_extension = 1; } /* Add 'uname' to pax extended attrs if necessary. */ if (uname != NULL) { if (uname_length > 31 || has_non_ASCII(uname)) { add_pax_attr(&(pax->pax_header), "uname", uname); need_extension = 1; } } /* * POSIX/SUSv3 doesn't provide a standard key for large device * numbers. I use the same keys here that Joerg Schilling * used for 'star.' (Which, somewhat confusingly, are called * "devXXX" even though they code "rdev" values.) No doubt, * other implementations use other keys. Note that there's no * reason we can't write the same information into a number of * different keys. * * Of course, this is only needed for block or char device entries. */ if (archive_entry_filetype(entry_main) == AE_IFBLK || archive_entry_filetype(entry_main) == AE_IFCHR) { /* * If rdevmajor is too large, add 'SCHILY.devmajor' to * extended attributes. */ int rdevmajor, rdevminor; rdevmajor = archive_entry_rdevmajor(entry_main); rdevminor = archive_entry_rdevminor(entry_main); if (rdevmajor >= (1 << 18)) { add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor", rdevmajor); /* * Non-strict formatting below means we don't * have to truncate here. Not truncating improves * the chance that some more modern tar archivers * (such as GNU tar 1.13) can restore the full * value even if they don't understand the pax * extended attributes. See my rant below about * file size fields for additional details. */ /* archive_entry_set_rdevmajor(entry_main, rdevmajor & ((1 << 18) - 1)); */ need_extension = 1; } /* * If devminor is too large, add 'SCHILY.devminor' to * extended attributes. */ if (rdevminor >= (1 << 18)) { add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor", rdevminor); /* Truncation is not necessary here, either. */ /* archive_entry_set_rdevminor(entry_main, rdevminor & ((1 << 18) - 1)); */ need_extension = 1; } } /* * Technically, the mtime field in the ustar header can * support 33 bits, but many platforms use signed 32-bit time * values. The cutoff of 0x7fffffff here is a compromise. * Yes, this check is duplicated just below; this helps to * avoid writing an mtime attribute just to handle a * high-resolution timestamp in "restricted pax" mode. */ if (!need_extension && ((archive_entry_mtime(entry_main) < 0) || (archive_entry_mtime(entry_main) >= 0x7fffffff))) need_extension = 1; /* I use a star-compatible file flag attribute. */ p = archive_entry_fflags_text(entry_main); if (!need_extension && p != NULL && *p != '\0') need_extension = 1; + /* If there are non-trivial ACL entries, we need an extension. */ + if (!need_extension && archive_entry_acl_count(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS) > 0) + need_extension = 1; + + /* If there are non-trivial ACL entries, we need an extension. */ + if (!need_extension && archive_entry_acl_count(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0) + need_extension = 1; + /* If there are extended attributes, we need an extension */ if (!need_extension && archive_entry_xattr_count(entry_original) > 0) need_extension = 1; /* If there are sparse info, we need an extension */ if (!need_extension && sparse_count > 0) need_extension = 1; - acl_access = archive_entry_acl_count(entry_original, - ARCHIVE_ENTRY_ACL_TYPE_ACCESS); - acl_default = archive_entry_acl_count(entry_original, - ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); - acl_nfs4 = archive_entry_acl_count(entry_original, - ARCHIVE_ENTRY_ACL_TYPE_NFS4); - - /* If there are any ACL entries, we need an extension */ - if (!need_extension && (acl_access + acl_default + acl_nfs4) > 0) - need_extension = 1; - /* * Libarchive used to include these in extended headers for * restricted pax format, but that confused people who * expected ustar-like time semantics. So now we only include * them in full pax format. */ if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED) { if (archive_entry_ctime(entry_main) != 0 || archive_entry_ctime_nsec(entry_main) != 0) add_pax_attr_time(&(pax->pax_header), "ctime", archive_entry_ctime(entry_main), archive_entry_ctime_nsec(entry_main)); if (archive_entry_atime(entry_main) != 0 || archive_entry_atime_nsec(entry_main) != 0) add_pax_attr_time(&(pax->pax_header), "atime", archive_entry_atime(entry_main), archive_entry_atime_nsec(entry_main)); /* Store birth/creationtime only if it's earlier than mtime */ if (archive_entry_birthtime_is_set(entry_main) && archive_entry_birthtime(entry_main) < archive_entry_mtime(entry_main)) add_pax_attr_time(&(pax->pax_header), "LIBARCHIVE.creationtime", archive_entry_birthtime(entry_main), archive_entry_birthtime_nsec(entry_main)); } /* * The following items are handled differently in "pax * restricted" format. In particular, in "pax restricted" * format they won't be added unless need_extension is * already set (we're already generating an extended header, so * may as well include these). */ if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED || need_extension) { if (archive_entry_mtime(entry_main) < 0 || archive_entry_mtime(entry_main) >= 0x7fffffff || archive_entry_mtime_nsec(entry_main) != 0) add_pax_attr_time(&(pax->pax_header), "mtime", archive_entry_mtime(entry_main), archive_entry_mtime_nsec(entry_main)); /* I use a star-compatible file flag attribute. */ p = archive_entry_fflags_text(entry_main); if (p != NULL && *p != '\0') add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p); /* I use star-compatible ACL attributes. */ - if (acl_access > 0) { - ret = add_pax_acl(a, entry_original, pax, - ARCHIVE_ENTRY_ACL_TYPE_ACCESS | - ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); - if (ret == ARCHIVE_FATAL) + r = archive_entry_acl_text_l(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_ACCESS | + ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID, + &p, NULL, pax->sconv_utf8); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for " + "ACL.access"); return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Can't translate ACL.access to UTF-8"); + ret = ARCHIVE_WARN; + } else if (p != NULL && *p != '\0') { + add_pax_attr(&(pax->pax_header), + "SCHILY.acl.access", p); } - if (acl_default > 0) { - ret = add_pax_acl(a, entry_original, pax, - ARCHIVE_ENTRY_ACL_TYPE_DEFAULT | - ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); - if (ret == ARCHIVE_FATAL) + r = archive_entry_acl_text_l(entry_original, + ARCHIVE_ENTRY_ACL_TYPE_DEFAULT | + ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID, + &p, NULL, pax->sconv_utf8); + if (r != 0) { + if (errno == ENOMEM) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate memory for " + "ACL.default"); return (ARCHIVE_FATAL); - } - if (acl_nfs4 > 0) { - ret = add_pax_acl(a, entry_original, pax, - ARCHIVE_ENTRY_ACL_TYPE_NFS4 | - ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); - if (ret == ARCHIVE_FATAL) - return (ARCHIVE_FATAL); + } + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Can't translate ACL.default to UTF-8"); + ret = ARCHIVE_WARN; + } else if (p != NULL && *p != '\0') { + add_pax_attr(&(pax->pax_header), + "SCHILY.acl.default", p); } /* We use GNU-tar-compatible sparse attributes. */ if (sparse_count > 0) { int64_t soffset, slength; add_pax_attr_int(&(pax->pax_header), "GNU.sparse.major", 1); add_pax_attr_int(&(pax->pax_header), "GNU.sparse.minor", 0); add_pax_attr(&(pax->pax_header), "GNU.sparse.name", entry_name.s); add_pax_attr_int(&(pax->pax_header), "GNU.sparse.realsize", archive_entry_size(entry_main)); /* Rename the file name which will be used for * ustar header to a special name, which GNU * PAX Format 1.0 requires */ archive_entry_set_pathname(entry_main, build_gnu_sparse_name(gnu_sparse_name, entry_name.s)); /* * - Make a sparse map, which will precede a file data. * - Get the total size of available data of sparse. */ archive_string_sprintf(&(pax->sparse_map), "%d\n", sparse_count); while (archive_entry_sparse_next(entry_main, &soffset, &slength) == ARCHIVE_OK) { archive_string_sprintf(&(pax->sparse_map), "%jd\n%jd\n", (intmax_t)soffset, (intmax_t)slength); sparse_total += slength; if (sparse_list_add(pax, soffset, slength) != ARCHIVE_OK) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory"); archive_entry_free(entry_main); archive_string_free(&entry_name); return (ARCHIVE_FATAL); } } } /* Store extended attributes */ if (archive_write_pax_header_xattrs(a, pax, entry_original) == ARCHIVE_FATAL) { archive_entry_free(entry_main); archive_string_free(&entry_name); return (ARCHIVE_FATAL); } } /* Only regular files have data. */ if (archive_entry_filetype(entry_main) != AE_IFREG) archive_entry_set_size(entry_main, 0); /* * Pax-restricted does not store data for hardlinks, in order * to improve compatibility with ustar. */ if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE && hardlink != NULL) archive_entry_set_size(entry_main, 0); /* * XXX Full pax interchange format does permit a hardlink * entry to have data associated with it. I'm not supporting * that here because the client expects me to tell them whether * or not this format expects data for hardlinks. If I * don't check here, then every pax archive will end up with * duplicated data for hardlinks. Someday, there may be * need to select this behavior, in which case the following * will need to be revisited. XXX */ if (hardlink != NULL) archive_entry_set_size(entry_main, 0); /* Save a real file size. */ real_size = archive_entry_size(entry_main); /* * Overwrite a file size by the total size of sparse blocks and * the size of sparse map info. That file size is the length of * the data, which we will exactly store into an archive file. */ if (archive_strlen(&(pax->sparse_map))) { size_t mapsize = archive_strlen(&(pax->sparse_map)); pax->sparse_map_padding = 0x1ff & (-(ssize_t)mapsize); archive_entry_set_size(entry_main, mapsize + pax->sparse_map_padding + sparse_total); } /* Format 'ustar' header for main entry. * * The trouble with file size: If the reader can't understand * the file size, they may not be able to locate the next * entry and the rest of the archive is toast. Pax-compliant * readers are supposed to ignore the file size in the main * header, so the question becomes how to maximize portability * for readers that don't support pax attribute extensions. * For maximum compatibility, I permit numeric extensions in * the main header so that the file size stored will always be * correct, even if it's in a format that only some * implementations understand. The technique used here is: * * a) If possible, follow the standard exactly. This handles * files up to 8 gigabytes minus 1. * * b) If that fails, try octal but omit the field terminator. * That handles files up to 64 gigabytes minus 1. * * c) Otherwise, use base-256 extensions. That handles files * up to 2^63 in this implementation, with the potential to * go up to 2^94. That should hold us for a while. ;-) * * The non-strict formatter uses similar logic for other * numeric fields, though they're less critical. */ if (__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0, NULL) == ARCHIVE_FATAL) return (ARCHIVE_FATAL); /* If we built any extended attributes, write that entry first. */ if (archive_strlen(&(pax->pax_header)) > 0) { struct archive_entry *pax_attr_entry; time_t s; int64_t uid, gid; int mode; pax_attr_entry = archive_entry_new2(&a->archive); p = entry_name.s; archive_entry_set_pathname(pax_attr_entry, build_pax_attribute_name(pax_entry_name, p)); archive_entry_set_size(pax_attr_entry, archive_strlen(&(pax->pax_header))); /* Copy uid/gid (but clip to ustar limits). */ uid = archive_entry_uid(entry_main); if (uid >= 1 << 18) uid = (1 << 18) - 1; archive_entry_set_uid(pax_attr_entry, uid); gid = archive_entry_gid(entry_main); if (gid >= 1 << 18) gid = (1 << 18) - 1; archive_entry_set_gid(pax_attr_entry, gid); /* Copy mode over (but not setuid/setgid bits) */ mode = archive_entry_mode(entry_main); #ifdef S_ISUID mode &= ~S_ISUID; #endif #ifdef S_ISGID mode &= ~S_ISGID; #endif #ifdef S_ISVTX mode &= ~S_ISVTX; #endif archive_entry_set_mode(pax_attr_entry, mode); /* Copy uname/gname. */ archive_entry_set_uname(pax_attr_entry, archive_entry_uname(entry_main)); archive_entry_set_gname(pax_attr_entry, archive_entry_gname(entry_main)); /* Copy mtime, but clip to ustar limits. */ s = archive_entry_mtime(entry_main); if (s < 0) { s = 0; } if (s >= 0x7fffffff) { s = 0x7fffffff; } archive_entry_set_mtime(pax_attr_entry, s, 0); /* Standard ustar doesn't support atime. */ archive_entry_set_atime(pax_attr_entry, 0, 0); /* Standard ustar doesn't support ctime. */ archive_entry_set_ctime(pax_attr_entry, 0, 0); r = __archive_write_format_header_ustar(a, paxbuff, pax_attr_entry, 'x', 1, NULL); archive_entry_free(pax_attr_entry); /* Note that the 'x' header shouldn't ever fail to format */ if (r < ARCHIVE_WARN) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "archive_write_pax_header: " "'x' header failed?! This can't happen.\n"); return (ARCHIVE_FATAL); } else if (r < ret) ret = r; r = __archive_write_output(a, paxbuff, 512); if (r != ARCHIVE_OK) { sparse_list_clear(pax); pax->entry_bytes_remaining = 0; pax->entry_padding = 0; return (ARCHIVE_FATAL); } pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header)); pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining); r = __archive_write_output(a, pax->pax_header.s, archive_strlen(&(pax->pax_header))); if (r != ARCHIVE_OK) { /* If a write fails, we're pretty much toast. */ return (ARCHIVE_FATAL); } /* Pad out the end of the entry. */ r = __archive_write_nulls(a, (size_t)pax->entry_padding); if (r != ARCHIVE_OK) { /* If a write fails, we're pretty much toast. */ return (ARCHIVE_FATAL); } pax->entry_bytes_remaining = pax->entry_padding = 0; } /* Write the header for main entry. */ r = __archive_write_output(a, ustarbuff, 512); if (r != ARCHIVE_OK) return (r); /* * Inform the client of the on-disk size we're using, so * they can avoid unnecessarily writing a body for something * that we're just going to ignore. */ archive_entry_set_size(entry_original, real_size); if (pax->sparse_list == NULL && real_size > 0) { /* This is not a sparse file but we handle its data as * a sparse block. */ sparse_list_add(pax, 0, real_size); sparse_total = real_size; } pax->entry_padding = 0x1ff & (-(int64_t)sparse_total); archive_entry_free(entry_main); archive_string_free(&entry_name); return (ret); } /* * We need a valid name for the regular 'ustar' entry. This routine * tries to hack something more-or-less reasonable. * * The approach here tries to preserve leading dir names. We do so by * working with four sections: * 1) "prefix" directory names, * 2) "suffix" directory names, * 3) inserted dir name (optional), * 4) filename. * * These sections must satisfy the following requirements: * * Parts 1 & 2 together form an initial portion of the dir name. * * Part 3 is specified by the caller. (It should not contain a leading * or trailing '/'.) * * Part 4 forms an initial portion of the base filename. * * The filename must be <= 99 chars to fit the ustar 'name' field. * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld. * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field. * * If the original name ends in a '/', the new name must also end in a '/' * * Trailing '/.' sequences may be stripped. * * Note: Recall that the ustar format does not store the '/' separating * parts 1 & 2, but does store the '/' separating parts 2 & 3. */ static char * build_ustar_entry_name(char *dest, const char *src, size_t src_length, const char *insert) { const char *prefix, *prefix_end; const char *suffix, *suffix_end; const char *filename, *filename_end; char *p; int need_slash = 0; /* Was there a trailing slash? */ size_t suffix_length = 99; size_t insert_length; /* Length of additional dir element to be added. */ if (insert == NULL) insert_length = 0; else /* +2 here allows for '/' before and after the insert. */ insert_length = strlen(insert) + 2; /* Step 0: Quick bailout in a common case. */ if (src_length < 100 && insert == NULL) { strncpy(dest, src, src_length); dest[src_length] = '\0'; return (dest); } /* Step 1: Locate filename and enforce the length restriction. */ filename_end = src + src_length; /* Remove trailing '/' chars and '/.' pairs. */ for (;;) { if (filename_end > src && filename_end[-1] == '/') { filename_end --; need_slash = 1; /* Remember to restore trailing '/'. */ continue; } if (filename_end > src + 1 && filename_end[-1] == '.' && filename_end[-2] == '/') { filename_end -= 2; need_slash = 1; /* "foo/." will become "foo/" */ continue; } break; } if (need_slash) suffix_length--; /* Find start of filename. */ filename = filename_end - 1; while ((filename > src) && (*filename != '/')) filename --; if ((*filename == '/') && (filename < filename_end - 1)) filename ++; /* Adjust filename_end so that filename + insert fits in 99 chars. */ suffix_length -= insert_length; if (filename_end > filename + suffix_length) filename_end = filename + suffix_length; /* Calculate max size for "suffix" section (#3 above). */ suffix_length -= filename_end - filename; /* Step 2: Locate the "prefix" section of the dirname, including * trailing '/'. */ prefix = src; prefix_end = prefix + 155; if (prefix_end > filename) prefix_end = filename; while (prefix_end > prefix && *prefix_end != '/') prefix_end--; if ((prefix_end < filename) && (*prefix_end == '/')) prefix_end++; /* Step 3: Locate the "suffix" section of the dirname, * including trailing '/'. */ suffix = prefix_end; suffix_end = suffix + suffix_length; /* Enforce limit. */ if (suffix_end > filename) suffix_end = filename; if (suffix_end < suffix) suffix_end = suffix; while (suffix_end > suffix && *suffix_end != '/') suffix_end--; if ((suffix_end < filename) && (*suffix_end == '/')) suffix_end++; /* Step 4: Build the new name. */ /* The OpenBSD strlcpy function is safer, but less portable. */ /* Rather than maintain two versions, just use the strncpy version. */ p = dest; if (prefix_end > prefix) { strncpy(p, prefix, prefix_end - prefix); p += prefix_end - prefix; } if (suffix_end > suffix) { strncpy(p, suffix, suffix_end - suffix); p += suffix_end - suffix; } if (insert != NULL) { /* Note: assume insert does not have leading or trailing '/' */ strcpy(p, insert); p += strlen(insert); *p++ = '/'; } strncpy(p, filename, filename_end - filename); p += filename_end - filename; if (need_slash) *p++ = '/'; *p = '\0'; return (dest); } /* * The ustar header for the pax extended attributes must have a * reasonable name: SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename' * where 'pid' is the PID of the archiving process. Unfortunately, * that makes testing a pain since the output varies for each run, * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename' * for now. (Someday, I'll make this settable. Then I can use the * SUS recommendation as default and test harnesses can override it * to get predictable results.) * * Joerg Schilling has argued that this is unnecessary because, in * practice, if the pax extended attributes get extracted as regular * files, no one is going to bother reading those attributes to * manually restore them. Based on this, 'star' uses * /tmp/PaxHeader/'basename' as the ustar header name. This is a * tempting argument, in part because it's simpler than the SUSv3 * recommendation, but I'm not entirely convinced. I'm also * uncomfortable with the fact that "/tmp" is a Unix-ism. * * The following routine leverages build_ustar_entry_name() above and * so is simpler than you might think. It just needs to provide the * additional path element and handle a few pathological cases). */ static char * build_pax_attribute_name(char *dest, const char *src) { char buff[64]; const char *p; /* Handle the null filename case. */ if (src == NULL || *src == '\0') { strcpy(dest, "PaxHeader/blank"); return (dest); } /* Prune final '/' and other unwanted final elements. */ p = src + strlen(src); for (;;) { /* Ends in "/", remove the '/' */ if (p > src && p[-1] == '/') { --p; continue; } /* Ends in "/.", remove the '.' */ if (p > src + 1 && p[-1] == '.' && p[-2] == '/') { --p; continue; } break; } /* Pathological case: After above, there was nothing left. * This includes "/." "/./." "/.//./." etc. */ if (p == src) { strcpy(dest, "/PaxHeader/rootdir"); return (dest); } /* Convert unadorned "." into a suitable filename. */ if (*src == '.' && p == src + 1) { strcpy(dest, "PaxHeader/currentdir"); return (dest); } /* * TODO: Push this string into the 'pax' structure to avoid * recomputing it every time. That will also open the door * to having clients override it. */ #if HAVE_GETPID && 0 /* Disable this for now; see above comment. */ sprintf(buff, "PaxHeader.%d", getpid()); #else /* If the platform can't fetch the pid, don't include it. */ strcpy(buff, "PaxHeader"); #endif /* General case: build a ustar-compatible name adding * "/PaxHeader/". */ build_ustar_entry_name(dest, src, p - src, buff); return (dest); } /* * GNU PAX Format 1.0 requires the special name, which pattern is: * /GNUSparseFile./ * * This function is used for only Sparse file, a file type of which * is regular file. */ static char * build_gnu_sparse_name(char *dest, const char *src) { char buff[64]; const char *p; /* Handle the null filename case. */ if (src == NULL || *src == '\0') { strcpy(dest, "GNUSparseFile/blank"); return (dest); } /* Prune final '/' and other unwanted final elements. */ p = src + strlen(src); for (;;) { /* Ends in "/", remove the '/' */ if (p > src && p[-1] == '/') { --p; continue; } /* Ends in "/.", remove the '.' */ if (p > src + 1 && p[-1] == '.' && p[-2] == '/') { --p; continue; } break; } #if HAVE_GETPID && 0 /* Disable this as pax attribute name. */ sprintf(buff, "GNUSparseFile.%d", getpid()); #else /* If the platform can't fetch the pid, don't include it. */ strcpy(buff, "GNUSparseFile"); #endif /* General case: build a ustar-compatible name adding * "/GNUSparseFile/". */ build_ustar_entry_name(dest, src, p - src, buff); return (dest); } /* Write two null blocks for the end of archive */ static int archive_write_pax_close(struct archive_write *a) { return (__archive_write_nulls(a, 512 * 2)); } static int archive_write_pax_free(struct archive_write *a) { struct pax *pax; pax = (struct pax *)a->format_data; if (pax == NULL) return (ARCHIVE_OK); archive_string_free(&pax->pax_header); archive_string_free(&pax->sparse_map); archive_string_free(&pax->l_url_encoded_name); sparse_list_clear(pax); free(pax); a->format_data = NULL; return (ARCHIVE_OK); } static int archive_write_pax_finish_entry(struct archive_write *a) { struct pax *pax; uint64_t remaining; int ret; pax = (struct pax *)a->format_data; remaining = pax->entry_bytes_remaining; if (remaining == 0) { while (pax->sparse_list) { struct sparse_block *sb; if (!pax->sparse_list->is_hole) remaining += pax->sparse_list->remaining; sb = pax->sparse_list->next; free(pax->sparse_list); pax->sparse_list = sb; } } ret = __archive_write_nulls(a, (size_t)(remaining + pax->entry_padding)); pax->entry_bytes_remaining = pax->entry_padding = 0; return (ret); } static ssize_t archive_write_pax_data(struct archive_write *a, const void *buff, size_t s) { struct pax *pax; size_t ws; size_t total; int ret; pax = (struct pax *)a->format_data; /* * According to GNU PAX format 1.0, write a sparse map * before the body. */ if (archive_strlen(&(pax->sparse_map))) { ret = __archive_write_output(a, pax->sparse_map.s, archive_strlen(&(pax->sparse_map))); if (ret != ARCHIVE_OK) return (ret); ret = __archive_write_nulls(a, pax->sparse_map_padding); if (ret != ARCHIVE_OK) return (ret); archive_string_empty(&(pax->sparse_map)); } total = 0; while (total < s) { const unsigned char *p; while (pax->sparse_list != NULL && pax->sparse_list->remaining == 0) { struct sparse_block *sb = pax->sparse_list->next; free(pax->sparse_list); pax->sparse_list = sb; } if (pax->sparse_list == NULL) return (total); p = ((const unsigned char *)buff) + total; ws = s - total; if (ws > pax->sparse_list->remaining) ws = (size_t)pax->sparse_list->remaining; if (pax->sparse_list->is_hole) { /* Current block is hole thus we do not write * the body. */ pax->sparse_list->remaining -= ws; total += ws; continue; } ret = __archive_write_output(a, p, ws); pax->sparse_list->remaining -= ws; total += ws; if (ret != ARCHIVE_OK) return (ret); } return (total); } static int has_non_ASCII(const char *_p) { const unsigned char *p = (const unsigned char *)_p; if (p == NULL) return (1); while (*p != '\0' && *p < 128) p++; return (*p != '\0'); } /* * Used by extended attribute support; encodes the name * so that there will be no '=' characters in the result. */ static char * url_encode(const char *in) { const char *s; char *d; int out_len = 0; char *out; for (s = in; *s != '\0'; s++) { if (*s < 33 || *s > 126 || *s == '%' || *s == '=') out_len += 3; else out_len++; } out = (char *)malloc(out_len + 1); if (out == NULL) return (NULL); for (s = in, d = out; *s != '\0'; s++) { /* encode any non-printable ASCII character or '%' or '=' */ if (*s < 33 || *s > 126 || *s == '%' || *s == '=') { /* URL encoding is '%' followed by two hex digits */ *d++ = '%'; *d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)]; *d++ = "0123456789ABCDEF"[0x0f & *s]; } else { *d++ = *s; } } *d = '\0'; return (out); } /* * Encode a sequence of bytes into a C string using base-64 encoding. * * Returns a null-terminated C string allocated with malloc(); caller * is responsible for freeing the result. */ static char * base64_encode(const char *s, size_t len) { static const char digits[64] = { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', 'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d', 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s', 't','u','v','w','x','y','z','0','1','2','3','4','5','6','7', '8','9','+','/' }; int v; char *d, *out; /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */ out = (char *)malloc((len * 4 + 2) / 3 + 1); if (out == NULL) return (NULL); d = out; /* Convert each group of 3 bytes into 4 characters. */ while (len >= 3) { v = (((int)s[0] << 16) & 0xff0000) | (((int)s[1] << 8) & 0xff00) | (((int)s[2]) & 0x00ff); s += 3; len -= 3; *d++ = digits[(v >> 18) & 0x3f]; *d++ = digits[(v >> 12) & 0x3f]; *d++ = digits[(v >> 6) & 0x3f]; *d++ = digits[(v) & 0x3f]; } /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */ switch (len) { case 0: break; case 1: v = (((int)s[0] << 16) & 0xff0000); *d++ = digits[(v >> 18) & 0x3f]; *d++ = digits[(v >> 12) & 0x3f]; break; case 2: v = (((int)s[0] << 16) & 0xff0000) | (((int)s[1] << 8) & 0xff00); *d++ = digits[(v >> 18) & 0x3f]; *d++ = digits[(v >> 12) & 0x3f]; *d++ = digits[(v >> 6) & 0x3f]; break; } /* Add trailing NUL character so output is a valid C string. */ *d = '\0'; return (out); } static void sparse_list_clear(struct pax *pax) { while (pax->sparse_list != NULL) { struct sparse_block *sb = pax->sparse_list; pax->sparse_list = sb->next; free(sb); } pax->sparse_tail = NULL; } static int _sparse_list_add_block(struct pax *pax, int64_t offset, int64_t length, int is_hole) { struct sparse_block *sb; sb = (struct sparse_block *)malloc(sizeof(*sb)); if (sb == NULL) return (ARCHIVE_FATAL); sb->next = NULL; sb->is_hole = is_hole; sb->offset = offset; sb->remaining = length; if (pax->sparse_list == NULL || pax->sparse_tail == NULL) pax->sparse_list = pax->sparse_tail = sb; else { pax->sparse_tail->next = sb; pax->sparse_tail = sb; } return (ARCHIVE_OK); } static int sparse_list_add(struct pax *pax, int64_t offset, int64_t length) { int64_t last_offset; int r; if (pax->sparse_tail == NULL) last_offset = 0; else { last_offset = pax->sparse_tail->offset + pax->sparse_tail->remaining; } if (last_offset < offset) { /* Add a hole block. */ r = _sparse_list_add_block(pax, last_offset, offset - last_offset, 1); if (r != ARCHIVE_OK) return (r); } /* Add data block. */ return (_sparse_list_add_block(pax, offset, length, 0)); } Index: projects/clang390-import/contrib/libarchive/libarchive =================================================================== --- projects/clang390-import/contrib/libarchive/libarchive (revision 305016) +++ projects/clang390-import/contrib/libarchive/libarchive (revision 305017) Property changes on: projects/clang390-import/contrib/libarchive/libarchive ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/libarchive/libarchive:r304885-305016 Index: projects/clang390-import/contrib/libarchive =================================================================== --- projects/clang390-import/contrib/libarchive (revision 305016) +++ projects/clang390-import/contrib/libarchive (revision 305017) Property changes on: projects/clang390-import/contrib/libarchive ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/libarchive:r304885-305016 Index: projects/clang390-import/contrib/netbsd-tests/usr.bin/dirname/t_dirname.sh =================================================================== --- projects/clang390-import/contrib/netbsd-tests/usr.bin/dirname/t_dirname.sh (revision 305016) +++ projects/clang390-import/contrib/netbsd-tests/usr.bin/dirname/t_dirname.sh (revision 305017) @@ -1,49 +1,52 @@ # $NetBSD: t_dirname.sh,v 1.1 2012/03/17 16:33:13 jruoho Exp $ # # Copyright (c) 2008 The NetBSD Foundation, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # atf_test_case basic basic_head() { atf_set "descr" "Checks basic functionality" } basic_body() { + # Begin FreeBSD + atf_tc_expect_fail "dirname //usr//bin doesn't return //usr like it used to; bug # 212193" + # End FreeBSD atf_check -o inline:"/\n" dirname / atf_check -o inline:"/\n" dirname // atf_check -o inline:"/usr\n" dirname /usr/bin/ atf_check -o inline:"//usr\n" dirname //usr//bin// atf_check -o inline:".\n" dirname usr atf_check -o inline:".\n" dirname "" atf_check -o inline:"/\n" dirname /usr atf_check -o inline:"/usr\n" dirname /usr/bin atf_check -o inline:"usr\n" dirname usr/bin } atf_init_test_cases() { atf_add_test_case basic } Index: projects/clang390-import/lib/libc/aarch64/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/aarch64/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/aarch64/sys/Makefile.inc (revision 305017) @@ -1,24 +1,23 @@ # $FreeBSD$ MIASM:= ${MIASM:Nfreebsd[467]_*} SRCS+= __vdso_gettc.c -#MDASM= ptrace.S MDASM= cerror.S \ shmat.S \ sigreturn.S \ syscall.S \ vfork.S # Don't generate default code for these syscalls: NOASM= break.o \ exit.o \ getlogin.o \ sbrk.o \ sstk.o \ vfork.o \ yield.o PSEUDO= _exit.o \ _getlogin.o Index: projects/clang390-import/lib/libc/amd64/sys/ptrace.S =================================================================== --- projects/clang390-import/lib/libc/amd64/sys/ptrace.S (revision 305016) +++ projects/clang390-import/lib/libc/amd64/sys/ptrace.S (nonexistent) @@ -1,67 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(SYSLIBC_SCCS) && !defined(lint) - .asciz "@(#)ptrace.s 5.1 (Berkeley) 4/23/90" -#endif /* SYSLIBC_SCCS and not lint */ -#include -__FBSDID("$FreeBSD$"); - -#include "SYS.h" - - .globl CNAME(__error) - .type CNAME(__error),@function - -ENTRY(ptrace) - pushq %rdi /* align stack */ - pushq %rdi - pushq %rsi - pushq %rdx - pushq %rcx -#ifdef PIC - callq PIC_PLT(CNAME(__error)) -#else - callq CNAME(__error) -#endif - popq %rcx - popq %rdx - popq %rsi - popq %rdi - popq %rdi - movl $0,(%rax) - mov $SYS_ptrace,%eax - KERNCALL - jb HIDENAME(cerror) - ret -END(ptrace) - - .section .note.GNU-stack,"",%progbits Property changes on: projects/clang390-import/lib/libc/amd64/sys/ptrace.S ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang390-import/lib/libc/amd64/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/amd64/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/amd64/sys/Makefile.inc (revision 305017) @@ -1,13 +1,13 @@ # from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp # $FreeBSD$ SRCS+= amd64_get_fsbase.c amd64_get_gsbase.c amd64_set_fsbase.c \ amd64_set_gsbase.c -MDASM= vfork.S brk.S cerror.S exect.S getcontext.S ptrace.S \ +MDASM= vfork.S brk.S cerror.S exect.S getcontext.S \ sbrk.S setlogin.S sigreturn.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o sstk.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o Index: projects/clang390-import/lib/libc/arm/sys/ptrace.S =================================================================== --- projects/clang390-import/lib/libc/arm/sys/ptrace.S (revision 305016) +++ projects/clang390-import/lib/libc/arm/sys/ptrace.S (nonexistent) @@ -1,51 +0,0 @@ -/* $NetBSD: ptrace.S,v 1.7 2003/08/07 16:42:04 agc Exp $ */ - -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)ptrace.s 5.1 (Berkeley) 4/23/90 - */ - -#include -__FBSDID("$FreeBSD$"); -#include "SYS.h" - -ENTRY(ptrace) - stmfd sp!, {r0-r3, lr} - sub sp, sp, #4 /* align stack */ - bl PIC_SYM(_C_LABEL(__error), PLT) - add sp, sp, #4 /* unalign stack */ - mov r1, #0x00000000 - str r1, [r0] - ldmfd sp!, {r0-r3, lr} - SYSTRAP(ptrace) - bcs PIC_SYM(CERROR, PLT) - RET -END(ptrace) - - .section .note.GNU-stack,"",%progbits Property changes on: projects/clang390-import/lib/libc/arm/sys/ptrace.S ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang390-import/lib/libc/arm/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/arm/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/arm/sys/Makefile.inc (revision 305017) @@ -1,10 +1,10 @@ # $FreeBSD$ SRCS+= __vdso_gettc.c -MDASM= Ovfork.S brk.S cerror.S ptrace.S sbrk.S shmat.S sigreturn.S syscall.S +MDASM= Ovfork.S brk.S cerror.S sbrk.S shmat.S sigreturn.S syscall.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o sstk.o vfork.o yield.o PSEUDO= _exit.o _getlogin.o Index: projects/clang390-import/lib/libc/i386/sys/ptrace.S =================================================================== --- projects/clang390-import/lib/libc/i386/sys/ptrace.S (revision 305016) +++ projects/clang390-import/lib/libc/i386/sys/ptrace.S (nonexistent) @@ -1,59 +0,0 @@ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(SYSLIBC_SCCS) && !defined(lint) - .asciz "@(#)ptrace.s 5.1 (Berkeley) 4/23/90" -#endif /* SYSLIBC_SCCS and not lint */ -#include -__FBSDID("$FreeBSD$"); - -#include "SYS.h" - - .globl CNAME(__error) - .type CNAME(__error),@function - -ENTRY(ptrace) -#ifdef PIC - PIC_PROLOGUE - call PIC_PLT(CNAME(__error)) - PIC_EPILOGUE -#else - call CNAME(__error) -#endif - movl $0,(%eax) - mov $SYS_ptrace,%eax - KERNCALL - jb HIDENAME(cerror) - ret -END(ptrace) - - .section .note.GNU-stack,"",%progbits Property changes on: projects/clang390-import/lib/libc/i386/sys/ptrace.S ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang390-import/lib/libc/i386/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/i386/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/i386/sys/Makefile.inc (revision 305017) @@ -1,23 +1,23 @@ # from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp # $FreeBSD$ .if !defined(COMPAT_32BIT) SRCS+= i386_clr_watch.c i386_set_watch.c i386_vm86.c .endif SRCS+= i386_get_fsbase.c i386_get_gsbase.c i386_get_ioperm.c i386_get_ldt.c \ i386_set_fsbase.c i386_set_gsbase.c i386_set_ioperm.c i386_set_ldt.c -MDASM= Ovfork.S brk.S cerror.S exect.S getcontext.S ptrace.S \ +MDASM= Ovfork.S brk.S cerror.S exect.S getcontext.S \ sbrk.S setlogin.S sigreturn.S syscall.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o sstk.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o MAN+= i386_get_ioperm.2 i386_get_ldt.2 i386_vm86.2 MAN+= i386_set_watch.3 MLINKS+=i386_get_ioperm.2 i386_set_ioperm.2 MLINKS+=i386_get_ldt.2 i386_set_ldt.2 MLINKS+=i386_set_watch.3 i386_clr_watch.3 Index: projects/clang390-import/lib/libc/include/libc_private.h =================================================================== --- projects/clang390-import/lib/libc/include/libc_private.h (revision 305016) +++ projects/clang390-import/lib/libc/include/libc_private.h (revision 305017) @@ -1,401 +1,402 @@ /* * Copyright (c) 1998 John Birrell . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * * Private definitions for libc, libc_r and libpthread. * */ #ifndef _LIBC_PRIVATE_H_ #define _LIBC_PRIVATE_H_ #include #include /* * This global flag is non-zero when a process has created one * or more threads. It is used to avoid calling locking functions * when they are not required. */ extern int __isthreaded; /* * Elf_Auxinfo *__elf_aux_vector, the pointer to the ELF aux vector * provided by kernel. Either set for us by rtld, or found at runtime * on stack for static binaries. * * Type is void to avoid polluting whole libc with ELF types. */ extern void *__elf_aux_vector; /* * libc should use libc_dlopen internally, which respects a global * flag where loading of new shared objects can be restricted. */ void *libc_dlopen(const char *, int); /* * For dynamic linker. */ void _rtld_error(const char *fmt, ...); /* * File lock contention is difficult to diagnose without knowing * where locks were set. Allow a debug library to be built which * records the source file and line number of each lock call. */ #ifdef _FLOCK_DEBUG #define _FLOCKFILE(x) _flockfile_debug(x, __FILE__, __LINE__) #else #define _FLOCKFILE(x) _flockfile(x) #endif /* * Macros for locking and unlocking FILEs. These test if the * process is threaded to avoid locking when not required. */ #define FLOCKFILE(fp) if (__isthreaded) _FLOCKFILE(fp) #define FUNLOCKFILE(fp) if (__isthreaded) _funlockfile(fp) struct _spinlock; extern struct _spinlock __stdio_thread_lock __hidden; #define STDIO_THREAD_LOCK() \ do { \ if (__isthreaded) \ _SPINLOCK(&__stdio_thread_lock); \ } while (0) #define STDIO_THREAD_UNLOCK() \ do { \ if (__isthreaded) \ _SPINUNLOCK(&__stdio_thread_lock); \ } while (0) void __libc_spinlock_stub(struct _spinlock *); void __libc_spinunlock_stub(struct _spinlock *); /* * Indexes into the pthread jump table. * * Warning! If you change this type, you must also change the threads * libraries that reference it (libc_r, libpthread). */ typedef enum { PJT_ATFORK, PJT_ATTR_DESTROY, PJT_ATTR_GETDETACHSTATE, PJT_ATTR_GETGUARDSIZE, PJT_ATTR_GETINHERITSCHED, PJT_ATTR_GETSCHEDPARAM, PJT_ATTR_GETSCHEDPOLICY, PJT_ATTR_GETSCOPE, PJT_ATTR_GETSTACKADDR, PJT_ATTR_GETSTACKSIZE, PJT_ATTR_INIT, PJT_ATTR_SETDETACHSTATE, PJT_ATTR_SETGUARDSIZE, PJT_ATTR_SETINHERITSCHED, PJT_ATTR_SETSCHEDPARAM, PJT_ATTR_SETSCHEDPOLICY, PJT_ATTR_SETSCOPE, PJT_ATTR_SETSTACKADDR, PJT_ATTR_SETSTACKSIZE, PJT_CANCEL, PJT_CLEANUP_POP, PJT_CLEANUP_PUSH, PJT_COND_BROADCAST, PJT_COND_DESTROY, PJT_COND_INIT, PJT_COND_SIGNAL, PJT_COND_TIMEDWAIT, PJT_COND_WAIT, PJT_DETACH, PJT_EQUAL, PJT_EXIT, PJT_GETSPECIFIC, PJT_JOIN, PJT_KEY_CREATE, PJT_KEY_DELETE, PJT_KILL, PJT_MAIN_NP, PJT_MUTEXATTR_DESTROY, PJT_MUTEXATTR_INIT, PJT_MUTEXATTR_SETTYPE, PJT_MUTEX_DESTROY, PJT_MUTEX_INIT, PJT_MUTEX_LOCK, PJT_MUTEX_TRYLOCK, PJT_MUTEX_UNLOCK, PJT_ONCE, PJT_RWLOCK_DESTROY, PJT_RWLOCK_INIT, PJT_RWLOCK_RDLOCK, PJT_RWLOCK_TRYRDLOCK, PJT_RWLOCK_TRYWRLOCK, PJT_RWLOCK_UNLOCK, PJT_RWLOCK_WRLOCK, PJT_SELF, PJT_SETCANCELSTATE, PJT_SETCANCELTYPE, PJT_SETSPECIFIC, PJT_SIGMASK, PJT_TESTCANCEL, PJT_CLEANUP_POP_IMP, PJT_CLEANUP_PUSH_IMP, PJT_CANCEL_ENTER, PJT_CANCEL_LEAVE, PJT_MUTEX_CONSISTENT, PJT_MUTEXATTR_GETROBUST, PJT_MUTEXATTR_SETROBUST, PJT_MAX } pjt_index_t; typedef int (*pthread_func_t)(void); typedef pthread_func_t pthread_func_entry_t[2]; extern pthread_func_entry_t __thr_jtable[]; void __set_error_selector(int *(*arg)(void)); int _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex, void *(calloc_cb)(__size_t, __size_t)); typedef int (*interpos_func_t)(void); interpos_func_t *__libc_interposing_slot(int interposno); extern interpos_func_t __libc_interposing[] __hidden; enum { INTERPOS_accept, INTERPOS_accept4, INTERPOS_aio_suspend, INTERPOS_close, INTERPOS_connect, INTERPOS_fcntl, INTERPOS_fsync, INTERPOS_fork, INTERPOS_msync, INTERPOS_nanosleep, INTERPOS_openat, INTERPOS_poll, INTERPOS_pselect, INTERPOS_recvfrom, INTERPOS_recvmsg, INTERPOS_select, INTERPOS_sendmsg, INTERPOS_sendto, INTERPOS_setcontext, INTERPOS_sigaction, INTERPOS_sigprocmask, INTERPOS_sigsuspend, INTERPOS_sigwait, INTERPOS_sigtimedwait, INTERPOS_sigwaitinfo, INTERPOS_swapcontext, INTERPOS_system, INTERPOS_tcdrain, INTERPOS_read, INTERPOS_readv, INTERPOS_wait4, INTERPOS_write, INTERPOS_writev, INTERPOS__pthread_mutex_init_calloc_cb, INTERPOS_spinlock, INTERPOS_spinunlock, INTERPOS_kevent, INTERPOS_wait6, INTERPOS_ppoll, INTERPOS_map_stacks_exec, INTERPOS_fdatasync, INTERPOS_MAX }; /* * yplib internal interfaces */ #ifdef YP int _yp_check(char **); #endif /* * Initialise TLS for static programs */ void _init_tls(void); /* * Provides pthread_once()-like functionality for both single-threaded * and multi-threaded applications. */ int _once(pthread_once_t *, void (*)(void)); /* * Set the TLS thread pointer */ void _set_tp(void *tp); /* * This is a pointer in the C run-time startup code. It is used * by getprogname() and setprogname(). */ extern const char *__progname; /* * This function is used by the threading libraries to notify malloc that a * thread is exiting. */ void _malloc_thread_cleanup(void); /* * This function is used by the threading libraries to notify libc that a * thread is exiting, so its thread-local dtors should be called. */ void __cxa_thread_call_dtors(void); /* * These functions are used by the threading libraries in order to protect * malloc across fork(). */ void _malloc_prefork(void); void _malloc_postfork(void); void _malloc_first_thread(void); /* * Function to clean up streams, called from abort() and exit(). */ extern void (*__cleanup)(void) __hidden; /* * Get kern.osreldate to detect ABI revisions. Explicitly * ignores value of $OSVERSION and caches result. */ int __getosreldate(void); #include #include struct aiocb; struct fd_set; struct iovec; struct kevent; struct msghdr; struct pollfd; struct rusage; struct sigaction; struct sockaddr; struct timespec; struct timeval; struct timezone; struct __siginfo; struct __ucontext; struct __wrusage; enum idtype; int __sys_aio_suspend(const struct aiocb * const[], int, const struct timespec *); int __sys_accept(int, struct sockaddr *, __socklen_t *); int __sys_accept4(int, struct sockaddr *, __socklen_t *, int); int __sys_clock_gettime(__clockid_t, struct timespec *ts); int __sys_close(int); int __sys_connect(int, const struct sockaddr *, __socklen_t); int __sys_fcntl(int, int, ...); int __sys_fdatasync(int); int __sys_fsync(int); __pid_t __sys_fork(void); int __sys_ftruncate(int, __off_t); int __sys_gettimeofday(struct timeval *, struct timezone *); int __sys_kevent(int, const struct kevent *, int, struct kevent *, int, const struct timespec *); __off_t __sys_lseek(int, __off_t, int); void *__sys_mmap(void *, __size_t, int, int, int, __off_t); int __sys_msync(void *, __size_t, int); int __sys_nanosleep(const struct timespec *, struct timespec *); int __sys_open(const char *, int, ...); int __sys_openat(int, const char *, int, ...); int __sys_pselect(int, struct fd_set *, struct fd_set *, struct fd_set *, const struct timespec *, const __sigset_t *); +int __sys_ptrace(int, __pid_t, char *, int); int __sys_poll(struct pollfd *, unsigned, int); int __sys_ppoll(struct pollfd *, unsigned, const struct timespec *, const __sigset_t *); __ssize_t __sys_pread(int, void *, __size_t, __off_t); __ssize_t __sys_pwrite(int, const void *, __size_t, __off_t); __ssize_t __sys_read(int, void *, __size_t); __ssize_t __sys_readv(int, const struct iovec *, int); __ssize_t __sys_recv(int, void *, __size_t, int); __ssize_t __sys_recvfrom(int, void *, __size_t, int, struct sockaddr *, __socklen_t *); __ssize_t __sys_recvmsg(int, struct msghdr *, int); int __sys_select(int, struct fd_set *, struct fd_set *, struct fd_set *, struct timeval *); __ssize_t __sys_sendmsg(int, const struct msghdr *, int); __ssize_t __sys_sendto(int, const void *, __size_t, int, const struct sockaddr *, __socklen_t); int __sys_setcontext(const struct __ucontext *); int __sys_sigaction(int, const struct sigaction *, struct sigaction *); int __sys_sigprocmask(int, const __sigset_t *, __sigset_t *); int __sys_sigsuspend(const __sigset_t *); int __sys_sigtimedwait(const __sigset_t *, struct __siginfo *, const struct timespec *); int __sys_sigwait(const __sigset_t *, int *); int __sys_sigwaitinfo(const __sigset_t *, struct __siginfo *); int __sys_swapcontext(struct __ucontext *, const struct __ucontext *); int __sys_thr_kill(long, int); int __sys_thr_self(long *); int __sys_truncate(const char *, __off_t); __pid_t __sys_wait4(__pid_t, int *, int, struct rusage *); __pid_t __sys_wait6(enum idtype, __id_t, int *, int, struct __wrusage *, struct __siginfo *); __ssize_t __sys_write(int, const void *, __size_t); __ssize_t __sys_writev(int, const struct iovec *, int); int __libc_sigaction(int, const struct sigaction *, struct sigaction *) __hidden; int __libc_sigprocmask(int, const __sigset_t *, __sigset_t *) __hidden; int __libc_sigsuspend(const __sigset_t *) __hidden; int __libc_sigwait(const __sigset_t * __restrict, int * restrict sig); int __libc_system(const char *); int __libc_tcdrain(int); int __fcntl_compat(int fd, int cmd, ...); int __sys_futimens(int fd, const struct timespec *times) __hidden; int __sys_utimensat(int fd, const char *path, const struct timespec *times, int flag) __hidden; /* execve() with PATH processing to implement posix_spawnp() */ int _execvpe(const char *, char * const *, char * const *); int _elf_aux_info(int aux, void *buf, int buflen); struct dl_phdr_info; int __elf_phdr_match_addr(struct dl_phdr_info *, void *); void __init_elf_aux_vector(void); void __libc_map_stacks_exec(void); void _pthread_cancel_enter(int); void _pthread_cancel_leave(int); #endif /* _LIBC_PRIVATE_H_ */ Index: projects/clang390-import/lib/libc/mips/sys/ptrace.S =================================================================== --- projects/clang390-import/lib/libc/mips/sys/ptrace.S (revision 305016) +++ projects/clang390-import/lib/libc/mips/sys/ptrace.S (nonexistent) @@ -1,71 +0,0 @@ -/* $NetBSD: ptrace.S,v 1.9 2003/08/07 16:42:17 agc Exp $ */ - -/*- - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Ralph Campbell. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); -#include "SYS.h" - -#if defined(LIBC_SCCS) && !defined(lint) - ASMSTR("from: @(#)ptrace.s 8.1 (Berkeley) 6/4/93") - ASMSTR("$NetBSD: ptrace.S,v 1.9 2003/08/07 16:42:17 agc Exp $") -#endif /* LIBC_SCCS and not lint */ - -NESTED_NOPROFILE(ptrace, CALLFRAME_SIZ, ra) - .mask 0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ) - SETUP_GP - PTR_SUBU sp, sp, CALLFRAME_SIZ - SETUP_GP64(CALLFRAME_GP, ptrace) - SAVE_GP(CALLFRAME_GP) - - PTR_S ra, CALLFRAME_RA(sp) - - PTR_LA t9, _C_LABEL(__error) # locate address of errno - jalr t9 - - PTR_L ra, CALLFRAME_RA(sp) - INT_S zero, 0(v0) # update errno value - - li v0, SYS_ptrace - syscall - - # Load __cerror's address using our gp, then restore it. - PTR_LA t9, __cerror - RESTORE_GP64 - PTR_ADDU sp, sp, CALLFRAME_SIZ - - bne a3, zero, 1f - - j ra -1: j t9 -END(ptrace) Property changes on: projects/clang390-import/lib/libc/mips/sys/ptrace.S ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang390-import/lib/libc/mips/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/mips/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/mips/sys/Makefile.inc (revision 305017) @@ -1,11 +1,11 @@ # $FreeBSD$ SRCS+= trivial-vdso_tc.c MDASM= Ovfork.S brk.S cerror.S exect.S \ - ptrace.S sbrk.S syscall.S + sbrk.S syscall.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o sstk.o vfork.o yield.o PSEUDO= _exit.o _getlogin.o Index: projects/clang390-import/lib/libc/powerpc/sys/ptrace.S =================================================================== --- projects/clang390-import/lib/libc/powerpc/sys/ptrace.S (revision 305016) +++ projects/clang390-import/lib/libc/powerpc/sys/ptrace.S (nonexistent) @@ -1,61 +0,0 @@ -/*- - * Copyright (c) 2002 Peter Grehan. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* $NetBSD: ptrace.S,v 1.3 2000/02/23 20:16:57 kleink Exp $ */ - -#include -__FBSDID("$FreeBSD$"); - -#include "SYS.h" - -ENTRY(ptrace) - mflr %r0 - stwu %r1,-32(%r1) - stw %r0,36(%r1) - stw %r3,8(%r1) - stw %r4,12(%r1) - stw %r5,16(%r1) - stw %r6,20(%r1) - - bl PIC_PLT(CNAME(__error)) - li %r7,0 - stw %r7,0(%r3) - - lwz %r3,8(%r1) - lwz %r4,12(%r1) - lwz %r5,16(%r1) - lwz %r0,36(%r1) - lwz %r6,20(%r1) - mtlr %r0 - la %r1,32(%r1) - li %r0,SYS_ptrace - sc - bso 1f - blr -1: - b PIC_PLT(HIDENAME(cerror)) -END(ptrace) - - .section .note.GNU-stack,"",%progbits Property changes on: projects/clang390-import/lib/libc/powerpc/sys/ptrace.S ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang390-import/lib/libc/powerpc/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/powerpc/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/powerpc/sys/Makefile.inc (revision 305017) @@ -1,8 +1,8 @@ # $FreeBSD$ -MDASM+= brk.S cerror.S exect.S ptrace.S sbrk.S setlogin.S +MDASM+= brk.S cerror.S exect.S sbrk.S setlogin.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o sstk.o yield.o PSEUDO= _getlogin.o _exit.o Index: projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S =================================================================== --- projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S (revision 305016) +++ projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S (nonexistent) @@ -1,68 +0,0 @@ -/*- - * Copyright (c) 2002 Peter Grehan. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* $NetBSD: ptrace.S,v 1.3 2000/02/23 20:16:57 kleink Exp $ */ - -#include -__FBSDID("$FreeBSD$"); - -#include "SYS.h" - -ENTRY(ptrace) - mflr %r0 - std %r0,16(%r1) - stdu %r1,-80(%r1) - stw %r3,48(%r1) - stw %r4,52(%r1) - std %r5,56(%r1) - stw %r6,64(%r1) - - bl CNAME(__error) - nop - li %r7,0 - stw %r7,0(%r3) - - lwz %r3,48(%r1) - lwz %r4,52(%r1) - ld %r5,56(%r1) - lwz %r6,64(%r1) - ld %r1,0(%r1) - ld %r0,16(%r1) - mtlr %r0 - li %r0,SYS_ptrace - sc - bso 1f - blr -1: - stdu %r1,-48(%r1) /* lr already saved */ - bl HIDENAME(cerror) - nop - ld %r1,0(%r1) - ld %r0,16(%r1) - mtlr %r0 - blr -END(ptrace) - - .section .note.GNU-stack,"",%progbits Property changes on: projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang390-import/lib/libc/powerpc64/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/powerpc64/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/powerpc64/sys/Makefile.inc (revision 305017) @@ -1,8 +1,8 @@ # $FreeBSD$ -MDASM+= brk.S cerror.S exect.S ptrace.S sbrk.S setlogin.S +MDASM+= brk.S cerror.S exect.S sbrk.S setlogin.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o sstk.o yield.o PSEUDO= _getlogin.o _exit.o Index: projects/clang390-import/lib/libc/sparc64/sys/ptrace.S =================================================================== --- projects/clang390-import/lib/libc/sparc64/sys/ptrace.S (revision 305016) +++ projects/clang390-import/lib/libc/sparc64/sys/ptrace.S (nonexistent) @@ -1,57 +0,0 @@ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: Header: ptrace.s,v 1.2 91/12/20 01:59:00 leres Exp - * from: NetBSD: ptrace.S,v 1.4 2000/07/24 00:11:10 mycroft Exp - */ - -#if defined(SYSLIBC_SCCS) && !defined(lint) - .asciz "@(#)ptrace.s 8.1 (Berkeley) 6/4/93" -#if 0 - RCSID("$NetBSD: ptrace.S,v 1.4 2000/07/24 00:11:10 mycroft Exp $") -#endif -#endif /* SYSLIBC_SCCS and not lint */ -#include -__FBSDID("$FreeBSD$"); - -#include "SYS.h" - -_SYSENTRY(ptrace) - save %sp, -CCFSZ, %sp - call CNAME(__error) - nop - stw %g0, [%o0] - restore - _SYSCALL(ptrace) - retl - nop -_SYSEND(ptrace) Property changes on: projects/clang390-import/lib/libc/sparc64/sys/ptrace.S ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang390-import/lib/libc/sparc64/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/sparc64/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/sparc64/sys/Makefile.inc (revision 305017) @@ -1,20 +1,20 @@ # $FreeBSD$ SRCS+= __sparc_sigtramp_setup.c \ __sparc_utrap.c \ __sparc_utrap_align.c \ __sparc_utrap_emul.c \ __sparc_utrap_fp_disabled.S \ __sparc_utrap_gen.S \ __sparc_utrap_install.c \ __sparc_utrap_setup.c \ sigcode.S CFLAGS+= -I${LIBC_SRCTOP}/sparc64/fpu -MDASM+= brk.S cerror.S exect.S ptrace.S sbrk.S setlogin.S sigaction1.S +MDASM+= brk.S cerror.S exect.S sbrk.S setlogin.S sigaction1.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o sstk.o yield.o PSEUDO= _getlogin.o _exit.o Index: projects/clang390-import/lib/libc/sys/Makefile.inc =================================================================== --- projects/clang390-import/lib/libc/sys/Makefile.inc (revision 305016) +++ projects/clang390-import/lib/libc/sys/Makefile.inc (revision 305017) @@ -1,467 +1,468 @@ # @(#)Makefile.inc 8.3 (Berkeley) 10/24/94 # $FreeBSD$ # sys sources .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/sys ${LIBC_SRCTOP}/sys # Include the generated makefile containing the *complete* list # of syscall names in MIASM. .include "${LIBC_SRCTOP}/../../sys/sys/syscall.mk" # Include machine dependent definitions. # # MDASM names override the default syscall names in MIASM. # NOASM will prevent the default syscall code from being generated. # .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/sys/Makefile.inc" SRCS+= clock_gettime.c gettimeofday.c __vdso_gettimeofday.c NOASM+= clock_gettime.o gettimeofday.o PSEUDO+= _clock_gettime.o _gettimeofday.o # Sources common to both syscall interfaces: SRCS+= \ __error.c \ interposing_table.c SRCS+= futimens.c utimensat.c NOASM+= futimens.o utimensat.o PSEUDO+= _futimens.o _utimensat.o SRCS+= pipe.c INTERPOSED = \ accept \ accept4 \ aio_suspend \ close \ connect \ fcntl \ fdatasync \ fsync \ fork \ kevent \ msync \ nanosleep \ open \ openat \ poll \ ppoll \ pselect \ + ptrace \ read \ readv \ recvfrom \ recvmsg \ select \ sendmsg \ sendto \ setcontext \ sigprocmask \ sigsuspend \ sigtimedwait \ sigwait \ sigwaitinfo \ swapcontext \ wait4 \ wait6 \ write \ writev .if ${MACHINE_CPUARCH} == "sparc64" SRCS+= sigaction.c NOASM+= sigaction.o .else INTERPOSED+= sigaction .endif SRCS+= ${INTERPOSED:S/$/.c/} NOASM+= ${INTERPOSED:S/$/.o/} PSEUDO+= ${INTERPOSED:C/^.*$/_&.o/} # Add machine dependent asm sources: SRCS+=${MDASM} # Look though the complete list of syscalls (MIASM) for names that are # not defined with machine dependent implementations (MDASM) and are # not declared for no generation of default code (NOASM). Add each # syscall that satisfies these conditions to the ASM list. .for _asm in ${MIASM} .if (${MDASM:R:M${_asm:R}} == "") .if (${NOASM:R:M${_asm:R}} == "") ASM+=$(_asm) .endif .endif .endfor SASM= ${ASM:S/.o/.S/} SPSEUDO= ${PSEUDO:S/.o/.S/} SRCS+= ${SASM} ${SPSEUDO} SYM_MAPS+= ${LIBC_SRCTOP}/sys/Symbol.map # Generated files CLEANFILES+= ${SASM} ${SPSEUDO} .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" || \ ${MACHINE_CPUARCH} == "powerpc" || ${MACHINE_ARCH:Marmv6*} NOTE_GNU_STACK='\t.section .note.GNU-stack,"",%%progbits\n' .else NOTE_GNU_STACK='' .endif ${SASM}: printf '#include "compat.h"\n' > ${.TARGET} printf '#include "SYS.h"\nRSYSCALL(${.PREFIX})\n' >> ${.TARGET} printf ${NOTE_GNU_STACK} >>${.TARGET} ${SPSEUDO}: printf '#include "compat.h"\n' > ${.TARGET} printf '#include "SYS.h"\nPSEUDO(${.PREFIX:S/_//})\n' \ >> ${.TARGET} printf ${NOTE_GNU_STACK} >>${.TARGET} MAN+= abort2.2 \ accept.2 \ access.2 \ acct.2 \ adjtime.2 \ aio_cancel.2 \ aio_error.2 \ aio_fsync.2 \ aio_mlock.2 \ aio_read.2 \ aio_return.2 \ aio_suspend.2 \ aio_waitcomplete.2 \ aio_write.2 \ bind.2 \ bindat.2 \ brk.2 \ cap_enter.2 \ cap_fcntls_limit.2 \ cap_ioctls_limit.2 \ cap_rights_limit.2 \ chdir.2 \ chflags.2 \ chmod.2 \ chown.2 \ chroot.2 \ clock_gettime.2 \ close.2 \ closefrom.2 \ connect.2 \ connectat.2 \ cpuset.2 \ cpuset_getaffinity.2 \ dup.2 \ execve.2 \ _exit.2 \ extattr_get_file.2 \ fcntl.2 \ ffclock.2 \ fhopen.2 \ flock.2 \ fork.2 \ fsync.2 \ getdirentries.2 \ getdtablesize.2 \ getfh.2 \ getfsstat.2 \ getgid.2 \ getgroups.2 \ getitimer.2 \ getlogin.2 \ getloginclass.2 \ getpeername.2 \ getpgrp.2 \ getpid.2 \ getpriority.2 \ getrlimit.2 \ getrusage.2 \ getsid.2 \ getsockname.2 \ getsockopt.2 \ gettimeofday.2 \ getuid.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ jail.2 \ kenv.2 \ kill.2 \ kldfind.2 \ kldfirstmod.2 \ kldload.2 \ kldnext.2 \ kldstat.2 \ kldsym.2 \ kldunload.2 \ kqueue.2 \ ktrace.2 \ link.2 \ lio_listio.2 \ listen.2 \ lseek.2 \ madvise.2 \ mincore.2 \ minherit.2 \ mkdir.2 \ mkfifo.2 \ mknod.2 \ mlock.2 \ mlockall.2 \ mmap.2 \ modfind.2 \ modnext.2 \ modstat.2 \ mount.2 \ mprotect.2 \ mq_close.2 \ mq_getattr.2 \ mq_notify.2 \ mq_open.2 \ mq_receive.2 \ mq_send.2 \ mq_setattr.2 \ msgctl.2 \ msgget.2 \ msgrcv.2 \ msgsnd.2 \ msync.2 \ munmap.2 \ nanosleep.2 \ nfssvc.2 \ ntp_adjtime.2 \ numa_getaffinity.2 \ open.2 \ pathconf.2 \ pdfork.2 \ pipe.2 \ poll.2 \ posix_fadvise.2 \ posix_fallocate.2 \ posix_openpt.2 \ procctl.2 \ profil.2 \ pselect.2 \ ptrace.2 \ quotactl.2 \ read.2 \ readlink.2 \ reboot.2 \ recv.2 \ rename.2 \ revoke.2 \ rfork.2 \ rmdir.2 \ rtprio.2 .if !defined(NO_P1003_1B) MAN+= sched_get_priority_max.2 \ sched_setparam.2 \ sched_setscheduler.2 \ sched_yield.2 .endif MAN+= sctp_generic_recvmsg.2 \ sctp_generic_sendmsg.2 \ sctp_peeloff.2 \ select.2 \ semctl.2 \ semget.2 \ semop.2 \ send.2 \ setfib.2 \ sendfile.2 \ setgroups.2 \ setpgid.2 \ setregid.2 \ setresuid.2 \ setreuid.2 \ setsid.2 \ setuid.2 \ shmat.2 \ shmctl.2 \ shmget.2 \ shm_open.2 \ shutdown.2 \ sigaction.2 \ sigaltstack.2 \ sigpending.2 \ sigprocmask.2 \ sigqueue.2 \ sigreturn.2 \ sigstack.2 \ sigsuspend.2 \ sigwait.2 \ sigwaitinfo.2 \ socket.2 \ socketpair.2 \ stat.2 \ statfs.2 \ swapon.2 \ symlink.2 \ sync.2 \ sysarch.2 \ syscall.2 \ thr_exit.2 \ thr_kill.2 \ thr_new.2 \ thr_self.2 \ thr_set_name.2 \ timer_create.2 \ timer_delete.2 \ timer_settime.2 \ truncate.2 \ umask.2 \ undelete.2 \ unlink.2 \ utimensat.2 \ utimes.2 \ utrace.2 \ uuidgen.2 \ vfork.2 \ wait.2 \ write.2 \ _umtx_op.2 MLINKS+=accept.2 accept4.2 MLINKS+=access.2 eaccess.2 \ access.2 faccessat.2 MLINKS+=brk.2 sbrk.2 MLINKS+=cap_enter.2 cap_getmode.2 MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2 MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2 MLINKS+=cap_rights_limit.2 cap_rights_get.2 MLINKS+=chdir.2 fchdir.2 MLINKS+=chflags.2 chflagsat.2 \ chflags.2 fchflags.2 \ chflags.2 lchflags.2 MLINKS+=chmod.2 fchmod.2 \ chmod.2 fchmodat.2 \ chmod.2 lchmod.2 MLINKS+=chown.2 fchown.2 \ chown.2 fchownat.2 \ chown.2 lchown.2 MLINKS+=clock_gettime.2 clock_getres.2 \ clock_gettime.2 clock_settime.2 MLINKS+=cpuset.2 cpuset_getid.2 \ cpuset.2 cpuset_setid.2 MLINKS+=cpuset_getaffinity.2 cpuset_setaffinity.2 MLINKS+=dup.2 dup2.2 MLINKS+=execve.2 fexecve.2 MLINKS+=extattr_get_file.2 extattr.2 \ extattr_get_file.2 extattr_delete_fd.2 \ extattr_get_file.2 extattr_delete_file.2 \ extattr_get_file.2 extattr_delete_link.2 \ extattr_get_file.2 extattr_get_fd.2 \ extattr_get_file.2 extattr_get_link.2 \ extattr_get_file.2 extattr_list_fd.2 \ extattr_get_file.2 extattr_list_file.2 \ extattr_get_file.2 extattr_list_link.2 \ extattr_get_file.2 extattr_set_fd.2 \ extattr_get_file.2 extattr_set_file.2 \ extattr_get_file.2 extattr_set_link.2 MLINKS+=ffclock.2 ffclock_getcounter.2 \ ffclock.2 ffclock_getestimate.2 \ ffclock.2 ffclock_setestimate.2 MLINKS+=fhopen.2 fhstat.2 fhopen.2 fhstatfs.2 MLINKS+=fsync.2 fdatasync.2 MLINKS+=getdirentries.2 getdents.2 MLINKS+=getfh.2 lgetfh.2 MLINKS+=getgid.2 getegid.2 MLINKS+=getitimer.2 setitimer.2 MLINKS+=getlogin.2 getlogin_r.3 MLINKS+=getlogin.2 setlogin.2 MLINKS+=getloginclass.2 setloginclass.2 MLINKS+=getpgrp.2 getpgid.2 MLINKS+=getpid.2 getppid.2 MLINKS+=getpriority.2 setpriority.2 MLINKS+=getrlimit.2 setrlimit.2 MLINKS+=getsockopt.2 setsockopt.2 MLINKS+=gettimeofday.2 settimeofday.2 MLINKS+=getuid.2 geteuid.2 MLINKS+=intro.2 errno.2 MLINKS+=jail.2 jail_attach.2 \ jail.2 jail_get.2 \ jail.2 jail_remove.2 \ jail.2 jail_set.2 MLINKS+=kldunload.2 kldunloadf.2 MLINKS+=kqueue.2 kevent.2 \ kqueue.2 EV_SET.3 MLINKS+=link.2 linkat.2 MLINKS+=madvise.2 posix_madvise.2 MLINKS+=mkdir.2 mkdirat.2 MLINKS+=mkfifo.2 mkfifoat.2 MLINKS+=mknod.2 mknodat.2 MLINKS+=mlock.2 munlock.2 MLINKS+=mlockall.2 munlockall.2 MLINKS+=modnext.2 modfnext.2 MLINKS+=mount.2 nmount.2 \ mount.2 unmount.2 MLINKS+=mq_receive.2 mq_timedreceive.2 MLINKS+=mq_send.2 mq_timedsend.2 MLINKS+=ntp_adjtime.2 ntp_gettime.2 MLINKS+=numa_getaffinity.2 numa_setaffinity.2 MLINKS+=open.2 openat.2 MLINKS+=pathconf.2 fpathconf.2 MLINKS+=pathconf.2 lpathconf.2 MLINKS+=pdfork.2 pdgetpid.2\ pdfork.2 pdkill.2 \ pdfork.2 pdwait4.2 MLINKS+=pipe.2 pipe2.2 MLINKS+=poll.2 ppoll.2 MLINKS+=read.2 pread.2 \ read.2 preadv.2 \ read.2 readv.2 MLINKS+=readlink.2 readlinkat.2 MLINKS+=recv.2 recvfrom.2 \ recv.2 recvmsg.2 MLINKS+=rename.2 renameat.2 MLINKS+=rtprio.2 rtprio_thread.2 .if !defined(NO_P1003_1B) MLINKS+=sched_get_priority_max.2 sched_get_priority_min.2 \ sched_get_priority_max.2 sched_rr_get_interval.2 MLINKS+=sched_setparam.2 sched_getparam.2 MLINKS+=sched_setscheduler.2 sched_getscheduler.2 .endif MLINKS+=select.2 FD_CLR.3 \ select.2 FD_ISSET.3 \ select.2 FD_SET.3 \ select.2 FD_ZERO.3 MLINKS+=send.2 sendmsg.2 \ send.2 sendto.2 MLINKS+=setpgid.2 setpgrp.2 MLINKS+=setresuid.2 getresgid.2 \ setresuid.2 getresuid.2 \ setresuid.2 setresgid.2 MLINKS+=setuid.2 setegid.2 \ setuid.2 seteuid.2 \ setuid.2 setgid.2 MLINKS+=shmat.2 shmdt.2 MLINKS+=shm_open.2 shm_unlink.2 MLINKS+=sigwaitinfo.2 sigtimedwait.2 MLINKS+=stat.2 fstat.2 \ stat.2 fstatat.2 \ stat.2 lstat.2 MLINKS+=statfs.2 fstatfs.2 MLINKS+=swapon.2 swapoff.2 MLINKS+=symlink.2 symlinkat.2 MLINKS+=syscall.2 __syscall.2 MLINKS+=timer_settime.2 timer_getoverrun.2 \ timer_settime.2 timer_gettime.2 MLINKS+=thr_kill.2 thr_kill2.2 MLINKS+=truncate.2 ftruncate.2 MLINKS+=unlink.2 unlinkat.2 MLINKS+=utimensat.2 futimens.2 MLINKS+=utimes.2 futimes.2 \ utimes.2 futimesat.2 \ utimes.2 lutimes.2 MLINKS+=wait.2 wait3.2 \ wait.2 wait4.2 \ wait.2 waitpid.2 \ wait.2 waitid.2 \ wait.2 wait6.2 MLINKS+=write.2 pwrite.2 \ write.2 pwritev.2 \ write.2 writev.2 Index: projects/clang390-import/lib/libc/sys/ptrace.c =================================================================== --- projects/clang390-import/lib/libc/sys/ptrace.c (nonexistent) +++ projects/clang390-import/lib/libc/sys/ptrace.c (revision 305017) @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The FreeBSD Foundation. + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include "libc_private.h" + +__weak_reference(_ptrace, ptrace); + +int +_ptrace(int request, pid_t pid, caddr_t addr, int data) +{ + + errno = 0; + return (__sys_ptrace(request, pid, addr, data)); +} Property changes on: projects/clang390-import/lib/libc/sys/ptrace.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang390-import/release/doc/en_US.ISO8859-1/relnotes/article.xml =================================================================== --- projects/clang390-import/release/doc/en_US.ISO8859-1/relnotes/article.xml (revision 305016) +++ projects/clang390-import/release/doc/en_US.ISO8859-1/relnotes/article.xml (revision 305017) @@ -1,1931 +1,402 @@ %release; %sponsor; %vendor; ]>
&os; &release.current; Release Notes The &os; Project $FreeBSD$ - 2015 2016 The &os; Documentation Project &tm-attrib.freebsd; &tm-attrib.ibm; &tm-attrib.ieee; &tm-attrib.intel; &tm-attrib.sparc; &tm-attrib.general; The release notes for &os; &release.current; contain a summary of the changes made to the &os; base system on the &release.branch; development line. This document lists applicable security advisories that were issued since the last release, as well as significant changes to the &os; kernel and userland. Some brief remarks on upgrading are also presented. Introduction This document contains the release notes for &os; &release.current;. It describes recently added, changed, or deleted features of &os;. It also provides some notes on upgrading from previous versions of &os;. The &release.type; distribution to which these release notes apply represents the latest point along the &release.branch; development branch since &release.branch; was created. Information regarding pre-built, binary &release.type; distributions along this branch can be found at &release.url;. The &release.type; distribution to which these release notes apply represents a point along the &release.branch; development branch between &release.prev; and the future &release.next;. Information regarding pre-built, binary &release.type; distributions along this branch can be found at &release.url;. This distribution of &os; &release.current; is a &release.type; distribution. It can be found at &release.url; or any of its mirrors. More information on obtaining this (or other) &release.type; distributions of &os; can be found in the Obtaining &os; appendix to the &os; Handbook. All users are encouraged to consult the release errata before installing &os;. The errata document is updated with late-breaking information discovered late in the release cycle or after the release. Typically, it contains information on known bugs, security advisories, and corrections to documentation. An up-to-date copy of the errata for &os; &release.current; can be found on the &os; Web site. This document describes the most user-visible new or changed features in &os; since &release.prev;. In general, changes described here are unique to the &release.branch; branch unless specifically marked as &merged; features. Typical release note items document recent security advisories issued after &release.prev;, new drivers or hardware support, new commands or options, major bug fixes, or contributed software upgrades. They may also list changes to major ports/packages or release engineering practices. Clearly the release notes cannot list every single change made to &os; between releases; this document focuses primarily on security advisories, user-visible changes, and major architectural improvements. Upgrading from Previous Releases of &os; Binary upgrades between RELEASE versions (and snapshots of the various security branches) are supported using the &man.freebsd-update.8; utility. The binary upgrade procedure will update unmodified userland utilities, as well as unmodified GENERIC kernels distributed as a part of an official &os; release. The &man.freebsd-update.8; utility requires that the host being upgraded have Internet connectivity. Source-based upgrades (those based on recompiling the &os; base system from source code) from previous versions are supported, according to the instructions in /usr/src/UPDATING. Upgrading &os; should only be attempted after backing up all data and configuration files. Security and Errata This section lists the various Security Advisories and Errata Notices since &release.prev;. Security Advisories &security; Errata Notices &errata; Userland This section covers changes and additions to userland applications, contributed software, and system utilities. Userland Configuration Changes - The default &man.newsyslog.conf.5; now - includes files in the - /etc/newsyslog.conf.d/ and - /usr/local/etc/newsyslog.conf.d/ - directories by default for &man.newsyslog.8;. - - The &man.mailwrapper.8; utility has been - updated to use &man.mailer.conf.5; from the - LOCALBASE environment variable, which - defaults to /usr/local - if unset. - - The MK_ARM_EABI - &man.src.conf.5; option has been removed. - - The ntp suite - has been updated to version 4.2.8p8. +   Userland Application Changes - When unable to load a kernel module with - &man.kldload.8;, a message informing to view output of - &man.dmesg.8; is now printed, opposed to the previous output - Exec format error.. - - Allow &man.pciconf.8; to identify PCI - devices that are attached to a driver to be identified by - their device name instead of just the selector. Additionally, - an optional device argument to the -l flag - to restrict the output to only listing details about a single - device. - - A new flag, onifconsole - has been added to /etc/ttys. This allows - the system to provide a login prompt via serial console if the - device is an active kernel console, otherwise it is equivalent - to off. - - Support for displaying VPD for PCI - devices via &man.pciconf.8; has been added. - - &man.ping.8; protects against malicious - network packets using the Capsicum framework to drop - privileges. - - The &man.ps.1; utility has been - updated to include the -J flag, used to - filter output by matching &man.jail.8; IDs and names. - Additionally, argument 0 can be used to - -J to only list processes running on the - host system. - - The &man.top.1; utility has been updated - to filter by &man.jail.8; ID or name, in followup to the - &man.ps.1; change in r265229. - - The &man.pmcstat.8; utility has been - updated to include a new flag, -l, which - ends event collection after the specified number of - seconds. - - The &man.ps.1; utility has been updated - to include a new keyword, tracer, which - displays the PID of the tracing - process. - - Support for adding empty partitions has - been added to the &man.mkimg.1; utility. - - The &man.primes.6; utility has been - updated to correctly enumerate prime numbers between - 4295098369 and - 3825123056546413050, which prior to this - change, it would be possible for returned values to be - incorrectly identified as prime numbers. - - The &man.mkimg.1; utility has been - updated to include three options used to print information - about &man.mkimg.1; itself: - - - - - - - - Option - Output - - - - - - --version - The current version of the &man.mkimg.1; - utility - - - - --formats - The disk image file formats supported by - &man.mkimg.1; - - - - --schemes - The partition schemes supported by - &man.mkimg.1; - - - - - - Userland &man.ctf.5; support in - &man.dtrace.1; has been added. With this change, - &man.dtrace.1; is able to resolve type info for function and - USDT probe arguments, and function return - values. - - The &man.elfdump.1; utility has been - updated to support capability mode provided by - &man.capsicum.4;. - - The - &man.fstyp.8; utility has been added, which is used to - determine the filesystem on a specified device. - - The libedit library - has been updated to support UTF-8, which - additionally provides unicode support to &man.sh.1;. - - The - &man.mkimg.1; utility has been updated to support the - MBR EFI partition - type. - - The &man.ptrace.2; system - call has been updated include support for Altivec registers on - &os;/&arch.powerpc;. - - A new device control utility, - &man.devctl.8; has been added, which allows making - administrative changes to individual devices, such as - attaching and detaching drivers, and enabling and disabling - devices. The &man.devctl.8; utility uses the new - &man.devctl.3; library. - - The &man.netstat.1; utility has been - updated to link against the &man.libxo.3; shared - library. - - A new flag, -c, has - been added to the &man.mkimg.1; utility, which allows - specifying the capacity of the target disk image. - - The - &man.uefisign.8; utility has been added. - - The &man.freebsd-update.8; utility has - been updated to prevent fetching updated binary patches when - a previous upgrade has not been thoroughly completed. - - A regression in the &man.libarchive.3; - library that would prevent a directory from being included in - the archive when --one-file-system is used - has been fixed. - - The - &man.ar.1; utility has been updated to set - ARCHIVE_EXTRACT_SECURE_SYMLINKS and - ARCHIVE_EXTRACT_SECURE_NODOTDOT to disallow - directory traversal when extracting an archive, similar to - &man.tar.1;. - - A race condition in &man.wc.1; that - would cause final results to be sent to &man.stderr.4; when - receiving the SIGINFO signal has been - fixed. - - The &man.chflags.1;, &man.chgrp.1;, - &man.chmod.1;, and &man.chown.8; utilities now affect symbolic - links when the -R flag is specified, as - documented in &man.symlink.7;. - - The &man.date.1; utility has been - updated to print the modification time of the file passed as - an argument to the -r flag, improving - compatibility with the GNU &man.date.1; - utility behavior. - - The &man.pw.8; utility has been updated - with a new flag, -R, that sets the root - directory within which the utility will operate. - - The &man.lockstat.1; utility has been - updated with several improvements: - - - - Spin locks are now reported as the amount of time - spinning, instead of loop iterations. - - - - Reader locks are now recognized as adaptive that can - spin on &os;. - - - - Lock aquisition events for successful reader try-lock - events are now reported. - - - - Spin and block events are now reported before lock - acquisition events. - - - - The &man.fstyp.8; utility has been - updated to be able to detect &man.zfs.8; and &man.geli.8; - filesystems. - - The &man.mkimg.1; utility has been - updated to include support for NTFS - filesystems in both MBR and - GPT partitioning schemes. - - The &man.quota.1; utility has been - updated to include support for IPv6. - - The &man.jexec.8; utility has been - updated to include a new flag, -l, which - ensures a clean environment in the target jail when used. - Additionally, &man.jexec.8; will run a shell within the target - jail when run no commands are specified. - - The &man.w.1; utility has been updated - to display the full IPv6 remote address of the host from which - a user is connected. - - The &man.jail.8; framework has been - updated to allow mounting &man.linprocfs.5; and - &man.linsysfs.5; within a jail. - - The &man.patch.1; utility has been - updated to include a new option to the -V - flag, none, which disables backup file - creation when applying a patch. - - The - &man.ar.1; utility now enables deterministic mode - (-D) by default. This behavior can be - disabled by specifying the -U flag. - - The &man.xargs.1; utility has been - updated to allow specifying 0 as an - argument to the -P (parallel mode) flag, - which allows creating as many concurrent processes as - possible. - - The &man.patch.1; utility has been - updated to remove the automatic checkout feature. - - A - new utility, &man.sesutil.8;, has been added, which is used - to manage &man.ses.4; devices. - - The &man.pciconf.8; utility has been - updated to use the PCI ID database from the misc/pciids package, if present, - falling back to the PCI ID database in the &os; base - system. - - The &man.ifconfig.8; utility has been - updated to always exit with an error code if an important - &man.ioctl.2; fails. +   Contributed Software - &man.byacc.1; has been updated to - version 20140101. - - OpenSSH has - been updated to 7.2p2. - - mdocml has - been updated to version 1.12.3. - - The binutils - suite of utilities has been updated to include upstream - patches that add new relocations for &arch.powerpc; - support. - - The - ELF Tool Chain has been updated to - upstream revision r3136. - - The texinfo - utility and info pages were removed from - the base system. The print/texinfo port should be - installed on systems where info pages are - needed. - - The ELF - object manipulation tools - addr2line, - elfcopy (strip), - nm, - readelf, - size, and - strings were switched to the - versions from the ELF Tool Chain project. - - The libedit library - has been updated to include UTF-8 support, - adding UTF-8 support to the &man.sh.1; - shell. - - The &man.xz.1; utility has been updated - to support multi-threaded compression. - - The - elftoolchain utilities have been - updated to version 3179. - - The &man.xz.1; utility has been updated - to version 5.2.2. - - The &man.nvi.1; utility has been updated - to version 2.1.3. - - The &man.wpa.supplicant.8; and - &man.hostapd.8; utilities have been updated to version - 2.5. - - The - &man.resolvconf.8; utility has been updated to version - 3.7.3. - - less has - been updated to version v481. - - bmake has - been updated to version 20150606. - - sendmail has - been updated to 8.15.2. Starting with &os; 11.0 and - sendmail 8.15, sendmail uses uncompressed IPv6 addresses by - default, i.e., they will not contain ::. For - example, instead of ::1, it will be - 0:0:0:0:0:0:0:1. This permits a zero subnet to - have a more specific match, such as different map entries for - IPv6:0:0 versus IPv6:0. This change requires that - configuration data (including maps, files, classes, custom - ruleset, etc.) must use the same format, so make certain such - configuration data is upgrading. As a very simple check - search for patterns like 'IPv6:[0-9a-fA-F:]*::' and 'IPv6::'. - To return to the old behavior, set the m4 option - confUSE_COMPRESSED_IPV6_ADDRESSES or the cf - option UseCompressedIPv6Addresses. - - The &man.tcpdump.1; utility has been - updated to version 4.7.4. - - OpenSSL has - been updated to version 1.0.2h. - - The - &man.ssh.1; utility has been updated to re-implement hostname - canonicalization before locating the host in - known_hosts. - - The &man.libarchive.3; library has been - updated to properly skip a sparse file entry in a &man.tar.1; - file, which would previously produce errors. - - The apr - library used by &man.svnlite.1; has been updated to version - 1.5.2. - - The serf - library used by &man.svnlite.1; has been updated to version - 1.3.8. - - The &man.svnlite.1; utility has been - updated to version 1.8.14. - - The sqlite3 - library used by &man.svnlite.1; and &man.kerberos.8; has been - updated to version 3.12.1. - - Timezone data files have been updated to - version 2015f. - - The &man.acpi.4; subsystem has been - updated to version 20150818. - - The &man.unbound.8; utility has been - updated to version 1.5.4. - - &man.jemalloc.3; has been updated to - version 4.0.2. - - The &man.file.1; utility has been - updated to version 5.28. - - The &man.nc.1; utility has been updated - to the OpenBSD 5.8 version. - - Clang has - been updated to version 3.8.0. - - LLVM has - been updated to version 3.8.0. - - LLDB has - been updated to version 3.8.0. - - libc++ has - been updated to version 3.8.0. - - The - compiler_rt utility has been - updated to version 3.8.0. - - ACPICA has been - updated to version 20160527. - - OpenBSM has been - updated to version 1.2 alpha 4. - - The NetBSD - Project's &man.libblacklist.3; library and applications - have been ported and integrated into the system. Packet - filtering support for the &man.pf.4; packet filtering systems - has been implemented. The blacklist - system provides the blacklistd - daemon, the helper script - blacklistd-helper to make changes - to the running packet filter system and the - blacklistctl control program. - A selection of system daemons, including: - fingerd, - ftpd, - rlogind, and - rshd have been modified to support - sending notifications to the blacklistd - daemon. - - Support for - the &man.ipfw.4; packet filter has been added to the - blacklistd-helper script. - - Support for - the &man.ipfilter.4; packet filter has been added to the - blacklistd-helper script. +   Installation and Configuration Tools - The &man.bsdinstall.8; partition editor - and &man.sade.8; utility have been updated to include native - ZFS support. - - - &man.bsdinstall.8;/zfsboot GPT+BIOS+GELI installs now make use - of GELIBOOT which allows ZFS Boot Environments to be used with - GELI encrypted ZFS pools. - - A module to configure wifi devices has been added - to &man.bsdinstall.8; - - The &os; installation utility, - &man.bsdinstall.8;, has been updated to set the - canmount &man.zfs.8; property to - off for the /var dataset, preventing the - contents of directories within /var from conflicting when - using multiple boot environments, such as that provided by - sysutils/beadm. - - The &man.bsdconfig.8; utility has been - updated to skip the initial &man.tzsetup.8; - UTC versus wall-clock time prompt when run - in a virtual machine, determined when the - kern.vm_guest &man.sysctl.8; is set to - 1. - - The &man.bsdinstall.8; utility has been - updated to use the new &man.dpv.3; library to display progress - when extracting the &os; distributions. - - Support for detecting and implementing - aligning partitions on 1Mb boundaries has been added to - &man.bsdinstall.8;. - - Support for detecting and implementing - a workaround for various laptops and motherboards that do not - boot properly from GPT-partitioned disks - has been added to &man.bsdinstall.8;. Additionally, the - active flag will be set on the partition - when needed. - - Support for selecting the partitioning - scheme when installing on the UFS - filesystem has been added to &man.bsdinstall.8;. +   <filename class="directory">/etc/rc.d</filename> Scripts - The &man.rc.8; subsystem has been - updated to allow configuring services in ${LOCALBASE}/etc/rc.conf.d/. - If LOCALBASE is unset, it defaults to - /usr/local. - - A new &man.rc.8; script, - growfs, has been added, which will resize - the root filesystem on boot if /firstboot - exists. - - The mrouted - &man.rc.8; script has been removed from the base system. An - equivalent script is available from the net/mrouted port. - - A new &man.rc.8; script, - iovctl, has been added, which allows - automatically starting the &man.iovctl.8; utility at - boot. - - The &man.service.8; utility has been - updated to honor entries within /etc/rc.conf.d/. - +   <filename class="directory">/etc/periodic</filename> Scripts - The daily &man.periodic.8; script - 110.clean-tmps has been updated to avoid - crossing filesystem mount boundaries when cleaning files in - /tmp. - - A new - &man.periodic.8; script, - 510.status-world-kernel, has been added, - which evaluates the running userland and kernel versions from - the &man.uname.1; -U and - -K arguments, and prints an error if the - system userland and kernel are not in sync. +   Runtime Libraries and API - The Blowfish &man.crypt.3; default - format has been changed to - $2b$. - - The &man.readline.3; library is now - statically linked in software within the base system, and the - shared library is no longer installed, allowing the Ports - Collection to use a modern version of the library. - - The &man.strptime.3; library has been - updated to add support for POSIX-2001 - features %U and - %W. - - The &man.dl.iterate.phdr.3; library has been - changed to always return the path name of the - ELF object in the - dlpi_name structure member. - - The &man.libxo.3; library has been - imported to the base system. - - A - userland library for Chelsio Terminator 5 based iWARP cards - has been added, allowing userland RDMA - applications to work over compatible - NICs. - - The &man.gpio.3; library has been added, - providing a wrapper around the &man.gpio.4; kernel - interface. - - The - &man.procctl.2; system call has been updated to include - a facility for non-&man.init.8; processes to be declared as - the reaper of child processes and their decendants. - - The futimens() and - utimensat() system calls have been - added. See &man.utimensat.2; for more information. - - The &man.elf.3; compile-time dependency - has been removed from dtri.o, which - allows adding DTrace probes to - userland applications and libraries without also linking - against &man.elf.3;. - - The &man.setmode.3; function has been - updated to consistently set errno on - failure. - - The &man.qsort.3; functions have been - updated to be able to handle 32-bit aligned data on 64-bit - platforms, also providing a significant improvement in 32-bit - workloads. - - Several standard include headers have - been updated to use of gcc - attributes, such as __result_use_check(), - __alloc_size(), and - __nonnull(). - - Support for file verification in - MAC has been added. - - The - libgomp library is now only built when - building GCC from the base system. An - up-to-date version is available in the Ports Collection as - devel/libiomp5-devel. - - The stdlib.h and - malloc.h headers have been updated to - make use of the gcc - alloc_align() attribute. - - The Blowfish &man.crypt.3; library - has been updated to support $2y$ hashes. - - The &man.execl.3; and &man.execlp.3; - library functions have been updated to use the - __sentinel gcc - attribute. +   ABI Compatibility - The &linux; compatibility version has - been updated to 2.6.18. The - compat.linux.osrelease &man.sysctl.8; is - evaluated when building the emulators/linux-c6 and related - ports. - - The stack protector has been upgraded to - the "strong" level, elevating the protection against buffer - overflows. While this significantly improves the security of - the system, extensive testing was done to ensure there are no - measurable side effects in performance or - functionality. +   Kernel This section covers changes to kernel configurations, system tuning, and system control parameters that are not otherwise categorized. Kernel Bug Fixes - A kernel bug that inhibited proper - functionality of the dev.cpu.0.freq - &man.sysctl.8; on &intel; processors with Turbo - Boost ™ enabled has been fixed. - - Support for - &man.dtrace.1; stack tracing has been fixed for - &os;/&arch.powerpc;, using the trapexit() - and asttrapexit() functions instead of - checking within addressed kernel space. - - A kernel panic triggered when destroying - a &man.vnet.9; &man.jail.8; configured with &man.gif.4; has - been fixed. - - A kernel panic triggered when destroying - a &man.vnet.9; &man.jail.8; configured with &man.gre.4; has - been fixed. - - A bug in &man.ipfw.4; that could - potentially lead to a kernel panic when using &man.dummynet.4; - at layer 2 has been fixed. - - The - kernel RPC has been updated to include - several enhancements: - - - - The 45 MiB limit on requests queued for - &man.nfsd.8; threads has been removed. - - - - Avoids unnecessary throttling by not deferring - accounting for completed requests. - - - - Fixes an integer overflow and signedness bugs. - - - - Support for - &man.dtrace.1; has been added for the - Book-E ™. - - The &man.kqueue.2; system call has been - updated to handle write events to files larger than 2 - gigabytes. +   Kernel Configuration - The IMAGACT_BINMISC - kernel configuration option has been enabled by default, - which enables application execution through emulators, such - as Qemu. - - The VT kernel - configuration file has been removed, and the &man.vt.4; - driver is included in the GENERIC kernel. - To enable &man.vt.4;, enter set kern.vty=vt - at the &man.loader.8; prompt during boot, or add - kern.vty=vt to &man.loader.conf.5; and - reboot the system. - - The &man.config.8; utility has been - updated to allow using a non-standard src/ tree, specified as an - argument to the -s flag. - - The - &os;/&arch.powerpc64; kernel now builds as - a position-independent executable, allowing the kernel to be - loaded into and run from any physical or virtual - address. - - - This change requires an update to &man.loader.8;. - The userland and kernel must be updated before rebooting the - system. - - - A new module for creating - rpi.dtb has been added for the Raspberry - Pi. - - The - rpi.dtb module is now installed to - /boot/dtb/ by - default for the Raspberry Pi system. - - Kernel support for Vector-Scalar eXtension - (VSX) found on POWER7 and POWER8 hardware - has been added. - - The &man.pmap.9; implementation for 64-bit - &powerpc; processors has been overhaulded to improve - concurrency. - - A new module for creating - the dtb module for AM335x systems has - been added. - - The - PAE_TABLES kernel configuration option has - been added for &os;/&arch.i386;, which instructs &man.pmap.9; - to use PAE format for page tables while - maintaining a 32-bit physical address size elsewhere in the - kernel. The use of this option can enhance application-level - security by enabling the creation of no execute - mappings on modern &arch.i386; processors. Unlike the - PAE option, PAE_TABLES - preserves kernel binary interface (KBI) - compatibility with non-PAE kernels, - allowing non-PAE kernel modules and drivers - to work with a PAE_TABLES-enabled kernel. - Additionally, system limits are tuned for 4GB maximum - RAM, avoiding kernel virtual address space - (KVA) exhaustion. - - The SIFTR kernel - configuration has been added, allowing building &man.siftr.4; - statically into the kernel. - - The &arch.arm; boot loader, - ubldr, is now relocatable. In addition, - ubldr.bin is now created during build - time, which is a stripped binary with an entry point of - 0, providing the ability to specify the - load address by running go - ${loadaddr} in - u-boot. - - The &man.nvd.4; and &man.nvme.4; drivers are - now included in the GENERIC kernel - configuration by default. - - A new kernel configuration option, - EM_MULTIQUEUE, has been added which enables - multi-queue support in the &man.em.4; driver. - - - Multi-queue support in the &man.em.4; driver is not - officially supported by &intel;. - - - The GENERIC kernel - configuration has been updated to include the - IPSEC option by default. - - Initial NUMA - affinity and policy configuration has been added. See - &man.numactl.1;, and &man.numa.getaffinity.2;, for usage - details. - - The &man.pms.4; driver has been added - to the GENERIC kernel configuration for - supported architectures. - - The - CUBIEBOARD2 kernel configuration has been - renamed to A20. - - Kernel - debugging symbols are now installed to /usr/lib/debug/boot/kernel/. - To retain the previous behavior, add - KERN_DEBUGDIR="" to - &man.src.conf.5;. - - - NEW_PCIB is enabled by default. - +   System Tuning and Controls - The - &man.hwpmc.4; default and maximum callchain depths have been - increased. The default has been increased from 16 to 32, and - the maximum increased from 32 to 128. - - The kern.osrelease - and kern.osreldate are now configurable - &man.jail.8; parameters. - - The &man.devfs.5; device filesystem has - been changed to update timestamps for read/write operations - using seconds precision. A new &man.sysctl.8;, - vfs.devfs.dotimes has been added, which - when set to a non-zero value, enables default precision - timestamps for these operations. - - A new - &man.sysctl.8;, kern.racct.enable, has been - added, which when set to a non-zero value allows using - &man.rctl.8; with the GENERIC kernel. - A new kernel configuration option, - RACCT_DISABLED has also been added. - - The - GENERIC kernel configuration now includes - RACCT and RCTL by - default. - - - To enable RACCT and - RCTL on a system using the - GENERIC kernel configuration, add - kern.racct.enable=1 to - &man.loader.conf.5;, and reboot the system. - - - A new &man.sysctl.8;, - net.inet.tcp.hostcache.purgenow, has - been added, which when set to 1 during - runtime will flush all - net.inet.tcp.hostcache entries. - - A new &man.sysctl.8;, - hw.model, has been added, which displays - CPU model information. - - The &man.uart.4; driver has been - updated to allow tuning pulses per second captured in the - CTS line during runtime, whereas previously only the DCD line - could be used without rebuilding the kernel. +   Devices and Drivers This section covers changes and additions to devices and device drivers since &release.prev;. Device Drivers - Support for GPS ports has been added to - &man.uhso.4;. - - The &man.full.4; device has been added, - and the lindev(4) device has been removed. - Prior to this change, lindev(4) provided - only the /dev/full character device, - returning ENOSPC on write attempts. As - this device is not specific to &linux;, a native &os; version - has been added. - - Hardware context support has been - added to the drm/i915 driver, adding - support for Mesa 9.2 and - later. - - The &man.vt.4; driver has been updated, - replacing the bitmapped kern.vt.spclkeys - &man.sysctl.8; with individual - kern.vt.kbd_* variants. - - The &man.hpet.4; driver has been updated - to create a - /dev/hpetN - device, providing access to HPET from - userspace. - - The drm code has - been updated to match &linux; version 3.8.13. - - The &man.psm.4; driver has been updated - to include improved support for newer Synaptics ® - touchpads and the ClickPad ® mouse on newer - Lenovo ™ laptops. - - Support for the Freescale - PCI Root Complex device has been - added. - - The &man.cyapa.4; driver has been added, - supporting the Cypress APA I2C trackpad. - - The &man.isl.4; driver has been added, - supporting the Intersil I2C ISL29018 digital ambient light - sensor. +   Storage Drivers - The &man.mpr.4; - device has been added, providing support for LSI Fusion-MPT - 3 12Gb SCSI/SATA controllers. - - The &man.mrsas.4; driver has been added, - providing support for LSI MegaRAID SAS controllers. The - &man.mfi.4; driver will attach to the controller, by default. - To enable &man.mrsas.4; add - hw.mfi.mrsas_enable=1 to - /boot/loader.conf, which turns off - &man.mfi.4; device probing. - - - At this time, the &man.mfiutil.8; utility and the &os; - version of MegaCLI and - StorCli do not work with - &man.mrsas.4;. - - - The - &man.ctl.4; subsystem has been updated, increasing the ports - limit from 128 to 256, - and LUN limit from 256 - to 1024. - - The asr(4) driver has - been removed, and is no longer supported. - - The &man.hptnr.4; driver has been - updated to version 1.1.1. - - The &man.pms.4; driver has been added, - providing support for the PMC Sierra line of - SAS/SATA host bus - adapters. - - The &man.ioat.4; driver has been added, - providing support for the PSE (Platform - Storage Extension). - - The - CTL High Availability implementation has - been rewritten. - - The &man.ctl.4; driver has been updated - to support CD-ROM and removable devices. - - The &man.isp.4; driver has - been updated and improved: added support for 16Gbps FC cards, - improved target mode support, completed Multi-ID (NPIV) - functionality. +   Network Drivers - Support for Broadcom chipsets BCM57764, - BCM57767, BCM57782, BCM57786 and BCM57787 has been added to - &man.bge.4;. - - Support for the &intel; Centrino™ - Wireless-N 135 chipset has been added. - - Firmware for &intel; Centrino™ - Wireless-N 105 devices has been added to the base - system. - - The deprecated nve(4) driver has been - removed. Users of NVIDIA nForce MCP network adapters are - advised to use the &man.nfe.4; driver instead, which has been - the default driver for this hardware since - &os; 7.0. - - The if_nf10bmac(4) - device has been added, providing support for NetFPGA-10G - Embedded CPU Ethernet Core. - - - The if_nf10bmac(4) driver operates on - the FPGA, and is not suited for the PCI host - interface. - - - The &man.ath.hal.4; driver has been - updated to support the Atheros AR1111 chipset. - - Support for the &intel; Centrino™ - Wireless-N 105 chipset has been added. - - Support for the &man.cxgbe.4; Terminator - 5 (T5) 10G/40G cards has been added to &man.netmap.4;. - - The &man.alc.4; driver has been updated - to support AR816x and AR817x ethernet controllers. - - The &man.pf.4; packet filter default - hash has been changed from Jenkins to - Murmur3, providing a 3-percent performance - increase in packets-per-second. - - The &man.vxlan.4; driver has been added, - which creates a virtual Layer 2 (Ethernet) network overlaid in - a Layer 3 (IP/UDP) network. The &man.vxlan.4; driver is - analogous to &man.vlan.4;, but is designed to be better suited - for large, multiple-tenant datacenter environments. - - The - &man.gre.4; driver has been significantly overhauled, and has - been split into two separate modules, &man.gre.4; and - &man.me.4;. - - The &man.ral.4; driver has been updated - to support the RT5390 and RT5392 chipsets. - - The &man.sfxge.4; driver has been - updated to support Solarflare Flareon Ultra 7000-series - chipsets. - - The &man.em.4; driver has been updated - with improved transmission queue hang detection. - - The &man.cdce.4; driver has been updated - to include support for the RTL8153 chipset. - - The &man.iwm.4; driver has been imported - from OpenBSD, providing support for &intel; 3160/7260/7265 - wireless chipsets. - - The &man.em.4; driver has been updated - to allow disabling CRC stripping. - - The &man.pf.4; implementation has been - updated to remove support for the scrub fragment - crop|drop-ovl filtering rule. Systems with this - rule in &man.pf.conf.5; will implicitly be converted to the - scrub fragment reassemble filtering rule, - without necessary intervention. - - The &man.lagg.4; driver has been updated - to remove support for the fec - protocol. - - netmap - support from the ncxgbe/ncxl interfaces has been merged into the - vcxgbe/vcxl interfaces for the &man.cxgbe.4; driver. - +   Hardware Support This section covers general hardware support for physical machines, hypervisors, and virtualization environments, as well as hardware changes and updates that do not otherwise fit in other sections of this document. Hardware Support - The &man.asmc.4; driver has been - updated to support the &apple; MacMini 3,1. - - Support for &os;/ia64 has been dropped - as of &os; 11. - - An issue that could cause a system to - hang when entering ACPI - S3 state (suspend to - RAM) has been corrected in the &man.acpi.4; - and &man.pci.4; drivers. - - The power management unit - subsystem has been updated to support power button events on - certain &arch.powerpc; hardware, such as aluminum - PowerBook ®. - - The &man.hwpmc.4; - driver has been updated to correct performance counter - sampling on G4 (MPC74xxx) and G5 class processors. - - The - OpenCrypto framework has been - updated to include AES-ICM and - AES-GCM modes, both of which have also been - added to the &man.aesni.4; driver. - - The &man.hwpmc.4; - driver has been updated to support the Freescale e500 - core. - - The &man.ig4.4; driver has been added, - providing support for the fourth generation &intel; - I2C SMBus. - - The &man.uart.4; driver has been updated to support - AMT devices on newer systems. - - Initial SMP support has been - added to the &os;/&arch.arm64; port. +   Virtualization Support - Support for the Virtual Interrupt - Delivery feature of &intel; VT-x is enabled if - supported by the CPU. This feature can be disabled by running - sysctl hw.vmm.vmx.use_apic_vid=0. - Additionally, to persist this setting across reboots, add - hw.vmm.vmx.use_apic_vid=0 to - /etc/sysctl.conf. - - Support for Posted Interrupt - Processing is enabled if supported by the CPU. This - feature can be disabled by running sysctl - hw.vmm.vmx.use_apic_pir=0. Additionally, to - persist this setting across reboots, add - hw.vmm.vmx.use_apic_pir=0 to - /etc/sysctl.conf. - - Unmapped IO support has been added to - &man.virtio_blk.4;. - - Unmapped IO support has been added to - &man.virtio_scsi.4;. - - The &man.virtio_random.4; driver has - been added to harvest entropy from the host system. - - &os;/&arch.i386; guests can be run under - bhyve. - - Support for running a &os;/&arch.amd64; - Xen guest instance as - PVH guest has been added. - PVH mode, short for Para-Virtualized - Hardware, uses para-virtualized drivers for boot and - I/O, and uses hardware virtualization extensions for all other - tasks, without the need for emulation. - - The &man.bhyve.8; hypervisor has been - updated to support &amd; processors with - SVM and AMD-V hardware - extensions. - - The &man.virtio.console.4; driver has - been added, which provides an interface to VirtIO console - devices through a &man.tty.4; device. - - The &man.bhyve.8; hypervisor has been - updated to support DSM TRIM commands for - virtual AHCI disks. - - Native graphics support has been added to - the &man.bhyve.8; hypervisor. - - Support for the - QEMU virt system - has been added. - - The - Hyper-V™ drivers have been updated with several - enhancements: - - - - The &man.hv.vmbus.4; driver now has multi-channel - support. - - - - The &man.hv.storvsc.4; driver now has scatter/gather - support, in addition to performance improvements. - - - - The &man.hv.kvp.4; driver has received several bug - fixes. - - - - Support for &man.xen.4; para-virtualized - domU kernels has been removed. - - The - &man.hv.netvsc.4; driver has been updated to support checksum - offloading and TSO. - - The &man.xen.4; driver has been updated - to include support for blkif indirect - segment I/O. +   ARM Support - The &man.nand.4; device is enabled for - ARM devices by default. - - Support for the Exynos 5420 - Octa system has been added. - - The SMP - option has been enabled for all Exynos 5 systems supported by - &os;. - - Support for the Toradex - Apalis i.MX6 development board has been added. - - An issue that could cause - instability when detecting SD cards on the - Raspberry Pi SOC has been fixed. - - The bcm2835_cpufreq - driver has been added, which supports CPU - frequency and voltage control on the Raspberry Pi - SOC. - - Support to turn off the - BeagleBone Black system with the &man.shutdown.8; - -p flag or by invoking &man.poweroff.8; has - been added. - - Audio transmission drivers - have been added for Digital Audio Multiplexer - (AUDMUXM), Smart Direct Memory Access - Controller (SDMA), and Syncronous Serial - Interface (SSI). - - Initial - support for the ARM AArch64 architecture has been - added. - - Kernel support for Thumb-2 - userland has been added. - - Support for the hardware power button - on the BeagleBone Black system has been added. - - Initial - ACPI support has been added for - &os;/&arch.arm64;. - - Support for 1-Wire devices has been - added, providing support for 1-Wire hardware through - &man.gpio.4;. See &man.ow.4;, &man.owc.4;, and - &man.ow.temp.4; for more information. - - Support for the HiSilicon HI6220 SoC has been - added. - - The second CPU core on - Allwinner A20 SoC have been enabled. - - Support for the Allwinner H3 SoC - has been added. - - Support for X-Powers AXP813 and - AXP818 power management integrated circuits have been added. - - Support for GPIO, Sensors and - interrupts on AXP209 power management integrated circuits have been - added. - +   Storage This section covers changes and additions to file systems and other storage subsystems, both local and networked. General Storage - The - &man.ctl.4; LUN mapping has been rewritten, - replacing iSCSI-specific mapping mechanisms - with a new mechanism that works for any port. - - The - &man.ctld.8; utility has been updated to allow controlling - non-iSCSI &man.ctl.4; ports. - - The - &man.autofs.5; subsystem has been updated to include a new - &man.auto.master.5; map, -media, which - allows automatically mounting removable media, such as - CD drives or USB flash - drives. - - The - &man.autofs.5; subsystem has been updated to include a new - &man.auto.master.5; map, -noauto, which - handles &man.fstab.5; entries set to - noauto. - - The GELI class has - been updated to support the BIO_DELETE - &man.g.bio.9; bio_cmd field, providing - TRIM/UNMAP support on - GELI-backed SSD storage - providers. - - - Leading spaces are now stripped off SCSI disk serial - numbers when populating the CAM serial number. This affects the output of - &man.diskinfo.8; and the names of /dev/diskid/DISK-* - device nodes, among other things. - - - Support for managing Shingled Magnetic Recording (SMR) drives - has been added. - +   Networked Storage - The new - filesystem automount facility, &man.autofs.5;, has been added. - The new &man.autofs.5; facility is similar to that found in - other &unix;-like operating systems, such as OS X™ - and Solaris™. The &man.autofs.5; facility uses - a &sun;-compatible &man.auto.master.5; configuration file, and - is administered with the &man.automount.8; userland utility, - and the &man.automountd.8; and &man.autounmountd.8; - daemons. - - Support - for the timeo, actimeo, - noac, and proto options - have been added to &man.mount.nfs.8;. +   ZFS - The arc_meta_limit - statistics are now visible through the - kstat &man.sysctl.8;. As a result of this - change, the vfs.zfs.arc_meta_used - &man.sysctl.8; has been removed, and replaced with the - kstat.zfs.misc.arcstats.arc_meta_used - &man.sysctl.8;. - - The &man.zfs.8; l2arc - code has been updated to take ashift into - account when gathering buffers to be written to the - l2arc device. - - The zfsd daemon has been added, - which manages hotspares and replements in drive slots that publish - physical paths. +   &man.geom.4; - Support for the - disklabel64 partitioning scheme has been - added to &man.gpart.8;. - - Support for the - apple-boot, apple-hfs, - and apple-ufs MBR - partitioning schemes have been added to &man.gpart.8;. - - The &man.gpart.8; utility has been - updated to include a new attribute for GPT - partitions, lenovofix, which when set, - which works around BIOS compatibility - issues reported on several Lenovo ™ laptops. +   Boot Loader Changes This section covers the boot loader, boot menu, and other boot-related changes. Boot Loader Changes - The - memory test run at boot time on &os;/&arch.amd64; platforms - has been disabled by default. - - A new &man.ttys.5; class, - 3wire, has been added. This is similar to - the existing terminal classes, but does not have a defined - baudrate. - - The &man.vt.4; driver has been made the - default system console driver. The &man.syscons.4; driver is - still available, and can be enabled by adding - kern.vty=sc in &man.loader.conf.5;. - Alternatively, &man.syscons.4; can be enabled at boot time by - entering set kern.vty=sc at the - &man.loader.8; prompt. - - Support for bzipfs - has been added to the EFI loader. - - The boot loader has been updated to - support entering the GELI passphrase before - loading the kernel. To enable this behavior, add - geom_eli_passphrase_prompt="YES" to - &man.loader.conf.5;. - - The &man.ttys.5; file for &os;/&arch.arm; has been - updated to enable ttyu1, - ttyu2, and ttyu3 by - default, if the callin port is an active console port. +   Boot Menu Changes   Networking This section describes changes that affect networking in &os;. Network Protocols - Support for the IPX network transport - protocol has been removed, and will not be supported in - &os; 11 and later releases. - - Support for PLPMTUD - blackhole detection (RFC 4821) has been - added to the &man.tcp.4; stack, disabled by default. New - control tunables have been added: - - - - - - - - Tunable - Description - - - - - - net.inet.tcp.pmtud_blackhole_detection - Enables or disables PLPMTUD - blackhole detection - - - - net.inet.tcp.pmtud_blackhole_mss - MSS to try for IPv4 - - - - net.inet.tcp.v6pmtud_blackhole_mss - MSS to try for IPv6 - - - - - - New monitoring &man.sysctl.8;s haven been added: - - - - - - - - Tunable - Description - - - - - - net.inet.tcp.pmtud_blackhole_activated - Number of times the code was activated to attempt - downshifting the MSS - - - - net.inet.tcp.pmtud_blackhole_min_activated - Number of times the blackhole - MSS was used in an attempt to - downshift - - - - net.inet.tcp.pmtud_blackhole_failed - Number of times that the blackhole failed to - connect after downshifting the - MSS - - - - - - Support for IP - identification for atomic datagrams (RFC - 6864) has been added. Support for this feature can be toggled - with the net.inet.ip.rfc6864 - &man.sysctl.8;, which is enabled by default. - - The IPSEC has been - updated to include support for AES modes on - both software-only and hardware-backed (&man.aesni.4;) - systems. - - The - network stack has been updated to fix handling of - IPv6 On-Link redirects. - - The net.inet.tcp.ecn.enable sysctl mib has been - changed from a binary off/on control to a three way setting. - - - - - - - - Value - Description - - - - - - 0 - Totally disable ECN. - - - - 1 - Enable ECN if incoming connections request it. Outgoing - connections will request ECN. - - - - 2 - Enable ECN if incoming connections request it. Outgoing - conections will not request ECN. - - - - - - - Dummynet AQM, an independent implementation of - CoDel and FQ-CoDel for ipfw/dummynet has been imported to the base - system. - +   Ports Collection and Package Infrastructure This section covers changes to the &os; Ports Collection, package infrastructure, and package maintenance and installation tools. Infrastructure Changes   Packaging Changes   Documentation This section covers changes to the &os; Documentation Project sources and toolchain. Documentation Source Changes   Documentation Toolchain Changes   Release Engineering and Integration This section convers changes that are specific to the &os; Release Engineering processes. Integration Changes - The - Release Engineering build tools have been updated to include - support for producing virtual machine disk images for various - cloud hosting providers. - - The Release Engineering build tools have - been updated to use multi-threaded &man.xz.1;. By default, - the number of &man.xz.1; threads is set to the number of cores - available. - - The - Release Engineering build tools have been updated to include - support for building &os;/&arch.arm64; virtual machine and - memory stick installation images. - - The - Release Engineering build tools have been updated to support - building &os;/&arch.arm; images without external utilities for - supported boards where a corresponding - u-boot port exists in the Ports - Collection. - - The - &os;/&arch.i386; memory stick installation images are now - created using the &man.mkimg.1; utility, matching the way - the &os;/&arch.amd64; images are created. +  
Index: projects/clang390-import/sbin/hastd/lzf.h =================================================================== --- projects/clang390-import/sbin/hastd/lzf.h (revision 305016) +++ projects/clang390-import/sbin/hastd/lzf.h (revision 305017) @@ -1,211 +1,215 @@ /* * Copyright (c) 2000-2008 Marc Alexander Lehmann * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License ("GPL") version 2 or any later version, * in which case the provisions of the GPL are applicable instead of * the above. If you wish to allow the use of your version of this file * only under the terms of the GPL and not to allow others to use your * version of this file under the BSD license, indicate your decision * by deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete the * provisions above, a recipient may use your version of this file under * either the BSD or the GPL. */ #ifndef LZF_H #define LZF_H /*********************************************************************** ** ** lzf -- an extremely fast/free compression/decompression-method ** http://liblzf.plan9.de/ ** ** This algorithm is believed to be patent-free. ** ***********************************************************************/ #define LZF_VERSION 0x0105 /* 1.5, API version */ /* * Compress in_len bytes stored at the memory block starting at * in_data and write the result to out_data, up to a maximum length * of out_len bytes. * * If the output buffer is not large enough or any error occurs return 0, * otherwise return the number of bytes used, which might be considerably * more than in_len (but less than 104% of the original size), so it * makes sense to always use out_len == in_len - 1), to ensure _some_ * compression, and store the data uncompressed otherwise (with a flag, of * course. * * lzf_compress might use different algorithms on different systems and * even different runs, thus might result in different compressed strings * depending on the phase of the moon or similar factors. However, all * these strings are architecture-independent and will result in the * original data when decompressed using lzf_decompress. * * The buffers must not be overlapping. * * If the option LZF_STATE_ARG is enabled, an extra argument must be * supplied which is not reflected in this header file. Refer to lzfP.h * and lzf_c.c. * */ unsigned int lzf_compress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len); /* * Decompress data compressed with some version of the lzf_compress * function and stored at location in_data and length in_len. The result * will be stored at out_data up to a maximum of out_len characters. * * If the output buffer is not large enough to hold the decompressed * data, a 0 is returned and errno is set to E2BIG. Otherwise the number * of decompressed bytes (i.e. the original length of the data) is * returned. * * If an error in the compressed data is detected, a zero is returned and * errno is set to EINVAL. * * This function is very fast, about as fast as a copying loop. */ unsigned int lzf_decompress (const void *const in_data, unsigned int in_len, void *out_data, unsigned int out_len); /* * Size of hashtable is (1 << HLOG) * sizeof (char *) * decompression is independent of the hash table size * the difference between 15 and 14 is very small * for small blocks (and 14 is usually a bit faster). * For a low-memory/faster configuration, use HLOG == 13; * For best compression, use 15 or 16 (or more, up to 23). */ #ifndef HLOG # define HLOG 16 #endif /* * Sacrifice very little compression quality in favour of compression speed. * This gives almost the same compression as the default code, and is * (very roughly) 15% faster. This is the preferred mode of operation. */ #ifndef VERY_FAST # define VERY_FAST 1 #endif /* * Sacrifice some more compression quality in favour of compression speed. * (roughly 1-2% worse compression for large blocks and * 9-10% for small, redundant, blocks and >>20% better speed in both cases) * In short: when in need for speed, enable this for binary data, * possibly disable this for text data. */ #ifndef ULTRA_FAST # define ULTRA_FAST 0 #endif /* * Unconditionally aligning does not cost very much, so do it if unsure */ #ifndef STRICT_ALIGN -# define STRICT_ALIGN !(defined(__i386) || defined (__amd64)) +# if !(defined(__i386) || defined (__amd64)) +# define STRICT_ALIGN 1 +# else +# define STRICT_ALIGN 0 +# endif #endif /* * You may choose to pre-set the hash table (might be faster on some * modern cpus and large (>>64k) blocks, and also makes compression * deterministic/repeatable when the configuration otherwise is the same). */ #ifndef INIT_HTAB # define INIT_HTAB 1 #endif /* * Avoid assigning values to errno variable? for some embedding purposes * (linux kernel for example), this is necessary. NOTE: this breaks * the documentation in lzf.h. */ #ifndef AVOID_ERRNO # define AVOID_ERRNO 0 #endif /* * Wether to pass the LZF_STATE variable as argument, or allocate it * on the stack. For small-stack environments, define this to 1. * NOTE: this breaks the prototype in lzf.h. */ #ifndef LZF_STATE_ARG # define LZF_STATE_ARG 0 #endif /* * Wether to add extra checks for input validity in lzf_decompress * and return EINVAL if the input stream has been corrupted. This * only shields against overflowing the input buffer and will not * detect most corrupted streams. * This check is not normally noticeable on modern hardware * (<1% slowdown), but might slow down older cpus considerably. */ #ifndef CHECK_INPUT # define CHECK_INPUT 1 #endif /*****************************************************************************/ /* nothing should be changed below */ typedef unsigned char u8; typedef const u8 *LZF_STATE[1 << (HLOG)]; #if !STRICT_ALIGN /* for unaligned accesses we need a 16 bit datatype. */ # include # if USHRT_MAX == 65535 typedef unsigned short u16; # elif UINT_MAX == 65535 typedef unsigned int u16; # else # undef STRICT_ALIGN # define STRICT_ALIGN 1 # endif #endif #if ULTRA_FAST # if defined(VERY_FAST) # undef VERY_FAST # endif #endif #if INIT_HTAB # ifdef __cplusplus # include # else # include # endif #endif #endif Index: projects/clang390-import/share/mk/bsd.dep.mk =================================================================== --- projects/clang390-import/share/mk/bsd.dep.mk (revision 305016) +++ projects/clang390-import/share/mk/bsd.dep.mk (revision 305017) @@ -1,312 +1,314 @@ # $FreeBSD$ # # The include file handles Makefile dependencies. # # # +++ variables +++ # # CLEANDEPENDDIRS Additional directories to remove for the cleandepend # target. # # CLEANDEPENDFILES Additional files to remove for the cleandepend target. # # CTAGS A tags file generation program [gtags] # # CTAGSFLAGS Options for ctags(1) [not set] # # DEPENDFILE dependencies file [.depend] # # GTAGSFLAGS Options for gtags(1) [-o] # # HTAGSFLAGS Options for htags(1) [not set] # # SRCS List of source files (c, c++, assembler) # # DPSRCS List of source files which are needed for generating # dependencies, ${SRCS} are always part of it. # # +++ targets +++ # # cleandepend: # remove ${CLEANDEPENDFILES}; remove ${CLEANDEPENDDIRS} and all # contents. # # depend: # Make the dependencies for the source files, and store # them in the file ${DEPENDFILE}. # # tags: # In "ctags" mode, create a tags file for the source files. # In "gtags" mode, create a (GLOBAL) gtags file for the # source files. If HTML is defined, htags(1) is also run # after gtags(1). .if !target(____) .error bsd.dep.mk cannot be included directly. .endif CTAGS?= gtags CTAGSFLAGS?= GTAGSFLAGS?= -o HTAGSFLAGS?= .if ${MK_DIRDEPS_BUILD} == "no" .MAKE.DEPENDFILE= ${DEPENDFILE} .endif CLEANDEPENDFILES+= ${DEPENDFILE} ${DEPENDFILE}.* .if ${MK_META_MODE} == "yes" CLEANDEPENDFILES+= *.meta .endif # Keep `tags' here, before SRCS are mangled below for `depend'. .if !target(tags) && defined(SRCS) && !defined(NO_TAGS) tags: ${SRCS} .if ${CTAGS:T} == "gtags" @cd ${.CURDIR} && ${CTAGS} ${GTAGSFLAGS} ${.OBJDIR} .if defined(HTML) @cd ${.CURDIR} && htags ${HTAGSFLAGS} -d ${.OBJDIR} ${.OBJDIR} .endif .else @${CTAGS} ${CTAGSFLAGS} -f /dev/stdout \ ${.ALLSRC:N*.h} | sed "s;${.CURDIR}/;;" > ${.TARGET} .endif .endif .if !empty(.MAKE.MODE:Mmeta) && empty(.MAKE.MODE:Mnofilemon) _meta_filemon= 1 .endif # Skip reading .depend when not needed to speed up tree-walks and simple # lookups. For install, only do this if no other targets are specified. # Also skip generating or including .depend.* files if in meta+filemon mode # since it will track dependencies itself. OBJS_DEPEND_GUESS is still used. .if !empty(.MAKEFLAGS:M-V${_V_READ_DEPEND}) || make(obj) || make(clean*) || \ ${.TARGETS:M*install*} == ${.TARGETS} || \ make(analyze) || defined(_meta_filemon) _SKIP_READ_DEPEND= 1 .if ${MK_DIRDEPS_BUILD} == "no" .MAKE.DEPENDFILE= /dev/null .endif .endif .if defined(SRCS) CLEANFILES?= .for _S in ${SRCS:N*.[dhly]} OBJS_DEPEND_GUESS.${_S:R}.o+= ${_S} .endfor # Lexical analyzers .for _LSRC in ${SRCS:M*.l:N*/*} .for _LC in ${_LSRC:R}.c ${_LC}: ${_LSRC} ${LEX} ${LFLAGS} -o${.TARGET} ${.ALLSRC} OBJS_DEPEND_GUESS.${_LC:R}.o+= ${_LC} SRCS:= ${SRCS:S/${_LSRC}/${_LC}/} CLEANFILES+= ${_LC} .endfor .endfor # Yacc grammars .for _YSRC in ${SRCS:M*.y:N*/*} .for _YC in ${_YSRC:R}.c SRCS:= ${SRCS:S/${_YSRC}/${_YC}/} CLEANFILES+= ${_YC} .if !empty(YFLAGS:M-d) && !empty(SRCS:My.tab.h) .ORDER: ${_YC} y.tab.h y.tab.h: .NOMETA ${_YC} y.tab.h: ${_YSRC} ${YACC} ${YFLAGS} ${.ALLSRC} cp y.tab.c ${_YC} CLEANFILES+= y.tab.c y.tab.h .elif !empty(YFLAGS:M-d) .for _YH in ${_YC:R}.h .ORDER: ${_YC} ${_YH} ${_YH}: .NOMETA ${_YC} ${_YH}: ${_YSRC} ${YACC} ${YFLAGS} -o ${_YC} ${.ALLSRC} SRCS+= ${_YH} CLEANFILES+= ${_YH} .endfor .else ${_YC}: ${_YSRC} ${YACC} ${YFLAGS} -o ${_YC} ${.ALLSRC} .endif OBJS_DEPEND_GUESS.${_YC:R}.o+= ${_YC} .endfor .endfor # DTrace probe definitions .if ${SRCS:M*.d} CFLAGS+= -I${.OBJDIR} .endif .for _DSRC in ${SRCS:M*.d:N*/*} .for _D in ${_DSRC:R} SRCS+= ${_D}.h ${_D}.h: ${_DSRC} ${DTRACE} ${DTRACEFLAGS} -h -s ${.ALLSRC} SRCS:= ${SRCS:S/^${_DSRC}$//} OBJS+= ${_D}.o CLEANFILES+= ${_D}.h ${_D}.o ${_D}.o: ${_DSRC} ${OBJS:S/^${_D}.o$//} @rm -f ${.TARGET} ${DTRACE} ${DTRACEFLAGS} -G -o ${.TARGET} -s ${.ALLSRC:N*.h} .if defined(LIB) CLEANFILES+= ${_D}.So ${_D}.po ${_D}.So: ${_DSRC} ${SOBJS:S/^${_D}.So$//} @rm -f ${.TARGET} ${DTRACE} ${DTRACEFLAGS} -G -o ${.TARGET} -s ${.ALLSRC:N*.h} ${_D}.po: ${_DSRC} ${POBJS:S/^${_D}.po$//} @rm -f ${.TARGET} ${DTRACE} ${DTRACEFLAGS} -G -o ${.TARGET} -s ${.ALLSRC:N*.h} .endif .endfor .endfor .if ${MAKE_VERSION} < 20160220 DEPEND_MP?= -MP .endif # Handle OBJS=../somefile.o hacks. Just replace '/' rather than use :T to # avoid collisions. DEPEND_FILTER= C,/,_,g DEPENDSRCS= ${SRCS:M*.[cSC]} ${SRCS:M*.cxx} ${SRCS:M*.cpp} ${SRCS:M*.cc} .if !empty(DEPENDSRCS) DEPENDOBJS+= ${DEPENDSRCS:R:S,$,.o,} .endif DEPENDFILES_OBJS= ${DEPENDOBJS:O:u:${DEPEND_FILTER}:C/^/${DEPENDFILE}./} DEPEND_CFLAGS+= -MD ${DEPEND_MP} -MF${DEPENDFILE}.${.TARGET:${DEPEND_FILTER}} DEPEND_CFLAGS+= -MT${.TARGET} .if !defined(_meta_filemon) .if defined(.PARSEDIR) # Only add in DEPEND_CFLAGS for CFLAGS on files we expect from DEPENDOBJS # as those are the only ones we will include. DEPEND_CFLAGS_CONDITION= "${DEPENDOBJS:M${.TARGET:${DEPEND_FILTER}}}" != "" CFLAGS+= ${${DEPEND_CFLAGS_CONDITION}:?${DEPEND_CFLAGS}:} .else CFLAGS+= ${DEPEND_CFLAGS} .endif .if !defined(_SKIP_READ_DEPEND) .for __depend_obj in ${DEPENDFILES_OBJS} .if ${MAKE_VERSION} < 20160220 .sinclude "${.OBJDIR}/${__depend_obj}" .else .dinclude "${.OBJDIR}/${__depend_obj}" .endif .endfor .endif # !defined(_SKIP_READ_DEPEND) .endif # !defined(_meta_filemon) .endif # defined(SRCS) .if ${MK_DIRDEPS_BUILD} == "yes" # Prevent meta.autodep.mk from tracking "local dependencies". .depend: .include # If using filemon then _EXTRADEPEND is skipped since it is not needed. .if defined(_meta_filemon) # this depend: bypasses that below # the dependency helps when bootstrapping depend: beforedepend ${DPSRCS} ${SRCS} afterdepend beforedepend: afterdepend: beforedepend .endif .endif # Guess some dependencies for when no ${DEPENDFILE}.OBJ is generated yet. # For meta+filemon the .meta file is checked for since it is the dependency # file used. .for __obj in ${DEPENDOBJS:O:u} .if (defined(_meta_filemon) && !exists(${.OBJDIR}/${__obj}.meta)) || \ (!defined(_meta_filemon) && !exists(${.OBJDIR}/${DEPENDFILE}.${__obj})) ${__obj}: ${OBJS_DEPEND_GUESS} ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}} .elif defined(_meta_filemon) # For meta mode we still need to know which file to depend on to avoid # ambiguous suffix transformation rules from .PATH. Meta mode does not # use .depend files. We really only need source files, not headers since # they are typically in SRCS/beforebuild already. For target-specific # guesses do include headers though since they may not be in SRCS. ${__obj}: ${OBJS_DEPEND_GUESS:N*.h} ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}} .endif .endfor # Always run 'make depend' to generate dependencies early and to avoid the # need for manually running it. The dirdeps build should only do this in # sub-makes though since MAKELEVEL0 is for dirdeps calculations. .if ${MK_DIRDEPS_BUILD} == "no" || ${.MAKE.LEVEL} > 0 beforebuild: depend .endif .if !target(depend) .if defined(SRCS) depend: beforedepend ${DEPENDFILE} afterdepend # Tell bmake not to look for generated files via .PATH .NOPATH: ${DEPENDFILE} ${DEPENDFILES_OBJS} DPSRCS+= ${SRCS} # A .depend file will only be generated if there are commands in # beforedepend/_EXTRADEPEND/afterdepend The _EXTRADEPEND target is # ignored if using meta+filemon since it handles all dependencies. The other # targets are kept as they be used for generating something. The target is # kept to allow 'make depend' to generate files. ${DEPENDFILE}: ${DPSRCS} .if exists(${.OBJDIR}/${DEPENDFILE}) || \ ((commands(beforedepend) || \ (!defined(_meta_filemon) && commands(_EXTRADEPEND)) || \ commands(afterdepend)) && !empty(.MAKE.MODE:Mmeta)) rm -f ${DEPENDFILE} .endif .if !defined(_meta_filemon) && target(_EXTRADEPEND) _EXTRADEPEND: .USE ${DEPENDFILE}: _EXTRADEPEND .endif .ORDER: ${DEPENDFILE} afterdepend .else depend: beforedepend afterdepend .endif .if !target(beforedepend) beforedepend: .else .ORDER: beforedepend ${DEPENDFILE} .ORDER: beforedepend afterdepend .endif .if !target(afterdepend) afterdepend: .endif .endif .if defined(SRCS) .if ${CTAGS:T} == "gtags" CLEANDEPENDFILES+= GPATH GRTAGS GSYMS GTAGS .if defined(HTML) CLEANDEPENDDIRS+= HTML .endif .else CLEANDEPENDFILES+= tags .endif .endif .if !target(cleandepend) cleandepend: .if !empty(CLEANDEPENDFILES) rm -f ${CLEANDEPENDFILES} .endif .if !empty(CLEANDEPENDDIRS) rm -rf ${CLEANDEPENDDIRS} .endif .endif +.ORDER: cleandepend all +.ORDER: cleandepend depend .if !target(checkdpadd) && (defined(DPADD) || defined(LDADD)) _LDADD_FROM_DPADD= ${DPADD:R:T:C;^lib(.*)$;-l\1;g} # Ignore -Wl,--start-group/-Wl,--end-group as it might be required in the # LDADD list due to unresolved symbols _LDADD_CANONICALIZED= ${LDADD:N:R:T:C;^lib(.*)$;-l\1;g:N-Wl,--[es]*-group} checkdpadd: .if ${_LDADD_FROM_DPADD} != ${_LDADD_CANONICALIZED} @echo ${.CURDIR} @echo "DPADD -> ${_LDADD_FROM_DPADD}" @echo "LDADD -> ${_LDADD_CANONICALIZED}" .endif .endif Index: projects/clang390-import/share/mk/bsd.obj.mk =================================================================== --- projects/clang390-import/share/mk/bsd.obj.mk (revision 305016) +++ projects/clang390-import/share/mk/bsd.obj.mk (revision 305017) @@ -1,211 +1,212 @@ # $FreeBSD$ # # The include file handles creating the 'obj' directory # and cleaning up object files, etc. # # +++ variables +++ # # CLEANDIRS Additional directories to remove for the clean target. # # CLEANFILES Additional files to remove for the clean target. # # MAKEOBJDIR A pathname for the directory where the targets # are built. Note: MAKEOBJDIR is an *environment* variable # and works properly only if set as an environment variable, # not as a global or command line variable! # # E.g. use `env MAKEOBJDIR=temp-obj make' # # MAKEOBJDIRPREFIX Specifies somewhere other than /usr/obj to root the object # tree. Note: MAKEOBJDIRPREFIX is an *environment* variable # and works properly only if set as an environment variable, # not as a global or command line variable! # # E.g. use `env MAKEOBJDIRPREFIX=/somewhere/obj make' # # NO_OBJ Do not create object directories. This should not be set # if anything is built. # # +++ targets +++ # # clean: # remove ${CLEANFILES}; remove ${CLEANDIRS} and all contents. # # cleandir: # remove the build directory (and all its contents) created by obj # # obj: # create build directory. # .if !target(____) ____: .include .if ${MK_AUTO_OBJ} == "yes" # it is done by now objwarn: obj: CANONICALOBJDIR= ${.OBJDIR} .if defined(NO_OBJ) # but this makefile does not want it! .OBJDIR: ${.CURDIR} .endif .elif defined(MAKEOBJDIRPREFIX) CANONICALOBJDIR:=${MAKEOBJDIRPREFIX}${.CURDIR} .elif defined(MAKEOBJDIR) && ${MAKEOBJDIR:M/*} != "" CANONICALOBJDIR:=${MAKEOBJDIR} OBJTOP?= ${MAKEOBJDIR} .else CANONICALOBJDIR:=/usr/obj${.CURDIR} .endif OBJTOP?= ${.OBJDIR:S,${.CURDIR},,}${SRCTOP} # # Warn of unorthodox object directory. # # The following directories are tried in order for ${.OBJDIR}: # # 1. ${MAKEOBJDIRPREFIX}/`pwd` # 2. ${MAKEOBJDIR} # 3. obj.${MACHINE} # 4. obj # 5. /usr/obj/`pwd` # 6. ${.CURDIR} # # If ${.OBJDIR} is constructed using canonical cases 1 or 5, or # case 2 (using MAKEOBJDIR), don't issue a warning. Otherwise, # issue a warning differentiating between cases 6 and (3 or 4). # objwarn: .if !defined(NO_OBJ) && ${.OBJDIR} != ${CANONICALOBJDIR} && \ !(defined(MAKEOBJDIRPREFIX) && exists(${CANONICALOBJDIR}/)) && \ !(defined(MAKEOBJDIR) && exists(${MAKEOBJDIR}/)) .if ${.OBJDIR} == ${.CURDIR} @${ECHO} "Warning: Object directory not changed from original ${.CURDIR}" .elif exists(${.CURDIR}/obj.${MACHINE}/) || exists(${.CURDIR}/obj/) @${ECHO} "Warning: Using ${.OBJDIR} as object directory instead of\ canonical ${CANONICALOBJDIR}" .endif .endif beforebuild: objwarn .if !defined(NO_OBJ) .if !target(obj) obj: .PHONY @if ! test -d ${CANONICALOBJDIR}/; then \ mkdir -p ${CANONICALOBJDIR}; \ if ! test -d ${CANONICALOBJDIR}/; then \ ${ECHO} "Unable to create ${CANONICALOBJDIR}."; \ exit 1; \ fi; \ ${ECHO} "${CANONICALOBJDIR} created for ${.CURDIR}"; \ fi .for dir in ${SRCS:H:O:u} ${DPSRCS:H:O:u} @if ! test -d ${CANONICALOBJDIR}/${dir}/; then \ mkdir -p ${CANONICALOBJDIR}/${dir}; \ if ! test -d ${CANONICALOBJDIR}/${dir}/; then \ ${ECHO} "Unable to create ${CANONICALOBJDIR}/${dir}."; \ exit 1; \ fi; \ ${ECHO} "${CANONICALOBJDIR}/${dir} created for ${.CURDIR}"; \ fi .endfor .endif .if !target(objlink) objlink: @if test -d ${CANONICALOBJDIR}/; then \ rm -f ${.CURDIR}/obj; \ ln -s ${CANONICALOBJDIR} ${.CURDIR}/obj; \ else \ echo "No ${CANONICALOBJDIR} to link to - do a make obj."; \ fi .endif .endif # !defined(NO_OBJ) # # where would that obj directory be? # .if !target(whereobj) whereobj: @echo ${.OBJDIR} .endif .if ${CANONICALOBJDIR} != ${.CURDIR} && exists(${CANONICALOBJDIR}/) cleanobj: @-rm -rf ${CANONICALOBJDIR} .else cleanobj: clean cleandepend .endif @if [ -L ${.CURDIR}/obj ]; then rm -f ${.CURDIR}/obj; fi # Tell bmake not to look for generated files via .PATH NOPATH_FILES+= ${CLEANFILES} .if !empty(NOPATH_FILES) .NOPATH: ${NOPATH_FILES} .endif .if !target(clean) clean: .if defined(CLEANFILES) && !empty(CLEANFILES) rm -f ${CLEANFILES} .endif .if defined(CLEANDIRS) && !empty(CLEANDIRS) -rm -rf ${CLEANDIRS} .endif .endif +.ORDER: clean all cleandir: cleanobj .include .if make(destroy*) && defined(OBJROOT) # this (rm -rf objdir) is much faster and more reliable than cleaning. # just in case we are playing games with these... _OBJDIR?= ${.OBJDIR} _CURDIR?= ${.CURDIR} # destroy almost everything destroy: destroy-all destroy-all: # just remove our objdir destroy-arch: .NOMETA .if ${_OBJDIR} != ${_CURDIR} cd ${_CURDIR} && rm -rf ${_OBJDIR} .endif .if defined(HOST_OBJTOP) destroy-host: destroy.host destroy.host: .NOMETA cd ${_CURDIR} && rm -rf ${HOST_OBJTOP}/${RELDIR:N.} .endif .if make(destroy-all) && ${RELDIR} == "." destroy-all: destroy-stage .endif # remove the stage tree destroy-stage: .NOMETA .if defined(STAGE_ROOT) cd ${_CURDIR} && rm -rf ${STAGE_ROOT} .endif # allow parallel destruction _destroy_machine_list = common host ${ALL_MACHINE_LIST} .for m in ${_destroy_machine_list:O:u} destroy-all: destroy.$m .if !target(destroy.$m) destroy.$m: .NOMETA .if ${_OBJDIR} != ${_CURDIR} cd ${_CURDIR} && rm -rf ${OBJROOT}$m*/${RELDIR:N.} .endif .endif .endfor .endif .endif # !target(____) Index: projects/clang390-import/sys/amd64/amd64/pmap.c =================================================================== --- projects/clang390-import/sys/amd64/amd64/pmap.c (revision 305016) +++ projects/clang390-import/sys/amd64/amd64/pmap.c (revision 305017) @@ -1,7272 +1,7265 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #define AMD64_NPT_AWARE #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_pmap.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif static __inline boolean_t pmap_type_guest(pmap_t pmap) { return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); } static __inline boolean_t pmap_emulate_ad_bits(pmap_t pmap) { return ((pmap->pm_flags & PMAP_EMULATE_AD_BITS) != 0); } static __inline pt_entry_t pmap_valid_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_V; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_EMUL_V; else mask = EPT_PG_READ; break; default: panic("pmap_valid_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline pt_entry_t pmap_rw_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_RW; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_EMUL_RW; else mask = EPT_PG_WRITE; break; default: panic("pmap_rw_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline pt_entry_t pmap_global_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: mask = X86_PG_G; break; case PT_RVI: case PT_EPT: mask = 0; break; default: panic("pmap_global_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline pt_entry_t pmap_accessed_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_A; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_READ; else mask = EPT_PG_A; break; default: panic("pmap_accessed_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline pt_entry_t pmap_modified_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_M; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_WRITE; else mask = EPT_PG_M; break; default: panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } extern struct pcpu __pcpu[]; #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ int nkpt; SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0, "Number of kernel page table pages allocated on bootup"); static int ndmpdp; vm_paddr_t dmaplimit; vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; pt_entry_t pg_nx; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int pat_works = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, "Is page attribute table fully functional?"); static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pg_ps_enabled, 0, "Are large page mappings enabled?"); #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ static u_int64_t KPTphys; /* phys addr of kernel level 1 */ static u_int64_t KPDphys; /* phys addr of kernel level 2 */ u_int64_t KPDPphys; /* phys addr of kernel level 3 */ u_int64_t KPML4phys; /* phys addr of kernel level 4 */ static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ static int ndmpdpphys; /* number of DMPDPphys pages */ /* * pmap_mapdev support pre initialization (i.e. console) */ #define PMAP_PREINIT_MAPPING_COUNT 8 static struct pmap_preinit_mapping { vm_paddr_t pa; vm_offset_t va; vm_size_t sz; int mode; } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; static int pmap_initialized; /* * Data for the pv entry allocation mechanism. * Updates to pv_invl_gen are protected by the pv_list_locks[] * elements, but reads are not. */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static u_long pv_invl_gen[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1 = 0; caddr_t CADDR1 = 0; static vm_offset_t qframe = 0; static struct mtx qframe_mtx; static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */ int pmap_pcid_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?"); int invpcid_works = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0, "Is the invpcid instruction available ?"); static int pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) { int i; uint64_t res; res = 0; CPU_FOREACH(i) { res += cpuid_to_pcpu[i]->pc_pm_save_cnt; } return (sysctl_handle_64(oidp, &res, 0, req)); } SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", "Count of saved TLB context on switch"); static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker = LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker); static struct mtx invl_gen_mtx; static u_long pmap_invl_gen = 0; /* Fake lock object to satisfy turnstiles interface. */ static struct lock_object invl_gen_ts = { .lo_name = "invlts", }; #define PMAP_ASSERT_NOT_IN_DI() \ KASSERT(curthread->td_md.md_invl_gen.gen == 0, ("DI already started")) /* * Start a new Delayed Invalidation (DI) block of code, executed by * the current thread. Within a DI block, the current thread may * destroy both the page table and PV list entries for a mapping and * then release the corresponding PV list lock before ensuring that * the mapping is flushed from the TLBs of any processors with the * pmap active. */ static void pmap_delayed_invl_started(void) { struct pmap_invl_gen *invl_gen; u_long currgen; invl_gen = &curthread->td_md.md_invl_gen; PMAP_ASSERT_NOT_IN_DI(); mtx_lock(&invl_gen_mtx); if (LIST_EMPTY(&pmap_invl_gen_tracker)) currgen = pmap_invl_gen; else currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen; invl_gen->gen = currgen + 1; LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link); mtx_unlock(&invl_gen_mtx); } /* * Finish the DI block, previously started by the current thread. All * required TLB flushes for the pages marked by * pmap_delayed_invl_page() must be finished before this function is * called. * * This function works by bumping the global DI generation number to * the generation number of the current thread's DI, unless there is a * pending DI that started earlier. In the latter case, bumping the * global DI generation number would incorrectly signal that the * earlier DI had finished. Instead, this function bumps the earlier * DI's generation number to match the generation number of the * current thread's DI. */ static void pmap_delayed_invl_finished(void) { struct pmap_invl_gen *invl_gen, *next; struct turnstile *ts; invl_gen = &curthread->td_md.md_invl_gen; KASSERT(invl_gen->gen != 0, ("missed invl_started")); mtx_lock(&invl_gen_mtx); next = LIST_NEXT(invl_gen, link); if (next == NULL) { turnstile_chain_lock(&invl_gen_ts); ts = turnstile_lookup(&invl_gen_ts); pmap_invl_gen = invl_gen->gen; if (ts != NULL) { turnstile_broadcast(ts, TS_SHARED_QUEUE); turnstile_unpend(ts, TS_SHARED_LOCK); } turnstile_chain_unlock(&invl_gen_ts); } else { next->gen = invl_gen->gen; } LIST_REMOVE(invl_gen, link); mtx_unlock(&invl_gen_mtx); invl_gen->gen = 0; } #ifdef PV_STATS static long invl_wait; SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0, "Number of times DI invalidation blocked pmap_remove_all/write"); #endif static u_long * pmap_delayed_invl_genp(vm_page_t m) { return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]); } /* * Ensure that all currently executing DI blocks, that need to flush * TLB for the given page m, actually flushed the TLB at the time the * function returned. If the page m has an empty PV list and we call * pmap_delayed_invl_wait(), upon its return we know that no CPU has a * valid mapping for the page m in either its page table or TLB. * * This function works by blocking until the global DI generation * number catches up with the generation number associated with the * given page m and its PV list. Since this function's callers * typically own an object lock and sometimes own a page lock, it * cannot sleep. Instead, it blocks on a turnstile to relinquish the * processor. */ static void pmap_delayed_invl_wait(vm_page_t m) { struct thread *td; struct turnstile *ts; u_long *m_gen; #ifdef PV_STATS bool accounted = false; #endif td = curthread; m_gen = pmap_delayed_invl_genp(m); while (*m_gen > pmap_invl_gen) { #ifdef PV_STATS if (!accounted) { atomic_add_long(&invl_wait, 1); accounted = true; } #endif ts = turnstile_trywait(&invl_gen_ts); if (*m_gen > pmap_invl_gen) turnstile_wait(ts, NULL, TS_SHARED_QUEUE); else turnstile_cancel(ts); } } /* * Mark the page m's PV list as participating in the current thread's * DI block. Any threads concurrently using m's PV list to remove or * restrict all mappings to m will wait for the current thread's DI * block to complete before proceeding. * * The function works by setting the DI generation number for m's PV * list to at least the DI generation number of the current thread. * This forces a caller of pmap_delayed_invl_wait() to block until * current thread calls pmap_delayed_invl_finished(). */ static void pmap_delayed_invl_page(vm_page_t m) { u_long gen, *m_gen; rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED); gen = curthread->td_md.md_invl_gen.gen; if (gen == 0) return; m_gen = pmap_delayed_invl_genp(m); if (*m_gen < gen) *m_gen = gen; } /* * Crashdump maps. */ static caddr_t crashdumpmap; static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static int popcnt_pc_map_pq(uint64_t *map); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp); static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va); static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, struct spglist *free); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde); static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp); static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); /* * Move the kernel virtual free pointer to the next * 2MB. This is used to help improve performance * by using a large (2MB) page for much of the kernel * (.text, .data, .bss) */ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; newaddr = roundup2(addr, NBPDR); return (newaddr); } /********************/ /* Inline functions */ /********************/ /* Return a non-clipped PD index for a given VA */ static __inline vm_pindex_t pmap_pde_pindex(vm_offset_t va) { return (va >> PDRSHIFT); } /* Return various clipped indexes for a given VA */ static __inline vm_pindex_t pmap_pte_index(vm_offset_t va) { return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); } static __inline vm_pindex_t pmap_pde_index(vm_offset_t va) { return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); } static __inline vm_pindex_t pmap_pdpe_index(vm_offset_t va) { return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); } static __inline vm_pindex_t pmap_pml4e_index(vm_offset_t va) { return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); } /* Return a pointer to the PML4 slot that corresponds to a VA */ static __inline pml4_entry_t * pmap_pml4e(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_pml4[pmap_pml4e_index(va)]); } /* Return a pointer to the PDP slot that corresponds to a VA */ static __inline pdp_entry_t * pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) { pdp_entry_t *pdpe; pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); return (&pdpe[pmap_pdpe_index(va)]); } /* Return a pointer to the PDP slot that corresponds to a VA */ static __inline pdp_entry_t * pmap_pdpe(pmap_t pmap, vm_offset_t va) { pml4_entry_t *pml4e; pt_entry_t PG_V; PG_V = pmap_valid_bit(pmap); pml4e = pmap_pml4e(pmap, va); if ((*pml4e & PG_V) == 0) return (NULL); return (pmap_pml4e_to_pdpe(pml4e, va)); } /* Return a pointer to the PD slot that corresponds to a VA */ static __inline pd_entry_t * pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) { pd_entry_t *pde; pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); return (&pde[pmap_pde_index(va)]); } /* Return a pointer to the PD slot that corresponds to a VA */ static __inline pd_entry_t * pmap_pde(pmap_t pmap, vm_offset_t va) { pdp_entry_t *pdpe; pt_entry_t PG_V; PG_V = pmap_valid_bit(pmap); pdpe = pmap_pdpe(pmap, va); if (pdpe == NULL || (*pdpe & PG_V) == 0) return (NULL); return (pmap_pdpe_to_pde(pdpe, va)); } /* Return a pointer to the PT slot that corresponds to a VA */ static __inline pt_entry_t * pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) { pt_entry_t *pte; pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); return (&pte[pmap_pte_index(va)]); } /* Return a pointer to the PT slot that corresponds to a VA */ static __inline pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va) { pd_entry_t *pde; pt_entry_t PG_V; PG_V = pmap_valid_bit(pmap); pde = pmap_pde(pmap, va); if (pde == NULL || (*pde & PG_V) == 0) return (NULL); if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ return ((pt_entry_t *)pde); return (pmap_pde_to_pte(pde, va)); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } PMAP_INLINE pt_entry_t * vtopte(vm_offset_t va) { u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va)); return (PTmap + ((va >> PAGE_SHIFT) & mask)); } static __inline pd_entry_t * vtopde(vm_offset_t va) { u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopde on a uva/gpa 0x%0lx", va)); return (PDmap + ((va >> PDRSHIFT) & mask)); } static u_int64_t allocpages(vm_paddr_t *firstaddr, int n) { u_int64_t ret; ret = *firstaddr; bzero((void *)ret, n * PAGE_SIZE); *firstaddr += n * PAGE_SIZE; return (ret); } CTASSERT(powerof2(NDMPML4E)); /* number of kernel PDP slots */ #define NKPDPE(ptpgs) howmany(ptpgs, NPDEPG) static void nkpt_init(vm_paddr_t addr) { int pt_pages; #ifdef NKPT pt_pages = NKPT; #else pt_pages = howmany(addr, 1 << PDRSHIFT); pt_pages += NKPDPE(pt_pages); /* * Add some slop beyond the bare minimum required for bootstrapping * the kernel. * * This is quite important when allocating KVA for kernel modules. * The modules are required to be linked in the negative 2GB of * the address space. If we run out of KVA in this region then * pmap_growkernel() will need to allocate page table pages to map * the entire 512GB of KVA space which is an unnecessary tax on * physical memory. * * Secondly, device memory mapped as part of setting up the low- * level console(s) is taken from KVA, starting at virtual_avail. * This is because cninit() is called after pmap_bootstrap() but * before vm_init() and pmap_init(). 20MB for a frame buffer is * not uncommon. */ pt_pages += 32; /* 64MB additional slop. */ #endif nkpt = pt_pages; } static void create_pagetables(vm_paddr_t *firstaddr) { int i, j, ndm1g, nkpdpe; pt_entry_t *pt_p; pd_entry_t *pd_p; pdp_entry_t *pdp_p; pml4_entry_t *p4_p; /* Allocate page table pages for the direct map */ ndmpdp = howmany(ptoa(Maxmem), NBPDP); if (ndmpdp < 4) /* Minimum 4GB of dirmap */ ndmpdp = 4; ndmpdpphys = howmany(ndmpdp, NPDPEPG); if (ndmpdpphys > NDMPML4E) { /* * Each NDMPML4E allows 512 GB, so limit to that, * and then readjust ndmpdp and ndmpdpphys. */ printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); Maxmem = atop(NDMPML4E * NBPML4); ndmpdpphys = NDMPML4E; ndmpdp = NDMPML4E * NPDEPG; } DMPDPphys = allocpages(firstaddr, ndmpdpphys); ndm1g = 0; if ((amd_feature & AMDID_PAGE1GB) != 0) ndm1g = ptoa(Maxmem) >> PDPSHIFT; if (ndm1g < ndmpdp) DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g); dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; /* Allocate pages */ KPML4phys = allocpages(firstaddr, 1); KPDPphys = allocpages(firstaddr, NKPML4E); /* * Allocate the initial number of kernel page table pages required to * bootstrap. We defer this until after all memory-size dependent * allocations are done (e.g. direct map), so that we don't have to * build in too much slop in our estimate. * * Note that when NKPML4E > 1, we have an empty page underneath * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed) * pages. (pmap_enter requires a PD page to exist for each KPML4E.) */ nkpt_init(*firstaddr); nkpdpe = NKPDPE(nkpt); KPTphys = allocpages(firstaddr, nkpt); KPDphys = allocpages(firstaddr, nkpdpe); /* Fill in the underlying page table pages */ /* Nominally read-only (but really R/W) from zero to physfree */ /* XXX not fully used, underneath 2M pages */ pt_p = (pt_entry_t *)KPTphys; for (i = 0; ptoa(i) < *firstaddr; i++) pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | X86_PG_G; /* Now map the page tables at their location within PTmap */ pd_p = (pd_entry_t *)KPDphys; for (i = 0; i < nkpt; i++) pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; /* Map from zero to end of allocations under 2M pages */ /* This replaces some of the KPTphys entries above */ for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G; /* And connect up the PD to the PDP (leaving room for L4 pages) */ pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE)); for (i = 0; i < nkpdpe; i++) pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V | PG_U; /* * Now, set up the direct map region using 2MB and/or 1GB pages. If * the end of physical memory is not aligned to a 1GB page boundary, * then the residual physical memory is mapped with 2MB pages. Later, * if pmap_mapdev{_attr}() uses the direct map for non-write-back * memory, pmap_change_attr() will demote any 2MB or 1GB page mappings * that are partially used. */ pd_p = (pd_entry_t *)DMPDphys; for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) { pd_p[j] = (vm_paddr_t)i << PDRSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G | X86_PG_M | X86_PG_A; } pdp_p = (pdp_entry_t *)DMPDPphys; for (i = 0; i < ndm1g; i++) { pdp_p[i] = (vm_paddr_t)i << PDPSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G | X86_PG_M | X86_PG_A; } for (j = 0; i < ndmpdp; i++, j++) { pdp_p[i] = DMPDphys + ptoa(j); pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U; } /* And recursively map PML4 to itself in order to get PTmap */ p4_p = (pml4_entry_t *)KPML4phys; p4_p[PML4PML4I] = KPML4phys; p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | PG_U; /* Connect the Direct Map slot(s) up to the PML4. */ for (i = 0; i < ndmpdpphys; i++) { p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | PG_U; } /* Connect the KVA slots up to the PML4 */ for (i = 0; i < NKPML4E; i++) { p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V | PG_U; } } /* * Bootstrap the system enough to run with virtual memory. * * On amd64 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(vm_paddr_t *firstaddr) { vm_offset_t va; pt_entry_t *pte; int i; /* * Create an initial set of page tables to run the kernel in. */ create_pagetables(firstaddr); /* * Add a physical memory segment (vm_phys_seg) corresponding to the * preallocated kernel page table pages so that vm_page structures * representing these pages will be created. The vm_page structures * are required for promotion of the corresponding kernel virtual * addresses to superpage mappings. */ vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt)); virtual_avail = (vm_offset_t) KERNBASE + *firstaddr; virtual_avail = pmap_kmem_choose(virtual_avail); virtual_end = VM_MAX_KERNEL_ADDRESS; /* XXX do %cr0 as well */ load_cr4(rcr4() | CR4_PGE); load_cr3(KPML4phys); if (cpu_stdext_feature & CPUID_STDEXT_SMEP) load_cr4(rcr4() | CR4_SMEP); /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_cr3 = KPML4phys; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); kernel_pmap->pm_flags = pmap_flags; /* * Initialize the TLB invalidations generation number lock. */ mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * Crashdump maps. The first page is reused as CMAP1 for the * memory test. */ SYSMAP(caddr_t, CMAP1, crashdumpmap, MAXDUMPPGS) CADDR1 = crashdumpmap; virtual_avail = va; /* Initialize the PAT MSR. */ pmap_init_pat(); /* Initialize TLB Context Id. */ TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { /* Check for INVPCID support */ invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID) != 0; for (i = 0; i < MAXCPU; i++) { kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN; kernel_pmap->pm_pcids[i].pm_gen = 1; } __pcpu[0].pc_pcid_next = PMAP_PCID_KERN + 1; __pcpu[0].pc_pcid_gen = 1; /* * pcpu area for APs is zeroed during AP startup. * pc_pcid_next and pc_pcid_gen are initialized by AP * during pcpu setup. */ load_cr4(rcr4() | CR4_PCIDE); } else { pmap_pcid_enabled = 0; } } /* * Setup the PAT MSR. */ void pmap_init_pat(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; /* Bail if this CPU doesn't implement PAT. */ if ((cpu_feature & CPUID_PAT) == 0) panic("no PAT??"); /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) pat_table[i] = -1; pat_table[PAT_WRITE_BACK] = 0; pat_table[PAT_WRITE_THROUGH] = 1; pat_table[PAT_UNCACHEABLE] = 3; pat_table[PAT_WRITE_COMBINING] = 3; pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; /* Initialize default PAT entries. */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); if (pat_works) { /* * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. * Program 5 and 6 as WP and WC. * Leave 4 and 7 as WB and UC. */ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | PAT_VALUE(6, PAT_WRITE_COMBINING); pat_table[PAT_UNCACHED] = 2; pat_table[PAT_WRITE_PROTECTED] = 5; pat_table[PAT_WRITE_COMBINING] = 6; } else { /* * Just replace PAT Index 2 with WC instead of UC-. */ pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_table[PAT_WRITE_COMBINING] = 2; } /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* Disable caches (CD = 1, NW = 0). */ cr0 = rcr0(); load_cr0((cr0 & ~CR0_NW) | CR0_CD); /* Flushes caches and TLBs. */ wbinvd(); invltlb(); /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); invltlb(); /* Restore caches and PGE. */ load_cr0(cr0); load_cr4(cr4); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { struct pmap_preinit_mapping *ppim; vm_page_t mpte; vm_size_t s; int error, i, pv_npg; /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ for (i = 0; i < nkpt; i++) { mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = pmap_pde_pindex(KERNBASE) + i; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); } /* * If the kernel is running on a virtual machine, then it must assume * that MCA is enabled by the hypervisor. Moreover, the kernel must * be prepared for the hypervisor changing the vendor and family that * are reported by CPUID. Consequently, the workaround for AMD Family * 10h Erratum 383 is enabled if the processor's feature set does not * include at least one feature that is only supported by older Intel * or newer AMD processors. */ if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 && (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI | CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP | AMDID2_FMA4)) == 0) workaround_erratum383 = 1; /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; } /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); /* * Calculate the size of the pv head table for superpages. */ pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); TAILQ_INIT(&pv_dummy.pv_list); pmap_initialized = 1; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) continue; /* Make the direct map consistent */ if (ppim->pa < dmaplimit && ppim->pa + ppim->sz < dmaplimit) { (void)pmap_change_attr(PHYS_TO_DMAP(ppim->pa), ppim->sz, ppim->mode); } if (!bootverbose) continue; printf("PPIM %u: PA=%#lx, VA=%#lx, size=%#lx, mode=%#x\n", i, ppim->pa, ppim->va, ppim->sz, ppim->mode); } mtx_init(&qframe_mtx, "qfrmlk", NULL, MTX_SPIN); error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, (vmem_addr_t *)&qframe); if (error != 0) panic("qframe allocation failed"); } static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2MB page mapping counters"); static u_long pmap_pde_demotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pde_demotions, 0, "2MB page demotions"); static u_long pmap_pde_mappings; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pde_mappings, 0, "2MB page mappings"); static u_long pmap_pde_p_failures; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pde_p_failures, 0, "2MB page promotion failures"); static u_long pmap_pde_promotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pde_promotions, 0, "2MB page promotions"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD, 0, "1GB page mapping counters"); static u_long pmap_pdpe_demotions; SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pdpe_demotions, 0, "1GB page demotions"); /*************************************************** * Low level helper routines..... ***************************************************/ static pt_entry_t pmap_swap_pat(pmap_t pmap, pt_entry_t entry) { int x86_pat_bits = X86_PG_PTE_PAT | X86_PG_PDE_PAT; switch (pmap->pm_type) { case PT_X86: case PT_RVI: /* Verify that both PAT bits are not set at the same time */ KASSERT((entry & x86_pat_bits) != x86_pat_bits, ("Invalid PAT bits in entry %#lx", entry)); /* Swap the PAT bits if one of them is set */ if ((entry & x86_pat_bits) != 0) entry ^= x86_pat_bits; break; case PT_EPT: /* * Nothing to do - the memory attributes are represented * the same way for regular pages and superpages. */ break; default: panic("pmap_switch_pat_bits: bad pm_type %d", pmap->pm_type); } return (entry); } /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ static int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0) panic("Unknown caching mode %d\n", mode); switch (pmap->pm_type) { case PT_X86: case PT_RVI: /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; /* Map the caching mode to a PAT index. */ pat_idx = pat_index[mode]; /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_idx & 0x4) cache_bits |= pat_flag; if (pat_idx & 0x2) cache_bits |= PG_NC_PCD; if (pat_idx & 0x1) cache_bits |= PG_NC_PWT; break; case PT_EPT: cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode); break; default: panic("unsupported pmap type %d", pmap->pm_type); } return (cache_bits); } static int pmap_cache_mask(pmap_t pmap, boolean_t is_pde) { int mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE; break; case PT_EPT: mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7); break; default: panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline boolean_t pmap_ps_enabled(pmap_t pmap) { return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0); } static void pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde) { switch (pmap->pm_type) { case PT_X86: break; case PT_RVI: case PT_EPT: /* * XXX * This is a little bogus since the generation number is * supposed to be bumped up when a region of the address * space is invalidated in the page tables. * * In this case the old PDE entry is valid but yet we want * to make sure that any mappings using the old entry are * invalidated in the TLB. * * The reason this works as expected is because we rendezvous * "all" host cpus and force any vcpu context to exit as a * side-effect. */ atomic_add_acq_long(&pmap->pm_eptgen, 1); break; default: panic("pmap_update_pde_store: bad pm_type %d", pmap->pm_type); } pde_store(pde, newpde); } /* * After changing the page size for the specified virtual address in the page * table, flush the corresponding entries from the processor's TLB. Only the * calling processor's TLB is affected. * * The calling thread must be pinned to a processor. */ static void pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde) { pt_entry_t PG_G; if (pmap_type_guest(pmap)) return; KASSERT(pmap->pm_type == PT_X86, ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type)); PG_G = pmap_global_bit(pmap); if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ invlpg(va); else if ((newpde & PG_G) == 0) /* * Promotion: flush every 4KB page mapping from the TLB * because there are too many to flush individually. */ invltlb(); else { /* * Promotion: flush every 4KB page mapping from the TLB, * including any global (PG_G) mappings. */ invltlb_glob(); } } #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ /* * Interrupt the cpus that are executing in the guest context. * This will force the vcpu to exit and the cached EPT mappings * will be invalidated by the host before the next vmresume. */ static __inline void pmap_invalidate_ept(pmap_t pmap) { int ipinum; sched_pin(); KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), ("pmap_invalidate_ept: absurd pm_active")); /* * The TLB mappings associated with a vcpu context are not * flushed each time a different vcpu is chosen to execute. * * This is in contrast with a process's vtop mappings that * are flushed from the TLB on each context switch. * * Therefore we need to do more than just a TLB shootdown on * the active cpus in 'pmap->pm_active'. To do this we keep * track of the number of invalidations performed on this pmap. * * Each vcpu keeps a cache of this counter and compares it * just before a vmresume. If the counter is out-of-date an * invept will be done to flush stale mappings from the TLB. */ atomic_add_acq_long(&pmap->pm_eptgen, 1); /* * Force the vcpu to exit and trap back into the hypervisor. */ ipinum = pmap->pm_flags & PMAP_NESTED_IPIMASK; ipi_selected(pmap->pm_active, ipinum); sched_unpin(); } void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t *mask; u_int cpuid, i; if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_page: invalid type %d", pmap->pm_type)); sched_pin(); if (pmap == kernel_pmap) { invlpg(va); mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); if (pmap == PCPU_GET(curpmap)) invlpg(va); else if (pmap_pcid_enabled) pmap->pm_pcids[cpuid].pm_gen = 0; if (pmap_pcid_enabled) { CPU_FOREACH(i) { if (cpuid != i) pmap->pm_pcids[i].pm_gen = 0; } } mask = &pmap->pm_active; } smp_masked_invlpg(*mask, va); sched_unpin(); } /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ #define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t *mask; vm_offset_t addr; u_int cpuid, i; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { pmap_invalidate_all(pmap); return; } if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: invalid type %d", pmap->pm_type)); sched_pin(); cpuid = PCPU_GET(cpuid); if (pmap == kernel_pmap) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); mask = &all_cpus; } else { if (pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } else if (pmap_pcid_enabled) { pmap->pm_pcids[cpuid].pm_gen = 0; } if (pmap_pcid_enabled) { CPU_FOREACH(i) { if (cpuid != i) pmap->pm_pcids[i].pm_gen = 0; } } mask = &pmap->pm_active; } smp_masked_invlpg_range(*mask, sva, eva); sched_unpin(); } void pmap_invalidate_all(pmap_t pmap) { cpuset_t *mask; struct invpcid_descr d; u_int cpuid, i; if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_all: invalid type %d", pmap->pm_type)); sched_pin(); if (pmap == kernel_pmap) { if (pmap_pcid_enabled && invpcid_works) { bzero(&d, sizeof(d)); invpcid(&d, INVPCID_CTXGLOB); } else { invltlb_glob(); } mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); if (pmap == PCPU_GET(curpmap)) { if (pmap_pcid_enabled) { if (invpcid_works) { d.pcid = pmap->pm_pcids[cpuid].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); } else { load_cr3(pmap->pm_cr3 | pmap->pm_pcids [PCPU_GET(cpuid)].pm_pcid); } } else { invltlb(); } } else if (pmap_pcid_enabled) { pmap->pm_pcids[cpuid].pm_gen = 0; } if (pmap_pcid_enabled) { CPU_FOREACH(i) { if (cpuid != i) pmap->pm_pcids[i].pm_gen = 0; } } mask = &pmap->pm_active; } smp_masked_invltlb(*mask, pmap); sched_unpin(); } void pmap_invalidate_cache(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } struct pde_action { cpuset_t invalidate; /* processors that invalidate their TLB */ pmap_t pmap; vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; u_int store; /* processor that updates the PDE */ }; static void pmap_update_pde_action(void *arg) { struct pde_action *act = arg; if (act->store == PCPU_GET(cpuid)) pmap_update_pde_store(act->pmap, act->pde, act->newpde); } static void pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate)) pmap_update_pde_invalidate(act->pmap, act->va, act->newpde); } /* * Change the page size for the specified virtual address in a way that * prevents any possibility of the TLB ever having two entries that map the * same virtual address using different page sizes. This is the recommended * workaround for Erratum 383 on AMD Family 10h processors. It prevents a * machine check exception for a TLB state that is improperly diagnosed as a * hardware error. */ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; cpuset_t active, other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (pmap == kernel_pmap || pmap_type_guest(pmap)) active = all_cpus; else { active = pmap->pm_active; } if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpuid; act.invalidate = active; act.va = va; act.pmap = pmap; act.pde = pde; act.newpde = newpde; CPU_SET(cpuid, &active); smp_rendezvous_cpus(active, smp_no_rendevous_barrier, pmap_update_pde_action, pmap_update_pde_teardown, &act); } else { pmap_update_pde_store(pmap, pde, newpde); if (CPU_ISSET(cpuid, &active)) pmap_update_pde_invalidate(pmap, va, newpde); } sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, invalidation functions. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: unknown type %d", pmap->pm_type)); if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) invlpg(va); else if (pmap_pcid_enabled) pmap->pm_pcids[0].pm_gen = 0; } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: unknown type %d", pmap->pm_type)); if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } else if (pmap_pcid_enabled) { pmap->pm_pcids[0].pm_gen = 0; } } void pmap_invalidate_all(pmap_t pmap) { struct invpcid_descr d; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_all: unknown type %d", pmap->pm_type)); if (pmap == kernel_pmap) { if (pmap_pcid_enabled && invpcid_works) { bzero(&d, sizeof(d)); invpcid(&d, INVPCID_CTXGLOB); } else { invltlb_glob(); } } else if (pmap == PCPU_GET(curpmap)) { if (pmap_pcid_enabled) { if (invpcid_works) { d.pcid = pmap->pm_pcids[0].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); } else { load_cr3(pmap->pm_cr3 | pmap->pm_pcids[0]. pm_pcid); } } else { invltlb(); } } else if (pmap_pcid_enabled) { pmap->pm_pcids[0].pm_gen = 0; } } PMAP_INLINE void pmap_invalidate_cache(void) { wbinvd(); } static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { pmap_update_pde_store(pmap, pde, newpde); if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) pmap_update_pde_invalidate(pmap, va, newpde); else pmap->pm_pcids[0].pm_gen = 0; } #endif /* !SMP */ #define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) { if (force) { sva &= ~(vm_offset_t)cpu_clflush_line_size; } else { KASSERT((sva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: sva not page-aligned")); KASSERT((eva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: eva not page-aligned")); } if ((cpu_feature & CPUID_SS) != 0 && !force) ; /* If "Self Snoop" is supported and allowed, do nothing. */ else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { /* * XXX: Some CPUs fault, hang, or trash the local APIC * registers if we use CLFLUSH on the local APIC * range. The local APIC is always uncached, so we * don't need to flush for that range anyway. */ if (pmap_kextract(sva) == lapic_paddr) return; /* * Otherwise, do per-cache line flush. Use the mfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); mfence(); } else if ((cpu_feature & CPUID_CLFSH) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { if (pmap_kextract(sva) == lapic_paddr) return; /* * Writes are ordered by CLFLUSH on Intel CPUs. */ if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); } else { /* * No targeted cache flush methods are supported by CPU, * or the supplied range is bigger than 2MB. * Globally invalidate cache. */ pmap_invalidate_cache(); } } /* * Remove the specified set of pages from the data and instruction caches. * * In contrast to pmap_invalidate_cache_range(), this function does not * rely on the CPU's self-snoop feature, because it is intended for use * when moving pages into a different cache domain. */ void pmap_invalidate_cache_pages(vm_page_t *pages, int count) { vm_offset_t daddr, eva; int i; bool useclflushopt; useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt)) pmap_invalidate_cache(); else { if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (i = 0; i < count; i++) { daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i])); eva = daddr + PAGE_SIZE; for (; daddr < eva; daddr += cpu_clflush_line_size) { if (useclflushopt) clflushopt(daddr); else clflush(daddr); } } if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); } } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte, PG_V; vm_paddr_t pa; pa = 0; PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); pdpe = pmap_pdpe(pmap, va); if (pdpe != NULL && (*pdpe & PG_V) != 0) { if ((*pdpe & PG_PS) != 0) pa = (*pdpe & PG_PS_FRAME) | (va & PDPMASK); else { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & PG_V) != 0) { if ((*pde & PG_PS) != 0) { pa = (*pde & PG_PS_FRAME) | (va & PDRMASK); } else { pte = pmap_pde_to_pte(pde, va); pa = (*pte & PG_FRAME) | (va & PAGE_MASK); } } } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde, *pdep; pt_entry_t pte, PG_RW, PG_V; vm_paddr_t pa; vm_page_t m; pa = 0; m = NULL; PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pd_entry_t pde; vm_paddr_t pa; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { pde = *vtopde(va); if (pde & PG_PS) { pa = (pde & PG_PS_FRAME) | (va & PDRMASK); } else { /* * Beware of a concurrent promotion that changes the * PDE at this point! For example, vtopte() must not * be used to access the PTE because it would use the * new PDE. It is, however, safe to use the old PDE * because the page table page is preserved by the * promotion. */ pa = *pmap_pde_to_pte(&pde, va); pa = (pa & PG_FRAME) | (va & PAGE_MASK); } } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; int cache_bits; pte = vtopte(va); cache_bits = pmap_cache_bits(kernel_pmap, mode, 0); pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G | cache_bits); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); pte_clear(pte); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; int cache_bits; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { m = *ma++; cache_bits = pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0); pa = VM_PAGE_TO_PHYS(m) | cache_bits; if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) { oldpte |= *pte; pte_store(pte, pa | X86_PG_G | X86_PG_RW | X86_PG_V); } pte++; } if (__predict_false((oldpte & X86_PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va)); pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static __inline int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_insert(&pmap->pm_root, mpte)); } /* * Looks for a page table page mapping the specified virtual address in the * specified pmap's collection of idle page table pages. Returns NULL if there * is no page table page corresponding to the specified virtual address. */ static __inline vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va))); } /* * Removes the specified page table page from the specified pmap's collection * of idle page table pages. The specified page table page must be a member of * the pmap's collection. */ static __inline void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); vm_radix_remove(&pmap->pm_root, mpte->pindex); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_ptp(pmap, va, m, free); return (TRUE); } else return (FALSE); } static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ if (m->pindex >= (NUPDE + NUPDPE)) { /* PDP page */ pml4_entry_t *pml4; pml4 = pmap_pml4e(pmap, va); *pml4 = 0; } else if (m->pindex >= NUPDE) { /* PD page */ pdp_entry_t *pdp; pdp = pmap_pdpe(pmap, va); *pdp = 0; } else { /* PTE page */ pd_entry_t *pd; pd = pmap_pde(pmap, va); *pd = 0; } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUPDE) { /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME); pmap_unwire_ptp(pmap, va, pdpg, free); } if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { /* We just released a PD, unhold the matching PDP */ vm_page_t pdppg; pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); pmap_unwire_ptp(pmap, va, pdppg, free); } /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return (pmap_unwire_ptp(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { int i; PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_cr3 = KPML4phys; pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap->pm_flags = pmap_flags; CPU_FOREACH(i) { pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; pmap->pm_pcids[i].pm_gen = 0; } PCPU_SET(curpmap, kernel_pmap); pmap_activate(curthread); CPU_FILL(&kernel_pmap->pm_active); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) { vm_page_t pml4pg; vm_paddr_t pml4phys; int i; /* * allocate the page directory page */ while ((pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; pml4phys = VM_PAGE_TO_PHYS(pml4pg); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys); CPU_FOREACH(i) { pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; pmap->pm_pcids[i].pm_gen = 0; } pmap->pm_cr3 = ~0; /* initialize to an invalid value */ if ((pml4pg->flags & PG_ZERO) == 0) pagezero(pmap->pm_pml4); /* * Do not install the host kernel mappings in the nested page * tables. These mappings are meaningless in the guest physical * address space. */ if ((pmap->pm_type = pm_type) == PT_X86) { pmap->pm_cr3 = pml4phys; /* Wire in kernel global address entries. */ for (i = 0; i < NKPML4E; i++) { pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V | PG_U; } for (i = 0; i < ndmpdpphys; i++) { pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V | PG_U; } /* install self-referential address mapping entry(s) */ pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; } pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap->pm_flags = flags; pmap->pm_eptgen = 0; return (1); } int pmap_pinit(pmap_t pmap) { return (pmap_pinit_type(pmap, PT_X86, pmap_flags)); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, pdppg, pdpg; pt_entry_t PG_A, PG_M, PG_RW, PG_V; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); PMAP_ASSERT_NOT_IN_DI(); VM_WAIT; PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= (NUPDE + NUPDPE)) { pml4_entry_t *pml4; vm_pindex_t pml4index; /* Wire up a new PDPE page */ pml4index = ptepindex - (NUPDE + NUPDPE); pml4 = &pmap->pm_pml4[pml4index]; *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } else if (ptepindex >= NUPDE) { vm_pindex_t pml4index; vm_pindex_t pdpindex; pml4_entry_t *pml4; pdp_entry_t *pdp; /* Wire up a new PDE page */ pdpindex = ptepindex - NUPDE; pml4index = pdpindex >> NPML4EPGSHIFT; pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pdp, recurse */ if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { /* Add reference to pdp page */ pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME); pdppg->wire_count++; } pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); /* Now find the pdp page */ pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } else { vm_pindex_t pml4index; vm_pindex_t pdpindex; pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pd; /* Wire up a new PTE page */ pdpindex = ptepindex >> NPDPEPGSHIFT; pml4index = pdpindex >> NPML4EPGSHIFT; /* First, find the pdp and check that its valid. */ pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; } else { pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; if ((*pdp & PG_V) == 0) { /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { /* Add reference to the pd page */ pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); pdpg->wire_count++; } } pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); /* Now we know where the page directory page is */ pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t pdpindex, ptepindex; pdp_entry_t *pdpe, PG_V; vm_page_t pdpg; PG_V = pmap_valid_bit(pmap); retry: pdpe = pmap_pdpe(pmap, va); if (pdpe != NULL && (*pdpe & PG_V) != 0) { /* Add a reference to the pd page. */ pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); pdpg->wire_count++; } else { /* Allocate a pd page. */ ptepindex = pmap_pde_pindex(va); pdpindex = ptepindex >> NPDPEPGSHIFT; pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp); if (pdpg == NULL && lockp != NULL) goto retry; } return (pdpg); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *pd, PG_V; vm_page_t m; PG_V = pmap_valid_bit(pmap); /* * Calculate pagetable page index */ ptepindex = pmap_pde_pindex(va); retry: /* * Get the page directory entry */ pd = pmap_pde(pmap, va); /* * This supports switching from a 2MB page to a * normal 4K page. */ if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { if (!pmap_demote_pde_locked(pmap, pd, va, lockp)) { /* * Invalidation of the 2MB page mapping may have caused * the deallocation of the underlying PD page. */ pd = NULL; } } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (pd != NULL && (*pd & PG_V) != 0) { m = PHYS_TO_VM_PAGE(*pd & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ m = _pmap_allocpte(pmap, ptepindex, lockp); if (m == NULL && lockp != NULL) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; int i; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4)); for (i = 0; i < NKPML4E; i++) /* KVA */ pmap->pm_pml4[KPML4BASE + i] = 0; for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pml4[DMPML4I + i] = 0; pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *pde, newpdir; pdp_entry_t *pdpe; mtx_assert(&kernel_map->system_mtx, MA_OWNED); /* * Return if "addr" is within the range of kernel page table pages * that were preallocated during pmap bootstrap. Moreover, leave * "kernel_vm_end" and the kernel page table as they were. * * The correctness of this action is based on the following * argument: vm_map_insert() allocates contiguous ranges of the * kernel virtual address space. It calls this function if a range * ends after "kernel_vm_end". If the kernel is mapped between * "kernel_vm_end" and "addr", then the range cannot begin at * "kernel_vm_end". In fact, its beginning address cannot be less * than the kernel. Thus, there is no immediate need to allocate * any new kernel page table pages between "kernel_vm_end" and * "KERNBASE". */ if (KERNBASE < addr && addr <= KERNBASE + nkpt * NBPDR) return; addr = roundup2(addr, NBPDR); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); if ((*pdpe & X86_PG_V) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); *pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M); continue; /* try again */ } pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); if ((*pde & X86_PG_V) != 0) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end), VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; pde_store(pde, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { struct pch new_tail; struct pv_chunk *pc; struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; pt_entry_t PG_G, PG_A, PG_M, PG_RW; pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; struct spglist free; uint64_t inuse; int bit, field, freed; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); pmap = NULL; m_pc = NULL; PG_G = PG_A = PG_M = PG_RW = 0; SLIST_INIT(&free); TAILQ_INIT(&new_tail); pmap_delayed_invl_started(); mtx_lock(&pv_chunks_mutex); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); if (pmap != pc->pc_pmap) { if (pmap != NULL) { pmap_invalidate_all(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap_delayed_invl_finished(); pmap_delayed_invl_started(); pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) { RELEASE_PV_LIST_LOCK(lockp); PMAP_LOCK(pmap); } else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { pmap = NULL; TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); mtx_lock(&pv_chunks_mutex); continue; } PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = bsfq(inuse); pv = &pc->pc_pventry[field * 64 + bit]; va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) continue; pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_W) != 0) continue; tpte = pte_load_clear(pte); if ((tpte & PG_G) != 0) pmap_invalidate_page(pmap, va); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) { vm_page_aflag_clear(m, PGA_WRITEABLE); } } pmap_delayed_invl_page(m); pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt(pmap, va, *pde, &free); freed++; } } if (freed == 0) { TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); mtx_lock(&pv_chunks_mutex); continue; } /* Every freed mapping is for a 4 KB page. */ pmap_resident_count_dec(pmap, freed); PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && pc->pc_map[2] == PC_FREE2) { PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m_pc->phys_addr); mtx_lock(&pv_chunks_mutex); break; } TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); mtx_lock(&pv_chunks_mutex); /* One freed pv entry in locked_pmap is sufficient. */ if (pmap == locked_pmap) break; } TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); mtx_unlock(&pv_chunks_mutex); if (pmap != NULL) { pmap_invalidate_all(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap_delayed_invl_finished(); if (m_pc == NULL && !SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; atomic_add_int(&vm_cnt.v_wire_count, 1); } pmap_free_zero_pages(&free); return (m_pc); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfq(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * Returns the number of one bits within the given PV chunk map. * * The erratas for Intel processors state that "POPCNT Instruction May * Take Longer to Execute Than Expected". It is believed that the * issue is the spurious dependency on the destination register. * Provide a hint to the register rename logic that the destination * value is overwritten, by clearing it, as suggested in the * optimization manual. It should be cheap for unaffected processors * as well. * * Reference numbers for erratas are * 4th Gen Core: HSD146 * 5th Gen Core: BDM85 * 6th Gen Core: SKL029 */ static int popcnt_pc_map_pq(uint64_t *map) { u_long result, tmp; __asm __volatile("xorl %k0,%k0;popcntq %2,%0;" "xorl %k1,%k1;popcntq %3,%1;addl %k1,%k0;" "xorl %k1,%k1;popcntq %4,%1;addl %k1,%k0" : "=&r" (result), "=&r" (tmp) : "m" (map[0]), "m" (map[1]), "m" (map[2])); return (result); } /* * Ensure that the number of spare PV entries in the specified pmap meets or * exceeds the given count, "needed". * * The given PV list lock may be released. */ static void reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) { struct pch new_tail; struct pv_chunk *pc; int avail, free; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); /* * Newly allocated PV chunks must be stored in a private list until * the required number of PV chunks have been allocated. Otherwise, * reclaim_pv_chunk() could recycle one of these chunks. In * contrast, these chunks must be added to the pmap upon allocation. */ TAILQ_INIT(&new_tail); retry: avail = 0; TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { #ifndef __POPCNT__ if ((cpu_feature2 & CPUID2_POPCNT) == 0) bit_count((bitstr_t *)pc->pc_map, 0, sizeof(pc->pc_map) * NBBY, &free); else #endif free = popcnt_pc_map_pq(pc->pc_map); if (free == 0) break; avail += free; if (avail >= needed) break; } for (; avail < needed; avail += _NPCPV) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0; pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); } if (!TAILQ_EMPTY(&new_tail)) { mtx_lock(&pv_chunks_mutex); TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); mtx_unlock(&pv_chunks_mutex); } } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * After demotion from a 2MB page mapping to 512 4KB page mappings, * destroy the pv entry for the 2MB page mapping and reinstantiate the pv * entries for each of the 4KB page mappings. */ static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; struct pv_chunk *pc; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; int bit, field; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 2mpage aligned")); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the 2mpage's pv entry for this mapping to the first * page's pv list. Once this transfer begins, the pv list lock * must not be released until the last pv entry is reinstantiated. */ pvh = pa_to_pvh(pa); va = trunc_2mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; /* Instantiate the remaining NPTEPG - 1 pv entries. */ PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1)); va_last = va + NBPDR - PAGE_SIZE; for (;;) { pc = TAILQ_FIRST(&pmap->pm_pvchunk); KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || pc->pc_map[2] != 0, ("pmap_pv_demote_pde: missing spare")); for (field = 0; field < _NPCM; field++) { while (pc->pc_map[field]) { bit = bsfq(pc->pc_map[field]); pc->pc_map[field] &= ~(1ul << bit); pv = &pc->pc_pventry[field * 64 + bit]; va += PAGE_SIZE; pv->pv_va = va; m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (va == va_last) goto out; } } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } out: if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, NPTEPG - 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, NPTEPG - 1)); } /* * After promotion from 512 4KB page mappings to a single 2MB page mapping, * replace the many pv entries for the 4KB page mappings by a single pv entry * for the 2MB page mapping. */ static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 2mpage aligned")); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the first page's pv entry for this mapping to the 2mpage's * pv list. Aside from avoiding the cost of a call to get_pv_entry(), * a transfer avoids the possibility that get_pv_entry() calls * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the * mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = trunc_2mpage(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * Conditionally create the PV entry for a 2MB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; return (TRUE); } else return (FALSE); } /* * Fills a page table page with mappings to consecutive physical pages. */ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) { pt_entry_t *pte; for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; newpte += PAGE_SIZE; } } /* * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page * mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { struct rwlock *lock; boolean_t rv; lock = NULL; rv = pmap_demote_pde_locked(pmap, pde, va, &lock); if (lock != NULL) rw_wunlock(lock); return (rv); } static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V; vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; int PG_PTE_CACHE; PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_PTE_CACHE = pmap_cache_mask(pmap, 0); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) != NULL) pmap_remove_pt_page(pmap, mpte); else { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); /* * Invalidate the 2MB page mapping and return "failure" if the * mapping was never accessed or the allocation of the new * page table page fails. If the 2MB page mapping belongs to * the direct map region of the kernel's address space, then * the page allocation request specifies the highest possible * priority (VM_ALLOC_INTERRUPT). Otherwise, the priority is * normal. Page table pages are preallocated for every other * part of the kernel address space, so the direct map region * is the only part of the kernel address space that must be * handled here. */ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free, lockp); pmap_invalidate_page(pmap, trunc_2mpage(va)); pmap_free_zero_pages(&free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap_resident_count_inc(pmap, 1); } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); newpte = oldpde & ~PG_PS; newpte = pmap_swap_pat(pmap, newpte); /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = NPTEPG; pmap_fill_ptp(firstpte, newpte); } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); /* * The spare PV entries must be reserved prior to demoting the * mapping, that is, prior to changing the PDE. Otherwise, the state * of the PDE and the PV lists will be inconsistent, which can result * in reclaim_pv_chunk() attempting to remove a PV entry from the * wrong PV list and pmap_pv_demote_pde() failing to find the expected * PV entry for the 2MB page mapping that is being demoted. */ if ((oldpde & PG_MANAGED) != 0) reserve_pv_entries(pmap, NPTEPG - 1, lockp); /* * Demote the mapping. This pmap is locked. The old PDE has * PG_A set. If the old PDE has PG_RW set, it also has PG_M * set. Thus, there is no danger of a race with another * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. */ if (va >= VM_MAXUSER_ADDRESS) pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); /* * Demote the PV entry. */ if ((oldpde & PG_MANAGED) != 0) pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); atomic_add_long(&pmap_pde_demotions, 1); CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; vm_page_t mpte; KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); mpte = pmap_lookup_pt_page(pmap, va); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); pmap_remove_pt_page(pmap, mpte); mptepa = VM_PAGE_TO_PHYS(mpte); newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V; /* * Initialize the page table page. */ pagezero((void *)PHYS_TO_DMAP(mptepa)); /* * Demote the mapping. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); } /* * pmap_remove_pde: do the things to unmap a superpage in a process */ static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; vm_page_t m, mpte; pt_entry_t PG_G, PG_A, PG_M, PG_RW; PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_remove_pde: sva is not 2mpage aligned")); oldpde = pte_load_clear(pdq); if (oldpde & PG_W) pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpde & PG_G) pmap_invalidate_page(kernel_pmap, sva); pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME); pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) { if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpde & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); pmap_delayed_invl_page(m); } } if (pmap == kernel_pmap) { pmap_remove_kernel_pde(pmap, pdq, sva); } else { mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); atomic_subtract_int(&vm_cnt.v_wire_count, 1); } } return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t oldpte, PG_A, PG_M, PG_RW; vm_page_t m; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } pmap_delayed_invl_page(m); } return (pmap_unuse_pt(pmap, va, ptepde, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, struct spglist *free) { struct rwlock *lock; pt_entry_t *pte, PG_V; PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((*pde & PG_V) == 0) return; pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_V) == 0) return; lock = NULL; pmap_remove_pte(pmap, pte, va, *pde, free, &lock); if (lock != NULL) rw_wunlock(lock); pmap_invalidate_page(pmap, va); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; vm_offset_t va, va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t *pte, PG_G, PG_V; struct spglist free; int anyvalid; PG_G = pmap_global_bit(pmap); PG_V = pmap_valid_bit(pmap); /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); pmap_delayed_invl_started(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if (sva + PAGE_SIZE == eva) { pde = pmap_pde(pmap, sva); if (pde && (*pde & PG_PS) == 0) { pmap_remove_page(pmap, sva, pde, &free); goto out; } } lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); ptpaddr = *pde; /* * Weed out invalid mappings. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_remove_pde(). */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; pmap_remove_pde(pmap, pde, sva, &free, &lock); continue; } else if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { /* The large page mapping was destroyed. */ continue; } else ptpaddr = *pde; } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { if (*pte == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if ((*pte & PG_G) == 0) anyvalid = 1; else if (va == va_next) va = sva; if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free, &lock)) { sva += PAGE_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (lock != NULL) rw_wunlock(lock); out: if (anyvalid) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); pmap_delayed_invl_finished(); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; struct rwlock *lock; pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW; pd_entry_t *pde; vm_offset_t va; struct spglist free; int pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry: rw_wlock(lock); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } } va = pv->pv_va; pde = pmap_pde(pmap, va); (void)pmap_demote_pde_locked(pmap, pde, va, &lock); PMAP_UNLOCK(pmap); } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } } PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); tpte = pte_load_clear(pte); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(lock); pmap_delayed_invl_wait(m); pmap_free_zero_pages(&free); } /* * pmap_protect_pde: do the things to protect a 2mpage in a process */ static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) { pd_entry_t newpde, oldpde; vm_offset_t eva, va; vm_page_t m; boolean_t anychanged; pt_entry_t PG_G, PG_M, PG_RW; PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_protect_pde: sva is not 2mpage aligned")); anychanged = FALSE; retry: oldpde = newpde = *pde; if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; if (newpde != oldpde) { if (!atomic_cmpset_long(pde, oldpde, newpde)) goto retry; if (oldpde & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } return (anychanged); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V; boolean_t anychanged; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); ptpaddr = *pde; /* * Weed out invalid mappings. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_protect_pde(). */ if (pmap_protect_pde(pmap, pde, sva, prot)) anychanged = TRUE; continue; } else if (!pmap_demote_pde(pmap, pde, sva)) { /* * The large page mapping was destroyed. */ continue; } } if (va_next > eva) va_next = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { pt_entry_t obits, pbits; vm_page_t m; retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; if (pbits != obits) { if (!atomic_cmpset_long(pte, obits, pbits)) goto retry; if (obits & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } } } if (anychanged) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); } /* * Tries to promote the 512, contiguous 4KB page mappings that are within a * single page table page (PTP) to a single 2MB page mapping. For promotion * to occur, two conditions must be met: (1) the 4KB page mappings must map * aligned, contiguous physical memory and (2) the 4KB page mappings must have * identical characteristics. */ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V; vm_page_t mpte; int PG_PTE_CACHE; PG_A = pmap_accessed_bit(pmap); PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); PG_PTE_CACHE = pmap_cache_mask(pmap, 0); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE in the specified PTP. Abort if this PTE is * either invalid, unused, or does not map the first 4KB physical page * within a 2MB page. */ firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" " in pmap %p", (oldpte & PG_FRAME & PDRMASK) | (va & ~PDRMASK), pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } pa -= PAGE_SIZE; } /* * Save the page table page in its current state until the PDE * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == pmap_pde_pindex(va), ("pmap_promote_pde: page table page's pindex is wrong")); if (pmap_insert_pt_page(pmap, mpte)) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx in pmap %p", va, pmap); return; } /* * Promote the pv entries. */ if ((newpde & PG_MANAGED) != 0) pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp); /* * Propagate the PAT index to its proper position. */ newpde = pmap_swap_pat(pmap, newpde); /* * Map the superpage. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, PG_PS | newpde); else pde_store(pde, PG_PS | newpde); atomic_add_long(&pmap_pde_promotions, 1); CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" " in pmap %p", va, pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. * * When destroying both a page table and PV entry, this function * performs the TLB invalidation before releasing the PV list * lock, so we do not need pmap_delayed_invl_page() calls here. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind __unused) { struct rwlock *lock; pd_entry_t *pde; pt_entry_t *pte, PG_G, PG_A, PG_M, PG_RW, PG_V; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t nosleep; PG_A = pmap_accessed_bit(pmap); PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va)); KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); newpte = (pt_entry_t)(pa | PG_A | PG_V); if ((flags & VM_PROT_WRITE) != 0) newpte |= PG_M; if ((prot & VM_PROT_WRITE) != 0) newpte |= PG_RW; KASSERT((newpte & (PG_M | PG_RW)) != PG_M, ("pmap_enter: flags includes VM_PROT_WRITE but prot doesn't")); if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; if ((flags & PMAP_ENTER_WIRED) != 0) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= PG_G; newpte |= pmap_cache_bits(pmap, m->md.pat_mode, 0); /* * Set modified bit gratuitously for writeable mappings if * the page is unmanaged. We do not want to take a fault * to do the dirty bit accounting for these mappings. */ if ((m->oflags & VPO_UNMANAGED) != 0) { if ((newpte & PG_RW) != 0) newpte |= PG_M; } mpte = NULL; lock = NULL; PMAP_LOCK(pmap); /* * In the case that a page table page is not * resident, we are creating it here. */ retry: pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0 && ((*pde & PG_PS) == 0 || pmap_demote_pde_locked(pmap, pde, va, &lock))) { pte = pmap_pde_to_pte(pde, va); if (va < VM_MAXUSER_ADDRESS && mpte == NULL) { mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); mpte->wire_count++; } } else if (va < VM_MAXUSER_ADDRESS) { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } goto retry; } else panic("pmap_enter: invalid page directory va=%#lx", va); origpte = *pte; /* * Is the specified virtual address already mapped? */ if ((origpte & PG_V) != 0) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0) pmap->pm_stats.wired_count++; else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ opa = origpte & PG_FRAME; if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((origpte & PG_MANAGED) != 0) { newpte |= PG_MANAGED; if ((newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } if (((origpte ^ newpte) & ~(PG_M | PG_A)) == 0) goto unchanged; goto validate; } } else { /* * Increment the counters. */ if ((newpte & PG_W) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { newpte |= PG_MANAGED; pv = get_pv_entry(pmap, &lock); pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * Update the PTE. */ if ((origpte & PG_V) != 0) { validate: origpte = pte_load_store(pte, newpte); opa = origpte & PG_FRAME; if (opa != pa) { if ((origpte & PG_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(om); if ((origpte & PG_A) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pmap_pvh_free(&om->md, pmap, va); if ((om->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); } } else if ((newpte & PG_M) == 0 && (origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(m); /* * Although the PTE may still have PG_RW set, TLB * invalidation may nonetheless be required because * the PTE no longer has PG_M set. */ } else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) { /* * This PTE change does not require TLB invalidation. */ goto unchanged; } if ((origpte & PG_A) != 0) pmap_invalidate_page(pmap, va); } else pte_store(pte, newpte); unchanged: /* * If both the page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va, &lock); if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Tries to create a 2MB page mapping. Returns TRUE if successful and FALSE * otherwise. Fails if (1) a page table page cannot be allocated without * blocking, (2) a mapping already exists at the specified virtual address, or * (3) a pv entry cannot be allocated without reclaiming another pv entry. */ static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { pd_entry_t *pde, newpde; pt_entry_t PG_V; vm_page_t mpde; struct spglist free; PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpde)); pde = &pde[pmap_pde_index(va)]; if ((*pde & PG_V) != 0) { KASSERT(mpde->wire_count > 1, ("pmap_enter_pde: mpde's wire count is too low")); mpde->wire_count--; CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) | PG_PS | PG_V; if ((m->oflags & VPO_UNMANAGED) == 0) { newpde |= PG_MANAGED; /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m), lockp)) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, mpde, &free)) { /* * Although "va" is not mapped, paging- * structure caches could nonetheless have * entries that refer to the freed page table * pages. Invalidate those entries. */ pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } } if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; /* * Increment counters. */ pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); /* * Map the superpage. */ pde_store(pde, newpde); atomic_add_long(&pmap_pde_mappings, 1); CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pmap_ps_enabled(pmap) && pmap_enter_pde(pmap, va, m, prot, &lock)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; pt_entry_t *pte, PG_V; vm_paddr_t pa; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t ptepindex; pd_entry_t *ptepa; /* * Calculate pagetable page index */ ptepindex = pmap_pde_pindex(va); if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptepa = pmap_pde(pmap, va); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ if (ptepa && (*ptepa & PG_V) != 0) { if (*ptepa & PG_PS) return (NULL); mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME); mpte->wire_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_allocpte(pmap, ptepindex, NULL); if (mpte == NULL) return (mpte); } } pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); pte = &pte[pmap_pte_index(va)]; } else { mpte = NULL; pte = vtopte(va); } if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, mpte, &free)) { /* * Although "va" is not mapped, paging- * structure caches could nonetheless have * entries that refer to the freed page table * pages. Invalidate those entries. */ pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 0); if ((prot & VM_PROT_EXECUTE) == 0) pa |= pg_nx; /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) != 0) pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); return (mpte); } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; pt_entry_t PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pa, ptepa; vm_page_t p, pdpg; int pat_mode; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!pmap_ps_enabled(pmap)) return; if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 2MB pages. Since "ptepa" is 2M aligned and * "size" is a multiple of 2M, adding the PAT setting to "pa" * will not affect the termination of this loop. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pmap, pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pdpg = pmap_allocpde(pmap, addr, NULL); if (pdpg == NULL) { /* * The creation of mappings below is only an * optimization. If a page directory page * cannot be allocated without blocking, * continue on to the next mapping rather than * blocking. */ addr += NBPDR; continue; } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); pde = &pde[pmap_pde_index(addr)]; if ((*pde & PG_V) == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); atomic_add_long(&pmap_pde_mappings, 1); } else { /* Continue on if the PDE is already valid. */ pdpg->wire_count--; KASSERT(pdpg->wire_count > 0, ("pmap_object_init_pt: missing reference " "to page directory page, va: 0x%lx", addr)); } addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware * feature, so there is no need to invalidate any TLB entries. * Since pmap_demote_pde() for the wired entry must never fail, * pmap_delayed_invl_started()/finished() calls around the * function are not needed. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte, PG_V; PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); if ((*pde & PG_V) == 0) continue; if ((*pde & PG_PS) != 0) { if ((*pde & PG_W) == 0) panic("pmap_unwire: pde %#jx is missing PG_W", (uintmax_t)*pde); /* * Are we unwiring the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { atomic_clear_long(pde, PG_W); pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; continue; } else if (!pmap_demote_pde(pmap, pde, sva)) panic("pmap_unwire: demotion failed"); } if (va_next > eva) va_next = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { if ((*pte & PG_V) == 0) continue; if ((*pte & PG_W) == 0) panic("pmap_unwire: pte %#jx is missing PG_W", (uintmax_t)*pte); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ atomic_clear_long(pte, PG_W); pmap->pm_stats.wired_count--; } } PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { struct rwlock *lock; struct spglist free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t va_next; pt_entry_t PG_A, PG_M, PG_V; if (dst_addr != src_addr) return; if (dst_pmap->pm_type != src_pmap->pm_type) return; /* * EPT page table entries that require emulation of A/D bits are * sensitive to clearing the PG_A bit (aka EPT_PG_READ). Although * we clear PG_M (aka EPT_PG_WRITE) concomitantly, the PG_U bit * (aka EPT_PG_EXECUTE) could still be set. Since some EPT * implementations flag an EPT misconfiguration for exec-only * mappings we skip this function entirely for emulated pmaps. */ if (pmap_emulate_ad_bits(dst_pmap)) return; lock = NULL; if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } PG_A = pmap_accessed_bit(dst_pmap); PG_M = pmap_modified_bit(dst_pmap); PG_V = pmap_valid_bit(dst_pmap); for (addr = src_addr; addr < end_addr; addr = va_next) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpde, dstmpte, srcmpte; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t srcptepaddr, *pde; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pml4e = pmap_pml4e(src_pmap, addr); if ((*pml4e & PG_V) == 0) { va_next = (addr + NBPML4) & ~PML4MASK; if (va_next < addr) va_next = end_addr; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, addr); if ((*pdpe & PG_V) == 0) { va_next = (addr + NBPDP) & ~PDPMASK; if (va_next < addr) va_next = end_addr; continue; } va_next = (addr + NBPDR) & ~PDRMASK; if (va_next < addr) va_next = end_addr; pde = pmap_pdpe_to_pde(pdpe, addr); srcptepaddr = *pde; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr) continue; dstmpde = pmap_allocpde(dst_pmap, addr, NULL); if (dstmpde == NULL) break; pde = (pd_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde)); pde = &pde[pmap_pde_index(addr)]; if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & PG_PS_FRAME, &lock))) { *pde = srcptepaddr & ~PG_W; pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); atomic_add_long(&pmap_pde_mappings, 1); } else dstmpde->wire_count--; continue; } srcptepaddr &= PG_FRAME; srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (va_next > end_addr) va_next = end_addr; src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr); src_pte = &src_pte[pmap_pte_index(addr)]; dstmpte = NULL; while (addr < va_next) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { if (dstmpte != NULL && dstmpte->pindex == pmap_pde_pindex(addr)) dstmpte->wire_count++; else if ((dstmpte = pmap_allocpte(dst_pmap, addr, NULL)) == NULL) goto out; dst_pte = (pt_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); dst_pte = &dst_pte[pmap_pte_index(addr)]; if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(ptetemp & PG_FRAME), &lock)) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); pmap_resident_count_inc(dst_pmap, 1); } else { SLIST_INIT(&free); if (pmap_unwire_ptp(dst_pmap, addr, dstmpte, &free)) { /* * Although "addr" is not * mapped, paging-structure * caches could nonetheless * have entries that refer to * the freed page table pages. * Invalidate those entries. */ pmap_invalidate_page(dst_pmap, addr); pmap_free_zero_pages(&free); } goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } out: if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } /* - * pmap_zero_page zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. + * Zero the specified hardware page. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* - * pmap_zero_page_area zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. - * - * off and size may not cover an area beyond a single hardware page. + * Zero an an area within a single hardware page. off and size must not + * cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* - * pmap_zero_page_idle zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. This - * is intended to be called from the vm_pagezero process only and - * outside of Giant. + * Zero the specified hardware page in a way that minimizes cache thrashing. + * This is intended to be called from the vm_pagezero process only and + * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - pagezero((void *)va); + sse2_pagezero((void *)va); } /* - * pmap_copy_page copies the specified (machine independent) - * page by mapping the page into virtual memory and using - * bcopy to copy the page, one machine dependent page at a - * time. + * Copy 1 specified hardware page to another. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t pages[2]; vm_offset_t vaddr[2], a_pg_offset, b_pg_offset; int cnt; boolean_t mapped; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; pages[0] = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; pages[1] = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE); a_cp = (char *)vaddr[0] + a_pg_offset; b_cp = (char *)vaddr[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); if (__predict_false(mapped)) pmap_unmap_io_transient(pages, vaddr, 2, FALSE); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_runlock(lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; struct md_page *pvh; pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; int count, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pte(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pde(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } } rw_runlock(lock); return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 2mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { struct rwlock *lock; boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_runlock(lock); return (rv); } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. */ void pmap_remove_pages(pmap_t pmap) { pd_entry_t ptepde; pt_entry_t *pte, tpte; pt_entry_t PG_M, PG_RW, PG_V; struct spglist free; vm_page_t m, mpte, mt; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; boolean_t superpage; vm_paddr_t pa; /* * Assert that the given pmap is only active on the current * CPU. Unfortunately, we cannot block another CPU from * activating the pmap while this function is executing. */ KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap)); #ifdef INVARIANTS { cpuset_t other_cpus; other_cpus = all_cpus; critical_enter(); CPU_CLR(PCPU_GET(cpuid), &other_cpus); CPU_AND(&other_cpus, &pmap->pm_active); critical_exit(); KASSERT(CPU_EMPTY(&other_cpus), ("pmap active %p", pmap)); } #endif lock = NULL; PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); SLIST_INIT(&free); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = bsfq(inuse); bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_pdpe(pmap, pv->pv_va); ptepde = *pte; pte = pmap_pdpe_to_pde(pte, pv->pv_va); tpte = *pte; if ((tpte & (PG_PS | PG_V)) == PG_V) { superpage = FALSE; ptepde = tpte; pte = (pt_entry_t *)PHYS_TO_DMAP(tpte & PG_FRAME); pte = &pte[pmap_pte_index(pv->pv_va)]; tpte = *pte; } else { /* * Keep track whether 'tpte' is a * superpage explicitly instead of * relying on PG_PS being set. * * This is because PG_PS is numerically * identical to PG_PTE_PAT and thus a * regular page could be mistaken for * a superpage. */ superpage = TRUE; } if ((tpte & PG_V) == 0) { panic("bad pte va %lx pte %lx", pv->pv_va, tpte); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } if (superpage) pa = tpte & PG_PS_FRAME; else pa = tpte & PG_FRAME; m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pte_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (superpage) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_dirty(mt); } else vm_page_dirty(m); } CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; if (superpage) { pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if ((mt->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_lookup_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, &free, FALSE); atomic_subtract_int(&vm_cnt.v_wire_count, 1); } } else { pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((m->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } if (lock != NULL) rw_wunlock(lock); pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } static boolean_t pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct rwlock *lock; pv_entry_t pv; struct md_page *pvh; pt_entry_t *pte, mask; pt_entry_t PG_A, PG_M, PG_RW, PG_V; pmap_t pmap; int md_gen, pvh_gen; boolean_t rv; rv = FALSE; lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pte(pmap, pv->pv_va); mask = 0; if (modified) { PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); mask |= PG_RW | PG_M; } if (accessed) { PG_A = pmap_accessed_bit(pmap); PG_V = pmap_valid_bit(pmap); mask |= PG_V | PG_A; } rv = (*pte & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pde(pmap, pv->pv_va); mask = 0; if (modified) { PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); mask |= PG_RW | PG_M; } if (accessed) { PG_A = pmap_accessed_bit(pmap); PG_V = pmap_valid_bit(pmap); mask |= PG_V | PG_A; } rv = (*pte & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } } out: rw_runlock(lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pde; pt_entry_t *pte, PG_V; boolean_t rv; PG_V = pmap_valid_bit(pmap); rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) { pte = pmap_pde_to_pte(pde, addr); rv = (*pte & PG_V) == 0; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pmap_t pmap; struct rwlock *lock; pv_entry_t next_pv, pv; pd_entry_t *pde; pt_entry_t oldpte, *pte, PG_M, PG_RW; vm_offset_t va; int pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } PG_RW = pmap_rw_bit(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde_locked(pmap, pde, va, &lock); KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_write: found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); retry: oldpte = *pte; if (oldpte & PG_RW) { if (!atomic_cmpset_long(pte, oldpte, oldpte & ~(PG_RW | PG_M))) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); pmap_delayed_invl_wait(m); } static __inline boolean_t safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) { if (!pmap_emulate_ad_bits(pmap)) return (TRUE); KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type)); /* * XWR = 010 or 110 will cause an unconditional EPT misconfiguration * so we don't let the referenced (aka EPT_PG_READ) bit to be cleared * if the EPT_PG_WRITE bit is set. */ if ((pte & EPT_PG_WRITE) != 0) return (FALSE); /* * XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set. */ if ((pte & EPT_PG_EXECUTE) == 0 || ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0)) return (TRUE); else return (FALSE); } #define PMAP_TS_REFERENCED_MAX 5 /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. * * A DI block is not needed within this function, because * invalidations are performed before the PV list lock is * released. */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; pd_entry_t oldpde, *pde; pt_entry_t *pte, PG_A; vm_offset_t va; vm_paddr_t pa; int cleared, md_gen, not_cleared, pvh_gen; struct spglist free; boolean_t demoted; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); rw_wlock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } PG_A = pmap_accessed_bit(pmap); va = pv->pv_va; pde = pmap_pde(pmap, pv->pv_va); oldpde = *pde; if ((*pde & PG_A) != 0) { /* * Since this reference bit is shared by 512 4KB * pages, it should not be cleared every time it is * tested. Apply a simple "hash" function on the * physical page number, the virtual superpage number, * and the pmap address to select one 4KB page out of * the 512 on which testing the reference bit will * result in clearing that reference bit. This * function is designed to avoid the selection of the * same 4KB page for every 2MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the superpage is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^ (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && (*pde & PG_W) == 0) { if (safe_to_clear_referenced(pmap, oldpde)) { atomic_clear_long(pde, PG_A); pmap_invalidate_page(pmap, pv->pv_va); demoted = FALSE; } else if (pmap_demote_pde_locked(pmap, pde, pv->pv_va, &lock)) { /* * Remove the mapping to a single page * so that a subsequent access may * repromote. Since the underlying * page table page is fully populated, * this removal never frees a page * table page. */ demoted = TRUE; va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pde_to_pte(pde, va); pmap_remove_pte(pmap, pte, va, *pde, NULL, &lock); pmap_invalidate_page(pmap, va); } else demoted = TRUE; if (demoted) { /* * The superpage mapping was removed * entirely and therefore 'pv' is no * longer valid. */ if (pvf == pv) pvf = NULL; pv = NULL; } cleared++; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; } if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } PG_A = pmap_accessed_bit(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced: found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & PG_A) != 0) { if (safe_to_clear_referenced(pmap, *pte)) { atomic_clear_long(pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); cleared++; } else if ((*pte & PG_W) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ pmap_remove_pte(pmap, pte, pv->pv_va, *pde, &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; pv = NULL; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); pmap_free_zero_pages(&free); return (cleared + not_cleared); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { struct rwlock *lock; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t oldpde, *pde; pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V; vm_offset_t va_next; vm_page_t m; boolean_t anychanged; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; /* * A/D bit emulation requires an alternate code path when clearing * the modified and accessed bits below. Since this function is * advisory in nature we skip it entirely for pmaps that require * A/D bit emulation. */ if (pmap_emulate_ad_bits(pmap)) return; PG_A = pmap_accessed_bit(pmap); PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); anychanged = FALSE; pmap_delayed_invl_started(); PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); oldpde = *pde; if ((oldpde & PG_V) == 0) continue; else if ((oldpde & PG_PS) != 0) { if ((oldpde & PG_MANAGED) == 0) continue; lock = NULL; if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { if (lock != NULL) rw_wunlock(lock); /* * The large page mapping was destroyed. */ continue; } /* * Unless the page mappings are wired, remove the * mapping to a single page so that a subsequent * access may repromote. Since the underlying page * table page is fully populated, this removal never * frees a page table page. */ if ((oldpde & PG_W) == 0) { pte = pmap_pde_to_pte(pde, sva); KASSERT((*pte & PG_V) != 0, ("pmap_advise: invalid PTE")); pmap_remove_pte(pmap, pte, sva, *pde, NULL, &lock); anychanged = TRUE; } if (lock != NULL) rw_wunlock(lock); } if (va_next > eva) va_next = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | PG_V)) continue; else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); vm_page_dirty(m); } atomic_clear_long(pte, PG_M | PG_A); } else if ((*pte & PG_A) != 0) atomic_clear_long(pte, PG_A); else continue; if ((*pte & PG_G) != 0) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } } if (anychanged) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); pmap_delayed_invl_finished(); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t oldpde, *pde; pt_entry_t oldpte, *pte, PG_M, PG_RW, PG_V; struct rwlock *lock; vm_offset_t va; int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_wlock(lock); restart: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { if (pmap_demote_pde_locked(pmap, pde, va, &lock)) { if ((oldpde & PG_W) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pde_to_pte(pde, va); oldpte = *pte; if ((oldpte & PG_V) != 0) { while (!atomic_cmpset_long(pte, oldpte, oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); pmap_invalidate_page(pmap, va); } } } } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { atomic_clear_long(pte, PG_M); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); } /* * Miscellaneous support routines follow */ /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask) { u_int opte, npte; /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~mask; npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2MB page mapped via a PDE. */ static __inline void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask) { u_int opde, npde; /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; npde = opde & ~mask; npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) { struct pmap_preinit_mapping *ppim; vm_offset_t va, offset; vm_size_t tmpsize; int i; offset = pa & PAGE_MASK; size = round_page(offset + size); pa = trunc_page(pa); if (!pmap_initialized) { va = 0; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) { ppim->pa = pa; ppim->sz = size; ppim->mode = mode; ppim->va = virtual_avail; virtual_avail += size; va = ppim->va; break; } } if (va == 0) panic("%s: too many preinit mappings", __func__); } else { /* * If we have a preinit mapping, re-use it. */ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->pa == pa && ppim->sz == size && ppim->mode == mode) return ((void *)(ppim->va + offset)); } /* * If the specified range of physical addresses fits within * the direct map window, use the direct map. */ if (pa < dmaplimit && pa + size < dmaplimit) { va = PHYS_TO_DMAP(pa); if (!pmap_change_attr(va, size, mode)) return ((void *)(va + offset)); } va = kva_alloc(size); if (va == 0) panic("%s: Couldn't allocate KVA", __func__); } for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + tmpsize, FALSE); return ((void *)(va + offset)); } void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); } void pmap_unmapdev(vm_offset_t va, vm_size_t size) { struct pmap_preinit_mapping *ppim; vm_offset_t offset; int i; /* If we gave a direct map region in pmap_mapdev, do nothing */ if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) return; offset = va & PAGE_MASK; size = round_page(offset + size); va = trunc_page(va); for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == va && ppim->sz == size) { if (pmap_initialized) return; ppim->pa = 0; ppim->va = 0; ppim->sz = 0; ppim->mode = 0; if (va + size == virtual_avail) virtual_avail = va; return; } } if (pmap_initialized) kva_free(va, size); } /* * Tries to demote a 1GB page mapping. */ static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; pt_entry_t PG_A, PG_M, PG_RW, PG_V; vm_paddr_t mpdepa; vm_page_t mpde; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpdpe = *pdpe; KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); if ((mpde = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } mpdepa = VM_PAGE_TO_PHYS(mpde); firstpde = (pd_entry_t *)PHYS_TO_DMAP(mpdepa); newpdpe = mpdepa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; KASSERT((oldpdpe & PG_A) != 0, ("pmap_demote_pdpe: oldpdpe is missing PG_A")); KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pdpe: oldpdpe is missing PG_M")); newpde = oldpdpe; /* * Initialize the page directory page. */ for (pde = firstpde; pde < firstpde + NPDEPG; pde++) { *pde = newpde; newpde += NBPDR; } /* * Demote the mapping. */ *pdpe = newpdpe; /* * Invalidate a stale recursive mapping of the page directory page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va)); pmap_pdpe_demotions++; CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pat_mode = ma; /* * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, m->md.pat_mode)) panic("memory attribute change on the direct map failed"); } /* * Changes the specified virtual address range's memory type to that given by * the parameter "mode". The specified virtual address range must be * completely contained within either the direct map or the kernel map. If * the virtual address range is contained within the kernel map, then the * memory type for each of the corresponding ranges of the direct map is also * changed. (The corresponding ranges of the direct map are those ranges that * map the same physical pages as the specified virtual address range.) These * changes to the direct map are necessary because Intel describes the * behavior of their processors as "undefined" if two or more mappings to the * same physical page have different memory types. * * Returns zero if the change completed successfully, and either EINVAL or * ENOMEM if the change failed. Specifically, EINVAL is returned if some part * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. In the * latter case, the memory type may have been changed on some part of the * virtual address range or the direct map. */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { int error; PMAP_LOCK(kernel_pmap); error = pmap_change_attr_locked(va, size, mode); PMAP_UNLOCK(kernel_pmap); return (error); } static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; vm_paddr_t pa_start, pa_end, pa_end1; pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte; int cache_bits_pte, cache_bits_pde, error; boolean_t changed; PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); base = trunc_page(va); offset = va & PAGE_MASK; size = round_page(offset + size); /* * Only supported on kernel virtual addresses, including the direct * map but excluding the recursive map. */ if (base < DMAP_MIN_ADDRESS) return (EINVAL); cache_bits_pde = pmap_cache_bits(kernel_pmap, mode, 1); cache_bits_pte = pmap_cache_bits(kernel_pmap, mode, 0); changed = FALSE; /* * Pages that aren't mapped aren't supported. Also break down 2MB pages * into 4KB pages if required. */ for (tmpva = base; tmpva < base + size; ) { pdpe = pmap_pdpe(kernel_pmap, tmpva); if (pdpe == NULL || *pdpe == 0) return (EINVAL); if (*pdpe & PG_PS) { /* * If the current 1GB page already has the required * memory type, then we need not demote this page. Just * increment tmpva to the next 1GB page frame. */ if ((*pdpe & X86_PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_1gpage(tmpva) + NBPDP; continue; } /* * If the current offset aligns with a 1GB page frame * and there is at least 1GB left within the range, then * we need not break down this page into 2MB pages. */ if ((tmpva & PDPMASK) == 0 && tmpva + PDPMASK < base + size) { tmpva += NBPDP; continue; } if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) return (ENOMEM); } pde = pmap_pdpe_to_pde(pdpe, tmpva); if (*pde == 0) return (EINVAL); if (*pde & PG_PS) { /* * If the current 2MB page already has the required * memory type, then we need not demote this page. Just * increment tmpva to the next 2MB page frame. */ if ((*pde & X86_PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_2mpage(tmpva) + NBPDR; continue; } /* * If the current offset aligns with a 2MB page frame * and there is at least 2MB left within the range, then * we need not break down this page into 4KB pages. */ if ((tmpva & PDRMASK) == 0 && tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) return (ENOMEM); } pte = pmap_pde_to_pte(pde, tmpva); if (*pte == 0) return (EINVAL); tmpva += PAGE_SIZE; } error = 0; /* * Ok, all the pages exist, so run through them updating their * cache mode if required. */ pa_start = pa_end = 0; for (tmpva = base; tmpva < base + size; ) { pdpe = pmap_pdpe(kernel_pmap, tmpva); if (*pdpe & PG_PS) { if ((*pdpe & X86_PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pdpe, cache_bits_pde, X86_PG_PDE_CACHE); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS && (*pdpe & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pdpe & PG_PS_FRAME; pa_end = pa_start + NBPDP; } else if (pa_end == (*pdpe & PG_PS_FRAME)) pa_end += NBPDP; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pdpe & PG_PS_FRAME; pa_end = pa_start + NBPDP; } } tmpva = trunc_1gpage(tmpva) + NBPDP; continue; } pde = pmap_pdpe_to_pde(pdpe, tmpva); if (*pde & PG_PS) { if ((*pde & X86_PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde, X86_PG_PDE_CACHE); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS && (*pde & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pde & PG_PS_FRAME; pa_end = pa_start + NBPDR; } else if (pa_end == (*pde & PG_PS_FRAME)) pa_end += NBPDR; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pde & PG_PS_FRAME; pa_end = pa_start + NBPDR; } } tmpva = trunc_2mpage(tmpva) + NBPDR; } else { pte = pmap_pde_to_pte(pde, tmpva); if ((*pte & X86_PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte, X86_PG_PTE_CACHE); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS && (*pte & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pte & PG_FRAME; pa_end = pa_start + PAGE_SIZE; } else if (pa_end == (*pte & PG_FRAME)) pa_end += PAGE_SIZE; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pte & PG_FRAME; pa_end = pa_start + PAGE_SIZE; } } tmpva += PAGE_SIZE; } } if (error == 0 && pa_start != pa_end && pa_start < dmaplimit) { pa_end1 = MIN(pa_end, dmaplimit); if (pa_start != pa_end1) error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start), pa_end1 - pa_start, mode); } /* * Flush CPU caches if required to make sure any data isn't cached that * shouldn't be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva, FALSE); } return (error); } /* * Demotes any mapping within the direct map region that covers more than the * specified range of physical addresses. This range's size must be a power * of two and its starting address must be a multiple of its size. Since the * demotion does not change any attributes of the mapping, a TLB invalidation * is not mandatory. The caller may, however, request a TLB invalidation. */ void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate) { pdp_entry_t *pdpe; pd_entry_t *pde; vm_offset_t va; boolean_t changed; if (len == 0) return; KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2")); KASSERT((base & (len - 1)) == 0, ("pmap_demote_DMAP: base is not a multiple of len")); if (len < NBPDP && base < dmaplimit) { va = PHYS_TO_DMAP(base); changed = FALSE; PMAP_LOCK(kernel_pmap); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDPE"); if ((*pdpe & PG_PS) != 0) { if (!pmap_demote_pdpe(kernel_pmap, pdpe, va)) panic("pmap_demote_DMAP: PDPE failed"); changed = TRUE; } if (len < NBPDR) { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDE"); if ((*pde & PG_PS) != 0) { if (!pmap_demote_pde(kernel_pmap, pde, va)) panic("pmap_demote_DMAP: PDE failed"); changed = TRUE; } } if (changed && invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); } } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t pte, PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pa; int val; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, addr); if (pdep != NULL && (*pdep & PG_V)) { if (*pdep & PG_PS) { pte = *pdep; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; val = MINCORE_SUPER; } else { pte = *pmap_pde_to_pte(pdep, addr); pa = pte & PG_FRAME; val = 0; } } else { pte = 0; pa = 0; val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } static uint64_t pmap_pcid_alloc(pmap_t pmap, u_int cpuid) { uint32_t gen, new_gen, pcid_next; CRITICAL_ASSERT(curthread); gen = PCPU_GET(pcid_gen); if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN || pmap->pm_pcids[cpuid].pm_gen == gen) return (CR3_PCID_SAVE); pcid_next = PCPU_GET(pcid_next); KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x", cpuid, pcid_next)); if (pcid_next == PMAP_PCID_OVERMAX) { new_gen = gen + 1; if (new_gen == 0) new_gen = 1; PCPU_SET(pcid_gen, new_gen); pcid_next = PMAP_PCID_KERN + 1; } else { new_gen = gen; } pmap->pm_pcids[cpuid].pm_pcid = pcid_next; pmap->pm_pcids[cpuid].pm_gen = new_gen; PCPU_SET(pcid_next, pcid_next + 1); return (0); } void pmap_activate_sw(struct thread *td) { pmap_t oldpmap, pmap; uint64_t cached, cr3; u_int cpuid; oldpmap = PCPU_GET(curpmap); pmap = vmspace_pmap(td->td_proc->p_vmspace); if (oldpmap == pmap) return; cpuid = PCPU_GET(cpuid); #ifdef SMP CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_SET(cpuid, &pmap->pm_active); #endif cr3 = rcr3(); if (pmap_pcid_enabled) { cached = pmap_pcid_alloc(pmap, cpuid); KASSERT(pmap->pm_pcids[cpuid].pm_pcid >= 0 && pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX, ("pmap %p cpu %d pcid %#x", pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid)); KASSERT(pmap->pm_pcids[cpuid].pm_pcid != PMAP_PCID_KERN || pmap == kernel_pmap, ("non-kernel pmap thread %p pmap %p cpu %d pcid %#x", td, pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid)); if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) { load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid | cached); if (cached) PCPU_INC(pm_save_cnt); } } else if (cr3 != pmap->pm_cr3) { load_cr3(pmap->pm_cr3); } PCPU_SET(curpmap, pmap); #ifdef SMP CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); #else CPU_CLR(cpuid, &oldpmap->pm_active); #endif } void pmap_activate(struct thread *td) { critical_enter(); pmap_activate_sw(td); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } #ifdef INVARIANTS static unsigned long num_dirty_emulations; SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_dirty_emulations, CTLFLAG_RW, &num_dirty_emulations, 0, NULL); static unsigned long num_accessed_emulations; SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_accessed_emulations, CTLFLAG_RW, &num_accessed_emulations, 0, NULL); static unsigned long num_superpage_accessed_emulations; SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_superpage_accessed_emulations, CTLFLAG_RW, &num_superpage_accessed_emulations, 0, NULL); static unsigned long ad_emulation_superpage_promotions; SYSCTL_ULONG(_vm_pmap, OID_AUTO, ad_emulation_superpage_promotions, CTLFLAG_RW, &ad_emulation_superpage_promotions, 0, NULL); #endif /* INVARIANTS */ int pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype) { int rv; struct rwlock *lock; vm_page_t m, mpte; pd_entry_t *pde; pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V; KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE, ("pmap_emulate_accessed_dirty: invalid fault type %d", ftype)); if (!pmap_emulate_ad_bits(pmap)) return (-1); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); rv = -1; lock = NULL; PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if (pde == NULL || (*pde & PG_V) == 0) goto done; if ((*pde & PG_PS) != 0) { if (ftype == VM_PROT_READ) { #ifdef INVARIANTS atomic_add_long(&num_superpage_accessed_emulations, 1); #endif *pde |= PG_A; rv = 0; } goto done; } pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_V) == 0) goto done; if (ftype == VM_PROT_WRITE) { if ((*pte & PG_RW) == 0) goto done; /* * Set the modified and accessed bits simultaneously. * * Intel EPT PTEs that do software emulation of A/D bits map * PG_A and PG_M to EPT_PG_READ and EPT_PG_WRITE respectively. * An EPT misconfiguration is triggered if the PTE is writable * but not readable (WR=10). This is avoided by setting PG_A * and PG_M simultaneously. */ *pte |= PG_M | PG_A; } else { *pte |= PG_A; } /* try to promote the mapping */ if (va < VM_MAXUSER_ADDRESS) mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); else mpte = NULL; m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); if ((mpte == NULL || mpte->wire_count == NPTEPG) && pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { pmap_promote_pde(pmap, pde, va, &lock); #ifdef INVARIANTS atomic_add_long(&ad_emulation_superpage_promotions, 1); #endif } #ifdef INVARIANTS if (ftype == VM_PROT_WRITE) atomic_add_long(&num_dirty_emulations, 1); else atomic_add_long(&num_accessed_emulations, 1); #endif rv = 0; /* success */ done: if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); return (rv); } void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num) { pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pde; pt_entry_t *pte, PG_V; int idx; idx = 0; PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); pml4 = pmap_pml4e(pmap, va); ptr[idx++] = *pml4; if ((*pml4 & PG_V) == 0) goto done; pdp = pmap_pml4e_to_pdpe(pml4, va); ptr[idx++] = *pdp; if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) goto done; pde = pmap_pdpe_to_pde(pdp, va); ptr[idx++] = *pde; if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) goto done; pte = pmap_pde_to_pte(pde, va); ptr[idx++] = *pte; done: PMAP_UNLOCK(pmap); *num = idx; } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; pt_entry_t *pte; int cache_bits, error, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(paddr >= dmaplimit)) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); /* * NB: The sequence of updating a page table followed by accesses * to the corresponding pages used in the !DMAP case is subject to * the situation described in the "AMD64 Architecture Programmer's * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special * Coherency Considerations". Therefore, issuing the INVLPG right * after modifying the PTE bits is crucial. */ if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= dmaplimit) { if (can_fault) { /* * Slow path, since we can get page faults * while mappings are active don't pin the * thread to the CPU and instead add a global * mapping visible to all CPUs. */ pmap_qenter(vaddr[i], &page[i], 1); } else { pte = vtopte(vaddr[i]); cache_bits = pmap_cache_bits(kernel_pmap, page[i]->md.pat_mode, 0); pte_store(pte, paddr | X86_PG_RW | X86_PG_V | cache_bits); invlpg(vaddr[i]); } } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= dmaplimit) { if (can_fault) pmap_qremove(vaddr[i], 1); vmem_free(kernel_arena, vaddr[i], PAGE_SIZE); } } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { vm_paddr_t paddr; paddr = VM_PAGE_TO_PHYS(m); if (paddr < dmaplimit) return (PHYS_TO_DMAP(paddr)); mtx_lock_spin(&qframe_mtx); KASSERT(*vtopte(qframe) == 0, ("qframe busy")); pte_store(vtopte(qframe), paddr | X86_PG_RW | X86_PG_V | X86_PG_A | X86_PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0)); return (qframe); } void pmap_quick_remove_page(vm_offset_t addr) { if (addr != qframe) return; pte_store(vtopte(qframe), 0); invlpg(qframe); mtx_unlock_spin(&qframe_mtx); } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(pte, pmap_print_pte) { pmap_t pmap; pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pde; pt_entry_t *pte, PG_V; vm_offset_t va; if (have_addr) { va = (vm_offset_t)addr; pmap = PCPU_GET(curpmap); /* XXX */ } else { db_printf("show pte addr\n"); return; } PG_V = pmap_valid_bit(pmap); pml4 = pmap_pml4e(pmap, va); db_printf("VA %#016lx pml4e %#016lx", va, *pml4); if ((*pml4 & PG_V) == 0) { db_printf("\n"); return; } pdp = pmap_pml4e_to_pdpe(pml4, va); db_printf(" pdpe %#016lx", *pdp); if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) { db_printf("\n"); return; } pde = pmap_pdpe_to_pde(pdp, va); db_printf(" pde %#016lx", *pde); if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) { db_printf("\n"); return; } pte = pmap_pde_to_pte(pde, va); db_printf(" pte %#016lx\n", *pte); } DB_SHOW_COMMAND(phys2dmap, pmap_phys2dmap) { vm_paddr_t a; if (have_addr) { a = (vm_paddr_t)addr; db_printf("0x%jx\n", (uintmax_t)PHYS_TO_DMAP(a)); } else { db_printf("show phys2dmap addr\n"); } } #endif Index: projects/clang390-import/sys/amd64/amd64/support.S =================================================================== --- projects/clang390-import/sys/amd64/amd64/support.S (revision 305016) +++ projects/clang390-import/sys/amd64/amd64/support.S (revision 305017) @@ -1,789 +1,813 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_ddb.h" #include #include #include "assym.s" .text /* * bcopy family * void bzero(void *buf, u_int len) */ /* done */ ENTRY(bzero) PUSH_FRAME_POINTER movq %rsi,%rcx xorl %eax,%eax shrq $3,%rcx cld rep stosq movq %rsi,%rcx andq $7,%rcx rep stosb POP_FRAME_POINTER ret END(bzero) /* Address: %rdi */ ENTRY(pagezero) PUSH_FRAME_POINTER movq $PAGE_SIZE/8,%rcx xorl %eax,%eax rep stosq POP_FRAME_POINTER ret END(pagezero) +/* Address: %rdi */ +ENTRY(sse2_pagezero) + PUSH_FRAME_POINTER + movq $-PAGE_SIZE,%rdx + subq %rdx,%rdi + xorl %eax,%eax + jmp 1f + /* + * The loop takes 29 bytes. Ensure that it doesn't cross a 32-byte + * cache line. + */ + .p2align 5,0x90 +1: + movnti %rax,(%rdi,%rdx) + movnti %rax,8(%rdi,%rdx) + movnti %rax,16(%rdi,%rdx) + movnti %rax,24(%rdi,%rdx) + addq $32,%rdx + jne 1b + sfence + POP_FRAME_POINTER + ret +END(sse2_pagezero) + ENTRY(bcmp) PUSH_FRAME_POINTER movq %rdx,%rcx shrq $3,%rcx cld /* compare forwards */ repe cmpsq jne 1f movq %rdx,%rcx andq $7,%rcx repe cmpsb 1: setne %al movsbl %al,%eax POP_FRAME_POINTER ret END(bcmp) /* * bcopy(src, dst, cnt) * rdi, rsi, rdx * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(bcopy) PUSH_FRAME_POINTER xchgq %rsi,%rdi movq %rdx,%rcx movq %rdi,%rax subq %rsi,%rax cmpq %rcx,%rax /* overlapping && src < dst? */ jb 1f shrq $3,%rcx /* copy by 64-bit words */ cld /* nope, copy forwards */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ rep movsb POP_FRAME_POINTER ret /* ALIGN_TEXT */ 1: addq %rcx,%rdi /* copy backwards */ addq %rcx,%rsi decq %rdi decq %rsi andq $7,%rcx /* any fractional bytes? */ std rep movsb movq %rdx,%rcx /* copy remainder by 32-bit words */ shrq $3,%rcx subq $7,%rsi subq $7,%rdi rep movsq cld POP_FRAME_POINTER ret END(bcopy) /* * Note: memcpy does not support overlapping copies */ ENTRY(memcpy) PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx shrq $3,%rcx /* copy by 64-bit words */ cld /* copy forwards */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ rep movsb POP_FRAME_POINTER ret END(memcpy) /* * pagecopy(%rdi=from, %rsi=to) */ ENTRY(pagecopy) PUSH_FRAME_POINTER movq $-PAGE_SIZE,%rax movq %rax,%rdx subq %rax,%rdi subq %rax,%rsi 1: prefetchnta (%rdi,%rax) addq $64,%rax jne 1b 2: movq (%rdi,%rdx),%rax movnti %rax,(%rsi,%rdx) movq 8(%rdi,%rdx),%rax movnti %rax,8(%rsi,%rdx) movq 16(%rdi,%rdx),%rax movnti %rax,16(%rsi,%rdx) movq 24(%rdi,%rdx),%rax movnti %rax,24(%rsi,%rdx) addq $32,%rdx jne 2b sfence POP_FRAME_POINTER ret END(pagecopy) /* fillw(pat, base, cnt) */ /* %rdi,%rsi, %rdx */ ENTRY(fillw) PUSH_FRAME_POINTER movq %rdi,%rax movq %rsi,%rdi movq %rdx,%rcx cld rep stosw POP_FRAME_POINTER ret END(fillw) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines should be * the only places that do this. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ /* * copyout(from_kernel, to_user, len) - MP SAFE * %rdi, %rsi, %rdx */ ENTRY(copyout) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyout_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movq %rsi,%rax addq %rdx,%rax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copyout_fault xchgq %rdi,%rsi /* bcopy(%rsi, %rdi, %rdx) */ movq %rdx,%rcx shrq $3,%rcx cld rep movsq movb %dl,%cl andb $7,%cl rep movsb done_copyout: xorl %eax,%eax movq PCPU(CURPCB),%rdx movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret ALIGN_TEXT copyout_fault: movq PCPU(CURPCB),%rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax POP_FRAME_POINTER ret END(copyout) /* * copyin(from_user, to_kernel, len) - MP SAFE * %rdi, %rsi, %rdx */ ENTRY(copyin) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rax movq $copyin_fault,PCB_ONFAULT(%rax) testq %rdx,%rdx /* anything to do? */ jz done_copyin /* * make sure address is valid */ movq %rdi,%rax addq %rdx,%rax jc copyin_fault movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copyin_fault xchgq %rdi,%rsi movq %rdx,%rcx movb %cl,%al shrq $3,%rcx /* copy longword-wise */ cld rep movsq movb %al,%cl andb $7,%cl /* copy remaining bytes */ rep movsb done_copyin: xorl %eax,%eax movq PCPU(CURPCB),%rdx movq %rax,PCB_ONFAULT(%rdx) POP_FRAME_POINTER ret ALIGN_TEXT copyin_fault: movq PCPU(CURPCB),%rdx movq $0,PCB_ONFAULT(%rdx) movq $EFAULT,%rax POP_FRAME_POINTER ret END(copyin) /* * casueword32. Compare and set user integer. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx */ ENTRY(casueword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movl %esi,%eax /* old */ #ifdef SMP lock #endif cmpxchgl %ecx,(%rdi) /* new = %ecx */ /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. Save %eax into %esi to prepare the return * value. */ movl %eax,%esi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) /* * Access the oldp after the pcb_onfault is cleared, to correctly * catch corrupted pointer. */ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER ret END(casueword32) /* * casueword. Compare and set user long. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx */ ENTRY(casueword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movq %rsi,%rax /* old */ #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ /* * The old value is in %rax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movq %rax,%rsi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) movq %rsi,(%rdx) POP_FRAME_POINTER ret END(casueword) /* * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit * byte from user memory. * addr = %rdi, valp = %rsi */ ALTENTRY(fueword64) ENTRY(fueword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movq (%rdi),%r11 movq %rax,PCB_ONFAULT(%rcx) movq %r11,(%rsi) POP_FRAME_POINTER ret END(fueword64) END(fueword) ENTRY(fueword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movl (%rdi),%r11d movq %rax,PCB_ONFAULT(%rcx) movl %r11d,(%rsi) POP_FRAME_POINTER ret END(fueword32) /* * fuswintr() and suswintr() are specialized variants of fuword16() and * suword16(), respectively. They are called from the profiling code, * potentially at interrupt time. If they fail, that's okay; good things * will happen later. They always fail for now, until the trap code is * able to deal with this. */ ALTENTRY(suswintr) ENTRY(fuswintr) movq $-1,%rax ret END(suswintr) END(fuswintr) ENTRY(fuword16) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault movzwl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fuword16) ENTRY(fubyte) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault movzbl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fubyte) ALIGN_TEXT fusufault: movq PCPU(CURPCB),%rcx xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) decq %rax POP_FRAME_POINTER ret /* * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to * user memory. All these functions are MPSAFE. * addr = %rdi, value = %rsi */ ALTENTRY(suword64) ENTRY(suword) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movq %rsi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword64) END(suword) ENTRY(suword32) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword32) ENTRY(suword16) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movw %si,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword16) ENTRY(subyte) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,%eax movb %al,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(subyte) /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE * %rdi, %rsi, %rdx, %rcx * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ movq %rcx,%r9 /* %r9 = *len */ xchgq %rdi,%rsi /* %rdi = from, %rsi = to */ movq PCPU(CURPCB),%rcx movq $cpystrflt,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS,%rax /* make sure 'from' is within bounds */ subq %rsi,%rax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpq %rdx,%rax jae 1f movq %rax,%rdx movq %rax,%r8 1: incq %rdx cld 2: decq %rdx jz 3f lodsb stosb orb %al,%al jnz 2b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp cpystrflt_x 3: /* rdx is zero - return ENAMETOOLONG or EFAULT */ movq $VM_MAXUSER_ADDRESS,%rax cmpq %rax,%rsi jae cpystrflt 4: movq $ENAMETOOLONG,%rax jmp cpystrflt_x cpystrflt: movq $EFAULT,%rax cpystrflt_x: /* set *lencopied and return %eax */ movq PCPU(CURPCB),%rcx movq $0,PCB_ONFAULT(%rcx) testq %r9,%r9 jz 1f subq %rdx,%r8 movq %r8,(%r9) 1: POP_FRAME_POINTER ret END(copyinstr) /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ xchgq %rdi,%rsi incq %rdx cld 1: decq %rdx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax jmp 6f 4: /* rdx is zero -- return ENAMETOOLONG */ movq $ENAMETOOLONG,%rax 6: testq %rcx,%rcx jz 7f /* set *lencopied and return %rax */ subq %rdx,%r8 movq %r8,(%rcx) 7: POP_FRAME_POINTER ret END(copystr) /* * Handling of special amd64 registers and descriptor tables etc * %rdi */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ lgdt (%rdi) /* flush the prefetch q */ jmp 1f nop 1: movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%fs /* Beware, use wrmsr to set 64 bit base */ movl %eax,%gs movl %eax,%ss /* reload code selector by turning return into intersegmental return */ popq %rax pushq $KCSEL pushq %rax MEXITCOUNT lretq END(lgdt) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movq %rbx,0(%rdi) /* save rbx */ movq %rsp,8(%rdi) /* save rsp */ movq %rbp,16(%rdi) /* save rbp */ movq %r12,24(%rdi) /* save r12 */ movq %r13,32(%rdi) /* save r13 */ movq %r14,40(%rdi) /* save r14 */ movq %r15,48(%rdi) /* save r15 */ movq 0(%rsp),%rdx /* get rta */ movq %rdx,56(%rdi) /* save rip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movq 0(%rdi),%rbx /* restore rbx */ movq 8(%rdi),%rsp /* restore rsp */ movq 16(%rdi),%rbp /* restore rbp */ movq 24(%rdi),%r12 /* restore r12 */ movq 32(%rdi),%r13 /* restore r13 */ movq 40(%rdi),%r14 /* restore r14 */ movq 48(%rdi),%r15 /* restore r15 */ movq 56(%rdi),%rdx /* get rta */ movq %rdx,0(%rsp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx rdmsr /* Read MSR pointed by %ecx. Returns hi byte in edx, lo in %eax */ salq $32,%rdx /* sign-shift %rdx left */ movl %eax,%eax /* zero-extend %eax -> %rax */ orq %rdx,%rax movq %rax,(%rsi) xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * Support for writing MSRs in the safe manner. */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx movl %esi,%eax sarq $32,%rsi movl %esi,%edx wrmsr /* Write MSR pointed by %ecx. Accepts hi byte in edx, lo in %eax. */ xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movq $0,PCB_ONFAULT(%r8) movl $EFAULT,%eax POP_FRAME_POINTER ret Index: projects/clang390-import/sys/amd64/include/md_var.h =================================================================== --- projects/clang390-import/sys/amd64/include/md_var.h (revision 305016) +++ projects/clang390-import/sys/amd64/include/md_var.h (revision 305017) @@ -1,63 +1,64 @@ /*- * Copyright (c) 1995 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_MD_VAR_H_ #define _MACHINE_MD_VAR_H_ #include extern uint64_t *vm_page_dump; struct savefpu; void amd64_db_resume_dbreg(void); void amd64_syscall(struct thread *td, int traced); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); void ld_ds(void) __asm(__STRING(ld_ds)); void ld_es(void) __asm(__STRING(ld_es)); void ld_fs(void) __asm(__STRING(ld_fs)); void ld_gs(void) __asm(__STRING(ld_gs)); void ld_fsbase(void) __asm(__STRING(ld_fsbase)); void ld_gsbase(void) __asm(__STRING(ld_gsbase)); void ds_load_fault(void) __asm(__STRING(ds_load_fault)); void es_load_fault(void) __asm(__STRING(es_load_fault)); void fs_load_fault(void) __asm(__STRING(fs_load_fault)); void gs_load_fault(void) __asm(__STRING(gs_load_fault)); void fsbase_load_fault(void) __asm(__STRING(fsbase_load_fault)); void gsbase_load_fault(void) __asm(__STRING(gsbase_load_fault)); void fpstate_drop(struct thread *td); void pagezero(void *addr); void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist); +void sse2_pagezero(void *addr); struct savefpu *get_pcb_user_save_td(struct thread *td); struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb); #endif /* !_MACHINE_MD_VAR_H_ */ Index: projects/clang390-import/sys/boot/i386/libi386/biosdisk.c =================================================================== --- projects/clang390-import/sys/boot/i386/libi386/biosdisk.c (revision 305016) +++ projects/clang390-import/sys/boot/i386/libi386/biosdisk.c (revision 305017) @@ -1,921 +1,922 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2012 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * BIOS disk device handling. * * Ideas and algorithms from: * * - NetBSD libi386/biosdisk.c * - FreeBSD biosboot/disk.c * */ #include #include #include #include #include #include #include #include "disk.h" #include "libi386.h" #ifdef LOADER_GELI_SUPPORT #include "cons.h" #include "drv.h" #include "gpt.h" #include "part.h" #include struct pentry { struct ptable_entry part; uint64_t flags; union { uint8_t bsd; uint8_t mbr; uuid_t gpt; uint16_t vtoc8; } type; STAILQ_ENTRY(pentry) entry; }; struct ptable { enum ptable_type type; uint16_t sectorsize; uint64_t sectors; STAILQ_HEAD(, pentry) entries; }; #include "geliboot.c" #endif /* LOADER_GELI_SUPPORT */ CTASSERT(sizeof(struct i386_devdesc) >= sizeof(struct disk_devdesc)); #define BIOS_NUMDRIVES 0x475 #define BIOSDISK_SECSIZE 512 #define BUFSIZE (1 * BIOSDISK_SECSIZE) #define DT_ATAPI 0x10 /* disk type for ATAPI floppies */ #define WDMAJOR 0 /* major numbers for devices we frontend for */ #define WFDMAJOR 1 #define FDMAJOR 2 #define DAMAJOR 4 #ifdef DISK_DEBUG # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else # define DEBUG(fmt, args...) #endif /* * List of BIOS devices, translation from disk unit number to * BIOS unit number. */ static struct bdinfo { int bd_unit; /* BIOS unit number */ int bd_cyl; /* BIOS geometry */ int bd_hds; int bd_sec; int bd_flags; #define BD_MODEINT13 0x0000 #define BD_MODEEDD1 0x0001 #define BD_MODEEDD3 0x0002 #define BD_MODEMASK 0x0003 #define BD_FLOPPY 0x0004 int bd_type; /* BIOS 'drive type' (floppy only) */ uint16_t bd_sectorsize; /* Sector size */ uint64_t bd_sectors; /* Disk size */ int bd_open; /* reference counter */ void *bd_bcache; /* buffer cache data */ } bdinfo [MAXBDDEV]; static int nbdinfo = 0; #define BD(dev) (bdinfo[(dev)->d_unit]) static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest); static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest); static int bd_int13probe(struct bdinfo *bd); static int bd_init(void); static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize); static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize); static int bd_open(struct open_file *f, ...); static int bd_close(struct open_file *f); static int bd_ioctl(struct open_file *f, u_long cmd, void *data); static void bd_print(int verbose); static void bd_cleanup(void); #ifdef LOADER_GELI_SUPPORT static enum isgeli { ISGELI_UNKNOWN, ISGELI_NO, ISGELI_YES }; static enum isgeli geli_status[MAXBDDEV][MAXTBLENTS]; int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes); #endif /* LOADER_GELI_SUPPORT */ struct devsw biosdisk = { "disk", DEVT_DISK, bd_init, bd_strategy, bd_open, bd_close, bd_ioctl, bd_print, bd_cleanup }; /* * Translate between BIOS device numbers and our private unit numbers. */ int bd_bios2unit(int biosdev) { int i; DEBUG("looking for bios device 0x%x", biosdev); for (i = 0; i < nbdinfo; i++) { DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit); if (bdinfo[i].bd_unit == biosdev) return (i); } return (-1); } int bd_unit2bios(int unit) { if ((unit >= 0) && (unit < nbdinfo)) return (bdinfo[unit].bd_unit); return (-1); } /* * Quiz the BIOS for disk devices, save a little info about them. */ static int bd_init(void) { int base, unit, nfd = 0; #ifdef LOADER_GELI_SUPPORT geli_init(); #endif /* sequence 0, 0x80 */ for (base = 0; base <= 0x80; base += 0x80) { for (unit = base; (nbdinfo < MAXBDDEV); unit++) { #ifndef VIRTUALBOX /* * Check the BIOS equipment list for number * of fixed disks. */ if(base == 0x80 && (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES))) break; #endif bdinfo[nbdinfo].bd_open = 0; bdinfo[nbdinfo].bd_bcache = NULL; bdinfo[nbdinfo].bd_unit = unit; bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0; if (!bd_int13probe(&bdinfo[nbdinfo])) break; /* XXX we need "disk aliases" to make this simpler */ printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ? ('A' + unit): ('C' + unit - 0x80), nbdinfo); nbdinfo++; if (base == 0x80) nfd++; } } bcache_add_dev(nbdinfo); return(0); } static void bd_cleanup(void) { disk_cleanup(&biosdisk); } /* * Try to detect a device supported by the legacy int13 BIOS */ static int bd_int13probe(struct bdinfo *bd) { struct edd_params params; v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = bd->bd_unit; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ecx & 0x3f) == 0 || /* absurd sector number */ (v86.edx & 0xff) <= (unsigned)(bd->bd_unit & 0x7f)) /* unit # bad */ return (0); /* skip device */ /* Convert max cyl # -> # of cylinders */ bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; /* Convert max head # -> # of heads */ bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1; bd->bd_sec = v86.ecx & 0x3f; bd->bd_type = v86.ebx & 0xff; bd->bd_flags |= BD_MODEINT13; /* Calculate sectors count from the geometry */ bd->bd_sectors = bd->bd_cyl * bd->bd_hds * bd->bd_sec; bd->bd_sectorsize = BIOSDISK_SECSIZE; DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl, bd->bd_hds, bd->bd_sec); /* Determine if we can use EDD with this device. */ v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4100; v86.edx = bd->bd_unit; v86.ebx = 0x55aa; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ebx & 0xffff) != 0xaa55 || /* signature */ (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) return (1); /* EDD supported */ bd->bd_flags |= BD_MODEEDD1; if ((v86.eax & 0xff00) >= 0x3000) bd->bd_flags |= BD_MODEEDD3; /* Get disk params */ params.len = sizeof(struct edd_params); v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4800; v86.edx = bd->bd_unit; v86.ds = VTOPSEG(¶ms); v86.esi = VTOPOFF(¶ms); v86int(); if (!V86_CY(v86.efl)) { bd->bd_sectors = params.sectors; bd->bd_sectorsize = params.sector_size; } DEBUG("unit 0x%x flags %x, sectors %llu, sectorsize %u", bd->bd_unit, bd->bd_flags, bd->bd_sectors, bd->bd_sectorsize); return (1); } /* * Print information about disks */ static void bd_print(int verbose) { static char line[80]; struct disk_devdesc dev; int i; pager_open(); for (i = 0; i < nbdinfo; i++) { sprintf(line, " disk%d: BIOS drive %c (%ju X %u):\n", i, (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit): ('C' + bdinfo[i].bd_unit - 0x80), (uintmax_t)bdinfo[i].bd_sectors, bdinfo[i].bd_sectorsize); if (pager_output(line)) break; dev.d_dev = &biosdisk; dev.d_unit = i; dev.d_slice = -1; dev.d_partition = -1; if (disk_open(&dev, bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors, bdinfo[i].bd_sectorsize, (bdinfo[i].bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0) == 0) { sprintf(line, " disk%d", i); disk_print(&dev, line, verbose); disk_close(&dev); } } pager_close(); } /* * Attempt to open the disk described by (dev) for use by (f). * * Note that the philosophy here is "give them exactly what * they ask for". This is necessary because being too "smart" * about what the user might want leads to complications. * (eg. given no slice or partition value, with a disk that is * sliced - are they after the first BSD slice, or the DOS * slice before it?) */ static int bd_open(struct open_file *f, ...) { struct disk_devdesc *dev, rdev; int err, g_err; va_list ap; va_start(ap, f); dev = va_arg(ap, struct disk_devdesc *); va_end(ap); if (dev->d_unit < 0 || dev->d_unit >= nbdinfo) return (EIO); BD(dev).bd_open++; if (BD(dev).bd_bcache == NULL) BD(dev).bd_bcache = bcache_allocate(); err = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize, BD(dev).bd_sectorsize, (BD(dev).bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0); #ifdef LOADER_GELI_SUPPORT static char gelipw[GELI_PW_MAXLEN]; char *passphrase; if (err) return (err); /* if we already know there is no GELI, skip the rest */ if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_UNKNOWN) return (err); struct dsk dskp; struct ptable *table = NULL; struct ptable_entry part; struct pentry *entry; int geli_part = 0; dskp.drive = bd_unit2bios(dev->d_unit); dskp.type = dev->d_type; dskp.unit = dev->d_unit; dskp.slice = dev->d_slice; dskp.part = dev->d_partition; dskp.start = dev->d_offset; memcpy(&rdev, dev, sizeof(rdev)); /* to read the GPT table, we need to read the first sector */ rdev.d_offset = 0; /* We need the LBA of the end of the partition */ table = ptable_open(&rdev, BD(dev).bd_sectors, BD(dev).bd_sectorsize, ptblread); if (table == NULL) { DEBUG("Can't read partition table"); /* soft failure, return the exit status of disk_open */ return (err); } if (table->type == PTABLE_GPT) dskp.part = 255; STAILQ_FOREACH(entry, &table->entries, entry) { dskp.slice = entry->part.index; dskp.start = entry->part.start; if (is_geli(&dskp) == 0) { geli_status[dev->d_unit][dskp.slice] = ISGELI_YES; return (0); } if (geli_taste(bios_read, &dskp, entry->part.end - entry->part.start) == 0) { if ((passphrase = getenv("kern.geom.eli.passphrase")) != NULL) { /* Use the cached passphrase */ bcopy(passphrase, &gelipw, GELI_PW_MAXLEN); } if (geli_passphrase(&gelipw, dskp.unit, 'p', (dskp.slice > 0 ? dskp.slice : dskp.part), &dskp) == 0) { setenv("kern.geom.eli.passphrase", &gelipw, 1); bzero(gelipw, sizeof(gelipw)); geli_status[dev->d_unit][dskp.slice] = ISGELI_YES; geli_part++; } } else geli_status[dev->d_unit][dskp.slice] = ISGELI_NO; } /* none of the partitions on this disk have GELI */ if (geli_part == 0) { /* found no GELI */ geli_status[dev->d_unit][dev->d_slice] = ISGELI_NO; } #endif /* LOADER_GELI_SUPPORT */ return (err); } static int bd_close(struct open_file *f) { struct disk_devdesc *dev; dev = (struct disk_devdesc *)f->f_devdata; BD(dev).bd_open--; if (BD(dev).bd_open == 0) { bcache_free(BD(dev).bd_bcache); BD(dev).bd_bcache = NULL; } return (disk_close(dev)); } static int bd_ioctl(struct open_file *f, u_long cmd, void *data) { struct disk_devdesc *dev; dev = (struct disk_devdesc *)f->f_devdata; switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = BD(dev).bd_sectorsize; break; case DIOCGMEDIASIZE: *(off_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize; break; default: return (ENOTTY); } return (0); } static int bd_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata bcd; struct disk_devdesc *dev; dev = (struct disk_devdesc *)devdata; bcd.dv_strategy = bd_realstrategy; bcd.dv_devdata = devdata; bcd.dv_cache = BD(dev).bd_bcache; return (bcache_strategy(&bcd, rw, dblk + dev->d_offset, offset, size, buf, rsize)); } static int bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { struct disk_devdesc *dev = (struct disk_devdesc *)devdata; - int blks; + int blks, remaining; #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */ char fragbuf[BIOSDISK_SECSIZE]; size_t fragsize; fragsize = size % BIOSDISK_SECSIZE; #else if (size % BD(dev).bd_sectorsize) panic("bd_strategy: %d bytes I/O not multiple of block size", size); #endif DEBUG("open_disk %p", dev); blks = size / BD(dev).bd_sectorsize; if (rsize) *rsize = 0; - if (dblk >= BD(dev).bd_sectors) { - DEBUG("IO past disk end %llu", (unsigned long long)dblk); - return (EIO); - } - - if (dblk + blks > BD(dev).bd_sectors) { - /* perform partial read */ - blks = BD(dev).bd_sectors - dblk; + /* + * Perform partial read to prevent read-ahead crossing + * the end of disk - or any 32 bit aliases of the end. + * Signed arithmetic is used to handle wrap-around cases + * like we do for TCP sequence numbers. + */ + remaining = (int)(BD(dev).bd_sectors - dblk); /* truncate */ + if (remaining > 0 && remaining < blks) { + blks = remaining; size = blks * BD(dev).bd_sectorsize; DEBUG("short read %d", blks); } switch(rw){ case F_READ: DEBUG("read %d from %lld to %p", blks, dblk, buf); if (blks && bd_read(dev, dblk, blks, buf)) { DEBUG("read error"); return (EIO); } #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */ DEBUG("bd_strategy: frag read %d from %d+%d to %p", fragsize, dblk, blks, buf + (blks * BIOSDISK_SECSIZE)); if (fragsize && bd_read(od, dblk + blks, 1, fragsize)) { DEBUG("frag read error"); return(EIO); } bcopy(fragbuf, buf + (blks * BIOSDISK_SECSIZE), fragsize); #endif break; case F_WRITE : DEBUG("write %d from %d to %p", blks, dblk, buf); if (blks && bd_write(dev, dblk, blks, buf)) { DEBUG("write error"); return (EIO); } #ifdef BD_SUPPORT_FRAGS if(fragsize) { DEBUG("Attempted to write a frag"); return (EIO); } #endif break; default: /* DO NOTHING */ return (EROFS); } if (rsize) *rsize = size; return (0); } /* Max number of sectors to bounce-buffer if the request crosses a 64k boundary */ #define FLOPPY_BOUNCEBUF 18 static int bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { static struct edd_packet packet; packet.len = sizeof(struct edd_packet); packet.count = blks; packet.off = VTOPOFF(dest); packet.seg = VTOPSEG(dest); packet.lba = dblk; v86.ctl = V86_FLAGS; v86.addr = 0x13; if (write) /* Should we Write with verify ?? 0x4302 ? */ v86.eax = 0x4300; else v86.eax = 0x4200; v86.edx = BD(dev).bd_unit; v86.ds = VTOPSEG(&packet); v86.esi = VTOPOFF(&packet); v86int(); return (V86_CY(v86.efl)); } static int bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { u_int x, bpc, cyl, hd, sec; bpc = BD(dev).bd_sec * BD(dev).bd_hds; /* blocks per cylinder */ x = dblk; cyl = x / bpc; /* block # / blocks per cylinder */ x %= bpc; /* block offset into cylinder */ hd = x / BD(dev).bd_sec; /* offset / blocks per track */ sec = x % BD(dev).bd_sec; /* offset into track */ /* correct sector number for 1-based BIOS numbering */ sec++; if (cyl > 1023) /* CHS doesn't support cylinders > 1023. */ return (1); v86.ctl = V86_FLAGS; v86.addr = 0x13; if (write) v86.eax = 0x300 | blks; else v86.eax = 0x200 | blks; v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec; v86.edx = (hd << 8) | BD(dev).bd_unit; v86.es = VTOPSEG(dest); v86.ebx = VTOPOFF(dest); v86int(); return (V86_CY(v86.efl)); } static int bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { u_int x, sec, result, resid, retry, maxfer; caddr_t p, xp, bbuf, breg; /* Just in case some idiot actually tries to read/write -1 blocks... */ if (blks < 0) return (-1); resid = blks; p = dest; /* Decide whether we have to bounce */ if (VTOP(dest) >> 20 != 0 || (BD(dev).bd_unit < 0x80 && (VTOP(dest) >> 16) != (VTOP(dest + blks * BD(dev).bd_sectorsize) >> 16))) { /* * There is a 64k physical boundary somewhere in the * destination buffer, or the destination buffer is above * first 1MB of physical memory so we have to arrange a * suitable bounce buffer. Allocate a buffer twice as large * as we need to. Use the bottom half unless there is a break * there, in which case we use the top half. */ x = min(FLOPPY_BOUNCEBUF, (unsigned)blks); bbuf = alloca(x * 2 * BD(dev).bd_sectorsize); if (((u_int32_t)VTOP(bbuf) & 0xffff0000) == ((u_int32_t)VTOP(bbuf + x * BD(dev).bd_sectorsize) & 0xffff0000)) { breg = bbuf; } else { breg = bbuf + x * BD(dev).bd_sectorsize; } maxfer = x; /* limit transfers to bounce region size */ } else { breg = bbuf = NULL; maxfer = 0; } while (resid > 0) { /* * Play it safe and don't cross track boundaries. * (XXX this is probably unnecessary) */ sec = dblk % BD(dev).bd_sec; /* offset into track */ x = min(BD(dev).bd_sec - sec, resid); if (maxfer > 0) x = min(x, maxfer); /* fit bounce buffer */ /* where do we transfer to? */ xp = bbuf == NULL ? p : breg; /* * Put your Data In, Put your Data out, * Put your Data In, and shake it all about */ if (write && bbuf != NULL) bcopy(p, breg, x * BD(dev).bd_sectorsize); /* * Loop retrying the operation a couple of times. The BIOS * may also retry. */ for (retry = 0; retry < 3; retry++) { /* if retrying, reset the drive */ if (retry > 0) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0; v86.edx = BD(dev).bd_unit; v86int(); } if (BD(dev).bd_flags & BD_MODEEDD1) result = bd_edd_io(dev, dblk, x, xp, write); else result = bd_chs_io(dev, dblk, x, xp, write); if (result == 0) break; } if (write) DEBUG("Write %d sector(s) from %p (0x%x) to %lld %s", x, p, VTOP(p), dblk, result ? "failed" : "ok"); else DEBUG("Read %d sector(s) from %lld to %p (0x%x) %s", x, dblk, p, VTOP(p), result ? "failed" : "ok"); if (result) { return(-1); } if (!write && bbuf != NULL) bcopy(breg, p, x * BD(dev).bd_sectorsize); p += (x * BD(dev).bd_sectorsize); dblk += x; resid -= x; } /* hexdump(dest, (blks * BD(dev).bd_sectorsize)); */ return(0); } static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest) { #ifdef LOADER_GELI_SUPPORT struct dsk dskp; off_t p_off, diff; daddr_t alignlba; int err, n, alignblks; char *tmpbuf; /* if we already know there is no GELI, skip the rest */ if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_YES) return (bd_io(dev, dblk, blks, dest, 0)); if (geli_status[dev->d_unit][dev->d_slice] == ISGELI_YES) { /* * Align reads to DEV_GELIBOOT_BSIZE bytes because partial * sectors cannot be decrypted. Round the requested LBA down to * nearest multiple of DEV_GELIBOOT_BSIZE bytes. */ alignlba = rounddown2(dblk * BD(dev).bd_sectorsize, DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize; /* * Round number of blocks to read up to nearest multiple of * DEV_GELIBOOT_BSIZE */ diff = (dblk - alignlba) * BD(dev).bd_sectorsize; alignblks = roundup2(blks * BD(dev).bd_sectorsize + diff, DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize; /* * If the read is rounded up to a larger size, use a temporary * buffer here because the buffer provided by the caller may be * too small. */ if (diff == 0) { tmpbuf = dest; } else { tmpbuf = malloc(alignblks * BD(dev).bd_sectorsize); if (tmpbuf == NULL) { return (-1); } } err = bd_io(dev, alignlba, alignblks, tmpbuf, 0); if (err) return (err); dskp.drive = bd_unit2bios(dev->d_unit); dskp.type = dev->d_type; dskp.unit = dev->d_unit; dskp.slice = dev->d_slice; dskp.part = dev->d_partition; dskp.start = dev->d_offset; /* GELI needs the offset relative to the partition start */ p_off = alignlba - dskp.start; err = geli_read(&dskp, p_off * BD(dev).bd_sectorsize, tmpbuf, alignblks * BD(dev).bd_sectorsize); if (err) return (err); if (tmpbuf != dest) { bcopy(tmpbuf + diff, dest, blks * BD(dev).bd_sectorsize); free(tmpbuf); } return (0); } #endif /* LOADER_GELI_SUPPORT */ return (bd_io(dev, dblk, blks, dest, 0)); } static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest) { return (bd_io(dev, dblk, blks, dest, 1)); } /* * Return the BIOS geometry of a given "fixed drive" in a format * suitable for the legacy bootinfo structure. Since the kernel is * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we * prefer to get the information directly, rather than rely on being * able to put it together from information already maintained for * different purposes and for a probably different number of drives. * * For valid drives, the geometry is expected in the format (31..0) * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are * indicated by returning the geometry of a "1.2M" PC-format floppy * disk. And, incidentally, what is returned is not the geometry as * such but the highest valid cylinder, head, and sector numbers. */ u_int32_t bd_getbigeom(int bunit) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = 0x80 + bunit; v86int(); if (V86_CY(v86.efl)) return 0x4f010f; return ((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) | (v86.edx & 0xff00) | (v86.ecx & 0x3f); } /* * Return a suitable dev_t value for (dev). * * In the case where it looks like (dev) is a SCSI disk, we allow the number of * IDE disks to be specified in $num_ide_disks. There should be a Better Way. */ int bd_getdev(struct i386_devdesc *d) { struct disk_devdesc *dev; int biosdev; int major; int rootdev; char *nip, *cp; int i, unit; dev = (struct disk_devdesc *)d; biosdev = bd_unit2bios(dev->d_unit); DEBUG("unit %d BIOS device %d", dev->d_unit, biosdev); if (biosdev == -1) /* not a BIOS device */ return(-1); if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize, BD(dev).bd_sectorsize,(BD(dev).bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0) != 0) /* oops, not a viable device */ return (-1); else disk_close(dev); if (biosdev < 0x80) { /* floppy (or emulated floppy) or ATAPI device */ if (bdinfo[dev->d_unit].bd_type == DT_ATAPI) { /* is an ATAPI disk */ major = WFDMAJOR; } else { /* is a floppy disk */ major = FDMAJOR; } } else { /* assume an IDE disk */ major = WDMAJOR; } /* default root disk unit number */ unit = biosdev & 0x7f; /* XXX a better kludge to set the root disk unit number */ if ((nip = getenv("root_disk_unit")) != NULL) { i = strtol(nip, &cp, 0); /* check for parse error */ if ((cp != nip) && (*cp == 0)) unit = i; } rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition); DEBUG("dev is 0x%x\n", rootdev); return(rootdev); } #ifdef LOADER_GELI_SUPPORT int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes) { struct disk_devdesc dev; dev.d_dev = &biosdisk; dev.d_type = priv->type; dev.d_unit = priv->unit; dev.d_slice = priv->slice; dev.d_partition = priv->part; dev.d_offset = priv->start; off = off / BD(&dev).bd_sectorsize; /* GELI gives us the offset relative to the partition start */ off += dev.d_offset; bytes = bytes / BD(&dev).bd_sectorsize; return (bd_io(&dev, off, bytes, buf, 0)); } #endif /* LOADER_GELI_SUPPORT */ Index: projects/clang390-import/sys/dev/bhnd/bhnd.h =================================================================== --- projects/clang390-import/sys/dev/bhnd/bhnd.h (revision 305016) +++ projects/clang390-import/sys/dev/bhnd/bhnd.h (revision 305017) @@ -1,1225 +1,1229 @@ /*- * Copyright (c) 2015 Landon Fuller * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ #ifndef _BHND_BHND_H_ #define _BHND_BHND_H_ #include #include #include #include "bhnd_ids.h" #include "bhnd_types.h" #include "bhnd_debug.h" #include "bhnd_bus_if.h" #include "bhnd_match.h" #include "nvram/bhnd_nvram.h" extern devclass_t bhnd_devclass; extern devclass_t bhnd_hostb_devclass; extern devclass_t bhnd_nvram_devclass; #define BHND_CHIPID_MAX_NAMELEN 32 /**< maximum buffer required for a bhnd_format_chip_id() */ /** * bhnd child instance variables */ enum bhnd_device_vars { BHND_IVAR_VENDOR, /**< Designer's JEP-106 manufacturer ID. */ BHND_IVAR_DEVICE, /**< Part number */ BHND_IVAR_HWREV, /**< Core revision */ BHND_IVAR_DEVICE_CLASS, /**< Core class (@sa bhnd_devclass_t) */ BHND_IVAR_VENDOR_NAME, /**< Core vendor name */ BHND_IVAR_DEVICE_NAME, /**< Core name */ BHND_IVAR_CORE_INDEX, /**< Bus-assigned core number */ BHND_IVAR_CORE_UNIT, /**< Bus-assigned core unit number, assigned sequentially (starting at 0) for each vendor/device pair. */ }; /** * bhnd device probe priority bands. */ enum { BHND_PROBE_ROOT = 0, /**< Nexus or host bridge */ BHND_PROBE_BUS = 1000, /**< Busses and bridges */ BHND_PROBE_CPU = 2000, /**< CPU devices */ BHND_PROBE_INTERRUPT = 3000, /**< Interrupt controllers. */ BHND_PROBE_TIMER = 4000, /**< Timers and clocks. */ BHND_PROBE_RESOURCE = 5000, /**< Resource discovery (including NVRAM/SPROM) */ BHND_PROBE_DEFAULT = 6000, /**< Default device priority */ }; /** * Constants defining fine grained ordering within a BHND_PROBE_* priority band. * * Example: * @code * BHND_PROBE_BUS + BHND_PROBE_ORDER_FIRST * @endcode */ enum { BHND_PROBE_ORDER_FIRST = 0, BHND_PROBE_ORDER_EARLY = 25, BHND_PROBE_ORDER_MIDDLE = 50, BHND_PROBE_ORDER_LATE = 75, BHND_PROBE_ORDER_LAST = 100 }; /* * Simplified accessors for bhnd device ivars */ #define BHND_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(bhnd, var, BHND, ivar, type) BHND_ACCESSOR(vendor, VENDOR, uint16_t); BHND_ACCESSOR(device, DEVICE, uint16_t); BHND_ACCESSOR(hwrev, HWREV, uint8_t); BHND_ACCESSOR(class, DEVICE_CLASS, bhnd_devclass_t); BHND_ACCESSOR(vendor_name, VENDOR_NAME, const char *); BHND_ACCESSOR(device_name, DEVICE_NAME, const char *); BHND_ACCESSOR(core_index, CORE_INDEX, u_int); BHND_ACCESSOR(core_unit, CORE_UNIT, int); #undef BHND_ACCESSOR /** * A bhnd(4) board descriptor. */ struct bhnd_board_info { uint16_t board_vendor; /**< PCI-SIG vendor ID (even on non-PCI * devices). * * On PCI devices, this will generally * be the subsystem vendor ID, but the * value may be overridden in device * NVRAM. */ uint16_t board_type; /**< Board type (See BHND_BOARD_*) * * On PCI devices, this will generally * be the subsystem device ID, but the * value may be overridden in device * NVRAM. */ uint16_t board_rev; /**< Board revision. */ uint8_t board_srom_rev; /**< Board SROM format revision */ uint32_t board_flags; /**< Board flags (see BHND_BFL_*) */ uint32_t board_flags2; /**< Board flags 2 (see BHND_BFL2_*) */ uint32_t board_flags3; /**< Board flags 3 (see BHND_BFL3_*) */ }; /** * Chip Identification * * This is read from the ChipCommon ID register; on earlier bhnd(4) devices * where ChipCommon is unavailable, known values must be supplied. */ struct bhnd_chipid { uint16_t chip_id; /**< chip id (BHND_CHIPID_*) */ uint8_t chip_rev; /**< chip revision */ uint8_t chip_pkg; /**< chip package (BHND_PKGID_*) */ uint8_t chip_type; /**< chip type (BHND_CHIPTYPE_*) */ bhnd_addr_t enum_addr; /**< chip_type-specific enumeration * address; either the siba(4) base * core register block, or the bcma(4) * EROM core address. */ uint8_t ncores; /**< number of cores, if known. 0 if * not available. */ }; /** * A bhnd(4) core descriptor. */ struct bhnd_core_info { uint16_t vendor; /**< JEP-106 vendor (BHND_MFGID_*) */ uint16_t device; /**< device */ uint16_t hwrev; /**< hardware revision */ u_int core_idx; /**< bus-assigned core index */ int unit; /**< bus-assigned core unit */ }; /** * A bhnd(4) bus resource. * * This provides an abstract interface to per-core resources that may require * bus-level remapping of address windows prior to access. */ struct bhnd_resource { struct resource *res; /**< the system resource. */ bool direct; /**< false if the resource requires * bus window remapping before it * is MMIO accessible. */ }; /** * Device quirk table descriptor. */ struct bhnd_device_quirk { struct bhnd_device_match desc; /**< device match descriptor */ uint32_t quirks; /**< quirk flags */ }; #define BHND_CORE_QUIRK(_rev, _flags) \ {{ BHND_MATCH_CORE_REV(_rev) }, (_flags) } #define BHND_CHIP_QUIRK(_chip, _rev, _flags) \ {{ BHND_CHIP_IR(BCM ## _chip, _rev) }, (_flags) } #define BHND_PKG_QUIRK(_chip, _pkg, _flags) \ {{ BHND_CHIP_IP(BCM ## _chip, BCM ## _chip ## _pkg) }, (_flags) } #define BHND_BOARD_QUIRK(_board, _flags) \ {{ BHND_MATCH_BOARD_TYPE(_board) }, \ (_flags) } #define BHND_DEVICE_QUIRK_END { { BHND_MATCH_ANY }, 0 } #define BHND_DEVICE_QUIRK_IS_END(_q) \ (((_q)->desc.m.match_flags == 0) && (_q)->quirks == 0) enum { BHND_DF_ANY = 0, BHND_DF_HOSTB = (1<<0), /**< core is serving as the bus' host * bridge. implies BHND_DF_ADAPTER */ BHND_DF_SOC = (1<<1), /**< core is attached to a native bus (BHND_ATTACH_NATIVE) */ BHND_DF_ADAPTER = (1<<2), /**< core is attached to a bridged * adapter (BHND_ATTACH_ADAPTER) */ }; /** Device probe table descriptor */ struct bhnd_device { const struct bhnd_device_match core; /**< core match descriptor */ const char *desc; /**< device description, or NULL. */ const struct bhnd_device_quirk *quirks_table; /**< quirks table for this device, or NULL */ uint32_t device_flags; /**< required BHND_DF_* flags */ }; #define _BHND_DEVICE(_vendor, _device, _desc, _quirks, \ _flags, ...) \ { { BHND_MATCH_CORE(BHND_MFGID_ ## _vendor, \ BHND_COREID_ ## _device) }, _desc, _quirks, \ _flags } #define BHND_DEVICE(_vendor, _device, _desc, _quirks, ...) \ _BHND_DEVICE(_vendor, _device, _desc, _quirks, \ ## __VA_ARGS__, 0) #define BHND_DEVICE_END { { BHND_MATCH_ANY }, NULL, NULL, 0 } #define BHND_DEVICE_IS_END(_d) \ (BHND_MATCH_IS_ANY(&(_d)->core) && (_d)->desc == NULL) const char *bhnd_vendor_name(uint16_t vendor); const char *bhnd_port_type_name(bhnd_port_type port_type); const char *bhnd_nvram_src_name(bhnd_nvram_src nvram_src); const char *bhnd_find_core_name(uint16_t vendor, uint16_t device); bhnd_devclass_t bhnd_find_core_class(uint16_t vendor, uint16_t device); const char *bhnd_core_name(const struct bhnd_core_info *ci); bhnd_devclass_t bhnd_core_class(const struct bhnd_core_info *ci); int bhnd_format_chip_id(char *buffer, size_t size, uint16_t chip_id); device_t bhnd_match_child(device_t dev, const struct bhnd_core_match *desc); device_t bhnd_find_child(device_t dev, bhnd_devclass_t class, int unit); device_t bhnd_find_bridge_root(device_t dev, devclass_t bus_class); const struct bhnd_core_info *bhnd_match_core( const struct bhnd_core_info *cores, u_int num_cores, const struct bhnd_core_match *desc); const struct bhnd_core_info *bhnd_find_core( const struct bhnd_core_info *cores, u_int num_cores, bhnd_devclass_t class); bool bhnd_core_matches( const struct bhnd_core_info *core, const struct bhnd_core_match *desc); bool bhnd_chip_matches( const struct bhnd_chipid *chipid, const struct bhnd_chip_match *desc); bool bhnd_board_matches( const struct bhnd_board_info *info, const struct bhnd_board_match *desc); bool bhnd_hwrev_matches(uint16_t hwrev, const struct bhnd_hwrev_match *desc); bool bhnd_device_matches(device_t dev, const struct bhnd_device_match *desc); const struct bhnd_device *bhnd_device_lookup(device_t dev, const struct bhnd_device *table, size_t entry_size); uint32_t bhnd_device_quirks(device_t dev, const struct bhnd_device *table, size_t entry_size); struct bhnd_core_info bhnd_get_core_info(device_t dev); int bhnd_alloc_resources(device_t dev, struct resource_spec *rs, struct bhnd_resource **res); void bhnd_release_resources(device_t dev, const struct resource_spec *rs, struct bhnd_resource **res); struct bhnd_chipid bhnd_parse_chipid(uint32_t idreg, bhnd_addr_t enum_addr); +int bhnd_chipid_fixed_ncores( + const struct bhnd_chipid *cid, + uint16_t chipc_hwrev, uint8_t *ncores); + int bhnd_read_chipid(device_t dev, struct resource_spec *rs, bus_size_t chipc_offset, struct bhnd_chipid *result); void bhnd_set_custom_core_desc(device_t dev, const char *name); void bhnd_set_default_core_desc(device_t dev); void bhnd_set_default_bus_desc(device_t dev, const struct bhnd_chipid *chip_id); int bhnd_nvram_getvar_str(device_t dev, const char *name, char *buf, size_t len, size_t *rlen); int bhnd_nvram_getvar_uint(device_t dev, const char *name, void *value, int width); int bhnd_nvram_getvar_uint8(device_t dev, const char *name, uint8_t *value); int bhnd_nvram_getvar_uint16(device_t dev, const char *name, uint16_t *value); int bhnd_nvram_getvar_uint32(device_t dev, const char *name, uint32_t *value); int bhnd_nvram_getvar_int(device_t dev, const char *name, void *value, int width); int bhnd_nvram_getvar_int8(device_t dev, const char *name, int8_t *value); int bhnd_nvram_getvar_int16(device_t dev, const char *name, int16_t *value); int bhnd_nvram_getvar_int32(device_t dev, const char *name, int32_t *value); int bhnd_nvram_getvar_array(device_t dev, const char *name, void *buf, size_t count, bhnd_nvram_type type); bool bhnd_bus_generic_is_hw_disabled(device_t dev, device_t child); bool bhnd_bus_generic_is_region_valid(device_t dev, device_t child, bhnd_port_type type, u_int port, u_int region); int bhnd_bus_generic_get_nvram_var(device_t dev, device_t child, const char *name, void *buf, size_t *size, bhnd_nvram_type type); const struct bhnd_chipid *bhnd_bus_generic_get_chipid(device_t dev, device_t child); int bhnd_bus_generic_read_board_info(device_t dev, device_t child, struct bhnd_board_info *info); struct bhnd_resource *bhnd_bus_generic_alloc_resource (device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); int bhnd_bus_generic_release_resource (device_t dev, device_t child, int type, int rid, struct bhnd_resource *r); int bhnd_bus_generic_activate_resource (device_t dev, device_t child, int type, int rid, struct bhnd_resource *r); int bhnd_bus_generic_deactivate_resource (device_t dev, device_t child, int type, int rid, struct bhnd_resource *r); bhnd_attach_type bhnd_bus_generic_get_attach_type(device_t dev, device_t child); /** * Return the active host bridge core for the bhnd bus, if any, or NULL if * not found. * * @param dev A bhnd bus device. */ static inline device_t bhnd_find_hostb_device(device_t dev) { return (BHND_BUS_FIND_HOSTB_DEVICE(dev)); } /** * Return true if the hardware components required by @p dev are known to be * unpopulated or otherwise unusable. * * In some cases, enumerated devices may have pins that are left floating, or * the hardware may otherwise be non-functional; this method allows a parent * device to explicitly specify if a successfully enumerated @p dev should * be disabled. * * @param dev A bhnd bus child device. */ static inline bool bhnd_is_hw_disabled(device_t dev) { return (BHND_BUS_IS_HW_DISABLED(device_get_parent(dev), dev)); } /** * Return the BHND chip identification info for the bhnd bus. * * @param dev A bhnd bus child device. */ static inline const struct bhnd_chipid * bhnd_get_chipid(device_t dev) { return (BHND_BUS_GET_CHIPID(device_get_parent(dev), dev)); }; /** * Get a list of all cores discoverable on the bhnd bus. * * Enumerates all cores discoverable on @p dev, returning the list in * @p cores and the count in @p num_cores. * * The memory allocated for the list should be freed using * `free(*cores, M_BHND)`. @p cores and @p num_cores are not changed * when an error is returned. * * @param dev A bhnd bus child device. * @param[out] cores The table of core descriptors. * @param[out] num_cores The number of core descriptors in @p cores. * * @retval 0 success * @retval non-zero if an error occurs enumerating @p dev, a regular UNIX * error code should be returned. */ static inline int bhnd_get_core_table(device_t dev, struct bhnd_core_info **cores, u_int *num_cores) { return (BHND_BUS_GET_CORE_TABLE(device_get_parent(dev), dev, cores, num_cores)); } /** * If supported by the chipset, return the clock source for the given clock. * * This function is only supported on early PWRCTL-equipped chipsets * that expose clock management via their host bridge interface. Currently, * this includes PCI (not PCIe) devices, with ChipCommon core revisions 0-9. * * @param dev A bhnd bus child device. * @param clock The clock for which a clock source will be returned. * * @retval bhnd_clksrc The clock source for @p clock. * @retval BHND_CLKSRC_UNKNOWN If @p clock is unsupported, or its * clock source is not known to the bus. */ static inline bhnd_clksrc bhnd_pwrctl_get_clksrc(device_t dev, bhnd_clock clock) { return (BHND_BUS_PWRCTL_GET_CLKSRC(device_get_parent(dev), dev, clock)); } /** * If supported by the chipset, gate @p clock * * This function is only supported on early PWRCTL-equipped chipsets * that expose clock management via their host bridge interface. Currently, * this includes PCI (not PCIe) devices, with ChipCommon core revisions 0-9. * * @param dev A bhnd bus child device. * @param clock The clock to be disabled. * * @retval 0 success * @retval ENODEV If bus-level clock source management is not supported. * @retval ENXIO If bus-level management of @p clock is not supported. */ static inline int bhnd_pwrctl_gate_clock(device_t dev, bhnd_clock clock) { return (BHND_BUS_PWRCTL_GATE_CLOCK(device_get_parent(dev), dev, clock)); } /** * If supported by the chipset, ungate @p clock * * This function is only supported on early PWRCTL-equipped chipsets * that expose clock management via their host bridge interface. Currently, * this includes PCI (not PCIe) devices, with ChipCommon core revisions 0-9. * * @param dev A bhnd bus child device. * @param clock The clock to be enabled. * * @retval 0 success * @retval ENODEV If bus-level clock source management is not supported. * @retval ENXIO If bus-level management of @p clock is not supported. */ static inline int bhnd_pwrctl_ungate_clock(device_t dev, bhnd_clock clock) { return (BHND_BUS_PWRCTL_UNGATE_CLOCK(device_get_parent(dev), dev, clock)); } /** * Return the BHND attachment type of the parent bhnd bus. * * @param dev A bhnd bus child device. * * @retval BHND_ATTACH_ADAPTER if the bus is resident on a bridged adapter, * such as a WiFi chipset. * @retval BHND_ATTACH_NATIVE if the bus provides hardware services (clock, * CPU, etc) to a directly attached native host. */ static inline bhnd_attach_type bhnd_get_attach_type (device_t dev) { return (BHND_BUS_GET_ATTACH_TYPE(device_get_parent(dev), dev)); } /** * Attempt to read the BHND board identification from the bhnd bus. * * This relies on NVRAM access, and will fail if a valid NVRAM device cannot * be found, or is not yet attached. * * @param dev The parent of @p child. * @param child The bhnd device requesting board info. * @param[out] info On success, will be populated with the bhnd(4) device's * board information. * * @retval 0 success * @retval ENODEV No valid NVRAM source could be found. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ static inline int bhnd_read_board_info(device_t dev, struct bhnd_board_info *info) { return (BHND_BUS_READ_BOARD_INFO(device_get_parent(dev), dev, info)); } /** * Allocate and enable per-core PMU request handling for @p child. * * The region containing the core's PMU register block (if any) must be * allocated via bus_alloc_resource(9) (or bhnd_alloc_resource) before * calling bhnd_alloc_pmu(), and must not be released until after * calling bhnd_release_pmu(). * * @param dev The parent of @p child. * @param child The requesting bhnd device. * * @retval 0 success * @retval non-zero If allocating PMU request state otherwise fails, a * regular unix error code will be returned. */ static inline int bhnd_alloc_pmu(device_t dev) { return (BHND_BUS_ALLOC_PMU(device_get_parent(dev), dev)); } /** * Release any per-core PMU resources allocated for @p child. Any outstanding * PMU requests are are discarded. * * @param dev The parent of @p child. * @param child The requesting bhnd device. * * @retval 0 success * @retval non-zero If releasing PMU request state otherwise fails, a * regular unix error code will be returned, and * the core state will be left unmodified. */ static inline int bhnd_release_pmu(device_t dev) { return (BHND_BUS_RELEASE_PMU(device_get_parent(dev), dev)); } /** * Request that @p clock (or faster) be routed to @p dev. * * A driver must ask the bhnd bus to allocate clock request state * via bhnd_alloc_pmu() before it can request clock resources. * * Request multiplexing is managed by the bus. * * @param dev The bhnd(4) device to which @p clock should be routed. * @param clock The requested clock source. * * @retval 0 success * @retval ENODEV If an unsupported clock was requested. * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable. */ static inline int bhnd_request_clock(device_t dev, bhnd_clock clock) { return (BHND_BUS_REQUEST_CLOCK(device_get_parent(dev), dev, clock)); } /** * Request that @p clocks be powered on behalf of @p dev. * * This will power any clock sources (e.g. XTAL, PLL, etc) required for * @p clocks and wait until they are ready, discarding any previous * requests by @p dev. * * Request multiplexing is managed by the bus. * * A driver must ask the bhnd bus to allocate clock request state * via bhnd_alloc_pmu() before it can request clock resources. * * @param dev The requesting bhnd(4) device. * @param clocks The clock(s) to be enabled. * * @retval 0 success * @retval ENODEV If an unsupported clock was requested. * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable. */ static inline int bhnd_enable_clocks(device_t dev, uint32_t clocks) { return (BHND_BUS_ENABLE_CLOCKS(device_get_parent(dev), dev, clocks)); } /** * Power up an external PMU-managed resource assigned to @p dev. * * A driver must ask the bhnd bus to allocate PMU request state * via bhnd_alloc_pmu() before it can request PMU resources. * * @param dev The requesting bhnd(4) device. * @param rsrc The core-specific external resource identifier. * * @retval 0 success * @retval ENODEV If the PMU does not support @p rsrc. * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable. */ static inline int bhnd_request_ext_rsrc(device_t dev, u_int rsrc) { return (BHND_BUS_REQUEST_EXT_RSRC(device_get_parent(dev), dev, rsrc)); } /** * Power down an external PMU-managed resource assigned to @p dev. * * A driver must ask the bhnd bus to allocate PMU request state * via bhnd_alloc_pmu() before it can request PMU resources. * * @param dev The requesting bhnd(4) device. * @param rsrc The core-specific external resource identifier. * * @retval 0 success * @retval ENODEV If the PMU does not support @p rsrc. * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable. */ static inline int bhnd_release_ext_rsrc(device_t dev, u_int rsrc) { return (BHND_BUS_RELEASE_EXT_RSRC(device_get_parent(dev), dev, rsrc)); } /** * Read @p width bytes at @p offset from the bus-specific agent/config * space of @p dev. * * @param dev The bhnd device for which @p offset should be read. * @param offset The offset to be read. * @param width The size of the access. Must be 1, 2 or 4 bytes. * * The exact behavior of this method is bus-specific. In the case of * bcma(4), this method provides access to the first agent port of @p child. * * @note Device drivers should only use this API for functionality * that is not available via another bhnd(4) function. */ static inline uint32_t bhnd_read_config(device_t dev, bus_size_t offset, u_int width) { return (BHND_BUS_READ_CONFIG(device_get_parent(dev), dev, offset, width)); } /** * Read @p width bytes at @p offset from the bus-specific agent/config * space of @p dev. * * @param dev The bhnd device for which @p offset should be read. * @param offset The offset to be written. * @param width The size of the access. Must be 1, 2 or 4 bytes. * * The exact behavior of this method is bus-specific. In the case of * bcma(4), this method provides access to the first agent port of @p child. * * @note Device drivers should only use this API for functionality * that is not available via another bhnd(4) function. */ static inline void bhnd_write_config(device_t dev, bus_size_t offset, uint32_t val, u_int width) { BHND_BUS_WRITE_CONFIG(device_get_parent(dev), dev, offset, val, width); } /** * Read an NVRAM variable, coerced to the requested @p type. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] buf A buffer large enough to hold @p len bytes. On * success, the requested value will be written to * this buffer. This argment may be NULL if * the value is not desired. * @param[in,out] len The maximum capacity of @p buf. On success, * will be set to the actual size of the requested * value. * @param type The desired data representation to be written * to @p buf. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval ENOMEM If a buffer of @p size is too small to hold the * requested value. * @retval EOPNOTSUPP If the value cannot be coerced to @p type. * @retval ERANGE If value coercion would overflow @p type. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ static inline int bhnd_nvram_getvar(device_t dev, const char *name, void *buf, size_t *len, bhnd_nvram_type type) { return (BHND_BUS_GET_NVRAM_VAR(device_get_parent(dev), dev, name, buf, len, type)); } /** * Allocate a resource from a device's parent bhnd(4) bus. * * @param dev The device requesting resource ownership. * @param type The type of resource to allocate. This may be any type supported * by the standard bus APIs. * @param rid The bus-specific handle identifying the resource being allocated. * @param start The start address of the resource. * @param end The end address of the resource. * @param count The size of the resource. * @param flags The flags for the resource to be allocated. These may be any * values supported by the standard bus APIs. * * To request the resource's default addresses, pass @p start and * @p end values of @c 0 and @c ~0, respectively, and * a @p count of @c 1. * * @retval NULL The resource could not be allocated. * @retval resource The allocated resource. */ static inline struct bhnd_resource * bhnd_alloc_resource(device_t dev, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { return BHND_BUS_ALLOC_RESOURCE(device_get_parent(dev), dev, type, rid, start, end, count, flags); } /** * Allocate a resource from a device's parent bhnd(4) bus, using the * resource's default start, end, and count values. * * @param dev The device requesting resource ownership. * @param type The type of resource to allocate. This may be any type supported * by the standard bus APIs. * @param rid The bus-specific handle identifying the resource being allocated. * @param flags The flags for the resource to be allocated. These may be any * values supported by the standard bus APIs. * * @retval NULL The resource could not be allocated. * @retval resource The allocated resource. */ static inline struct bhnd_resource * bhnd_alloc_resource_any(device_t dev, int type, int *rid, u_int flags) { return bhnd_alloc_resource(dev, type, rid, 0, ~0, 1, flags); } /** * Activate a previously allocated bhnd resource. * * @param dev The device holding ownership of the allocated resource. * @param type The type of the resource. * @param rid The bus-specific handle identifying the resource. * @param r A pointer to the resource returned by bhnd_alloc_resource or * BHND_BUS_ALLOC_RESOURCE. * * @retval 0 success * @retval non-zero an error occurred while activating the resource. */ static inline int bhnd_activate_resource(device_t dev, int type, int rid, struct bhnd_resource *r) { return BHND_BUS_ACTIVATE_RESOURCE(device_get_parent(dev), dev, type, rid, r); } /** * Deactivate a previously activated bhnd resource. * * @param dev The device holding ownership of the activated resource. * @param type The type of the resource. * @param rid The bus-specific handle identifying the resource. * @param r A pointer to the resource returned by bhnd_alloc_resource or * BHND_BUS_ALLOC_RESOURCE. * * @retval 0 success * @retval non-zero an error occurred while activating the resource. */ static inline int bhnd_deactivate_resource(device_t dev, int type, int rid, struct bhnd_resource *r) { return BHND_BUS_DEACTIVATE_RESOURCE(device_get_parent(dev), dev, type, rid, r); } /** * Free a resource allocated by bhnd_alloc_resource(). * * @param dev The device holding ownership of the resource. * @param type The type of the resource. * @param rid The bus-specific handle identifying the resource. * @param r A pointer to the resource returned by bhnd_alloc_resource or * BHND_ALLOC_RESOURCE. * * @retval 0 success * @retval non-zero an error occurred while activating the resource. */ static inline int bhnd_release_resource(device_t dev, int type, int rid, struct bhnd_resource *r) { return BHND_BUS_RELEASE_RESOURCE(device_get_parent(dev), dev, type, rid, r); } /** * Return true if @p region_num is a valid region on @p port_num of * @p type attached to @p dev. * * @param dev A bhnd bus child device. * @param type The port type being queried. * @param port_num The port number being queried. * @param region_num The region number being queried. */ static inline bool bhnd_is_region_valid(device_t dev, bhnd_port_type type, u_int port_num, u_int region_num) { return (BHND_BUS_IS_REGION_VALID(device_get_parent(dev), dev, type, port_num, region_num)); } /** * Return the number of ports of type @p type attached to @p def. * * @param dev A bhnd bus child device. * @param type The port type being queried. */ static inline u_int bhnd_get_port_count(device_t dev, bhnd_port_type type) { return (BHND_BUS_GET_PORT_COUNT(device_get_parent(dev), dev, type)); } /** * Return the number of memory regions mapped to @p child @p port of * type @p type. * * @param dev A bhnd bus child device. * @param port The port number being queried. * @param type The port type being queried. */ static inline u_int bhnd_get_region_count(device_t dev, bhnd_port_type type, u_int port) { return (BHND_BUS_GET_REGION_COUNT(device_get_parent(dev), dev, type, port)); } /** * Return the resource-ID for a memory region on the given device port. * * @param dev A bhnd bus child device. * @param type The port type. * @param port The port identifier. * @param region The identifier of the memory region on @p port. * * @retval int The RID for the given @p port and @p region on @p device. * @retval -1 No such port/region found. */ static inline int bhnd_get_port_rid(device_t dev, bhnd_port_type type, u_int port, u_int region) { return BHND_BUS_GET_PORT_RID(device_get_parent(dev), dev, type, port, region); } /** * Decode a port / region pair on @p dev defined by @p rid. * * @param dev A bhnd bus child device. * @param type The resource type. * @param rid The resource identifier. * @param[out] port_type The decoded port type. * @param[out] port The decoded port identifier. * @param[out] region The decoded region identifier. * * @retval 0 success * @retval non-zero No matching port/region found. */ static inline int bhnd_decode_port_rid(device_t dev, int type, int rid, bhnd_port_type *port_type, u_int *port, u_int *region) { return BHND_BUS_DECODE_PORT_RID(device_get_parent(dev), dev, type, rid, port_type, port, region); } /** * Get the address and size of @p region on @p port. * * @param dev A bhnd bus child device. * @param port_type The port type. * @param port The port identifier. * @param region The identifier of the memory region on @p port. * @param[out] region_addr The region's base address. * @param[out] region_size The region's size. * * @retval 0 success * @retval non-zero No matching port/region found. */ static inline int bhnd_get_region_addr(device_t dev, bhnd_port_type port_type, u_int port, u_int region, bhnd_addr_t *region_addr, bhnd_size_t *region_size) { return BHND_BUS_GET_REGION_ADDR(device_get_parent(dev), dev, port_type, port, region, region_addr, region_size); } /* * bhnd bus-level equivalents of the bus_(read|write|set|barrier|...) * macros (compatible with bhnd_resource). * * Generated with bhnd/tools/bus_macro.sh */ #define bhnd_bus_barrier(r, o, l, f) \ ((r)->direct) ? \ bus_barrier((r)->res, (o), (l), (f)) : \ BHND_BUS_BARRIER( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (l), (f)) #define bhnd_bus_read_1(r, o) \ ((r)->direct) ? \ bus_read_1((r)->res, (o)) : \ BHND_BUS_READ_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o)) #define bhnd_bus_read_multi_1(r, o, d, c) \ ((r)->direct) ? \ bus_read_multi_1((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_MULTI_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_region_1(r, o, d, c) \ ((r)->direct) ? \ bus_read_region_1((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_REGION_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_1(r, o, v) \ ((r)->direct) ? \ bus_write_1((r)->res, (o), (v)) : \ BHND_BUS_WRITE_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v)) #define bhnd_bus_write_multi_1(r, o, d, c) \ ((r)->direct) ? \ bus_write_multi_1((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_MULTI_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_region_1(r, o, d, c) \ ((r)->direct) ? \ bus_write_region_1((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_REGION_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_stream_1(r, o) \ ((r)->direct) ? \ bus_read_stream_1((r)->res, (o)) : \ BHND_BUS_READ_STREAM_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o)) #define bhnd_bus_read_multi_stream_1(r, o, d, c) \ ((r)->direct) ? \ bus_read_multi_stream_1((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_MULTI_STREAM_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_region_stream_1(r, o, d, c) \ ((r)->direct) ? \ bus_read_region_stream_1((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_REGION_STREAM_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_stream_1(r, o, v) \ ((r)->direct) ? \ bus_write_stream_1((r)->res, (o), (v)) : \ BHND_BUS_WRITE_STREAM_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v)) #define bhnd_bus_write_multi_stream_1(r, o, d, c) \ ((r)->direct) ? \ bus_write_multi_stream_1((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_MULTI_STREAM_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_region_stream_1(r, o, d, c) \ ((r)->direct) ? \ bus_write_region_stream_1((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_REGION_STREAM_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_set_multi_1(r, o, v, c) \ ((r)->direct) ? \ bus_set_multi_1((r)->res, (o), (v), (c)) : \ BHND_BUS_SET_MULTI_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v), (c)) #define bhnd_bus_set_region_1(r, o, v, c) \ ((r)->direct) ? \ bus_set_region_1((r)->res, (o), (v), (c)) : \ BHND_BUS_SET_REGION_1( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v), (c)) #define bhnd_bus_read_2(r, o) \ ((r)->direct) ? \ bus_read_2((r)->res, (o)) : \ BHND_BUS_READ_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o)) #define bhnd_bus_read_multi_2(r, o, d, c) \ ((r)->direct) ? \ bus_read_multi_2((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_MULTI_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_region_2(r, o, d, c) \ ((r)->direct) ? \ bus_read_region_2((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_REGION_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_2(r, o, v) \ ((r)->direct) ? \ bus_write_2((r)->res, (o), (v)) : \ BHND_BUS_WRITE_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v)) #define bhnd_bus_write_multi_2(r, o, d, c) \ ((r)->direct) ? \ bus_write_multi_2((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_MULTI_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_region_2(r, o, d, c) \ ((r)->direct) ? \ bus_write_region_2((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_REGION_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_stream_2(r, o) \ ((r)->direct) ? \ bus_read_stream_2((r)->res, (o)) : \ BHND_BUS_READ_STREAM_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o)) #define bhnd_bus_read_multi_stream_2(r, o, d, c) \ ((r)->direct) ? \ bus_read_multi_stream_2((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_MULTI_STREAM_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_region_stream_2(r, o, d, c) \ ((r)->direct) ? \ bus_read_region_stream_2((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_REGION_STREAM_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_stream_2(r, o, v) \ ((r)->direct) ? \ bus_write_stream_2((r)->res, (o), (v)) : \ BHND_BUS_WRITE_STREAM_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v)) #define bhnd_bus_write_multi_stream_2(r, o, d, c) \ ((r)->direct) ? \ bus_write_multi_stream_2((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_MULTI_STREAM_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_region_stream_2(r, o, d, c) \ ((r)->direct) ? \ bus_write_region_stream_2((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_REGION_STREAM_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_set_multi_2(r, o, v, c) \ ((r)->direct) ? \ bus_set_multi_2((r)->res, (o), (v), (c)) : \ BHND_BUS_SET_MULTI_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v), (c)) #define bhnd_bus_set_region_2(r, o, v, c) \ ((r)->direct) ? \ bus_set_region_2((r)->res, (o), (v), (c)) : \ BHND_BUS_SET_REGION_2( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v), (c)) #define bhnd_bus_read_4(r, o) \ ((r)->direct) ? \ bus_read_4((r)->res, (o)) : \ BHND_BUS_READ_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o)) #define bhnd_bus_read_multi_4(r, o, d, c) \ ((r)->direct) ? \ bus_read_multi_4((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_MULTI_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_region_4(r, o, d, c) \ ((r)->direct) ? \ bus_read_region_4((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_REGION_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_4(r, o, v) \ ((r)->direct) ? \ bus_write_4((r)->res, (o), (v)) : \ BHND_BUS_WRITE_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v)) #define bhnd_bus_write_multi_4(r, o, d, c) \ ((r)->direct) ? \ bus_write_multi_4((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_MULTI_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_region_4(r, o, d, c) \ ((r)->direct) ? \ bus_write_region_4((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_REGION_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_stream_4(r, o) \ ((r)->direct) ? \ bus_read_stream_4((r)->res, (o)) : \ BHND_BUS_READ_STREAM_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o)) #define bhnd_bus_read_multi_stream_4(r, o, d, c) \ ((r)->direct) ? \ bus_read_multi_stream_4((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_MULTI_STREAM_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_read_region_stream_4(r, o, d, c) \ ((r)->direct) ? \ bus_read_region_stream_4((r)->res, (o), (d), (c)) : \ BHND_BUS_READ_REGION_STREAM_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_stream_4(r, o, v) \ ((r)->direct) ? \ bus_write_stream_4((r)->res, (o), (v)) : \ BHND_BUS_WRITE_STREAM_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v)) #define bhnd_bus_write_multi_stream_4(r, o, d, c) \ ((r)->direct) ? \ bus_write_multi_stream_4((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_MULTI_STREAM_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_write_region_stream_4(r, o, d, c) \ ((r)->direct) ? \ bus_write_region_stream_4((r)->res, (o), (d), (c)) : \ BHND_BUS_WRITE_REGION_STREAM_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (d), (c)) #define bhnd_bus_set_multi_4(r, o, v, c) \ ((r)->direct) ? \ bus_set_multi_4((r)->res, (o), (v), (c)) : \ BHND_BUS_SET_MULTI_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v), (c)) #define bhnd_bus_set_region_4(r, o, v, c) \ ((r)->direct) ? \ bus_set_region_4((r)->res, (o), (v), (c)) : \ BHND_BUS_SET_REGION_4( \ device_get_parent(rman_get_device((r)->res)), \ rman_get_device((r)->res), (r), (o), (v), (c)) #endif /* _BHND_BHND_H_ */ Index: projects/clang390-import/sys/dev/bhnd/bhnd_subr.c =================================================================== --- projects/clang390-import/sys/dev/bhnd/bhnd_subr.c (revision 305016) +++ projects/clang390-import/sys/dev/bhnd/bhnd_subr.c (revision 305017) @@ -1,1532 +1,1612 @@ /*- * Copyright (c) 2015 Landon Fuller * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include +#include + #include #include "nvram/bhnd_nvram.h" #include "bhnd_chipc_if.h" #include "bhnd_nvram_if.h" #include "bhnd_nvram_map.h" #include "bhndreg.h" #include "bhndvar.h" /* BHND core device description table. */ static const struct bhnd_core_desc { uint16_t vendor; uint16_t device; bhnd_devclass_t class; const char *desc; } bhnd_core_descs[] = { #define BHND_CDESC(_mfg, _cid, _cls, _desc) \ { BHND_MFGID_ ## _mfg, BHND_COREID_ ## _cid, \ BHND_DEVCLASS_ ## _cls, _desc } BHND_CDESC(BCM, CC, CC, "ChipCommon I/O Controller"), BHND_CDESC(BCM, ILINE20, OTHER, "iLine20 HPNA"), BHND_CDESC(BCM, SRAM, RAM, "SRAM"), BHND_CDESC(BCM, SDRAM, RAM, "SDRAM"), BHND_CDESC(BCM, PCI, PCI, "PCI Bridge"), BHND_CDESC(BCM, MIPS, CPU, "MIPS Core"), BHND_CDESC(BCM, ENET, ENET_MAC, "Fast Ethernet MAC"), BHND_CDESC(BCM, CODEC, OTHER, "V.90 Modem Codec"), BHND_CDESC(BCM, USB, OTHER, "USB 1.1 Device/Host Controller"), BHND_CDESC(BCM, ADSL, OTHER, "ADSL Core"), BHND_CDESC(BCM, ILINE100, OTHER, "iLine100 HPNA"), BHND_CDESC(BCM, IPSEC, OTHER, "IPsec Accelerator"), BHND_CDESC(BCM, UTOPIA, OTHER, "UTOPIA ATM Core"), BHND_CDESC(BCM, PCMCIA, PCCARD, "PCMCIA Bridge"), BHND_CDESC(BCM, SOCRAM, RAM, "Internal Memory"), BHND_CDESC(BCM, MEMC, MEMC, "MEMC SDRAM Controller"), BHND_CDESC(BCM, OFDM, OTHER, "OFDM PHY"), BHND_CDESC(BCM, EXTIF, OTHER, "External Interface"), BHND_CDESC(BCM, D11, WLAN, "802.11 MAC/PHY/Radio"), BHND_CDESC(BCM, APHY, WLAN_PHY, "802.11a PHY"), BHND_CDESC(BCM, BPHY, WLAN_PHY, "802.11b PHY"), BHND_CDESC(BCM, GPHY, WLAN_PHY, "802.11g PHY"), BHND_CDESC(BCM, MIPS33, CPU, "MIPS3302 Core"), BHND_CDESC(BCM, USB11H, OTHER, "USB 1.1 Host Controller"), BHND_CDESC(BCM, USB11D, OTHER, "USB 1.1 Device Core"), BHND_CDESC(BCM, USB20H, OTHER, "USB 2.0 Host Controller"), BHND_CDESC(BCM, USB20D, OTHER, "USB 2.0 Device Core"), BHND_CDESC(BCM, SDIOH, OTHER, "SDIO Host Controller"), BHND_CDESC(BCM, ROBO, OTHER, "RoboSwitch"), BHND_CDESC(BCM, ATA100, OTHER, "Parallel ATA Controller"), BHND_CDESC(BCM, SATAXOR, OTHER, "SATA DMA/XOR Controller"), BHND_CDESC(BCM, GIGETH, ENET_MAC, "Gigabit Ethernet MAC"), BHND_CDESC(BCM, PCIE, PCIE, "PCIe Bridge"), BHND_CDESC(BCM, NPHY, WLAN_PHY, "802.11n 2x2 PHY"), BHND_CDESC(BCM, SRAMC, MEMC, "SRAM Controller"), BHND_CDESC(BCM, MINIMAC, OTHER, "MINI MAC/PHY"), BHND_CDESC(BCM, ARM11, CPU, "ARM1176 CPU"), BHND_CDESC(BCM, ARM7S, CPU, "ARM7TDMI-S CPU"), BHND_CDESC(BCM, LPPHY, WLAN_PHY, "802.11a/b/g PHY"), BHND_CDESC(BCM, PMU, PMU, "PMU"), BHND_CDESC(BCM, SSNPHY, WLAN_PHY, "802.11n Single-Stream PHY"), BHND_CDESC(BCM, SDIOD, OTHER, "SDIO Device Core"), BHND_CDESC(BCM, ARMCM3, CPU, "ARM Cortex-M3 CPU"), BHND_CDESC(BCM, HTPHY, WLAN_PHY, "802.11n 4x4 PHY"), BHND_CDESC(MIPS,MIPS74K, CPU, "MIPS74k CPU"), BHND_CDESC(BCM, GMAC, ENET_MAC, "Gigabit MAC core"), BHND_CDESC(BCM, DMEMC, MEMC, "DDR1/DDR2 Memory Controller"), BHND_CDESC(BCM, PCIERC, OTHER, "PCIe Root Complex"), BHND_CDESC(BCM, OCP, SOC_BRIDGE, "OCP to OCP Bridge"), BHND_CDESC(BCM, SC, OTHER, "Shared Common Core"), BHND_CDESC(BCM, AHB, SOC_BRIDGE, "OCP to AHB Bridge"), BHND_CDESC(BCM, SPIH, OTHER, "SPI Host Controller"), BHND_CDESC(BCM, I2S, OTHER, "I2S Digital Audio Interface"), BHND_CDESC(BCM, DMEMS, MEMC, "SDR/DDR1 Memory Controller"), BHND_CDESC(BCM, UBUS_SHIM, OTHER, "BCM6362/UBUS WLAN SHIM"), BHND_CDESC(BCM, PCIE2, PCIE, "PCIe Bridge (Gen2)"), BHND_CDESC(ARM, APB_BRIDGE, SOC_BRIDGE, "BP135 AMBA3 AXI to APB Bridge"), BHND_CDESC(ARM, PL301, SOC_ROUTER, "PL301 AMBA3 Interconnect"), BHND_CDESC(ARM, EROM, EROM, "PL366 Device Enumeration ROM"), BHND_CDESC(ARM, OOB_ROUTER, OTHER, "PL367 OOB Interrupt Router"), BHND_CDESC(ARM, AXI_UNMAPPED, OTHER, "Unmapped Address Ranges"), BHND_CDESC(BCM, 4706_CC, CC, "ChipCommon I/O Controller"), BHND_CDESC(BCM, NS_PCIE2, PCIE, "PCIe Bridge (Gen2)"), BHND_CDESC(BCM, NS_DMA, OTHER, "DMA engine"), BHND_CDESC(BCM, NS_SDIO, OTHER, "SDIO 3.0 Host Controller"), BHND_CDESC(BCM, NS_USB20H, OTHER, "USB 2.0 Host Controller"), BHND_CDESC(BCM, NS_USB30H, OTHER, "USB 3.0 Host Controller"), BHND_CDESC(BCM, NS_A9JTAG, OTHER, "ARM Cortex A9 JTAG Interface"), BHND_CDESC(BCM, NS_DDR23_MEMC, MEMC, "Denali DDR2/DD3 Memory Controller"), BHND_CDESC(BCM, NS_ROM, NVRAM, "System ROM"), BHND_CDESC(BCM, NS_NAND, NVRAM, "NAND Flash Controller"), BHND_CDESC(BCM, NS_QSPI, NVRAM, "QSPI Flash Controller"), BHND_CDESC(BCM, NS_CC_B, CC_B, "ChipCommon B Auxiliary I/O Controller"), BHND_CDESC(BCM, 4706_SOCRAM, RAM, "Internal Memory"), BHND_CDESC(BCM, IHOST_ARMCA9, CPU, "ARM Cortex A9 CPU"), BHND_CDESC(BCM, 4706_GMAC_CMN, ENET, "Gigabit MAC (Common)"), BHND_CDESC(BCM, 4706_GMAC, ENET_MAC, "Gigabit MAC"), BHND_CDESC(BCM, AMEMC, MEMC, "Denali DDR1/DDR2 Memory Controller"), #undef BHND_CDESC /* Derived from inspection of the BCM4331 cores that provide PrimeCell * IDs. Due to lack of documentation, the surmised device name/purpose * provided here may be incorrect. */ { BHND_MFGID_ARM, BHND_PRIMEID_EROM, BHND_DEVCLASS_OTHER, "PL364 Device Enumeration ROM" }, { BHND_MFGID_ARM, BHND_PRIMEID_SWRAP, BHND_DEVCLASS_OTHER, "PL368 Device Management Interface" }, { BHND_MFGID_ARM, BHND_PRIMEID_MWRAP, BHND_DEVCLASS_OTHER, "PL369 Device Management Interface" }, { 0, 0, 0, NULL } }; /** * Return the name for a given JEP106 manufacturer ID. * * @param vendor A JEP106 Manufacturer ID, including the non-standard ARM 4-bit * JEP106 continuation code. */ const char * bhnd_vendor_name(uint16_t vendor) { switch (vendor) { case BHND_MFGID_ARM: return "ARM"; case BHND_MFGID_BCM: return "Broadcom"; case BHND_MFGID_MIPS: return "MIPS"; default: return "unknown"; } } /** * Return the name of a port type. */ const char * bhnd_port_type_name(bhnd_port_type port_type) { switch (port_type) { case BHND_PORT_DEVICE: return ("device"); case BHND_PORT_BRIDGE: return ("bridge"); case BHND_PORT_AGENT: return ("agent"); default: return "unknown"; } } /** * Return the name of an NVRAM source. */ const char * bhnd_nvram_src_name(bhnd_nvram_src nvram_src) { switch (nvram_src) { case BHND_NVRAM_SRC_FLASH: return ("flash"); case BHND_NVRAM_SRC_OTP: return ("OTP"); case BHND_NVRAM_SRC_SPROM: return ("SPROM"); case BHND_NVRAM_SRC_UNKNOWN: return ("none"); default: return ("unknown"); } } static const struct bhnd_core_desc * bhnd_find_core_desc(uint16_t vendor, uint16_t device) { for (u_int i = 0; bhnd_core_descs[i].desc != NULL; i++) { if (bhnd_core_descs[i].vendor != vendor) continue; if (bhnd_core_descs[i].device != device) continue; return (&bhnd_core_descs[i]); } return (NULL); } /** * Return a human-readable name for a BHND core. * * @param vendor The core designer's JEDEC-106 Manufacturer ID * @param device The core identifier. */ const char * bhnd_find_core_name(uint16_t vendor, uint16_t device) { const struct bhnd_core_desc *desc; if ((desc = bhnd_find_core_desc(vendor, device)) == NULL) return ("unknown"); return desc->desc; } /** * Return the device class for a BHND core. * * @param vendor The core designer's JEDEC-106 Manufacturer ID * @param device The core identifier. */ bhnd_devclass_t bhnd_find_core_class(uint16_t vendor, uint16_t device) { const struct bhnd_core_desc *desc; if ((desc = bhnd_find_core_desc(vendor, device)) == NULL) return (BHND_DEVCLASS_OTHER); return desc->class; } /** * Return a human-readable name for a BHND core. * * @param ci The core's info record. */ const char * bhnd_core_name(const struct bhnd_core_info *ci) { return bhnd_find_core_name(ci->vendor, ci->device); } /** * Return the device class for a BHND core. * * @param ci The core's info record. */ bhnd_devclass_t bhnd_core_class(const struct bhnd_core_info *ci) { return bhnd_find_core_class(ci->vendor, ci->device); } /** * Write a human readable name representation of the given * BHND_CHIPID_* constant to @p buffer. * * @param buffer Output buffer, or NULL to compute the required size. * @param size Capacity of @p buffer, in bytes. * @param chip_id Chip ID to be formatted. * * @return Returns the required number of bytes on success, or a negative * integer on failure. No more than @p size-1 characters be written, with * the @p size'th set to '\0'. * * @sa BHND_CHIPID_MAX_NAMELEN */ int bhnd_format_chip_id(char *buffer, size_t size, uint16_t chip_id) { /* All hex formatted IDs are within the range of 0x4000-0x9C3F (40000-1) */ if (chip_id >= 0x4000 && chip_id <= 0x9C3F) return (snprintf(buffer, size, "BCM%hX", chip_id)); else return (snprintf(buffer, size, "BCM%hu", chip_id)); } /** * Initialize a core info record with data from from a bhnd-attached @p dev. * * @param dev A bhnd device. * @param core The record to be initialized. */ struct bhnd_core_info bhnd_get_core_info(device_t dev) { return (struct bhnd_core_info) { .vendor = bhnd_get_vendor(dev), .device = bhnd_get_device(dev), .hwrev = bhnd_get_hwrev(dev), .core_idx = bhnd_get_core_index(dev), .unit = bhnd_get_core_unit(dev) }; } /** * Find a @p class child device with @p unit on @p dev. * * @param parent The bhnd-compatible bus to be searched. * @param class The device class to match on. * @param unit The core unit number; specify -1 to return the first match * regardless of unit number. * * @retval device_t if a matching child device is found. * @retval NULL if no matching child device is found. */ device_t bhnd_find_child(device_t dev, bhnd_devclass_t class, int unit) { struct bhnd_core_match md = { BHND_MATCH_CORE_CLASS(class), BHND_MATCH_CORE_UNIT(unit) }; if (unit == -1) md.m.match.core_unit = 0; return bhnd_match_child(dev, &md); } /** * Find the first child device on @p dev that matches @p desc. * * @param parent The bhnd-compatible bus to be searched. * @param desc A match descriptor. * * @retval device_t if a matching child device is found. * @retval NULL if no matching child device is found. */ device_t bhnd_match_child(device_t dev, const struct bhnd_core_match *desc) { device_t *devlistp; device_t match; int devcnt; int error; error = device_get_children(dev, &devlistp, &devcnt); if (error != 0) return (NULL); match = NULL; for (int i = 0; i < devcnt; i++) { struct bhnd_core_info ci = bhnd_get_core_info(devlistp[i]); if (bhnd_core_matches(&ci, desc)) { match = devlistp[i]; goto done; } } done: free(devlistp, M_TEMP); return match; } /** * Walk up the bhnd device hierarchy to locate the root device * to which the bhndb bridge is attached. * * This can be used from within bhnd host bridge drivers to locate the * actual upstream host device. * * @param dev A bhnd device. * @param bus_class The expected bus (e.g. "pci") to which the bridge root * should be attached. * * @retval device_t if a matching parent device is found. * @retval NULL @p dev is not attached via a bhndb bus * @retval NULL no parent device is attached via @p bus_class. */ device_t bhnd_find_bridge_root(device_t dev, devclass_t bus_class) { devclass_t bhndb_class; device_t parent; KASSERT(device_get_devclass(device_get_parent(dev)) == bhnd_devclass, ("%s not a bhnd device", device_get_nameunit(dev))); bhndb_class = devclass_find("bhndb"); /* Walk the device tree until we hit a bridge */ parent = dev; while ((parent = device_get_parent(parent)) != NULL) { if (device_get_devclass(parent) == bhndb_class) break; } /* No bridge? */ if (parent == NULL) return (NULL); /* Search for a parent attached to the expected bus class */ while ((parent = device_get_parent(parent)) != NULL) { device_t bus; bus = device_get_parent(parent); if (bus != NULL && device_get_devclass(bus) == bus_class) return (parent); } /* Not found */ return (NULL); } /** * Find the first core in @p cores that matches @p desc. * * @param cores The table to search. * @param num_cores The length of @p cores. * @param desc A match descriptor. * * @retval bhnd_core_info if a matching core is found. * @retval NULL if no matching core is found. */ const struct bhnd_core_info * bhnd_match_core(const struct bhnd_core_info *cores, u_int num_cores, const struct bhnd_core_match *desc) { for (u_int i = 0; i < num_cores; i++) { if (bhnd_core_matches(&cores[i], desc)) return &cores[i]; } return (NULL); } /** * Find the first core in @p cores with the given @p class. * * @param cores The table to search. * @param num_cores The length of @p cores. * @param desc A match descriptor. * * @retval bhnd_core_info if a matching core is found. * @retval NULL if no matching core is found. */ const struct bhnd_core_info * bhnd_find_core(const struct bhnd_core_info *cores, u_int num_cores, bhnd_devclass_t class) { struct bhnd_core_match md = { BHND_MATCH_CORE_CLASS(class) }; return bhnd_match_core(cores, num_cores, &md); } /** * Return true if the @p core matches @p desc. * * @param core A bhnd core descriptor. * @param desc A match descriptor to compare against @p core. * * @retval true if @p core matches @p match * @retval false if @p core does not match @p match. */ bool bhnd_core_matches(const struct bhnd_core_info *core, const struct bhnd_core_match *desc) { if (desc->m.match.core_vendor && desc->core_vendor != core->vendor) return (false); if (desc->m.match.core_id && desc->core_id != core->device) return (false); if (desc->m.match.core_unit && desc->core_unit != core->unit) return (false); if (desc->m.match.core_rev && !bhnd_hwrev_matches(core->hwrev, &desc->core_rev)) return (false); if (desc->m.match.core_class && desc->core_class != bhnd_core_class(core)) return (false); return true; } /** * Return true if the @p chip matches @p desc. * * @param chip A bhnd chip identifier. * @param desc A match descriptor to compare against @p chip. * * @retval true if @p chip matches @p match * @retval false if @p chip does not match @p match. */ bool bhnd_chip_matches(const struct bhnd_chipid *chip, const struct bhnd_chip_match *desc) { if (desc->m.match.chip_id && chip->chip_id != desc->chip_id) return (false); if (desc->m.match.chip_pkg && chip->chip_pkg != desc->chip_pkg) return (false); if (desc->m.match.chip_rev && !bhnd_hwrev_matches(chip->chip_rev, &desc->chip_rev)) return (false); return (true); } /** * Return true if the @p board matches @p desc. * * @param board The bhnd board info. * @param desc A match descriptor to compare against @p board. * * @retval true if @p chip matches @p match * @retval false if @p chip does not match @p match. */ bool bhnd_board_matches(const struct bhnd_board_info *board, const struct bhnd_board_match *desc) { if (desc->m.match.board_srom_rev && !bhnd_hwrev_matches(board->board_srom_rev, &desc->board_srom_rev)) return (false); if (desc->m.match.board_vendor && board->board_vendor != desc->board_vendor) return (false); if (desc->m.match.board_type && board->board_type != desc->board_type) return (false); if (desc->m.match.board_rev && !bhnd_hwrev_matches(board->board_rev, &desc->board_rev)) return (false); return (true); } /** * Return true if the @p hwrev matches @p desc. * * @param hwrev A bhnd hardware revision. * @param desc A match descriptor to compare against @p core. * * @retval true if @p hwrev matches @p match * @retval false if @p hwrev does not match @p match. */ bool bhnd_hwrev_matches(uint16_t hwrev, const struct bhnd_hwrev_match *desc) { if (desc->start != BHND_HWREV_INVALID && desc->start > hwrev) return false; if (desc->end != BHND_HWREV_INVALID && desc->end < hwrev) return false; return true; } /** * Return true if the @p dev matches @p desc. * * @param dev A bhnd device. * @param desc A match descriptor to compare against @p dev. * * @retval true if @p dev matches @p match * @retval false if @p dev does not match @p match. */ bool bhnd_device_matches(device_t dev, const struct bhnd_device_match *desc) { struct bhnd_core_info core; const struct bhnd_chipid *chip; struct bhnd_board_info board; device_t parent; int error; /* Construct individual match descriptors */ struct bhnd_core_match m_core = { _BHND_CORE_MATCH_COPY(desc) }; struct bhnd_chip_match m_chip = { _BHND_CHIP_MATCH_COPY(desc) }; struct bhnd_board_match m_board = { _BHND_BOARD_MATCH_COPY(desc) }; /* Fetch and match core info */ if (m_core.m.match_flags) { /* Only applicable to bhnd-attached cores */ parent = device_get_parent(dev); if (device_get_devclass(parent) != bhnd_devclass) { device_printf(dev, "attempting to match core " "attributes against non-core device\n"); return (false); } core = bhnd_get_core_info(dev); if (!bhnd_core_matches(&core, &m_core)) return (false); } /* Fetch and match chip info */ if (m_chip.m.match_flags) { chip = bhnd_get_chipid(dev); if (!bhnd_chip_matches(chip, &m_chip)) return (false); } /* Fetch and match board info. * * This is not available until after NVRAM is up; earlier device * matches should not include board requirements */ if (m_board.m.match_flags) { if ((error = bhnd_read_board_info(dev, &board))) { device_printf(dev, "failed to read required board info " "during device matching: %d\n", error); return (false); } if (!bhnd_board_matches(&board, &m_board)) return (false); } /* All matched */ return (true); } /** * Search @p table for an entry matching @p dev. * * @param dev A bhnd device to match against @p table. * @param table The device table to search. * @param entry_size The @p table entry size, in bytes. * * @retval bhnd_device the first matching device, if any. * @retval NULL if no matching device is found in @p table. */ const struct bhnd_device * bhnd_device_lookup(device_t dev, const struct bhnd_device *table, size_t entry_size) { const struct bhnd_device *entry; device_t hostb, parent; bhnd_attach_type attach_type; uint32_t dflags; parent = device_get_parent(dev); hostb = bhnd_find_hostb_device(parent); attach_type = bhnd_get_attach_type(dev); for (entry = table; !BHND_DEVICE_IS_END(entry); entry = (const struct bhnd_device *) ((const char *) entry + entry_size)) { /* match core info */ if (!bhnd_device_matches(dev, &entry->core)) continue; /* match device flags */ dflags = entry->device_flags; /* hostb implies BHND_ATTACH_ADAPTER requirement */ if (dflags & BHND_DF_HOSTB) dflags |= BHND_DF_ADAPTER; if (dflags & BHND_DF_ADAPTER) if (attach_type != BHND_ATTACH_ADAPTER) continue; if (dflags & BHND_DF_HOSTB) if (dev != hostb) continue; if (dflags & BHND_DF_SOC) if (attach_type != BHND_ATTACH_NATIVE) continue; /* device found */ return (entry); } /* not found */ return (NULL); } /** * Scan the device @p table for all quirk flags applicable to @p dev. * * @param dev A bhnd device to match against @p table. * @param table The device table to search. * * @return returns all matching quirk flags. */ uint32_t bhnd_device_quirks(device_t dev, const struct bhnd_device *table, size_t entry_size) { const struct bhnd_device *dent; const struct bhnd_device_quirk *qent, *qtable; uint32_t quirks; /* Locate the device entry */ if ((dent = bhnd_device_lookup(dev, table, entry_size)) == NULL) return (0); /* Quirks table is optional */ qtable = dent->quirks_table; if (qtable == NULL) return (0); /* Collect matching device quirk entries */ quirks = 0; for (qent = qtable; !BHND_DEVICE_QUIRK_IS_END(qent); qent++) { if (bhnd_device_matches(dev, &qent->desc)) quirks |= qent->quirks; } return (quirks); } /** * Allocate bhnd(4) resources defined in @p rs from a parent bus. * * @param dev The device requesting ownership of the resources. * @param rs A standard bus resource specification. This will be updated * with the allocated resource's RIDs. * @param res On success, the allocated bhnd resources. * * @retval 0 success * @retval non-zero if allocation of any non-RF_OPTIONAL resource fails, * all allocated resources will be released and a regular * unix error code will be returned. */ int bhnd_alloc_resources(device_t dev, struct resource_spec *rs, struct bhnd_resource **res) { /* Initialize output array */ for (u_int i = 0; rs[i].type != -1; i++) res[i] = NULL; for (u_int i = 0; rs[i].type != -1; i++) { res[i] = bhnd_alloc_resource_any(dev, rs[i].type, &rs[i].rid, rs[i].flags); /* Clean up all allocations on failure */ if (res[i] == NULL && !(rs[i].flags & RF_OPTIONAL)) { bhnd_release_resources(dev, rs, res); return (ENXIO); } } return (0); }; /** * Release bhnd(4) resources defined in @p rs from a parent bus. * * @param dev The device that owns the resources. * @param rs A standard bus resource specification previously initialized * by @p bhnd_alloc_resources. * @param res The bhnd resources to be released. */ void bhnd_release_resources(device_t dev, const struct resource_spec *rs, struct bhnd_resource **res) { for (u_int i = 0; rs[i].type != -1; i++) { if (res[i] == NULL) continue; bhnd_release_resource(dev, rs[i].type, rs[i].rid, res[i]); res[i] = NULL; } } /** * Parse the CHIPC_ID_* fields from the ChipCommon CHIPC_ID * register, returning its bhnd_chipid representation. * * @param idreg The CHIPC_ID register value. * @param enum_addr The enumeration address to include in the result. * * @warning * On early siba(4) devices, the ChipCommon core does not provide * a valid CHIPC_ID_NUMCORE field. On these ChipCommon revisions * (see CHIPC_NCORES_MIN_HWREV()), this function will parse and return * an invalid `ncores` value. */ struct bhnd_chipid bhnd_parse_chipid(uint32_t idreg, bhnd_addr_t enum_addr) { struct bhnd_chipid result; /* Fetch the basic chip info */ result.chip_id = CHIPC_GET_BITS(idreg, CHIPC_ID_CHIP); result.chip_pkg = CHIPC_GET_BITS(idreg, CHIPC_ID_PKG); result.chip_rev = CHIPC_GET_BITS(idreg, CHIPC_ID_REV); result.chip_type = CHIPC_GET_BITS(idreg, CHIPC_ID_BUS); result.ncores = CHIPC_GET_BITS(idreg, CHIPC_ID_NUMCORE); result.enum_addr = enum_addr; return (result); } + /** + * Determine the correct core count for a chip identification value that + * may contain an invalid core count. + * + * On some early siba(4) devices (see CHIPC_NCORES_MIN_HWREV()), the ChipCommon + * core does not provide a valid CHIPC_ID_NUMCORE field. + * + * @param cid The chip identification to be queried. + * @param chipc_hwrev The hardware revision of the ChipCommon core from which + * @p cid was parsed. + * @param[out] ncores On success, will be set to the correct core count. + * + * @retval 0 If the core count is already correct, or was mapped to a + * a correct value. + * @retval EINVAL If the core count is incorrect, but the chip was not + * recognized. + */ +int +bhnd_chipid_fixed_ncores(const struct bhnd_chipid *cid, uint16_t chipc_hwrev, + uint8_t *ncores) +{ + /* bcma(4), and most siba(4) devices */ + if (CHIPC_NCORES_MIN_HWREV(chipc_hwrev)) { + *ncores = cid->ncores; + return (0); + } + + /* broken siba(4) chipsets */ + switch (cid->chip_id) { + case BHND_CHIPID_BCM4306: + *ncores = 6; + break; + case BHND_CHIPID_BCM4704: + *ncores = 9; + break; + case BHND_CHIPID_BCM5365: + /* + * BCM5365 does support ID_NUMCORE in at least + * some of its revisions, but for unknown + * reasons, Broadcom's drivers always exclude + * the ChipCommon revision (0x5) used by BCM5365 + * from the set of revisions supporting + * ID_NUMCORE, and instead supply a fixed value. + * + * Presumably, at least some of these devices + * shipped with a broken ID_NUMCORE value. + */ + *ncores = 7; + break; + default: + return (EINVAL); + } + + return (0); +} + +/** * Allocate the resource defined by @p rs via @p dev, use it * to read the ChipCommon ID register relative to @p chipc_offset, * then release the resource. * * @param dev The device owning @p rs. * @param rs A resource spec that encompasses the ChipCommon register block. * @param chipc_offset The offset of the ChipCommon registers within @p rs. * @param[out] result the chip identification data. * * @retval 0 success * @retval non-zero if the ChipCommon identification data could not be read. */ int bhnd_read_chipid(device_t dev, struct resource_spec *rs, bus_size_t chipc_offset, struct bhnd_chipid *result) { struct resource *res; bhnd_addr_t enum_addr; uint32_t reg; uint8_t chip_type; int error, rid, rtype; rid = rs->rid; rtype = rs->type; error = 0; /* Allocate the ChipCommon window resource and fetch the chipid data */ res = bus_alloc_resource_any(dev, rtype, &rid, RF_ACTIVE); if (res == NULL) { device_printf(dev, "failed to allocate bhnd chipc resource\n"); return (ENXIO); } /* Fetch the basic chip info */ reg = bus_read_4(res, chipc_offset + CHIPC_ID); chip_type = CHIPC_GET_BITS(reg, CHIPC_ID_BUS); /* Fetch the EROMPTR */ if (BHND_CHIPTYPE_HAS_EROM(chip_type)) { enum_addr = bus_read_4(res, chipc_offset + CHIPC_EROMPTR); } else if (chip_type == BHND_CHIPTYPE_SIBA) { /* siba(4) uses the ChipCommon base address as the enumeration * address */ enum_addr = rman_get_start(res) + chipc_offset; } else { device_printf(dev, "unknown chip type %hhu\n", chip_type); error = ENODEV; goto cleanup; } *result = bhnd_parse_chipid(reg, enum_addr); + + /* Fix the core count on early siba(4) devices */ + if (chip_type == BHND_CHIPTYPE_SIBA) { + uint32_t idh; + uint16_t chipc_hwrev; + + /* + * We need the ChipCommon revision to determine whether + * the ncore field is valid. + * + * We can safely assume the siba IDHIGH register is mapped + * within the chipc register block. + */ + idh = bus_read_4(res, SB0_REG_ABS(SIBA_CFG0_IDHIGH)); + chipc_hwrev = SIBA_IDH_CORE_REV(idh); + + error = bhnd_chipid_fixed_ncores(result, chipc_hwrev, + &result->ncores); + if (error) + goto cleanup; + } cleanup: /* Clean up */ bus_release_resource(dev, rtype, rid, res); return (error); } /** * Read an NVRAM variable's NUL-terminated string value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] buf A buffer large enough to hold @p len bytes. On * success, the NUL-terminated string value will be * written to this buffer. This argment may be NULL if * the value is not desired. * @param len The maximum capacity of @p buf. * @param[out] rlen On success, will be set to the actual size of * the requested value (including NUL termination). This * argment may be NULL if the size is not desired. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval ENOMEM If @p buf is non-NULL and a buffer of @p len is too * small to hold the requested value. * @retval EFTYPE If the variable data cannot be coerced to a valid * string representation. * @retval ERANGE If value coercion would overflow @p type. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_str(device_t dev, const char *name, char *buf, size_t len, size_t *rlen) { size_t larg; int error; larg = len; error = bhnd_nvram_getvar(dev, name, buf, &larg, BHND_NVRAM_TYPE_CSTR); if (rlen != NULL) *rlen = larg; return (error); } /** * Read an NVRAM variable's unsigned integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * @param width The output integer type width (1, 2, or * 4 bytes). * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid unsigned integer representation. * @retval ERANGE If value coercion would overflow (or underflow) an * unsigned representation of the given @p width. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_uint(device_t dev, const char *name, void *value, int width) { bhnd_nvram_type type; size_t len; switch (width) { case 1: type = BHND_NVRAM_TYPE_UINT8; break; case 2: type = BHND_NVRAM_TYPE_UINT16; break; case 4: type = BHND_NVRAM_TYPE_UINT32; break; default: device_printf(dev, "unsupported NVRAM integer width: %d\n", width); return (EINVAL); } len = width; return (bhnd_nvram_getvar(dev, name, value, &len, type)); } /** * Read an NVRAM variable's unsigned 8-bit integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid unsigned integer representation. * @retval ERANGE If value coercion would overflow (or underflow) uint8_t. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_uint8(device_t dev, const char *name, uint8_t *value) { return (bhnd_nvram_getvar_uint(dev, name, value, sizeof(*value))); } /** * Read an NVRAM variable's unsigned 16-bit integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid unsigned integer representation. * @retval ERANGE If value coercion would overflow (or underflow) * uint16_t. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_uint16(device_t dev, const char *name, uint16_t *value) { return (bhnd_nvram_getvar_uint(dev, name, value, sizeof(*value))); } /** * Read an NVRAM variable's unsigned 32-bit integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid unsigned integer representation. * @retval ERANGE If value coercion would overflow (or underflow) * uint32_t. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_uint32(device_t dev, const char *name, uint32_t *value) { return (bhnd_nvram_getvar_uint(dev, name, value, sizeof(*value))); } /** * Read an NVRAM variable's signed integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * @param width The output integer type width (1, 2, or * 4 bytes). * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid integer representation. * @retval ERANGE If value coercion would overflow (or underflow) an * signed representation of the given @p width. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_int(device_t dev, const char *name, void *value, int width) { bhnd_nvram_type type; size_t len; switch (width) { case 1: type = BHND_NVRAM_TYPE_INT8; break; case 2: type = BHND_NVRAM_TYPE_INT16; break; case 4: type = BHND_NVRAM_TYPE_INT32; break; default: device_printf(dev, "unsupported NVRAM integer width: %d\n", width); return (EINVAL); } len = width; return (bhnd_nvram_getvar(dev, name, value, &len, type)); } /** * Read an NVRAM variable's signed 8-bit integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid integer representation. * @retval ERANGE If value coercion would overflow (or underflow) int8_t. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_int8(device_t dev, const char *name, int8_t *value) { return (bhnd_nvram_getvar_int(dev, name, value, sizeof(*value))); } /** * Read an NVRAM variable's signed 16-bit integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid integer representation. * @retval ERANGE If value coercion would overflow (or underflow) * int16_t. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_int16(device_t dev, const char *name, int16_t *value) { return (bhnd_nvram_getvar_int(dev, name, value, sizeof(*value))); } /** * Read an NVRAM variable's signed 32-bit integer value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] value On success, the requested value will be written * to this pointer. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval EFTYPE If the variable data cannot be coerced to a * a valid integer representation. * @retval ERANGE If value coercion would overflow (or underflow) * int32_t. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_int32(device_t dev, const char *name, int32_t *value) { return (bhnd_nvram_getvar_int(dev, name, value, sizeof(*value))); } /** * Read an NVRAM variable's array value. * * @param dev A bhnd bus child device. * @param name The NVRAM variable name. * @param[out] buf A buffer large enough to hold @p size bytes. * On success, the requested value will be written * to this buffer. * @param[in,out] size The required number of bytes to write to * @p buf. * @param type The desired array element data representation. * * @retval 0 success * @retval ENOENT The requested variable was not found. * @retval ENODEV No valid NVRAM source could be found. * @retval ENXIO If less than @p size bytes are available. * @retval ENOMEM If a buffer of @p size is too small to hold the * requested value. * @retval EFTYPE If the variable data cannot be coerced to a * a valid instance of @p type. * @retval ERANGE If value coercion would overflow (or underflow) a * representation of @p type. * @retval non-zero If reading @p name otherwise fails, a regular unix * error code will be returned. */ int bhnd_nvram_getvar_array(device_t dev, const char *name, void *buf, size_t size, bhnd_nvram_type type) { size_t nbytes; int error; /* Attempt read */ nbytes = size; if ((error = bhnd_nvram_getvar(dev, name, buf, &nbytes, type))) return (error); /* Verify that the expected number of bytes were fetched */ if (nbytes < size) return (ENXIO); return (0); } /** * Using the bhnd(4) bus-level core information and a custom core name, * populate @p dev's device description. * * @param dev A bhnd-bus attached device. * @param dev_name The core's name (e.g. "SDIO Device Core") */ void bhnd_set_custom_core_desc(device_t dev, const char *dev_name) { const char *vendor_name; char *desc; vendor_name = bhnd_get_vendor_name(dev); asprintf(&desc, M_BHND, "%s %s, rev %hhu", vendor_name, dev_name, bhnd_get_hwrev(dev)); if (desc != NULL) { device_set_desc_copy(dev, desc); free(desc, M_BHND); } else { device_set_desc(dev, dev_name); } } /** * Using the bhnd(4) bus-level core information, populate @p dev's device * description. * * @param dev A bhnd-bus attached device. */ void bhnd_set_default_core_desc(device_t dev) { bhnd_set_custom_core_desc(dev, bhnd_get_device_name(dev)); } /** * Using the bhnd @p chip_id, populate the bhnd(4) bus @p dev's device * description. * * @param dev A bhnd-bus attached device. */ void bhnd_set_default_bus_desc(device_t dev, const struct bhnd_chipid *chip_id) { const char *bus_name; char *desc; char chip_name[BHND_CHIPID_MAX_NAMELEN]; /* Determine chip type's bus name */ switch (chip_id->chip_type) { case BHND_CHIPTYPE_SIBA: bus_name = "SIBA bus"; break; case BHND_CHIPTYPE_BCMA: case BHND_CHIPTYPE_BCMA_ALT: bus_name = "BCMA bus"; break; case BHND_CHIPTYPE_UBUS: bus_name = "UBUS bus"; break; default: bus_name = "Unknown Type"; break; } /* Format chip name */ bhnd_format_chip_id(chip_name, sizeof(chip_name), chip_id->chip_id); /* Format and set device description */ asprintf(&desc, M_BHND, "%s %s", chip_name, bus_name); if (desc != NULL) { device_set_desc_copy(dev, desc); free(desc, M_BHND); } else { device_set_desc(dev, bus_name); } } /** * Helper function for implementing BHND_BUS_IS_HW_DISABLED(). * * If a parent device is available, this implementation delegates the * request to the BHND_BUS_IS_HW_DISABLED() method on the parent of @p dev. * * If no parent device is available (i.e. on a the bus root), the hardware * is assumed to be usable and false is returned. */ bool bhnd_bus_generic_is_hw_disabled(device_t dev, device_t child) { if (device_get_parent(dev) != NULL) return (BHND_BUS_IS_HW_DISABLED(device_get_parent(dev), child)); return (false); } /** * Helper function for implementing BHND_BUS_GET_CHIPID(). * * This implementation delegates the request to the BHND_BUS_GET_CHIPID() * method on the parent of @p dev. If no parent exists, the implementation * will panic. */ const struct bhnd_chipid * bhnd_bus_generic_get_chipid(device_t dev, device_t child) { if (device_get_parent(dev) != NULL) return (BHND_BUS_GET_CHIPID(device_get_parent(dev), child)); panic("missing BHND_BUS_GET_CHIPID()"); } /* nvram board_info population macros for bhnd_bus_generic_read_board_info() */ #define BHND_GV(_dest, _name) \ bhnd_nvram_getvar_uint(child, BHND_NVAR_ ## _name, &_dest, \ sizeof(_dest)) #define REQ_BHND_GV(_dest, _name) do { \ if ((error = BHND_GV(_dest, _name))) { \ device_printf(dev, \ "error reading " __STRING(_name) ": %d\n", error); \ return (error); \ } \ } while(0) #define OPT_BHND_GV(_dest, _name, _default) do { \ if ((error = BHND_GV(_dest, _name))) { \ if (error != ENOENT) { \ device_printf(dev, \ "error reading " \ __STRING(_name) ": %d\n", error); \ return (error); \ } \ _dest = _default; \ } \ } while(0) /** * Helper function for implementing BHND_BUS_READ_BOARDINFO(). * * This implementation populates @p info with information from NVRAM, * defaulting board_vendor and board_type fields to 0 if the * requested variables cannot be found. * * This behavior is correct for most SoCs, but must be overridden on * bridged (PCI, PCMCIA, etc) devices to produce a complete bhnd_board_info * result. */ int bhnd_bus_generic_read_board_info(device_t dev, device_t child, struct bhnd_board_info *info) { int error; OPT_BHND_GV(info->board_vendor, BOARDVENDOR, 0); OPT_BHND_GV(info->board_type, BOARDTYPE, 0); /* srom >= 2 */ REQ_BHND_GV(info->board_rev, BOARDREV); OPT_BHND_GV(info->board_srom_rev,SROMREV, 0); /* missing in some SoC NVRAM */ REQ_BHND_GV(info->board_flags, BOARDFLAGS); OPT_BHND_GV(info->board_flags2, BOARDFLAGS2, 0); /* srom >= 4 */ OPT_BHND_GV(info->board_flags3, BOARDFLAGS3, 0); /* srom >= 11 */ return (0); } #undef BHND_GV #undef BHND_GV_REQ #undef BHND_GV_OPT /** * Helper function for implementing BHND_BUS_GET_NVRAM_VAR(). * * This implementation searches @p dev for a usable NVRAM child device. * * If no usable child device is found on @p dev, the request is delegated to * the BHND_BUS_GET_NVRAM_VAR() method on the parent of @p dev. */ int bhnd_bus_generic_get_nvram_var(device_t dev, device_t child, const char *name, void *buf, size_t *size, bhnd_nvram_type type) { device_t nvram; device_t parent; /* Make sure we're holding Giant for newbus */ GIANT_REQUIRED; /* Look for a directly-attached NVRAM child */ if ((nvram = device_find_child(dev, "bhnd_nvram", -1)) != NULL) return BHND_NVRAM_GETVAR(nvram, name, buf, size, type); /* Try to delegate to parent */ if ((parent = device_get_parent(dev)) == NULL) return (ENODEV); return (BHND_BUS_GET_NVRAM_VAR(device_get_parent(dev), child, name, buf, size, type)); } /** * Helper function for implementing BHND_BUS_ALLOC_RESOURCE(). * * This implementation of BHND_BUS_ALLOC_RESOURCE() delegates allocation * of the underlying resource to BUS_ALLOC_RESOURCE(), and activation * to @p dev's BHND_BUS_ACTIVATE_RESOURCE(). */ struct bhnd_resource * bhnd_bus_generic_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct bhnd_resource *br; struct resource *res; int error; br = NULL; res = NULL; /* Allocate the real bus resource (without activating it) */ res = BUS_ALLOC_RESOURCE(dev, child, type, rid, start, end, count, (flags & ~RF_ACTIVE)); if (res == NULL) return (NULL); /* Allocate our bhnd resource wrapper. */ br = malloc(sizeof(struct bhnd_resource), M_BHND, M_NOWAIT); if (br == NULL) goto failed; br->direct = false; br->res = res; /* Attempt activation */ if (flags & RF_ACTIVE) { error = BHND_BUS_ACTIVATE_RESOURCE(dev, child, type, *rid, br); if (error) goto failed; } return (br); failed: if (res != NULL) BUS_RELEASE_RESOURCE(dev, child, type, *rid, res); free(br, M_BHND); return (NULL); } /** * Helper function for implementing BHND_BUS_RELEASE_RESOURCE(). * * This implementation of BHND_BUS_RELEASE_RESOURCE() delegates release of * the backing resource to BUS_RELEASE_RESOURCE(). */ int bhnd_bus_generic_release_resource(device_t dev, device_t child, int type, int rid, struct bhnd_resource *r) { int error; if ((error = BUS_RELEASE_RESOURCE(dev, child, type, rid, r->res))) return (error); free(r, M_BHND); return (0); } /** * Helper function for implementing BHND_BUS_ACTIVATE_RESOURCE(). * * This implementation of BHND_BUS_ACTIVATE_RESOURCE() simply calls the * BHND_BUS_ACTIVATE_RESOURCE() method of the parent of @p dev. */ int bhnd_bus_generic_activate_resource(device_t dev, device_t child, int type, int rid, struct bhnd_resource *r) { /* Try to delegate to the parent */ if (device_get_parent(dev) != NULL) return (BHND_BUS_ACTIVATE_RESOURCE(device_get_parent(dev), child, type, rid, r)); return (EINVAL); }; /** * Helper function for implementing BHND_BUS_DEACTIVATE_RESOURCE(). * * This implementation of BHND_BUS_ACTIVATE_RESOURCE() simply calls the * BHND_BUS_ACTIVATE_RESOURCE() method of the parent of @p dev. */ int bhnd_bus_generic_deactivate_resource(device_t dev, device_t child, int type, int rid, struct bhnd_resource *r) { if (device_get_parent(dev) != NULL) return (BHND_BUS_DEACTIVATE_RESOURCE(device_get_parent(dev), child, type, rid, r)); return (EINVAL); }; Index: projects/clang390-import/sys/dev/bhnd/siba/siba.c =================================================================== --- projects/clang390-import/sys/dev/bhnd/siba/siba.c (revision 305016) +++ projects/clang390-import/sys/dev/bhnd/siba/siba.c (revision 305017) @@ -1,788 +1,765 @@ /*- * Copyright (c) 2015 Landon Fuller * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "sibareg.h" #include "sibavar.h" int siba_probe(device_t dev) { device_set_desc(dev, "SIBA BHND bus"); return (BUS_PROBE_DEFAULT); } int siba_attach(device_t dev) { struct siba_devinfo *dinfo; struct siba_softc *sc; device_t *devs; int ndevs; int error; sc = device_get_softc(dev); sc->dev = dev; /* Fetch references to the siba SIBA_CFG* blocks for all * registered devices */ if ((error = device_get_children(dev, &devs, &ndevs))) return (error); for (int i = 0; i < ndevs; i++) { struct siba_addrspace *addrspace; dinfo = device_get_ivars(devs[i]); KASSERT(!device_is_suspended(devs[i]), ("siba(4) stateful suspend handling requires that devices " "not be suspended before siba_attach()")); /* Fetch the core register address space */ addrspace = siba_find_addrspace(dinfo, BHND_PORT_DEVICE, 0, 0); if (addrspace == NULL) { device_printf(dev, "missing device registers for core %d\n", i); error = ENXIO; goto cleanup; } /* * Map the per-core configuration blocks */ KASSERT(dinfo->core_id.num_cfg_blocks <= SIBA_MAX_CFG, ("config block count %u out of range", dinfo->core_id.num_cfg_blocks)); for (u_int cfgidx = 0; cfgidx < dinfo->core_id.num_cfg_blocks; cfgidx++) { rman_res_t r_start, r_count, r_end; /* Determine the config block's address range; configuration * blocks are allocated starting at SIBA_CFG0_OFFSET, * growing downwards. */ r_start = addrspace->sa_base + SIBA_CFG0_OFFSET; r_start -= cfgidx * SIBA_CFG_SIZE; r_count = SIBA_CFG_SIZE; r_end = r_start + r_count - 1; /* Allocate the config resource */ dinfo->cfg_rid[cfgidx] = 0; dinfo->cfg[cfgidx] = BHND_BUS_ALLOC_RESOURCE(dev, dev, SYS_RES_MEMORY, &dinfo->cfg_rid[cfgidx], r_start, r_end, r_count, RF_ACTIVE); if (dinfo->cfg[cfgidx] == NULL) { device_printf(dev, "failed allocating CFG_%u for " "core %d\n", cfgidx, i); error = ENXIO; goto cleanup; } } } cleanup: free(devs, M_BHND); if (error) return (error); /* Delegate remainder to standard bhnd method implementation */ return (bhnd_generic_attach(dev)); } int siba_detach(device_t dev) { return (bhnd_generic_detach(dev)); } int siba_resume(device_t dev) { return (bhnd_generic_resume(dev)); } int siba_suspend(device_t dev) { return (bhnd_generic_suspend(dev)); } static int siba_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { const struct siba_devinfo *dinfo; const struct bhnd_core_info *cfg; dinfo = device_get_ivars(child); cfg = &dinfo->core_id.core_info; switch (index) { case BHND_IVAR_VENDOR: *result = cfg->vendor; return (0); case BHND_IVAR_DEVICE: *result = cfg->device; return (0); case BHND_IVAR_HWREV: *result = cfg->hwrev; return (0); case BHND_IVAR_DEVICE_CLASS: *result = bhnd_core_class(cfg); return (0); case BHND_IVAR_VENDOR_NAME: *result = (uintptr_t) bhnd_vendor_name(cfg->vendor); return (0); case BHND_IVAR_DEVICE_NAME: *result = (uintptr_t) bhnd_core_name(cfg); return (0); case BHND_IVAR_CORE_INDEX: *result = cfg->core_idx; return (0); case BHND_IVAR_CORE_UNIT: *result = cfg->unit; return (0); default: return (ENOENT); } } static int siba_write_ivar(device_t dev, device_t child, int index, uintptr_t value) { switch (index) { case BHND_IVAR_VENDOR: case BHND_IVAR_DEVICE: case BHND_IVAR_HWREV: case BHND_IVAR_DEVICE_CLASS: case BHND_IVAR_VENDOR_NAME: case BHND_IVAR_DEVICE_NAME: case BHND_IVAR_CORE_INDEX: case BHND_IVAR_CORE_UNIT: return (EINVAL); default: return (ENOENT); } } static struct resource_list * siba_get_resource_list(device_t dev, device_t child) { struct siba_devinfo *dinfo = device_get_ivars(child); return (&dinfo->resources); } static device_t siba_find_hostb_device(device_t dev) { struct siba_softc *sc = device_get_softc(dev); /* This is set (or not) by the concrete siba driver subclass. */ return (sc->hostb_dev); } static int siba_reset_core(device_t dev, device_t child, uint16_t flags) { struct siba_devinfo *dinfo; if (device_get_parent(child) != dev) BHND_BUS_RESET_CORE(device_get_parent(dev), child, flags); dinfo = device_get_ivars(child); /* Can't reset the core without access to the CFG0 registers */ if (dinfo->cfg[0] == NULL) return (ENODEV); // TODO - perform reset return (ENXIO); } static int siba_suspend_core(device_t dev, device_t child) { struct siba_devinfo *dinfo; if (device_get_parent(child) != dev) BHND_BUS_SUSPEND_CORE(device_get_parent(dev), child); dinfo = device_get_ivars(child); /* Can't suspend the core without access to the CFG0 registers */ if (dinfo->cfg[0] == NULL) return (ENODEV); // TODO - perform suspend return (ENXIO); } static uint32_t siba_read_config(device_t dev, device_t child, bus_size_t offset, u_int width) { struct siba_devinfo *dinfo; rman_res_t r_size; /* Must be directly attached */ if (device_get_parent(child) != dev) return (UINT32_MAX); /* CFG0 registers must be available */ dinfo = device_get_ivars(child); if (dinfo->cfg[0] == NULL) return (UINT32_MAX); /* Offset must fall within CFG0 */ r_size = rman_get_size(dinfo->cfg[0]->res); if (r_size < offset || r_size - offset < width) return (UINT32_MAX); switch (width) { case 1: return (bhnd_bus_read_1(dinfo->cfg[0], offset)); case 2: return (bhnd_bus_read_2(dinfo->cfg[0], offset)); case 4: return (bhnd_bus_read_4(dinfo->cfg[0], offset)); } /* Unsuported */ return (UINT32_MAX); } static void siba_write_config(device_t dev, device_t child, bus_size_t offset, uint32_t val, u_int width) { struct siba_devinfo *dinfo; rman_res_t r_size; /* Must be directly attached */ if (device_get_parent(child) != dev) return; /* CFG0 registers must be available */ dinfo = device_get_ivars(child); if (dinfo->cfg[0] == NULL) return; /* Offset must fall within CFG0 */ r_size = rman_get_size(dinfo->cfg[0]->res); if (r_size < offset || r_size - offset < width) return; switch (width) { case 1: bhnd_bus_write_1(dinfo->cfg[0], offset, val); case 2: bhnd_bus_write_2(dinfo->cfg[0], offset, val); case 4: bhnd_bus_write_4(dinfo->cfg[0], offset, val); } } static u_int siba_get_port_count(device_t dev, device_t child, bhnd_port_type type) { struct siba_devinfo *dinfo; /* delegate non-bus-attached devices to our parent */ if (device_get_parent(child) != dev) return (BHND_BUS_GET_PORT_COUNT(device_get_parent(dev), child, type)); dinfo = device_get_ivars(child); return (siba_addrspace_port_count(dinfo)); } static u_int siba_get_region_count(device_t dev, device_t child, bhnd_port_type type, u_int port) { struct siba_devinfo *dinfo; /* delegate non-bus-attached devices to our parent */ if (device_get_parent(child) != dev) return (BHND_BUS_GET_REGION_COUNT(device_get_parent(dev), child, type, port)); dinfo = device_get_ivars(child); if (!siba_is_port_valid(dinfo, type, port)) return (0); return (siba_addrspace_region_count(dinfo, port)); } static int siba_get_port_rid(device_t dev, device_t child, bhnd_port_type port_type, u_int port_num, u_int region_num) { struct siba_devinfo *dinfo; struct siba_addrspace *addrspace; /* delegate non-bus-attached devices to our parent */ if (device_get_parent(child) != dev) return (BHND_BUS_GET_PORT_RID(device_get_parent(dev), child, port_type, port_num, region_num)); dinfo = device_get_ivars(child); addrspace = siba_find_addrspace(dinfo, port_type, port_num, region_num); if (addrspace == NULL) return (-1); return (addrspace->sa_rid); } static int siba_decode_port_rid(device_t dev, device_t child, int type, int rid, bhnd_port_type *port_type, u_int *port_num, u_int *region_num) { struct siba_devinfo *dinfo; /* delegate non-bus-attached devices to our parent */ if (device_get_parent(child) != dev) return (BHND_BUS_DECODE_PORT_RID(device_get_parent(dev), child, type, rid, port_type, port_num, region_num)); dinfo = device_get_ivars(child); /* Ports are always memory mapped */ if (type != SYS_RES_MEMORY) return (EINVAL); for (int i = 0; i < dinfo->core_id.num_addrspace; i++) { if (dinfo->addrspace[i].sa_rid != rid) continue; *port_type = BHND_PORT_DEVICE; *port_num = siba_addrspace_port(i); *region_num = siba_addrspace_region(i); return (0); } /* Not found */ return (ENOENT); } static int siba_get_region_addr(device_t dev, device_t child, bhnd_port_type port_type, u_int port_num, u_int region_num, bhnd_addr_t *addr, bhnd_size_t *size) { struct siba_devinfo *dinfo; struct siba_addrspace *addrspace; /* delegate non-bus-attached devices to our parent */ if (device_get_parent(child) != dev) { return (BHND_BUS_GET_REGION_ADDR(device_get_parent(dev), child, port_type, port_num, region_num, addr, size)); } dinfo = device_get_ivars(child); addrspace = siba_find_addrspace(dinfo, port_type, port_num, region_num); if (addrspace == NULL) return (ENOENT); *addr = addrspace->sa_base; *size = addrspace->sa_size - addrspace->sa_bus_reserved; return (0); } /** * Register all address space mappings for @p di. * * @param dev The siba bus device. * @param di The device info instance on which to register all address * space entries. * @param r A resource mapping the enumeration table block for @p di. */ static int siba_register_addrspaces(device_t dev, struct siba_devinfo *di, struct resource *r) { struct siba_core_id *cid; uint32_t addr; uint32_t size; int error; cid = &di->core_id; /* Register the device address space entries */ for (uint8_t i = 0; i < di->core_id.num_addrspace; i++) { uint32_t adm; u_int adm_offset; uint32_t bus_reserved; /* Determine the register offset */ adm_offset = siba_admatch_offset(i); if (adm_offset == 0) { device_printf(dev, "addrspace %hhu is unsupported", i); return (ENODEV); } /* Fetch the address match register value */ adm = bus_read_4(r, adm_offset); /* Parse the value */ if ((error = siba_parse_admatch(adm, &addr, &size))) { device_printf(dev, "failed to decode address " " match register value 0x%x\n", adm); return (error); } /* If this is the device's core/enumeration addrespace, * reserve the Sonics configuration register blocks for the * use of our bus. */ bus_reserved = 0; if (i == SIBA_CORE_ADDRSPACE) bus_reserved = cid->num_cfg_blocks * SIBA_CFG_SIZE; /* Append the region info */ error = siba_append_dinfo_region(di, i, addr, size, bus_reserved); if (error) return (error); } return (0); } static struct bhnd_devinfo * siba_alloc_bhnd_dinfo(device_t dev) { struct siba_devinfo *dinfo = siba_alloc_dinfo(dev); return ((struct bhnd_devinfo *)dinfo); } static void siba_free_bhnd_dinfo(device_t dev, struct bhnd_devinfo *dinfo) { siba_free_dinfo(dev, (struct siba_devinfo *)dinfo); } static int siba_get_core_table(device_t dev, device_t child, struct bhnd_core_info **cores, u_int *num_cores) { const struct bhnd_chipid *chipid; struct bhnd_core_info *table; struct bhnd_resource *r; int error; int rid; /* Fetch the core count from our chip identification */ chipid = BHND_BUS_GET_CHIPID(dev, dev); /* Allocate our local core table */ table = malloc(sizeof(*table) * chipid->ncores, M_BHND, M_NOWAIT); if (table == NULL) return (ENOMEM); /* Enumerate all cores. */ for (u_int i = 0; i < chipid->ncores; i++) { struct siba_core_id cid; uint32_t idhigh, idlow; /* Map the core's register block */ rid = 0; r = bhnd_alloc_resource(dev, SYS_RES_MEMORY, &rid, SIBA_CORE_ADDR(i), SIBA_CORE_ADDR(i) + SIBA_CORE_SIZE - 1, SIBA_CORE_SIZE, RF_ACTIVE); if (r == NULL) { error = ENXIO; goto failed; } /* Read the core info */ idhigh = bhnd_bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDHIGH)); idlow = bhnd_bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDLOW)); cid = siba_parse_core_id(idhigh, idlow, i, 0); table[i] = cid.core_info; /* Determine unit number */ for (u_int j = 0; j < i; j++) { if (table[j].vendor == table[i].vendor && table[j].device == table[i].device) table[i].unit++; } /* Release our resource */ bhnd_release_resource(dev, SYS_RES_MEMORY, rid, r); r = NULL; } /* Provide the result values (performed last to avoid modifying * cores/num_cores if enumeration failed). */ *cores = table; *num_cores = chipid->ncores; return (0); failed: if (table != NULL) free(table, M_BHND); if (r != NULL) bhnd_release_resource(dev, SYS_RES_MEMORY, rid, r); return (error); } /** * Scan the core table and add all valid discovered cores to * the bus. * * @param dev The siba bus device. * @param chipid The chip identifier, if the device does not provide a * ChipCommon core. Should o NULL otherwise. */ int siba_add_children(device_t dev, const struct bhnd_chipid *chipid) { struct bhnd_chipid ccid; struct bhnd_core_info *cores; struct siba_devinfo *dinfo; struct resource *r; int rid; int error; dinfo = NULL; cores = NULL; r = NULL; /* * Try to determine the number of device cores via the ChipCommon * identification registers. * * A small number of very early devices do not include a ChipCommon * core, in which case our caller must supply the chip identification * information via a non-NULL chipid parameter. */ if (chipid == NULL) { uint32_t idhigh, ccreg; uint16_t vendor, device; uint8_t ccrev; /* Map the first core's register block. If the ChipCommon core * exists, it will always be the first core. */ rid = 0; r = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, SIBA_CORE_ADDR(0), SIBA_CORE_SIZE, SIBA_CORE_ADDR(0) + SIBA_CORE_SIZE - 1, RF_ACTIVE); /* Identify the core */ idhigh = bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDHIGH)); vendor = SIBA_REG_GET(idhigh, IDH_VENDOR); device = SIBA_REG_GET(idhigh, IDH_DEVICE); ccrev = SIBA_IDH_CORE_REV(idhigh); if (vendor != OCP_VENDOR_BCM || device != BHND_COREID_CC) { device_printf(dev, "cannot identify device: no chipcommon core " "found\n"); error = ENXIO; goto cleanup; } /* Identify the chipset */ ccreg = bus_read_4(r, CHIPC_ID); ccid = bhnd_parse_chipid(ccreg, SIBA_ENUM_ADDR); - if (!CHIPC_NCORES_MIN_HWREV(ccrev)) { - switch (ccid.chip_id) { - case BHND_CHIPID_BCM4306: - ccid.ncores = 6; - break; - case BHND_CHIPID_BCM4704: - ccid.ncores = 9; - break; - case BHND_CHIPID_BCM5365: - /* - * BCM5365 does support ID_NUMCORE in at least - * some of its revisions, but for unknown - * reasons, Broadcom's drivers always exclude - * the ChipCommon revision (0x5) used by BCM5365 - * from the set of revisions supporting - * ID_NUMCORE, and instead supply a fixed value. - * - * Presumably, at least some of these devices - * shipped with a broken ID_NUMCORE value. - */ - ccid.ncores = 7; - break; - default: - device_printf(dev, "unable to determine core " - "count for unrecognized chipset 0x%hx\n", - ccid.chip_id); - error = ENXIO; - goto cleanup; - } + /* Fix up the core count */ + error = bhnd_chipid_fixed_ncores(&ccid, ccrev, &ccid.ncores); + if (error) { + device_printf(dev, "unable to determine core count for " + "chipset 0x%hx\n", ccid.chip_id); + goto cleanup; } chipid = &ccid; bus_release_resource(dev, SYS_RES_MEMORY, rid, r); } /* Allocate our temporary core table and enumerate all cores */ cores = malloc(sizeof(*cores) * chipid->ncores, M_BHND, M_NOWAIT); if (cores == NULL) return (ENOMEM); /* Add all cores. */ for (u_int i = 0; i < chipid->ncores; i++) { struct siba_core_id cid; device_t child; uint32_t idhigh, idlow; rman_res_t r_count, r_end, r_start; /* Map the core's register block */ rid = 0; r_start = SIBA_CORE_ADDR(i); r_count = SIBA_CORE_SIZE; r_end = r_start + SIBA_CORE_SIZE - 1; r = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, r_start, r_end, r_count, RF_ACTIVE); if (r == NULL) { error = ENXIO; goto cleanup; } /* Add the child device */ child = BUS_ADD_CHILD(dev, 0, NULL, -1); if (child == NULL) { error = ENXIO; goto cleanup; } /* Read the core info */ idhigh = bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDHIGH)); idlow = bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDLOW)); cid = siba_parse_core_id(idhigh, idlow, i, 0); cores[i] = cid.core_info; /* Determine unit number */ for (u_int j = 0; j < i; j++) { if (cores[j].vendor == cores[i].vendor && cores[j].device == cores[i].device) cores[i].unit++; } /* Initialize per-device bus info */ if ((dinfo = device_get_ivars(child)) == NULL) { error = ENXIO; goto cleanup; } if ((error = siba_init_dinfo(dev, dinfo, &cid))) goto cleanup; /* Register the core's address space(s). */ if ((error = siba_register_addrspaces(dev, dinfo, r))) goto cleanup; /* If pins are floating or the hardware is otherwise * unpopulated, the device shouldn't be used. */ if (bhnd_is_hw_disabled(child)) device_disable(child); /* Release our resource */ bus_release_resource(dev, SYS_RES_MEMORY, rid, r); r = NULL; /* Issue bus callback for fully initialized child. */ BHND_BUS_CHILD_ADDED(dev, child); } cleanup: if (cores != NULL) free(cores, M_BHND); if (r != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rid, r); return (error); } static device_method_t siba_methods[] = { /* Device interface */ DEVMETHOD(device_probe, siba_probe), DEVMETHOD(device_attach, siba_attach), DEVMETHOD(device_detach, siba_detach), DEVMETHOD(device_resume, siba_resume), DEVMETHOD(device_suspend, siba_suspend), /* Bus interface */ DEVMETHOD(bus_read_ivar, siba_read_ivar), DEVMETHOD(bus_write_ivar, siba_write_ivar), DEVMETHOD(bus_get_resource_list, siba_get_resource_list), /* BHND interface */ DEVMETHOD(bhnd_bus_find_hostb_device, siba_find_hostb_device), DEVMETHOD(bhnd_bus_get_core_table, siba_get_core_table), DEVMETHOD(bhnd_bus_alloc_devinfo, siba_alloc_bhnd_dinfo), DEVMETHOD(bhnd_bus_free_devinfo, siba_free_bhnd_dinfo), DEVMETHOD(bhnd_bus_reset_core, siba_reset_core), DEVMETHOD(bhnd_bus_suspend_core, siba_suspend_core), DEVMETHOD(bhnd_bus_read_config, siba_read_config), DEVMETHOD(bhnd_bus_write_config, siba_write_config), DEVMETHOD(bhnd_bus_get_port_count, siba_get_port_count), DEVMETHOD(bhnd_bus_get_region_count, siba_get_region_count), DEVMETHOD(bhnd_bus_get_port_rid, siba_get_port_rid), DEVMETHOD(bhnd_bus_decode_port_rid, siba_decode_port_rid), DEVMETHOD(bhnd_bus_get_region_addr, siba_get_region_addr), DEVMETHOD_END }; DEFINE_CLASS_1(bhnd, siba_driver, siba_methods, sizeof(struct siba_softc), bhnd_driver); MODULE_VERSION(siba, 1); MODULE_DEPEND(siba, bhnd, 1, 1, 1); Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_net_vsc.h =================================================================== --- projects/clang390-import/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 305016) +++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 305017) @@ -1,407 +1,411 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * HyperV vmbus (virtual machine bus) network VSC (virtual services client) * header file * * (Updated from unencumbered NvspProtocol.h) */ #ifndef __HV_NET_VSC_H__ #define __HV_NET_VSC_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include + #define HN_USE_TXDESC_BUFRING MALLOC_DECLARE(M_NETVSC); #define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 #define NVSP_PROTOCOL_VERSION_2 0x30002 #define NVSP_PROTOCOL_VERSION_4 0x40000 #define NVSP_PROTOCOL_VERSION_5 0x50000 #define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1) #define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2) #define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2 #define VERSION_4_OFFLOAD_SIZE 22 #define NVSP_OPERATIONAL_STATUS_OK (0x00000000) #define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001) #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002) #define NVSP_OPERATIONAL_STATUS_NO_CONTACT (0x00000003) #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004) /* * Maximun number of transfer pages (packets) the VSP will use on a receive */ #define NVSP_MAX_PACKETS_PER_RECEIVE 375 /* vRSS stuff */ #define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 #define RNDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 #define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 #define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 struct rndis_obj_header { uint8_t type; uint8_t rev; uint16_t size; } __packed; /* rndis_recv_scale_cap/cap_flag */ #define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 #define RNDIS_RSS_CAPS_USING_MSI_X 0x08000000 #define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 #define RNDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 /* RNDIS_RECEIVE_SCALE_CAPABILITIES */ struct rndis_recv_scale_cap { struct rndis_obj_header hdr; uint32_t cap_flag; uint32_t num_int_msg; uint32_t num_recv_que; uint16_t num_indirect_tabent; } __packed; /* rndis_recv_scale_param flags */ #define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 #define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 #define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 #define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 #define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 /* Hash info bits */ #define RNDIS_HASH_FUNC_TOEPLITZ 0x00000001 #define RNDIS_HASH_IPV4 0x00000100 #define RNDIS_HASH_TCP_IPV4 0x00000200 #define RNDIS_HASH_IPV6 0x00000400 #define RNDIS_HASH_IPV6_EX 0x00000800 #define RNDIS_HASH_TCP_IPV6 0x00001000 #define RNDIS_HASH_TCP_IPV6_EX 0x00002000 #define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) #define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 #define ITAB_NUM 128 #define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 /* RNDIS_RECEIVE_SCALE_PARAMETERS */ typedef struct rndis_recv_scale_param_ { struct rndis_obj_header hdr; /* Qualifies the rest of the information */ uint16_t flag; /* The base CPU number to do receive processing. not used */ uint16_t base_cpu_number; /* This describes the hash function and type being enabled */ uint32_t hashinfo; /* The size of indirection table array */ uint16_t indirect_tabsize; /* The offset of the indirection table from the beginning of this * structure */ uint32_t indirect_taboffset; /* The size of the hash secret key */ uint16_t hashkey_size; /* The offset of the secret key from the beginning of this structure */ uint32_t hashkey_offset; uint32_t processor_masks_offset; uint32_t num_processor_masks; uint32_t processor_masks_entry_size; } rndis_recv_scale_param; /* * The following arguably belongs in a separate header file */ /* * Defines */ #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe #define NETVSC_RECEIVE_SG_COUNT 1 /* Preallocated receive packets */ #define NETVSC_RECEIVE_PACKETLIST_COUNT 256 /* * Maximum MTU we permit to be configured for a netvsc interface. * When the code was developed, a max MTU of 12232 was tested and * proven to work. 9K is a reasonable maximum for an Ethernet. */ #define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024) #define NETVSC_PACKET_SIZE PAGE_SIZE #define VRSS_SEND_TABLE_SIZE 16 /* * Data types */ struct vmbus_channel; typedef void (*pfn_on_send_rx_completion)(struct vmbus_channel *, void *); #define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE) #define NETVSC_PACKET_MAXPAGE 32 #define NETVSC_VLAN_PRIO_MASK 0xe000 #define NETVSC_VLAN_PRIO_SHIFT 13 #define NETVSC_VLAN_VID_MASK 0x0fff #define TYPE_IPV4 2 #define TYPE_IPV6 4 #define TYPE_TCP 2 #define TYPE_UDP 4 #define TRANSPORT_TYPE_NOT_IP 0 #define TRANSPORT_TYPE_IPV4_TCP ((TYPE_IPV4 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV4_UDP ((TYPE_IPV4 << 16) | TYPE_UDP) #define TRANSPORT_TYPE_IPV6_TCP ((TYPE_IPV6 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV6_UDP ((TYPE_IPV6 << 16) | TYPE_UDP) typedef struct { uint8_t mac_addr[6]; /* Assumption unsigned long */ uint8_t link_state; } netvsc_device_info; #define HN_XACT_REQ_PGCNT 2 #define HN_XACT_RESP_PGCNT 2 #define HN_XACT_REQ_SIZE (HN_XACT_REQ_PGCNT * PAGE_SIZE) #define HN_XACT_RESP_SIZE (HN_XACT_RESP_PGCNT * PAGE_SIZE) #ifndef HN_USE_TXDESC_BUFRING struct hn_txdesc; SLIST_HEAD(hn_txdesc_list, hn_txdesc); #else struct buf_ring; #endif struct hn_tx_ring; struct hn_rx_ring { struct ifnet *hn_ifp; struct hn_tx_ring *hn_txr; void *hn_rdbuf; uint8_t *hn_rxbuf; /* shadow sc->hn_rxbuf */ int hn_rx_idx; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ struct lro_ctrl hn_lro; u_long hn_csum_ip; u_long hn_csum_tcp; u_long hn_csum_udp; u_long hn_csum_trusted; u_long hn_lro_tried; u_long hn_small_pkts; u_long hn_pkts; u_long hn_rss_pkts; /* Rarely used stuffs */ struct sysctl_oid *hn_rx_sysctl_tree; int hn_rx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TRUST_HCSUM_IP 0x0001 #define HN_TRUST_HCSUM_TCP 0x0002 #define HN_TRUST_HCSUM_UDP 0x0004 #define HN_RX_FLAG_ATTACHED 0x1 struct hn_tx_ring { #ifndef HN_USE_TXDESC_BUFRING struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; #else struct buf_ring *hn_txdesc_br; #endif int hn_txdesc_cnt; int hn_txdesc_avail; u_short hn_has_txeof; u_short hn_txdone_cnt; int hn_sched_tx; void (*hn_txeof)(struct hn_tx_ring *); struct taskqueue *hn_tx_taskq; struct task hn_tx_task; struct task hn_txeof_task; struct buf_ring *hn_mbuf_br; int hn_oactive; int hn_tx_idx; struct mtx hn_tx_lock; struct hn_softc *hn_sc; struct vmbus_channel *hn_chan; int hn_direct_tx_size; int hn_chim_size; bus_dma_tag_t hn_tx_data_dtag; uint64_t hn_csum_assist; int hn_gpa_cnt; struct vmbus_gpa hn_gpa[NETVSC_PACKET_MAXPAGE]; u_long hn_no_txdescs; u_long hn_send_failed; u_long hn_txdma_failed; u_long hn_tx_collapsed; u_long hn_tx_chimney_tried; u_long hn_tx_chimney; u_long hn_pkts; /* Rarely used stuffs */ struct hn_txdesc *hn_txdesc; bus_dma_tag_t hn_tx_rndis_dtag; struct sysctl_oid *hn_tx_sysctl_tree; int hn_tx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TX_FLAG_ATTACHED 0x1 /* * Device-specific softc structure */ typedef struct hn_softc { struct ifnet *hn_ifp; struct ifmedia hn_media; device_t hn_dev; uint8_t hn_unit; int hn_carrier; int hn_if_flags; struct mtx hn_lock; int hn_initdone; /* See hv_netvsc_drv_freebsd.c for rules on how to use */ int temp_unusable; struct rndis_device_ *rndis_dev; struct vmbus_channel *hn_prichan; int hn_rx_ring_cnt; int hn_rx_ring_inuse; struct hn_rx_ring *hn_rx_ring; int hn_tx_ring_cnt; int hn_tx_ring_inuse; struct hn_tx_ring *hn_tx_ring; uint8_t *hn_chim; u_long *hn_chim_bmap; int hn_chim_bmap_cnt; int hn_chim_cnt; int hn_chim_szmax; int hn_cpu; struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; struct sysctl_oid *hn_rx_sysctl_tree; struct vmbus_xact_ctx *hn_xact; uint32_t hn_nvs_ver; uint32_t hn_flags; void *hn_rxbuf; uint32_t hn_rxbuf_gpadl; struct hyperv_dma hn_rxbuf_dma; uint32_t hn_chim_gpadl; struct hyperv_dma hn_chim_dma; uint32_t hn_rndis_rid; uint32_t hn_ndis_ver; + + struct ndis_rssprm_toeplitz hn_rss; } hn_softc_t; #define HN_FLAG_RXBUF_CONNECTED 0x0001 #define HN_FLAG_CHIM_CONNECTED 0x0002 /* * Externs */ extern int hv_promisc_mode; struct hn_send_ctx; void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status); int hv_nv_on_device_add(struct hn_softc *sc, struct hn_rx_ring *rxr); int hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel); int hv_nv_on_send(struct vmbus_channel *chan, uint32_t rndis_mtype, struct hn_send_ctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt); void hv_nv_subchan_attach(struct vmbus_channel *chan, struct hn_rx_ring *rxr); #endif /* __HV_NET_VSC_H__ */ Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c =================================================================== --- projects/clang390-import/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 305016) +++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 305017) @@ -1,3096 +1,3098 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include "hv_net_vsc.h" -#include "hv_rndis.h" -#include "hv_rndis_filter.h" +#include +#include +#include +#include + #include "vmbus_if.h" /* Short for Hyper-V network interface */ #define NETVSC_DEVNAME "hn" /* * It looks like offset 0 of buf is reserved to hold the softc pointer. * The sc pointer evidently not needed, and is not presently populated. * The packet offset is where the netvsc_packet starts in the buffer. */ #define HV_NV_SC_PTR_OFFSET_IN_BUF 0 #define HV_NV_PACKET_OFFSET_IN_BUF 16 /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 #define HN_LROENT_CNT_DEF 128 #define HN_RING_CNT_DEF_MAX 8 #define HN_RNDIS_MSG_LEN \ (sizeof(rndis_msg) + \ RNDIS_HASHVAL_PPI_SIZE + \ RNDIS_VLAN_PPI_SIZE + \ RNDIS_TSO_PPI_SIZE + \ RNDIS_CSUM_PPI_SIZE) #define HN_RNDIS_MSG_BOUNDARY PAGE_SIZE #define HN_RNDIS_MSG_ALIGN CACHE_LINE_SIZE #define HN_TX_DATA_BOUNDARY PAGE_SIZE #define HN_TX_DATA_MAXSIZE IP_MAXPACKET #define HN_TX_DATA_SEGSIZE PAGE_SIZE #define HN_TX_DATA_SEGCNT_MAX \ (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) #define HN_DIRECT_TX_SIZE_DEF 128 #define HN_EARLY_TXEOF_THRESH 8 struct hn_txdesc { #ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; #endif struct mbuf *m; struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ struct hn_send_ctx send_ctx; bus_dmamap_t data_dmap; bus_addr_t rndis_msg_paddr; rndis_msg *rndis_msg; bus_dmamap_t rndis_msg_dmap; }; #define HN_TXD_FLAG_ONLIST 0x1 #define HN_TXD_FLAG_DMAMAP 0x2 /* * Only enable UDP checksum offloading when it is on 2012R2 or * later. UDP checksum offloading doesn't work on earlier * Windows releases. */ #define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP) #define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP) #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) #define HN_LRO_ACKCNT_DEF 1 /* * Be aware that this sleepable mutex will exhibit WITNESS errors when * certain TCP and ARP code paths are taken. This appears to be a * well-known condition, as all other drivers checked use a sleeping * mutex to protect their transmit paths. * Also Be aware that mutexes do not play well with semaphores, and there * is a conflicting semaphore in a certain channel code path. */ #define NV_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) #define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) #define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) #define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) #define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) /* * Globals */ int hv_promisc_mode = 0; /* normal mode by default */ SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V network interface"); /* Trust tcp segements verification on host side. */ static int hn_trust_hosttcp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, &hn_trust_hosttcp, 0, "Trust tcp segement verification on host side, " "when csum info is missing (global setting)"); /* Trust udp datagrams verification on host side. */ static int hn_trust_hostudp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, &hn_trust_hostudp, 0, "Trust udp datagram verification on host side, " "when csum info is missing (global setting)"); /* Trust ip packets verification on host side. */ static int hn_trust_hostip = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, &hn_trust_hostip, 0, "Trust ip packet verification on host side, " "when csum info is missing (global setting)"); #if __FreeBSD_version >= 1100045 /* Limit TSO burst size */ static int hn_tso_maxlen = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, &hn_tso_maxlen, 0, "TSO burst limit"); #endif /* Limit chimney send size */ static int hn_tx_chimney_size = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, &hn_tx_chimney_size, 0, "Chimney send packet size limit"); /* Limit the size of packet for direct transmission */ static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, &hn_lro_entry_count, 0, "LRO entry count"); #endif #endif static int hn_share_tx_taskq = 0; SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); static struct taskqueue *hn_tx_taskq; #ifndef HN_USE_TXDESC_BUFRING static int hn_use_txdesc_bufring = 0; #else static int hn_use_txdesc_bufring = 1; #endif SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); static int hn_bind_tx_taskq = -1; SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); static int hn_use_if_start = 0; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); static int hn_chan_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hn_chan_cnt, 0, "# of channels to use; each channel has one RX ring and one TX ring"); static int hn_tx_ring_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, &hn_tx_ring_cnt, 0, "# of TX rings to use"); static int hn_tx_swq_depth = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); #if __FreeBSD_version >= 1100095 static u_int hn_lro_mbufq_depth = 0; SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); #endif static u_int hn_cpu_index; /* * Forward declarations */ static void hn_stop(hn_softc_t *sc); static void hn_ifinit_locked(hn_softc_t *sc); static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int hn_start_locked(struct hn_tx_ring *txr, int len); static void hn_start(struct ifnet *ifp); static void hn_start_txeof(struct hn_tx_ring *); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); static int hn_check_iplen(const struct mbuf *, int); static int hn_create_tx_ring(struct hn_softc *, int); static void hn_destroy_tx_ring(struct hn_tx_ring *); static int hn_create_tx_data(struct hn_softc *, int); static void hn_destroy_tx_data(struct hn_softc *); static void hn_start_taskfunc(void *, int); static void hn_start_txeof_taskfunc(void *, int); static void hn_stop_tx_tasks(struct hn_softc *); static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static int hn_create_rx_data(struct hn_softc *sc, int); static void hn_destroy_rx_data(struct hn_softc *sc); static void hn_set_chim_size(struct hn_softc *, int); static void hn_channel_attach(struct hn_softc *, struct vmbus_channel *); static void hn_subchan_attach(struct hn_softc *, struct vmbus_channel *); static void hn_subchan_setup(struct hn_softc *); static int hn_transmit(struct ifnet *, struct mbuf *); static void hn_xmit_qflush(struct ifnet *); static int hn_xmit(struct hn_tx_ring *, int); static void hn_xmit_txeof(struct hn_tx_ring *); static void hn_xmit_taskfunc(void *, int); static void hn_xmit_txeof_taskfunc(void *, int); #if __FreeBSD_version >= 1100099 static void hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) { int i; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; } #endif static int hn_get_txswq_depth(const struct hn_tx_ring *txr) { KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); if (hn_tx_swq_depth < txr->hn_txdesc_cnt) return txr->hn_txdesc_cnt; return hn_tx_swq_depth; } static int hn_ifmedia_upd(struct ifnet *ifp __unused) { return EOPNOTSUPP; } static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct hn_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->hn_carrier) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ static const struct hyperv_guid g_net_vsc_device_type = { .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} }; /* * Standard probe entry point. * */ static int netvsc_probe(device_t dev) { if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &g_net_vsc_device_type) == 0) { device_set_desc(dev, "Hyper-V Network Interface"); return BUS_PROBE_DEFAULT; } return ENXIO; } /* * Standard attach entry point. * * Called when the driver is loaded. It allocates needed resources, * and initializes the "hardware" and software. */ static int netvsc_attach(device_t dev) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; netvsc_device_info device_info; hn_softc_t *sc; int unit = device_get_unit(dev); struct ifnet *ifp = NULL; int error, ring_cnt, tx_ring_cnt; #if __FreeBSD_version >= 1100045 int tso_maxlen; #endif sc = device_get_softc(dev); sc->hn_unit = unit; sc->hn_dev = dev; sc->hn_prichan = vmbus_get_channel(dev); if (hn_tx_taskq == NULL) { sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, PI_NET, &cpu_set, "%s tx", device_get_nameunit(dev)); } else { taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", device_get_nameunit(dev)); } } else { sc->hn_tx_taskq = hn_tx_taskq; } NV_LOCK_INIT(sc, "NetVSCLock"); ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Figure out the # of RX rings (ring_cnt) and the # of TX rings * to use (tx_ring_cnt). * * NOTE: * The # of RX rings to use is same as the # of channels to use. */ ring_cnt = hn_chan_cnt; if (ring_cnt <= 0) { /* Default */ ring_cnt = mp_ncpus; if (ring_cnt > HN_RING_CNT_DEF_MAX) ring_cnt = HN_RING_CNT_DEF_MAX; } else if (ring_cnt > mp_ncpus) { ring_cnt = mp_ncpus; } tx_ring_cnt = hn_tx_ring_cnt; if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) tx_ring_cnt = ring_cnt; if (hn_use_if_start) { /* ifnet.if_start only needs one TX ring. */ tx_ring_cnt = 1; } /* * Set the leader CPU for channels. */ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; error = hn_create_tx_data(sc, tx_ring_cnt); if (error) goto failed; error = hn_create_rx_data(sc, ring_cnt); if (error) goto failed; /* * Associate the first TX/RX ring w/ the primary channel. */ hn_channel_attach(sc, sc->hn_prichan); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = hn_ioctl; ifp->if_init = hn_ifinit; /* needed by hv_rf_on_device_add() code */ ifp->if_mtu = ETHERMTU; if (hn_use_if_start) { int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); ifp->if_snd.ifq_drv_maxlen = qdepth - 1; IFQ_SET_READY(&ifp->if_snd); } else { ifp->if_transmit = hn_transmit; ifp->if_qflush = hn_xmit_qflush; } ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); /* XXX ifmedia_set really should do this for us */ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; /* * Tell upper layers that we support full VLAN capability. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); if (sc->hn_xact == NULL) goto failed; error = hv_rf_on_device_add(sc, &device_info, &ring_cnt, &sc->hn_rx_ring[0]); if (error) goto failed; KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_inuse, ("invalid channel count %d, should be less than %d", ring_cnt, sc->hn_rx_ring_inuse)); /* * Set the # of TX/RX rings that could be used according to * the # of channels that host offered. */ if (sc->hn_tx_ring_inuse > ring_cnt) sc->hn_tx_ring_inuse = ring_cnt; sc->hn_rx_ring_inuse = ring_cnt; device_printf(dev, "%d TX ring, %d RX ring\n", sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); if (sc->hn_rx_ring_inuse > 1) hn_subchan_setup(sc); #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring_inuse > 1) { /* * Reduce TCP segment aggregation limit for multiple * RX rings to increase ACK timeliness. */ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); } #endif - if (device_info.link_state == 0) { + if (device_info.link_state == NDIS_MEDIA_STATE_CONNECTED) { sc->hn_carrier = 1; } #if __FreeBSD_version >= 1100045 tso_maxlen = hn_tso_maxlen; if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET) tso_maxlen = IP_MAXPACKET; ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); #endif ether_ifattach(ifp, device_info.mac_addr); #if __FreeBSD_version >= 1100045 if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); #endif hn_set_chim_size(sc, sc->hn_chim_szmax); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_chim_szmax) hn_set_chim_size(sc, hn_tx_chimney_size); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, &sc->hn_nvs_ver, 0, "NVS version"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_ndis_version_sysctl, "A", "NDIS version"); return (0); failed: hn_destroy_tx_data(sc); if (ifp != NULL) if_free(ifp); return (error); } /* * Standard detach entry point */ static int netvsc_detach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); if (bootverbose) printf("netvsc_detach\n"); /* * XXXKYS: Need to clean up all our * driver state; this is the driver * unloading. */ /* * XXXKYS: Need to stop outgoing traffic and unregister * the netdevice. */ hv_rf_on_device_remove(sc, HV_RF_NV_DESTROY_CHANNEL); hn_stop_tx_tasks(sc); ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); hn_destroy_tx_data(sc); if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); vmbus_xact_ctx_destroy(sc->hn_xact); return (0); } /* * Standard shutdown entry point */ static int netvsc_shutdown(device_t dev) { return (0); } static __inline int hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); if (m_new == NULL) return ENOBUFS; else *m_head = m = m_new; txr->hn_tx_collapsed++; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } return error; } static __inline void hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd) { if (txd->flags & HN_TXD_FLAG_DMAMAP) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } } static __inline int hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, ("put an onlist txd %#x", txd->flags)); KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; hn_txdesc_dmamap_unload(txr, txd); if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; } txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); #else atomic_add_int(&txr->hn_txdesc_avail, 1); buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif return 1; } static __inline struct hn_txdesc * hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { KASSERT(txr->hn_txdesc_avail > 0, ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail--; SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); #else txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); #endif if (txd != NULL) { #ifdef HN_USE_TXDESC_BUFRING atomic_subtract_int(&txr->hn_txdesc_avail, 1); #endif KASSERT(txd->m == NULL && txd->refs == 0 && (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; txd->refs = 1; } return txd; } static __inline void hn_txdesc_hold(struct hn_txdesc *txd) { /* 0->1 transition will never work */ KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); atomic_add_int(&txd->refs, 1); } static __inline void hn_txeof(struct hn_tx_ring *txr) { txr->hn_has_txeof = 0; txr->hn_txeof(txr); } static void hn_tx_done(struct hn_send_ctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan, const void *data __unused, int dlen __unused) { struct hn_txdesc *txd = sndc->hn_cbarg; struct hn_tx_ring *txr; if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID) hn_chim_free(sc, sndc->hn_chim_idx); txr = txd->txr; KASSERT(txr->hn_chan == chan, ("channel mismatch, on chan%u, should be chan%u", vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan))); txr->hn_has_txeof = 1; hn_txdesc_put(txr, txd); ++txr->hn_txdone_cnt; if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { txr->hn_txdone_cnt = 0; if (txr->hn_oactive) hn_txeof(txr); } } void netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) { #if defined(INET) || defined(INET6) tcp_lro_flush_all(&rxr->hn_lro); #endif /* * NOTE: * 'txr' could be NULL, if multiple channels and * ifnet.if_start method are enabled. */ if (txr == NULL || !txr->hn_has_txeof) return; txr->hn_txdone_cnt = 0; hn_txeof(txr); } /* * NOTE: * If this function fails, then both txd and m_head0 will be freed. */ static int hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; rndis_msg *rndis_mesg; rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; struct rndis_hash_value *hash_value; uint32_t rndis_msg_size, tot_data_buf_len, send_buf_section_idx; int send_buf_section_size; tot_data_buf_len = m_head->m_pkthdr.len; /* * extension points to the area reserved for the * rndis_filter_packet, which is placed just after * the netvsc_packet (and rppi struct, if present; * length is updated later). */ rndis_mesg = txd->rndis_msg; /* XXX not necessary */ memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN); rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset = sizeof(rndis_packet); rndis_pkt->data_length = tot_data_buf_len; rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); /* * Set the hash value for this packet, so that the host could * dispatch the TX done event for this packet back to this TX * ring's channel. */ rndis_msg_size += RNDIS_HASHVAL_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASHVAL_PPI_SIZE, nbl_hash_value); hash_value = (struct rndis_hash_value *)((uint8_t *)rppi + rppi->per_packet_info_offset); hash_value->hash_value = txr->hn_tx_idx; if (m_head->m_flags & M_VLANTAG) { ndis_8021q_info *rppi_vlan_info; rndis_msg_size += RNDIS_VLAN_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, ieee_8021q_info); rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); rppi_vlan_info->u1.s1.vlan_id = m_head->m_pkthdr.ether_vtag & 0xfff; } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { rndis_tcp_tso_info *tso_info; struct ether_vlan_header *eh; int ether_len; /* * XXX need m_pullup and use mtodo */ eh = mtod(m_head, struct ether_vlan_header*); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ether_len = ETHER_HDR_LEN; rndis_msg_size += RNDIS_TSO_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, tcp_large_send_info); tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); tso_info->lso_v2_xmit.type = RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { struct ip *ip = (struct ip *)(m_head->m_data + ether_len); unsigned long iph_len = ip->ip_hl << 2; struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iph_len); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip->ip_len = 0; ip->ip_sum = 0; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { struct ip6_hdr *ip6 = (struct ip6_hdr *) (m_head->m_data + ether_len); struct tcphdr *th = (struct tcphdr *)(ip6 + 1); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ip6->ip6_plen = 0; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); } #endif tso_info->lso_v2_xmit.tcp_header_offset = 0; tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { rndis_tcp_ip_csum_info *csum_info; rndis_msg_size += RNDIS_CSUM_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, tcpip_chksum_info); csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); csum_info->xmit.is_ipv4 = 1; if (m_head->m_pkthdr.csum_flags & CSUM_IP) csum_info->xmit.ip_header_csum = 1; if (m_head->m_pkthdr.csum_flags & CSUM_TCP) { csum_info->xmit.tcp_csum = 1; csum_info->xmit.tcp_header_offset = 0; } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { csum_info->xmit.udp_csum = 1; } } rndis_mesg->msg_len = tot_data_buf_len + rndis_msg_size; tot_data_buf_len = rndis_mesg->msg_len; /* * Chimney send, if the packet could fit into one chimney buffer. */ if (tot_data_buf_len < txr->hn_chim_size) { txr->hn_tx_chimney_tried++; send_buf_section_idx = hn_chim_alloc(txr->hn_sc); if (send_buf_section_idx != HN_NVS_CHIM_IDX_INVALID) { uint8_t *dest = txr->hn_sc->hn_chim + (send_buf_section_idx * txr->hn_sc->hn_chim_szmax); memcpy(dest, rndis_mesg, rndis_msg_size); dest += rndis_msg_size; m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); send_buf_section_size = tot_data_buf_len; txr->hn_gpa_cnt = 0; txr->hn_tx_chimney++; goto done; } } error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (error) { int freed; /* * This mbuf is not linked w/ the txd yet, so free it now. */ m_freem(m_head); *m_head0 = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); txr->hn_txdma_failed++; if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; txr->hn_gpa_cnt = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS; /* send packet with page buffer */ txr->hn_gpa[0].gpa_page = atop(txd->rndis_msg_paddr); txr->hn_gpa[0].gpa_ofs = txd->rndis_msg_paddr & PAGE_MASK; txr->hn_gpa[0].gpa_len = rndis_msg_size; /* * Fill the page buffers with mbuf info starting at index * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. */ for (i = 0; i < nsegs; ++i) { struct vmbus_gpa *gpa = &txr->hn_gpa[ i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS]; gpa->gpa_page = atop(segs[i].ds_addr); gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; gpa->gpa_len = segs[i].ds_len; } send_buf_section_idx = HN_NVS_CHIM_IDX_INVALID; send_buf_section_size = 0; done: txd->m = m_head; /* Set the completion routine */ hn_send_ctx_init(&txd->send_ctx, hn_tx_done, txd, send_buf_section_idx, send_buf_section_size); return 0; } /* * NOTE: * If this function fails, then txd will be freed, but the mbuf * associated w/ the txd will _not_ be freed. */ static int hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) { int error, send_failed = 0; again: /* * Make sure that txd is not freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); error = hv_nv_on_send(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt); if (!error) { ETHER_BPF_MTAP(ifp, txd->m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (!hn_use_if_start) { if_inc_counter(ifp, IFCOUNTER_OBYTES, txd->m->m_pkthdr.len); if (txd->m->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } txr->hn_pkts++; } hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; /* * This should "really rarely" happen. * * XXX Too many RX to be acked or too many sideband * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ txr->hn_has_txeof = 1; if (!send_failed) { txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_has_txeof; * in case that we missed the last * netvsc_channel_rollup(). */ goto again; } if_printf(ifp, "send failed\n"); /* * Caller will perform further processing on the * associated mbuf, so don't free it in hn_txdesc_put(); * only unload it from the DMA map in hn_txdesc_put(), * if it was loaded. */ txd->m = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); txr->hn_send_failed++; } return error; } /* * Start a transmit of one or more packets */ static int hn_start_locked(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(hn_use_if_start, ("hn_start_locked is called, when if_start is disabled")); KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); mtx_assert(&txr->hn_tx_lock, MA_OWNED); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { struct hn_txdesc *txd; struct mbuf *m_head; int error; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } return 0; } /* * Link up/down notification */ void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status) { if (status == 1) { sc->hn_carrier = 1; } else { sc->hn_carrier = 0; } } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. * There should be an equivalent in the kernel mbuf code, * but there does not appear to be one yet. * * Differs from m_append() in that additional mbufs are * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * * Return 1 if able to complete the job; otherwise 0. */ static int hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space; remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) break; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } #if defined(INET) || defined(INET6) static __inline int hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) { #if __FreeBSD_version >= 1100095 if (hn_lro_mbufq_depth) { tcp_lro_queue_mbuf(lc, m); return 0; } #endif return tcp_lro_rx(lc, m, 0); } #endif /* * Called when we receive a data packet from the "wire" on the * specified device * * Note: This is no longer used as a callback */ int netvsc_recv(struct hn_rx_ring *rxr, const void *data, int dlen, const struct hn_recvinfo *info) { struct ifnet *ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1; int hash_type = M_HASHTYPE_OPAQUE_HASH; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (0); /* * Bail out if packet contains more data than configured MTU. */ if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) { return (0); } else if (dlen <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } memcpy(mtod(m_new, void *), data, dlen); m_new->m_pkthdr.len = m_new->m_len = dlen; rxr->hn_small_pkts++; } else { /* * Get an mbuf with a cluster. For packets 2K or less, * get a standard 2K cluster. For anything larger, get a * 4K cluster. Any buffers larger than 4K can cause problems * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; if (dlen > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } hv_m_append(m_new, dlen, data); } m_new->m_pkthdr.rcvif = ifp; if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ if (info->csum_info != NULL) { /* IP csum offload */ if (info->csum_info->receive.ip_csum_succeeded && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ if ((info->csum_info->receive.tcp_csum_succeeded || info->csum_info->receive.udp_csum_succeeded) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; if (info->csum_info->receive.tcp_csum_succeeded) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; } if (info->csum_info->receive.ip_csum_succeeded && info->csum_info->receive.tcp_csum_succeeded) do_lro = 1; } else { const struct ether_header *eh; uint16_t etype; int hoff; hoff = sizeof(*eh); if (m_new->m_len < hoff) goto skip; eh = mtod(m_new, struct ether_header *); etype = ntohs(eh->ether_type); if (etype == ETHERTYPE_VLAN) { const struct ether_vlan_header *evl; hoff = sizeof(*evl); if (m_new->m_len < hoff) goto skip; evl = mtod(m_new, struct ether_vlan_header *); etype = ntohs(evl->evl_proto); } if (etype == ETHERTYPE_IP) { int pr; pr = hn_check_iplen(m_new, hoff); if (pr == IPPROTO_TCP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_TCP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } do_lro = 1; } else if (pr == IPPROTO_UDP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_UDP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } } else if (pr != IPPROTO_DONE && do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); } } } skip: if (info->vlan_info != NULL) { m_new->m_pkthdr.ether_vtag = info->vlan_info->u1.s1.vlan_id; m_new->m_flags |= M_VLANTAG; } if (info->hash_info != NULL && info->hash_value != NULL) { rxr->hn_rss_pkts++; m_new->m_pkthdr.flowid = info->hash_value->hash_value; if ((info->hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) == NDIS_HASH_FUNCTION_TOEPLITZ) { uint32_t type = (info->hash_info->hash_info & NDIS_HASH_TYPE_MASK); switch (type) { case NDIS_HASH_IPV4: hash_type = M_HASHTYPE_RSS_IPV4; break; case NDIS_HASH_TCP_IPV4: hash_type = M_HASHTYPE_RSS_TCP_IPV4; break; case NDIS_HASH_IPV6: hash_type = M_HASHTYPE_RSS_IPV6; break; case NDIS_HASH_IPV6_EX: hash_type = M_HASHTYPE_RSS_IPV6_EX; break; case NDIS_HASH_TCP_IPV6: hash_type = M_HASHTYPE_RSS_TCP_IPV6; break; case NDIS_HASH_TCP_IPV6_EX: hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; break; } } } else { if (info->hash_value != NULL) { m_new->m_pkthdr.flowid = info->hash_value->hash_value; } else { m_new->m_pkthdr.flowid = rxr->hn_rx_idx; hash_type = M_HASHTYPE_OPAQUE; } } M_HASHTYPE_SET(m_new, hash_type); /* * Note: Moved RX completion back to hv_nv_on_receive() so all * messages (not just data messages) will trigger a response. */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rxr->hn_pkts++; if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxr->hn_lro; if (lro->lro_cnt) { rxr->hn_lro_tried++; if (hn_lro_rx(lro, m_new) == 0) { /* DONE! */ return 0; } } #endif } /* We're not holding the lock here, so don't release it */ (*ifp->if_input)(ifp, m_new); return (0); } /* * Rules for using sc->temp_unusable: * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() * 2. code reading sc->temp_unusable under NV_LOCK(), and finding * sc->temp_unusable set, must release NV_LOCK() and exit * 3. to retain exclusive control of the interface, * sc->temp_unusable must be set by code before releasing NV_LOCK() * 4. only code setting sc->temp_unusable can clear sc->temp_unusable * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable */ /* * Standard ioctl entry point. Called when the user wants to configure * the interface. */ static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { hn_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif netvsc_device_info device_info; int mask, error = 0, ring_cnt; int retry_cnt = 500; switch(cmd) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) hn_ifinit(sc); arp_ifinit(ifp, ifa); } else #endif error = ether_ioctl(ifp, cmd, data); break; case SIOCSIFMTU: /* Check MTU value change */ if (ifp->if_mtu == ifr->ifr_mtu) break; if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { error = EINVAL; break; } /* Obtain and record requested MTU */ ifp->if_mtu = ifr->ifr_mtu; #if __FreeBSD_version >= 1100099 /* * Make sure that LRO aggregation length limit is still * valid, after the MTU change. */ NV_LOCK(sc); if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); NV_UNLOCK(sc); #endif do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } /* We must remove and add back the device to cause the new * MTU to take effect. This includes tearing down, but not * deleting the channel, then bringing it back up. */ error = hv_rf_on_device_remove(sc, HV_RF_NV_RETAIN_CHANNEL); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } /* Wait for subchannels to be destroyed */ vmbus_subchan_drain(sc->hn_prichan); ring_cnt = sc->hn_rx_ring_inuse; error = hv_rf_on_device_add(sc, &device_info, &ring_cnt, &sc->hn_rx_ring[0]); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } /* # of channels can _not_ be changed */ KASSERT(sc->hn_rx_ring_inuse == ring_cnt, ("RX ring count %d and channel count %u mismatch", sc->hn_rx_ring_cnt, ring_cnt)); if (sc->hn_rx_ring_inuse > 1) { int r; /* * Skip the rings on primary channel; they are * handled by the hv_rf_on_device_add() above. */ for (r = 1; r < sc->hn_rx_ring_cnt; ++r) { sc->hn_rx_ring[r].hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; } for (r = 1; r < sc->hn_tx_ring_cnt; ++r) { sc->hn_tx_ring[r].hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; } hn_subchan_setup(sc); } if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) hn_set_chim_size(sc, sc->hn_chim_szmax); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; case SIOCSIFFLAGS: do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ #ifdef notyet /* Fixme: Promiscuous mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->hn_if_flags & IFF_PROMISC)) { /* do something here for Hyper-V */ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->hn_if_flags & IFF_PROMISC) { /* do something here for Hyper-V */ } else #endif hn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { hn_stop(sc); } } NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); sc->hn_if_flags = ifp->if_flags; error = 0; break; case SIOCSIFCAP: NV_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= sc->hn_tx_ring[0].hn_csum_assist; } else { ifp->if_hwassist &= ~sc->hn_tx_ring[0].hn_csum_assist; } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; else ifp->if_hwassist &= ~CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; else ifp->if_hwassist &= ~CSUM_IP6_TSO; } NV_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef notyet /* Fixme: Multicast mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { NV_LOCK(sc); netvsc_setmulti(sc); NV_UNLOCK(sc); error = 0; } #endif error = EINVAL; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * */ static void hn_stop(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (bootverbose) printf(" Closing Device ...\n"); atomic_clear_int(&ifp->if_drv_flags, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; if_link_state_change(ifp, LINK_STATE_DOWN); sc->hn_initdone = 0; ret = hv_rf_on_close(sc); } /* * FreeBSD transmit entry point */ static void hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } static void hn_start_txeof(struct hn_tx_ring *txr) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } /* * */ static void hn_ifinit_locked(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { return; } hv_promisc_mode = 1; ret = hv_rf_on_open(sc); if (ret != 0) { return; } else { sc->hn_initdone = 1; } atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); if_link_state_change(ifp, LINK_STATE_UP); } /* * */ static void hn_ifinit(void *xsc) { hn_softc_t *sc = xsc; NV_LOCK(sc); if (sc->temp_unusable) { NV_UNLOCK(sc); return; } sc->temp_unusable = TRUE; NV_UNLOCK(sc); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); } #ifdef LATER /* * */ static void hn_watchdog(struct ifnet *ifp) { hn_softc_t *sc; sc = ifp->if_softc; printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); hn_ifinit(sc); /*???*/ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } #endif #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; unsigned int lenlim; int error; lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || lenlim > TCP_LRO_LENGTH_MAX) return EINVAL; NV_LOCK(sc); hn_set_lro_lenlim(sc, lenlim); NV_UNLOCK(sc); return 0; } static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ackcnt, error, i; /* * lro_ackcnt_lim is append count limit, * +1 to turn it into aggregation limit. */ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; error = sysctl_handle_int(oidp, &ackcnt, 0, req); if (error || req->newptr == NULL) return error; if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) return EINVAL; /* * Convert aggregation limit back to append * count limit. */ --ackcnt; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; NV_UNLOCK(sc); return 0; } #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int hcsum = arg2; int on, error, i; on = 0; if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) on = 1; error = sysctl_handle_int(oidp, &on, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (on) rxr->hn_trust_hcsum |= hcsum; else rxr->hn_trust_hcsum &= ~hcsum; } NV_UNLOCK(sc); return 0; } static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int chim_size, error; chim_size = sc->hn_tx_ring[0].hn_chim_size; error = sysctl_handle_int(oidp, &chim_size, 0, req); if (error || req->newptr == NULL) return error; if (chim_size > sc->hn_chim_szmax || chim_size <= 0) return EINVAL; hn_set_chim_size(sc, chim_size); return 0; } static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; u_long stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((u_long *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((u_long *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((uint64_t *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_tx_ring *txr; u_long stat; stat = 0; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; stat += *((u_long *)((uint8_t *)txr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((u_long *)((uint8_t *)txr + ofs)) = 0; } return 0; } static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error, conf; struct hn_tx_ring *txr; txr = &sc->hn_tx_ring[0]; conf = *((int *)((uint8_t *)txr + ofs)); error = sysctl_handle_int(oidp, &conf, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((int *)((uint8_t *)txr + ofs)) = conf; } NV_UNLOCK(sc); return 0; } static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char verstr[16]; snprintf(verstr, sizeof(verstr), "%u.%u", NDIS_VERSION_MAJOR(sc->hn_ndis_ver), NDIS_VERSION_MINOR(sc->hn_ndis_ver)); return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); } static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; int len, iphlen, iplen; const struct tcphdr *th; int thoff; /* TCP data offset */ len = hoff + sizeof(struct ip); /* The packet must be at least the size of an IP header. */ if (m->m_pkthdr.len < len) return IPPROTO_DONE; /* The fixed IP header must reside completely in the first mbuf. */ if (m->m_len < len) return IPPROTO_DONE; ip = mtodo(m, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; if (iphlen < sizeof(struct ip)) /* minimum header length */ return IPPROTO_DONE; /* The full IP header must reside completely in the one mbuf. */ if (m->m_len < hoff + iphlen) return IPPROTO_DONE; iplen = ntohs(ip->ip_len); /* * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ if (m->m_pkthdr.len < hoff + iplen) return IPPROTO_DONE; /* * Ignore IP fragments. */ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) return IPPROTO_DONE; /* * The TCP/IP or UDP/IP header must be entirely contained within * the first fragment of a packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (iplen < iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); thoff = th->th_off << 2; if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + thoff) return IPPROTO_DONE; break; case IPPROTO_UDP: if (iplen < iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; break; default: if (iplen < iphlen) return IPPROTO_DONE; break; } return ip->ip_p; } static int hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev = sc->hn_dev; #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 int lroent_cnt; #endif #endif int i; /* * Create RXBUF for reception. * * NOTE: * - It is shared by all channels. * - A large enough buffer is allocated, certain version of NVSes * may further limit the usable space. */ sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), PAGE_SIZE, 0, NETVSC_RECEIVE_BUFFER_SIZE, &sc->hn_rxbuf_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_rxbuf == NULL) { device_printf(sc->hn_dev, "allocate rxbuf failed\n"); return (ENOMEM); } sc->hn_rx_ring_cnt = ring_cnt; sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 lroent_cnt = hn_lro_entry_count; if (lroent_cnt < TCP_LRO_ENTRIES) lroent_cnt = TCP_LRO_ENTRIES; device_printf(dev, "LRO: entry count %d\n", lroent_cnt); #endif #endif /* INET || INET6 */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Create dev.hn.UNIT.rx sysctl tree */ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (hn_trust_hosttcp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; if (hn_trust_hostudp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; if (hn_trust_hostip) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; rxr->hn_ifp = sc->hn_ifp; if (i < sc->hn_tx_ring_cnt) rxr->hn_txr = &sc->hn_tx_ring[i]; rxr->hn_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK); rxr->hn_rx_idx = i; rxr->hn_rxbuf = sc->hn_rxbuf; /* * Initialize LRO. */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, hn_lro_mbufq_depth); #else tcp_lro_init(&rxr->hn_lro); rxr->hn_lro.ifp = sc->hn_ifp; #endif #if __FreeBSD_version >= 1100099 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif #endif /* INET || INET6 */ if (sc->hn_rx_sysctl_tree != NULL) { char name[16]; /* * Create per RX ring sysctl tree: * dev.hn.UNIT.rx.RINGID */ snprintf(name, sizeof(name), "%d", i); rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (rxr->hn_rx_sysctl_tree != NULL) { SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "packets", CTLFLAG_RW, &rxr->hn_pkts, "# of packets received"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "rss_pkts", CTLFLAG_RW, &rxr->hn_rss_pkts, "# of packets w/ RSS info received"); } } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_queued), hn_rx_stat_u64_sysctl, "LU", "LRO queued"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), hn_rx_stat_u64_sysctl, "LU", "LRO flushed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro_tried), hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); #if __FreeBSD_version >= 1100099 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_lenlim_sysctl, "IU", "Max # of data bytes to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_ackcnt_sysctl, "I", "Max # of ACKs to be aggregated by LRO"); #endif SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", "Trust tcp segement verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, hn_trust_hcsum_sysctl, "I", "Trust udp datagram verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, hn_trust_hcsum_sysctl, "I", "Trust ip packet verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_ip), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_tcp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_udp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_trusted), hn_rx_stat_ulong_sysctl, "LU", "# of packets that we trust host's csum verification"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_small_pkts), hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); return (0); } static void hn_destroy_rx_data(struct hn_softc *sc) { int i; if (sc->hn_rxbuf != NULL) { hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); sc->hn_rxbuf = NULL; } if (sc->hn_rx_ring_cnt == 0) return; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; #if defined(INET) || defined(INET6) tcp_lro_free(&rxr->hn_lro); #endif free(rxr->hn_rdbuf, M_NETVSC); } free(sc->hn_rx_ring, M_NETVSC); sc->hn_rx_ring = NULL; sc->hn_rx_ring_cnt = 0; sc->hn_rx_ring_inuse = 0; } static int hn_create_tx_ring(struct hn_softc *sc, int id) { struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; device_t dev = sc->hn_dev; bus_dma_tag_t parent_dtag; int error, i; uint32_t version; txr->hn_sc = sc; txr->hn_tx_idx = id; #ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); #endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK | M_ZERO); #ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); #else txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); #endif txr->hn_tx_taskq = sc->hn_tx_taskq; if (hn_use_if_start) { txr->hn_txeof = hn_start_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); } else { int br_depth; txr->hn_txeof = hn_xmit_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); br_depth = hn_get_txswq_depth(txr); txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); } txr->hn_direct_tx_size = hn_direct_tx_size; version = VMBUS_GET_VERSION(device_get_parent(dev), dev); if (version >= VMBUS_VERSION_WIN8_1) { txr->hn_csum_assist = HN_CSUM_ASSIST; } else { txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8; if (id == 0) { device_printf(dev, "bus version %u.%u, " "no UDP checksum offloading\n", VMBUS_VERSION_MAJOR(version), VMBUS_VERSION_MINOR(version)); } } /* * Always schedule transmission instead of trying to do direct * transmission. This one gives the best performance so far. */ txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(dev); /* DMA tag for RNDIS messages. */ error = bus_dma_tag_create(parent_dtag, /* parent */ HN_RNDIS_MSG_ALIGN, /* alignment */ HN_RNDIS_MSG_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_RNDIS_MSG_LEN, /* maxsize */ 1, /* nsegments */ HN_RNDIS_MSG_LEN, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_rndis_dtag); if (error) { device_printf(dev, "failed to create rndis dmatag\n"); return error; } /* DMA tag for data. */ error = bus_dma_tag_create(parent_dtag, /* parent */ 1, /* alignment */ HN_TX_DATA_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_TX_DATA_MAXSIZE, /* maxsize */ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ HN_TX_DATA_SEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_data_dtag); if (error) { device_printf(dev, "failed to create data dmatag\n"); return error; } for (i = 0; i < txr->hn_txdesc_cnt; ++i) { struct hn_txdesc *txd = &txr->hn_txdesc[i]; txd->txr = txr; /* * Allocate and load RNDIS messages. */ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_msg, BUS_DMA_WAITOK | BUS_DMA_COHERENT, &txd->rndis_msg_dmap); if (error) { device_printf(dev, "failed to allocate rndis_msg, %d\n", i); return error; } error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap, txd->rndis_msg, HN_RNDIS_MSG_LEN, hyperv_dma_map_paddr, &txd->rndis_msg_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to load rndis_msg, %d\n", i); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* DMA map for TX data. */ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(dev, "failed to allocate tx data dmamap\n"); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); #else buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; if (sc->hn_tx_sysctl_tree != NULL) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; char name[16]; /* * Create per TX ring sysctl tree: * dev.hn.UNIT.tx.RINGID */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); snprintf(name, sizeof(name), "%d", id); txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (txr->hn_tx_sysctl_tree != NULL) { child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", CTLFLAG_RD, &txr->hn_txdesc_avail, 0, "# of available TX descs"); if (!hn_use_if_start) { SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", CTLFLAG_RD, &txr->hn_oactive, 0, "over active"); } SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", CTLFLAG_RW, &txr->hn_pkts, "# of packets transmitted"); } } return 0; } static void hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) { struct hn_tx_ring *txr = txd->txr; KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } static void hn_destroy_tx_ring(struct hn_tx_ring *txr) { struct hn_txdesc *txd; if (txr->hn_txdesc == NULL) return; #ifndef HN_USE_TXDESC_BUFRING while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { SLIST_REMOVE_HEAD(&txr->hn_txlist, link); hn_txdesc_dmamap_destroy(txd); } #else mtx_lock(&txr->hn_tx_lock); while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) hn_txdesc_dmamap_destroy(txd); mtx_unlock(&txr->hn_tx_lock); #endif if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); #ifdef HN_USE_TXDESC_BUFRING buf_ring_free(txr->hn_txdesc_br, M_NETVSC); #endif free(txr->hn_txdesc, M_NETVSC); txr->hn_txdesc = NULL; if (txr->hn_mbuf_br != NULL) buf_ring_free(txr->hn_mbuf_br, M_NETVSC); #ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); #endif mtx_destroy(&txr->hn_tx_lock); } static int hn_create_tx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int i; /* * Create TXBUF for chimney sending. * * NOTE: It is shared by all channels. */ sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), PAGE_SIZE, 0, NETVSC_SEND_BUFFER_SIZE, &sc->hn_chim_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_chim == NULL) { device_printf(sc->hn_dev, "allocate txbuf failed\n"); return (ENOMEM); } sc->hn_tx_ring_cnt = ring_cnt; sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); /* Create dev.hn.UNIT.tx sysctl tree */ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { int error; error = hn_create_tx_ring(sc, i); if (error) return error; } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_no_txdescs), hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_send_failed), hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_txdma_failed), hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_collapsed), hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, "# of total TX descs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", CTLFLAG_RD, &sc->hn_chim_szmax, 0, "Chimney send packet size upper boundary"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_chim_size_sysctl, "I", "Chimney send packet size limit"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_direct_tx_size), hn_tx_conf_int_sysctl, "I", "Size of the packet for direct transmission"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_sched_tx), hn_tx_conf_int_sysctl, "I", "Always schedule transmission " "instead of doing direct transmission"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); return 0; } static void hn_set_chim_size(struct hn_softc *sc, int chim_size) { int i; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_chim_size = chim_size; NV_UNLOCK(sc); } static void hn_destroy_tx_data(struct hn_softc *sc) { int i; if (sc->hn_chim != NULL) { hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); sc->hn_chim = NULL; } if (sc->hn_tx_ring_cnt == 0) return; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) hn_destroy_tx_ring(&sc->hn_tx_ring[i]); free(sc->hn_tx_ring, M_NETVSC); sc->hn_tx_ring = NULL; sc->hn_tx_ring_cnt = 0; sc->hn_tx_ring_inuse = 0; } static void hn_start_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_start_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_stop_tx_tasks(struct hn_softc *sc) { int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static int hn_xmit(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; struct mbuf *m_head; mtx_assert(&txr->hn_tx_lock, MA_OWNED); KASSERT(hn_use_if_start == 0, ("hn_xmit is called, when if_start is enabled")); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) return 0; while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { struct hn_txdesc *txd; int error; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed; discard */ drbr_advance(ifp, txr->hn_mbuf_br); continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } /* Sent */ drbr_advance(ifp, txr->hn_mbuf_br); } return 0; } static int hn_transmit(struct ifnet *ifp, struct mbuf *m) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr; int error, idx = 0; /* * Select the TX ring based on flowid */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; txr = &sc->hn_tx_ring[idx]; error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); if (error) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return error; } if (txr->hn_oactive) return 0; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return 0; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); return 0; } static void hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; struct mbuf *m; mtx_lock(&txr->hn_tx_lock); while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) m_freem(m); mtx_unlock(&txr->hn_tx_lock); } if_qflush(ifp); } static void hn_xmit_txeof(struct hn_tx_ring *txr) { if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; txr->hn_oactive = 0; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the oactive earlier, with the hope, that * others could catch up. The task will clear the * oactive again with the hn_tx_lock to avoid possible * races. */ txr->hn_oactive = 0; taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_xmit_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); txr->hn_oactive = 0; hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_channel_attach(struct hn_softc *sc, struct vmbus_channel *chan) { struct hn_rx_ring *rxr; int idx; idx = vmbus_chan_subidx(chan); KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("RX ring %d already attached", idx)); rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; if (bootverbose) { if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n", idx, vmbus_chan_id(chan)); } if (idx < sc->hn_tx_ring_inuse) { struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("TX ring %d already attached", idx)); txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; txr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n", idx, vmbus_chan_id(chan)); } } /* Bind channel to a proper CPU */ vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); } static void hn_subchan_attach(struct hn_softc *sc, struct vmbus_channel *chan) { KASSERT(!vmbus_chan_is_primary(chan), ("subchannel callback on primary channel")); hn_channel_attach(sc, chan); } static void hn_subchan_setup(struct hn_softc *sc) { struct vmbus_channel **subchans; int subchan_cnt = sc->hn_rx_ring_inuse - 1; int i; /* Wait for sub-channels setup to complete. */ subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); /* Attach the sub-channels. */ for (i = 0; i < subchan_cnt; ++i) { struct vmbus_channel *subchan = subchans[i]; /* NOTE: Calling order is critical. */ hn_subchan_attach(sc, subchan); hv_nv_subchan_attach(subchan, &sc->hn_rx_ring[vmbus_chan_subidx(subchan)]); } /* Release the sub-channels */ vmbus_subchan_rel(subchans, subchan_cnt); if_printf(sc->hn_ifp, "%d sub-channels setup done\n", subchan_cnt); } static void hn_tx_taskq_create(void *arg __unused) { if (!hn_share_tx_taskq) return; hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, &cpu_set, "hn tx"); } else { taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); } } SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_create, NULL); static void hn_tx_taskq_destroy(void *arg __unused) { if (hn_tx_taskq != NULL) taskqueue_free(hn_tx_taskq); } SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_destroy, NULL); static device_method_t netvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netvsc_probe), DEVMETHOD(device_attach, netvsc_attach), DEVMETHOD(device_detach, netvsc_detach), DEVMETHOD(device_shutdown, netvsc_shutdown), { 0, 0 } }; static driver_t netvsc_driver = { NETVSC_DEVNAME, netvsc_methods, sizeof(hn_softc_t) }; static devclass_t netvsc_devclass; DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis.h =================================================================== --- projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis.h (revision 305016) +++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis.h (revision 305017) @@ -1,937 +1,923 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HV_RNDIS_H__ #define __HV_RNDIS_H__ #include /* * NDIS protocol version numbers */ #define NDIS_VERSION_5_0 0x00050000 #define NDIS_VERSION_5_1 0x00050001 #define NDIS_VERSION_6_0 0x00060000 #define NDIS_VERSION_6_1 0x00060001 #define NDIS_VERSION_6_30 0x0006001e #define NDIS_VERSION_MAJOR(ver) (((ver) & 0xffff0000) >> 16) #define NDIS_VERSION_MINOR(ver) ((ver) & 0xffff) /* * Object Identifiers used by NdisRequest Query/Set Information */ /* * General Objects */ #define RNDIS_OID_GEN_SUPPORTED_LIST 0x00010101 #define RNDIS_OID_GEN_HARDWARE_STATUS 0x00010102 #define RNDIS_OID_GEN_MEDIA_SUPPORTED 0x00010103 #define RNDIS_OID_GEN_MEDIA_IN_USE 0x00010104 #define RNDIS_OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105 #define RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106 #define RNDIS_OID_GEN_LINK_SPEED 0x00010107 #define RNDIS_OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108 #define RNDIS_OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109 #define RNDIS_OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A #define RNDIS_OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B #define RNDIS_OID_GEN_VENDOR_ID 0x0001010C #define RNDIS_OID_GEN_VENDOR_DESCRIPTION 0x0001010D #define RNDIS_OID_GEN_CURRENT_PACKET_FILTER 0x0001010E #define RNDIS_OID_GEN_CURRENT_LOOKAHEAD 0x0001010F #define RNDIS_OID_GEN_DRIVER_VERSION 0x00010110 #define RNDIS_OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111 #define RNDIS_OID_GEN_PROTOCOL_OPTIONS 0x00010112 #define RNDIS_OID_GEN_MAC_OPTIONS 0x00010113 #define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS 0x00010114 #define RNDIS_OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115 #define RNDIS_OID_GEN_VENDOR_DRIVER_VERSION 0x00010116 #define RNDIS_OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118 #define RNDIS_OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119 #define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A #define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B /* * For receive side scale */ /* Query only */ #define RNDIS_OID_GEN_RSS_CAPABILITIES 0x00010203 /* Query and set */ #define RNDIS_OID_GEN_RSS_PARAMETERS 0x00010204 #define RNDIS_OID_GEN_XMIT_OK 0x00020101 #define RNDIS_OID_GEN_RCV_OK 0x00020102 #define RNDIS_OID_GEN_XMIT_ERROR 0x00020103 #define RNDIS_OID_GEN_RCV_ERROR 0x00020104 #define RNDIS_OID_GEN_RCV_NO_BUFFER 0x00020105 #define RNDIS_OID_GEN_DIRECTED_BYTES_XMIT 0x00020201 #define RNDIS_OID_GEN_DIRECTED_FRAMES_XMIT 0x00020202 #define RNDIS_OID_GEN_MULTICAST_BYTES_XMIT 0x00020203 #define RNDIS_OID_GEN_MULTICAST_FRAMES_XMIT 0x00020204 #define RNDIS_OID_GEN_BROADCAST_BYTES_XMIT 0x00020205 #define RNDIS_OID_GEN_BROADCAST_FRAMES_XMIT 0x00020206 #define RNDIS_OID_GEN_DIRECTED_BYTES_RCV 0x00020207 #define RNDIS_OID_GEN_DIRECTED_FRAMES_RCV 0x00020208 #define RNDIS_OID_GEN_MULTICAST_BYTES_RCV 0x00020209 #define RNDIS_OID_GEN_MULTICAST_FRAMES_RCV 0x0002020A #define RNDIS_OID_GEN_BROADCAST_BYTES_RCV 0x0002020B #define RNDIS_OID_GEN_BROADCAST_FRAMES_RCV 0x0002020C #define RNDIS_OID_GEN_RCV_CRC_ERROR 0x0002020D #define RNDIS_OID_GEN_TRANSMIT_QUEUE_LENGTH 0x0002020E #define RNDIS_OID_GEN_GET_TIME_CAPS 0x0002020F #define RNDIS_OID_GEN_GET_NETCARD_TIME 0x00020210 /* * These are connection-oriented general OIDs. * These replace the above OIDs for connection-oriented media. */ #define RNDIS_OID_GEN_CO_SUPPORTED_LIST 0x00010101 #define RNDIS_OID_GEN_CO_HARDWARE_STATUS 0x00010102 #define RNDIS_OID_GEN_CO_MEDIA_SUPPORTED 0x00010103 #define RNDIS_OID_GEN_CO_MEDIA_IN_USE 0x00010104 #define RNDIS_OID_GEN_CO_LINK_SPEED 0x00010105 #define RNDIS_OID_GEN_CO_VENDOR_ID 0x00010106 #define RNDIS_OID_GEN_CO_VENDOR_DESCRIPTION 0x00010107 #define RNDIS_OID_GEN_CO_DRIVER_VERSION 0x00010108 #define RNDIS_OID_GEN_CO_PROTOCOL_OPTIONS 0x00010109 #define RNDIS_OID_GEN_CO_MAC_OPTIONS 0x0001010A #define RNDIS_OID_GEN_CO_MEDIA_CONNECT_STATUS 0x0001010B #define RNDIS_OID_GEN_CO_VENDOR_DRIVER_VERSION 0x0001010C #define RNDIS_OID_GEN_CO_MINIMUM_LINK_SPEED 0x0001010D #define RNDIS_OID_GEN_CO_GET_TIME_CAPS 0x00010201 #define RNDIS_OID_GEN_CO_GET_NETCARD_TIME 0x00010202 /* * These are connection-oriented statistics OIDs. */ #define RNDIS_OID_GEN_CO_XMIT_PDUS_OK 0x00020101 #define RNDIS_OID_GEN_CO_RCV_PDUS_OK 0x00020102 #define RNDIS_OID_GEN_CO_XMIT_PDUS_ERROR 0x00020103 #define RNDIS_OID_GEN_CO_RCV_PDUS_ERROR 0x00020104 #define RNDIS_OID_GEN_CO_RCV_PDUS_NO_BUFFER 0x00020105 #define RNDIS_OID_GEN_CO_RCV_CRC_ERROR 0x00020201 #define RNDIS_OID_GEN_CO_TRANSMIT_QUEUE_LENGTH 0x00020202 #define RNDIS_OID_GEN_CO_BYTES_XMIT 0x00020203 #define RNDIS_OID_GEN_CO_BYTES_RCV 0x00020204 #define RNDIS_OID_GEN_CO_BYTES_XMIT_OUTSTANDING 0x00020205 #define RNDIS_OID_GEN_CO_NETCARD_LOAD 0x00020206 /* * These are objects for Connection-oriented media call-managers. */ #define RNDIS_OID_CO_ADD_PVC 0xFF000001 #define RNDIS_OID_CO_DELETE_PVC 0xFF000002 #define RNDIS_OID_CO_GET_CALL_INFORMATION 0xFF000003 #define RNDIS_OID_CO_ADD_ADDRESS 0xFF000004 #define RNDIS_OID_CO_DELETE_ADDRESS 0xFF000005 #define RNDIS_OID_CO_GET_ADDRESSES 0xFF000006 #define RNDIS_OID_CO_ADDRESS_CHANGE 0xFF000007 #define RNDIS_OID_CO_SIGNALING_ENABLED 0xFF000008 #define RNDIS_OID_CO_SIGNALING_DISABLED 0xFF000009 /* * 802.3 Objects (Ethernet) */ #define RNDIS_OID_802_3_PERMANENT_ADDRESS 0x01010101 #define RNDIS_OID_802_3_CURRENT_ADDRESS 0x01010102 #define RNDIS_OID_802_3_MULTICAST_LIST 0x01010103 #define RNDIS_OID_802_3_MAXIMUM_LIST_SIZE 0x01010104 #define RNDIS_OID_802_3_MAC_OPTIONS 0x01010105 /* * */ #define NDIS_802_3_MAC_OPTION_PRIORITY 0x00000001 #define RNDIS_OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101 #define RNDIS_OID_802_3_XMIT_ONE_COLLISION 0x01020102 #define RNDIS_OID_802_3_XMIT_MORE_COLLISIONS 0x01020103 #define RNDIS_OID_802_3_XMIT_DEFERRED 0x01020201 #define RNDIS_OID_802_3_XMIT_MAX_COLLISIONS 0x01020202 #define RNDIS_OID_802_3_RCV_OVERRUN 0x01020203 #define RNDIS_OID_802_3_XMIT_UNDERRUN 0x01020204 #define RNDIS_OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205 #define RNDIS_OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206 #define RNDIS_OID_802_3_XMIT_LATE_COLLISIONS 0x01020207 /* * RNDIS MP custom OID for test */ #define OID_RNDISMP_GET_RECEIVE_BUFFERS 0xFFA0C90D // Query only /* * Remote NDIS offload parameters */ #define RNDIS_OBJECT_TYPE_DEFAULT 0x80 #define RNDIS_OFFLOAD_PARAMETERS_REVISION_3 3 #define RNDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 #define RNDIS_OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ #define RNDIS_OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */ #define RNDIS_OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ #define RNDIS_OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ /* * NdisInitialize message */ typedef struct rndis_initialize_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t major_version; uint32_t minor_version; uint32_t max_xfer_size; } rndis_initialize_request; /* * Response to NdisInitialize */ typedef struct rndis_initialize_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; uint32_t major_version; uint32_t minor_version; uint32_t device_flags; /* RNDIS medium */ uint32_t medium; uint32_t max_pkts_per_msg; uint32_t max_xfer_size; uint32_t pkt_align_factor; uint32_t af_list_offset; uint32_t af_list_size; } rndis_initialize_complete; /* * Call manager devices only: Information about an address family * supported by the device is appended to the response to NdisInitialize. */ typedef struct rndis_co_address_family_ { /* RNDIS AF */ uint32_t address_family; uint32_t major_version; uint32_t minor_version; } rndis_co_address_family; /* * NdisHalt message */ typedef struct rndis_halt_request_ { /* RNDIS request ID */ uint32_t request_id; } rndis_halt_request; /* * NdisQueryRequest message */ typedef struct rndis_query_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS OID */ uint32_t oid; uint32_t info_buffer_length; uint32_t info_buffer_offset; /* RNDIS handle */ uint32_t device_vc_handle; } rndis_query_request; /* * Response to NdisQueryRequest */ typedef struct rndis_query_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; uint32_t info_buffer_length; uint32_t info_buffer_offset; } rndis_query_complete; /* * NdisSetRequest message */ typedef struct rndis_set_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS OID */ uint32_t oid; uint32_t info_buffer_length; uint32_t info_buffer_offset; /* RNDIS handle */ uint32_t device_vc_handle; } rndis_set_request; /* * Response to NdisSetRequest */ typedef struct rndis_set_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rndis_set_complete; /* * NdisReset message */ typedef struct rndis_reset_request_ { uint32_t reserved; } rndis_reset_request; /* * Response to NdisReset */ typedef struct rndis_reset_complete_ { /* RNDIS status */ uint32_t status; uint32_t addressing_reset; } rndis_reset_complete; /* * NdisMIndicateStatus message */ typedef struct rndis_indicate_status_ { /* RNDIS status */ uint32_t status; uint32_t status_buf_length; uint32_t status_buf_offset; } rndis_indicate_status; /* * Diagnostic information passed as the status buffer in * rndis_indicate_status messages signifying error conditions. */ typedef struct rndis_diagnostic_info_ { /* RNDIS status */ uint32_t diag_status; uint32_t error_offset; } rndis_diagnostic_info; /* * NdisKeepAlive message */ typedef struct rndis_keepalive_request_ { /* RNDIS request ID */ uint32_t request_id; } rndis_keepalive_request; /* * Response to NdisKeepAlive */ typedef struct rndis_keepalive_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rndis_keepalive_complete; /* * Data message. All offset fields contain byte offsets from the beginning * of the rndis_packet structure. All length fields are in bytes. * VcHandle is set to 0 for connectionless data, otherwise it * contains the VC handle. */ typedef struct rndis_packet_ { uint32_t data_offset; uint32_t data_length; uint32_t oob_data_offset; uint32_t oob_data_length; uint32_t num_oob_data_elements; uint32_t per_pkt_info_offset; uint32_t per_pkt_info_length; /* RNDIS handle */ uint32_t vc_handle; uint32_t reserved; } rndis_packet; typedef struct rndis_packet_ex_ { uint32_t data_offset; uint32_t data_length; uint32_t oob_data_offset; uint32_t oob_data_length; uint32_t num_oob_data_elements; uint32_t per_pkt_info_offset; uint32_t per_pkt_info_length; /* RNDIS handle */ uint32_t vc_handle; uint32_t reserved; uint64_t data_buf_id; uint32_t data_buf_offset; uint64_t next_header_buf_id; uint32_t next_header_byte_offset; uint32_t next_header_byte_count; } rndis_packet_ex; /* * Optional Out of Band data associated with a Data message. */ typedef struct rndis_oobd_ { uint32_t size; /* RNDIS class ID */ uint32_t type; uint32_t class_info_offset; } rndis_oobd; /* * Packet extension field contents associated with a Data message. */ typedef struct rndis_per_packet_info_ { uint32_t size; uint32_t type; uint32_t per_packet_info_offset; } rndis_per_packet_info; typedef enum ndis_per_pkt_infotype_ { tcpip_chksum_info, ipsec_info, tcp_large_send_info, classification_handle_info, ndis_reserved, sgl_info, ieee_8021q_info, original_pkt_info, pkt_cancel_id, original_netbuf_list, cached_netbuf_list, short_pkt_padding_info, max_perpkt_info } ndis_per_pkt_infotype; #define nbl_hash_value pkt_cancel_id #define nbl_hash_info original_netbuf_list typedef struct ndis_8021q_info_ { union { struct { uint32_t user_pri : 3; /* User Priority */ uint32_t cfi : 1; /* Canonical Format ID */ uint32_t vlan_id : 12; uint32_t reserved : 16; } s1; uint32_t value; } u1; } ndis_8021q_info; struct rndis_object_header { uint8_t type; uint8_t revision; uint16_t size; }; typedef struct rndis_offload_params_ { struct rndis_object_header header; uint8_t ipv4_csum; uint8_t tcp_ipv4_csum; uint8_t udp_ipv4_csum; uint8_t tcp_ipv6_csum; uint8_t udp_ipv6_csum; uint8_t lso_v1; uint8_t ip_sec_v1; uint8_t lso_v2_ipv4; uint8_t lso_v2_ipv6; uint8_t tcp_connection_ipv4; uint8_t tcp_connection_ipv6; uint32_t flags; uint8_t ip_sec_v2; uint8_t ip_sec_v2_ipv4; struct { uint8_t rsc_ipv4; uint8_t rsc_ipv6; }; struct { uint8_t encapsulated_packet_task_offload; uint8_t encapsulation_types; }; } rndis_offload_params; typedef struct rndis_tcp_ip_csum_info_ { union { struct { uint32_t is_ipv4:1; uint32_t is_ipv6:1; uint32_t tcp_csum:1; uint32_t udp_csum:1; uint32_t ip_header_csum:1; uint32_t reserved:11; uint32_t tcp_header_offset:10; } xmit; struct { uint32_t tcp_csum_failed:1; uint32_t udp_csum_failed:1; uint32_t ip_csum_failed:1; uint32_t tcp_csum_succeeded:1; uint32_t udp_csum_succeeded:1; uint32_t ip_csum_succeeded:1; uint32_t loopback:1; uint32_t tcp_csum_value_invalid:1; uint32_t ip_csum_value_invalid:1; } receive; uint32_t value; }; } rndis_tcp_ip_csum_info; struct rndis_hash_value { uint32_t hash_value; } __packed; struct rndis_hash_info { uint32_t hash_info; } __packed; -#define NDIS_HASH_FUNCTION_MASK 0x000000FF /* see hash function */ -#define NDIS_HASH_TYPE_MASK 0x00FFFF00 /* see hash type */ - -/* hash function */ -#define NDIS_HASH_FUNCTION_TOEPLITZ 0x00000001 - -/* hash type */ -#define NDIS_HASH_IPV4 0x00000100 -#define NDIS_HASH_TCP_IPV4 0x00000200 -#define NDIS_HASH_IPV6 0x00000400 -#define NDIS_HASH_IPV6_EX 0x00000800 -#define NDIS_HASH_TCP_IPV6 0x00001000 -#define NDIS_HASH_TCP_IPV6_EX 0x00002000 - typedef struct rndis_tcp_tso_info_ { union { struct { uint32_t unused:30; uint32_t type:1; uint32_t reserved2:1; } xmit; struct { uint32_t mss:20; uint32_t tcp_header_offset:10; uint32_t type:1; uint32_t reserved2:1; } lso_v1_xmit; struct { uint32_t tcp_payload:30; uint32_t type:1; uint32_t reserved2:1; } lso_v1_xmit_complete; struct { uint32_t mss:20; uint32_t tcp_header_offset:10; uint32_t type:1; uint32_t ip_version:1; } lso_v2_xmit; struct { uint32_t reserved:30; uint32_t type:1; uint32_t reserved2:1; } lso_v2_xmit_complete; uint32_t value; }; } rndis_tcp_tso_info; #define RNDIS_HASHVAL_PPI_SIZE (sizeof(rndis_per_packet_info) + \ sizeof(struct rndis_hash_value)) #define RNDIS_VLAN_PPI_SIZE (sizeof(rndis_per_packet_info) + \ sizeof(ndis_8021q_info)) #define RNDIS_CSUM_PPI_SIZE (sizeof(rndis_per_packet_info) + \ sizeof(rndis_tcp_ip_csum_info)) #define RNDIS_TSO_PPI_SIZE (sizeof(rndis_per_packet_info) + \ sizeof(rndis_tcp_tso_info)) /* * Format of Information buffer passed in a SetRequest for the OID * OID_GEN_RNDIS_CONFIG_PARAMETER. */ typedef struct rndis_config_parameter_info_ { uint32_t parameter_name_offset; uint32_t parameter_name_length; uint32_t parameter_type; uint32_t parameter_value_offset; uint32_t parameter_value_length; } rndis_config_parameter_info; /* * Values for ParameterType in rndis_config_parameter_info */ #define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0 #define RNDIS_CONFIG_PARAM_TYPE_STRING 2 /* * CONDIS Miniport messages for connection oriented devices * that do not implement a call manager. */ /* * CoNdisMiniportCreateVc message */ typedef struct rcondis_mp_create_vc_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t ndis_vc_handle; } rcondis_mp_create_vc; /* * Response to CoNdisMiniportCreateVc */ typedef struct rcondis_mp_create_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t device_vc_handle; /* RNDIS status */ uint32_t status; } rcondis_mp_create_vc_complete; /* * CoNdisMiniportDeleteVc message */ typedef struct rcondis_mp_delete_vc_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t device_vc_handle; } rcondis_mp_delete_vc; /* * Response to CoNdisMiniportDeleteVc */ typedef struct rcondis_mp_delete_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_delete_vc_complete; /* * CoNdisMiniportQueryRequest message */ typedef struct rcondis_mp_query_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS request type */ uint32_t request_type; /* RNDIS OID */ uint32_t oid; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t info_buf_length; uint32_t info_buf_offset; } rcondis_mp_query_request; /* * CoNdisMiniportSetRequest message */ typedef struct rcondis_mp_set_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS request type */ uint32_t request_type; /* RNDIS OID */ uint32_t oid; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t info_buf_length; uint32_t info_buf_offset; } rcondis_mp_set_request; /* * CoNdisIndicateStatus message */ typedef struct rcondis_indicate_status_ { /* RNDIS handle */ uint32_t ndis_vc_handle; /* RNDIS status */ uint32_t status; uint32_t status_buf_length; uint32_t status_buf_offset; } rcondis_indicate_status; /* * CONDIS Call/VC parameters */ typedef struct rcondis_specific_parameters_ { uint32_t parameter_type; uint32_t parameter_length; uint32_t parameter_offset; } rcondis_specific_parameters; typedef struct rcondis_media_parameters_ { uint32_t flags; uint32_t reserved1; uint32_t reserved2; rcondis_specific_parameters media_specific; } rcondis_media_parameters; typedef struct rndis_flowspec_ { uint32_t token_rate; uint32_t token_bucket_size; uint32_t peak_bandwidth; uint32_t latency; uint32_t delay_variation; uint32_t service_type; uint32_t max_sdu_size; uint32_t minimum_policed_size; } rndis_flowspec; typedef struct rcondis_call_manager_parameters_ { rndis_flowspec transmit; rndis_flowspec receive; rcondis_specific_parameters call_mgr_specific; } rcondis_call_manager_parameters; /* * CoNdisMiniportActivateVc message */ typedef struct rcondis_mp_activate_vc_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t flags; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t media_params_offset; uint32_t media_params_length; uint32_t call_mgr_params_offset; uint32_t call_mgr_params_length; } rcondis_mp_activate_vc_request; /* * Response to CoNdisMiniportActivateVc */ typedef struct rcondis_mp_activate_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_activate_vc_complete; /* * CoNdisMiniportDeactivateVc message */ typedef struct rcondis_mp_deactivate_vc_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t flags; /* RNDIS handle */ uint32_t device_vc_handle; } rcondis_mp_deactivate_vc_request; /* * Response to CoNdisMiniportDeactivateVc */ typedef struct rcondis_mp_deactivate_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_deactivate_vc_complete; /* * union with all of the RNDIS messages */ typedef union rndis_msg_container_ { rndis_packet packet; rndis_initialize_request init_request; rndis_halt_request halt_request; rndis_query_request query_request; rndis_set_request set_request; rndis_reset_request reset_request; rndis_keepalive_request keepalive_request; rndis_indicate_status indicate_status; rndis_initialize_complete init_complete; rndis_query_complete query_complete; rndis_set_complete set_complete; rndis_reset_complete reset_complete; rndis_keepalive_complete keepalive_complete; rcondis_mp_create_vc co_miniport_create_vc; rcondis_mp_delete_vc co_miniport_delete_vc; rcondis_indicate_status co_miniport_status; rcondis_mp_activate_vc_request co_miniport_activate_vc; rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc; rcondis_mp_create_vc_complete co_miniport_create_vc_complete; rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete; rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete; rcondis_mp_deactivate_vc_complete co_miniport_deactivate_vc_complete; rndis_packet_ex packet_ex; } rndis_msg_container; /* * Remote NDIS message format */ typedef struct rndis_msg_ { uint32_t ndis_msg_type; /* * Total length of this message, from the beginning * of the rndis_msg struct, in bytes. */ uint32_t msg_len; /* Actual message */ rndis_msg_container msg; } rndis_msg; /* * Handy macros */ /* * get the size of an RNDIS message. Pass in the message type, * rndis_set_request, rndis_packet for example */ #define RNDIS_MESSAGE_SIZE(message) \ (sizeof(message) + (sizeof(rndis_msg) - sizeof(rndis_msg_container))) /* * get pointer to info buffer with message pointer */ #define MESSAGE_TO_INFO_BUFFER(message) \ (((PUCHAR)(message)) + message->InformationBufferOffset) /* * get pointer to status buffer with message pointer */ #define MESSAGE_TO_STATUS_BUFFER(message) \ (((PUCHAR)(message)) + message->StatusBufferOffset) /* * get pointer to OOBD buffer with message pointer */ #define MESSAGE_TO_OOBD_BUFFER(message) \ (((PUCHAR)(message)) + message->OOBDataOffset) /* * get pointer to data buffer with message pointer */ #define MESSAGE_TO_DATA_BUFFER(message) \ (((PUCHAR)(message)) + message->PerPacketInfoOffset) /* * get pointer to contained message from NDIS_MESSAGE pointer */ #define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_message) \ ((void *) &rndis_message->Message) /* * get pointer to contained message from NDIS_MESSAGE pointer */ #define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_message) \ ((void *) rndis_message) /* * Structures used in OID_RNDISMP_GET_RECEIVE_BUFFERS */ #define RNDISMP_RECEIVE_BUFFER_ELEM_FLAG_VMQ_RECEIVE_BUFFER 0x00000001 typedef struct rndismp_rx_buf_elem_ { uint32_t flags; uint32_t length; uint64_t rx_buf_id; uint32_t gpadl_handle; void *rx_buf; } rndismp_rx_buf_elem; typedef struct rndismp_rx_bufs_info_ { uint32_t num_rx_bufs; rndismp_rx_buf_elem rx_buf_elems[1]; } rndismp_rx_bufs_info; #define RNDIS_HEADER_SIZE (sizeof(rndis_msg) - sizeof(rndis_msg_container)) #define NDIS_PACKET_TYPE_DIRECTED 0x00000001 #define NDIS_PACKET_TYPE_MULTICAST 0x00000002 #define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004 #define NDIS_PACKET_TYPE_BROADCAST 0x00000008 #define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010 #define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020 #define NDIS_PACKET_TYPE_SMT 0x00000040 #define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080 #define NDIS_PACKET_TYPE_GROUP 0x00000100 #define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00000200 #define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400 #define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800 /* * Externs */ struct hn_rx_ring; struct hn_tx_ring; struct hn_recvinfo; int netvsc_recv(struct hn_rx_ring *rxr, const void *data, int dlen, const struct hn_recvinfo *info); void netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr); void* hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size, int pkt_type); void* hv_get_ppi_data(rndis_packet *rpkt, uint32_t type); #endif /* __HV_RNDIS_H__ */ Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis_filter.c =================================================================== --- projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis_filter.c (revision 305016) +++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis_filter.c (revision 305017) @@ -1,1551 +1,1490 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HV_RF_RECVINFO_VLAN 0x1 #define HV_RF_RECVINFO_CSUM 0x2 #define HV_RF_RECVINFO_HASHINF 0x4 #define HV_RF_RECVINFO_HASHVAL 0x8 #define HV_RF_RECVINFO_ALL \ (HV_RF_RECVINFO_VLAN | \ HV_RF_RECVINFO_CSUM | \ HV_RF_RECVINFO_HASHINF | \ HV_RF_RECVINFO_HASHVAL) #define HN_RNDIS_RID_COMPAT_MASK 0xffff #define HN_RNDIS_RID_COMPAT_MAX HN_RNDIS_RID_COMPAT_MASK #define HN_RNDIS_XFER_SIZE 2048 /* * Forward declarations */ static int hv_rf_send_request(rndis_device *device, rndis_request *request, uint32_t message_type); static void hv_rf_receive_response(rndis_device *device, const rndis_msg *response); static void hv_rf_receive_indicate_status(rndis_device *device, const rndis_msg *response); static void hv_rf_receive_data(struct hn_rx_ring *rxr, const void *data, int dlen); -static int hv_rf_query_device(rndis_device *device, uint32_t oid, - void *result, uint32_t *result_size); static inline int hv_rf_query_device_mac(rndis_device *device); static inline int hv_rf_query_device_link_status(rndis_device *device); static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter); static int hv_rf_init_device(rndis_device *device); static int hv_rf_open_device(rndis_device *device); static int hv_rf_close_device(rndis_device *device); int hv_rf_send_offload_request(struct hn_softc *sc, rndis_offload_params *offloads); static void hn_rndis_sent_halt(struct hn_send_ctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan, const void *data, int dlen); static void hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan, const void *data, int dlen); static int hn_rndis_query(struct hn_softc *sc, uint32_t oid, const void *idata, size_t idlen, void *odata, size_t *odlen0); static int hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen); static int hn_rndis_conf_offload(struct hn_softc *sc); +static int hn_rndis_get_rsscaps(struct hn_softc *sc, int *rxr_cnt); +static int hn_rndis_conf_rss(struct hn_softc *sc, int nchan); static __inline uint32_t hn_rndis_rid(struct hn_softc *sc) { uint32_t rid; again: rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1); if (rid == 0) goto again; /* Use upper 16 bits for non-compat RNDIS messages. */ return ((rid & 0xffff) << 16); } /* * Set the Per-Packet-Info with the specified type */ void * hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size, int pkt_type) { rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset += rppi_size; rppi = (rndis_per_packet_info *)((char *)rndis_pkt + rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_length); rppi->size = rppi_size; rppi->type = pkt_type; rppi->per_packet_info_offset = sizeof(rndis_per_packet_info); rndis_pkt->per_pkt_info_length += rppi_size; return (rppi); } /* * Get the Per-Packet-Info with the specified type * return NULL if not found. */ void * hv_get_ppi_data(rndis_packet *rpkt, uint32_t type) { rndis_per_packet_info *ppi; int len; if (rpkt->per_pkt_info_offset == 0) return (NULL); ppi = (rndis_per_packet_info *)((unsigned long)rpkt + rpkt->per_pkt_info_offset); len = rpkt->per_pkt_info_length; while (len > 0) { if (ppi->type == type) return (void *)((unsigned long)ppi + ppi->per_packet_info_offset); len -= ppi->size; ppi = (rndis_per_packet_info *)((unsigned long)ppi + ppi->size); } return (NULL); } /* * Allow module_param to work and override to switch to promiscuous mode. */ static inline rndis_device * hv_get_rndis_device(void) { rndis_device *device; device = malloc(sizeof(rndis_device), M_NETVSC, M_WAITOK | M_ZERO); mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_DEF); /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */ STAILQ_INIT(&device->myrequest_list); device->state = RNDIS_DEV_UNINITIALIZED; return (device); } /* * */ static inline void hv_put_rndis_device(rndis_device *device) { mtx_destroy(&device->req_lock); free(device, M_NETVSC); } /* * */ static inline rndis_request * hv_rndis_request(rndis_device *device, uint32_t message_type, uint32_t message_length) { rndis_request *request; rndis_msg *rndis_mesg; rndis_set_request *set; request = malloc(sizeof(rndis_request), M_NETVSC, M_WAITOK | M_ZERO); sema_init(&request->wait_sema, 0, "rndis sema"); rndis_mesg = &request->request_msg; rndis_mesg->ndis_msg_type = message_type; rndis_mesg->msg_len = message_length; /* * Set the request id. This field is always after the rndis header * for request/response packet types so we just use the set_request * as a template. */ set = &rndis_mesg->msg.set_request; set->request_id = atomic_fetchadd_int(&device->new_request_id, 1) & HN_RNDIS_RID_COMPAT_MASK; /* Add to the request list */ mtx_lock(&device->req_lock); STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry); mtx_unlock(&device->req_lock); return (request); } /* * */ static inline void hv_put_rndis_request(rndis_device *device, rndis_request *request) { mtx_lock(&device->req_lock); /* Fixme: Has O(n) performance */ /* * XXXKYS: Use Doubly linked lists. */ STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_, mylist_entry); mtx_unlock(&device->req_lock); sema_destroy(&request->wait_sema); free(request, M_NETVSC); } /* * */ static int hv_rf_send_request(rndis_device *device, rndis_request *request, uint32_t message_type) { struct hn_softc *sc = device->sc; uint32_t send_buf_section_idx, tot_data_buf_len; struct vmbus_gpa gpa[2]; int gpa_cnt, send_buf_section_size; hn_sent_callback_t cb; /* Set up the packet to send it */ tot_data_buf_len = request->request_msg.msg_len; gpa_cnt = 1; gpa[0].gpa_page = hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT; gpa[0].gpa_len = request->request_msg.msg_len; gpa[0].gpa_ofs = (unsigned long)&request->request_msg & (PAGE_SIZE - 1); if (gpa[0].gpa_ofs + gpa[0].gpa_len > PAGE_SIZE) { gpa_cnt = 2; gpa[0].gpa_len = PAGE_SIZE - gpa[0].gpa_ofs; gpa[1].gpa_page = hv_get_phys_addr((char*)&request->request_msg + gpa[0].gpa_len) >> PAGE_SHIFT; gpa[1].gpa_ofs = 0; gpa[1].gpa_len = request->request_msg.msg_len - gpa[0].gpa_len; } if (message_type != REMOTE_NDIS_HALT_MSG) cb = hn_rndis_sent_cb; else cb = hn_rndis_sent_halt; if (tot_data_buf_len < sc->hn_chim_szmax) { send_buf_section_idx = hn_chim_alloc(sc); if (send_buf_section_idx != HN_NVS_CHIM_IDX_INVALID) { uint8_t *dest = sc->hn_chim + (send_buf_section_idx * sc->hn_chim_szmax); memcpy(dest, &request->request_msg, request->request_msg.msg_len); send_buf_section_size = tot_data_buf_len; gpa_cnt = 0; goto sendit; } /* Failed to allocate chimney send buffer; move on */ } send_buf_section_idx = HN_NVS_CHIM_IDX_INVALID; send_buf_section_size = 0; sendit: hn_send_ctx_init(&request->send_ctx, cb, request, send_buf_section_idx, send_buf_section_size); return hv_nv_on_send(sc->hn_prichan, HN_NVS_RNDIS_MTYPE_CTRL, &request->send_ctx, gpa, gpa_cnt); } /* * RNDIS filter receive response */ static void hv_rf_receive_response(rndis_device *device, const rndis_msg *response) { rndis_request *request = NULL; rndis_request *next_request; boolean_t found = FALSE; mtx_lock(&device->req_lock); request = STAILQ_FIRST(&device->myrequest_list); while (request != NULL) { /* * All request/response message contains request_id as the * first field */ if (request->request_msg.msg.init_request.request_id == response->msg.init_complete.request_id) { found = TRUE; break; } next_request = STAILQ_NEXT(request, mylist_entry); request = next_request; } mtx_unlock(&device->req_lock); if (found) { if (response->msg_len <= sizeof(rndis_msg)) { memcpy(&request->response_msg, response, response->msg_len); } else { request->response_msg.msg.init_complete.status = RNDIS_STATUS_BUFFER_OVERFLOW; } sema_post(&request->wait_sema); } } int hv_rf_send_offload_request(struct hn_softc *sc, rndis_offload_params *offloads) { rndis_request *request; rndis_set_request *set; rndis_offload_params *offload_req; rndis_set_complete *set_complete; rndis_device *rndis_dev = sc->rndis_dev; device_t dev = sc->hn_dev; uint32_t extlen = sizeof(rndis_offload_params); int ret; if (sc->hn_nvs_ver <= NVSP_PROTOCOL_VERSION_4) { extlen = VERSION_4_OFFLOAD_SIZE; /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support * UDP checksum offload. */ offloads->udp_ipv4_csum = 0; offloads->udp_ipv6_csum = 0; } request = hv_rndis_request(rndis_dev, REMOTE_NDIS_SET_MSG, RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen); if (!request) return (ENOMEM); set = &request->request_msg.msg.set_request; set->oid = RNDIS_OID_TCP_OFFLOAD_PARAMETERS; set->info_buffer_length = extlen; set->info_buffer_offset = sizeof(rndis_set_request); set->device_vc_handle = 0; offload_req = (rndis_offload_params *)((unsigned long)set + set->info_buffer_offset); *offload_req = *offloads; offload_req->header.type = RNDIS_OBJECT_TYPE_DEFAULT; offload_req->header.revision = RNDIS_OFFLOAD_PARAMETERS_REVISION_3; offload_req->header.size = extlen; ret = hv_rf_send_request(rndis_dev, request, REMOTE_NDIS_SET_MSG); if (ret != 0) { device_printf(dev, "hv send offload request failed, ret=%d!\n", ret); goto cleanup; } ret = sema_timedwait(&request->wait_sema, 5 * hz); if (ret != 0) { device_printf(dev, "hv send offload request timeout\n"); goto cleanup; } set_complete = &request->response_msg.msg.set_complete; if (set_complete->status == RNDIS_STATUS_SUCCESS) { device_printf(dev, "hv send offload request succeeded\n"); ret = 0; } else { if (set_complete->status == RNDIS_STATUS_NOT_SUPPORTED) { device_printf(dev, "HV Not support offload\n"); ret = 0; } else { ret = set_complete->status; } } cleanup: hv_put_rndis_request(rndis_dev, request); return (ret); } /* * RNDIS filter receive indicate status */ static void hv_rf_receive_indicate_status(rndis_device *device, const rndis_msg *response) { const rndis_indicate_status *indicate = &response->msg.indicate_status; switch(indicate->status) { case RNDIS_STATUS_MEDIA_CONNECT: netvsc_linkstatus_callback(device->sc, 1); break; case RNDIS_STATUS_MEDIA_DISCONNECT: netvsc_linkstatus_callback(device->sc, 0); break; default: /* TODO: */ device_printf(device->sc->hn_dev, "unknown status %d received\n", indicate->status); break; } } static int hv_rf_find_recvinfo(const rndis_packet *rpkt, struct hn_recvinfo *info) { const rndis_per_packet_info *ppi; uint32_t mask, len; info->vlan_info = NULL; info->csum_info = NULL; info->hash_info = NULL; info->hash_value = NULL; if (rpkt->per_pkt_info_offset == 0) return 0; ppi = (const rndis_per_packet_info *) ((const uint8_t *)rpkt + rpkt->per_pkt_info_offset); len = rpkt->per_pkt_info_length; mask = 0; while (len != 0) { const void *ppi_dptr; uint32_t ppi_dlen; if (__predict_false(ppi->size < ppi->per_packet_info_offset)) return EINVAL; ppi_dlen = ppi->size - ppi->per_packet_info_offset; ppi_dptr = (const uint8_t *)ppi + ppi->per_packet_info_offset; switch (ppi->type) { case ieee_8021q_info: if (__predict_false(ppi_dlen < sizeof(ndis_8021q_info))) return EINVAL; info->vlan_info = ppi_dptr; mask |= HV_RF_RECVINFO_VLAN; break; case tcpip_chksum_info: if (__predict_false(ppi_dlen < sizeof(rndis_tcp_ip_csum_info))) return EINVAL; info->csum_info = ppi_dptr; mask |= HV_RF_RECVINFO_CSUM; break; case nbl_hash_value: if (__predict_false(ppi_dlen < sizeof(struct rndis_hash_value))) return EINVAL; info->hash_value = ppi_dptr; mask |= HV_RF_RECVINFO_HASHVAL; break; case nbl_hash_info: if (__predict_false(ppi_dlen < sizeof(struct rndis_hash_info))) return EINVAL; info->hash_info = ppi_dptr; mask |= HV_RF_RECVINFO_HASHINF; break; default: goto skip; } if (mask == HV_RF_RECVINFO_ALL) { /* All found; done */ break; } skip: if (__predict_false(len < ppi->size)) return EINVAL; len -= ppi->size; ppi = (const rndis_per_packet_info *) ((const uint8_t *)ppi + ppi->size); } return 0; } /* * RNDIS filter receive data */ static void hv_rf_receive_data(struct hn_rx_ring *rxr, const void *data, int dlen) { const rndis_msg *message = data; const rndis_packet *rndis_pkt; uint32_t data_offset; struct hn_recvinfo info; rndis_pkt = &message->msg.packet; /* * Fixme: Handle multiple rndis pkt msgs that may be enclosed in this * netvsc packet (ie tot_data_buf_len != message_length) */ /* Remove rndis header, then pass data packet up the stack */ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; dlen -= data_offset; if (dlen < rndis_pkt->data_length) { if_printf(rxr->hn_ifp, "total length %u is less than data length %u\n", dlen, rndis_pkt->data_length); return; } dlen = rndis_pkt->data_length; data = (const uint8_t *)data + data_offset; if (hv_rf_find_recvinfo(rndis_pkt, &info)) { if_printf(rxr->hn_ifp, "recvinfo parsing failed\n"); return; } netvsc_recv(rxr, data, dlen, &info); } /* * RNDIS filter on receive */ int hv_rf_on_receive(struct hn_softc *sc, struct hn_rx_ring *rxr, const void *data, int dlen) { rndis_device *rndis_dev; const rndis_msg *rndis_hdr; const struct rndis_comp_hdr *comp; rndis_dev = sc->rndis_dev; if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) return (EINVAL); rndis_hdr = data; switch (rndis_hdr->ndis_msg_type) { /* data message */ case REMOTE_NDIS_PACKET_MSG: hv_rf_receive_data(rxr, data, dlen); break; /* completion messages */ case REMOTE_NDIS_INITIALIZE_CMPLT: case REMOTE_NDIS_QUERY_CMPLT: case REMOTE_NDIS_SET_CMPLT: case REMOTE_NDIS_KEEPALIVE_CMPLT: comp = data; if (comp->rm_rid <= HN_RNDIS_RID_COMPAT_MAX) { /* Transition time compat code */ hv_rf_receive_response(rndis_dev, rndis_hdr); } else { vmbus_xact_ctx_wakeup(sc->hn_xact, data, dlen); } break; /* notification message */ case REMOTE_NDIS_INDICATE_STATUS_MSG: hv_rf_receive_indicate_status(rndis_dev, rndis_hdr); break; case REMOTE_NDIS_RESET_CMPLT: /* * Reset completed, no rid. * * NOTE: * RESET is not issued by hn(4), so this message should * _not_ be observed. */ if_printf(sc->hn_ifp, "RESET CMPLT received\n"); break; default: if_printf(sc->hn_ifp, "unknown RNDIS message 0x%x\n", rndis_hdr->ndis_msg_type); break; } return (0); } /* - * RNDIS filter query device - */ -static int -hv_rf_query_device(rndis_device *device, uint32_t oid, void *result, - uint32_t *result_size) -{ - rndis_request *request; - uint32_t in_result_size = *result_size; - rndis_query_request *query; - rndis_query_complete *query_complete; - int ret = 0; - - *result_size = 0; - request = hv_rndis_request(device, REMOTE_NDIS_QUERY_MSG, - RNDIS_MESSAGE_SIZE(rndis_query_request)); - if (request == NULL) { - ret = -1; - goto cleanup; - } - - /* Set up the rndis query */ - query = &request->request_msg.msg.query_request; - query->oid = oid; - query->info_buffer_offset = sizeof(rndis_query_request); - query->info_buffer_length = 0; - query->device_vc_handle = 0; - - if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) { - struct rndis_recv_scale_cap *cap; - - request->request_msg.msg_len += - sizeof(struct rndis_recv_scale_cap); - query->info_buffer_length = sizeof(struct rndis_recv_scale_cap); - cap = (struct rndis_recv_scale_cap *)((unsigned long)query + - query->info_buffer_offset); - cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES; - cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; - cap->hdr.size = sizeof(struct rndis_recv_scale_cap); - } - - ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG); - if (ret != 0) { - /* Fixme: printf added */ - printf("RNDISFILTER request failed to Send!\n"); - goto cleanup; - } - - sema_wait(&request->wait_sema); - - /* Copy the response back */ - query_complete = &request->response_msg.msg.query_complete; - - if (query_complete->info_buffer_length > in_result_size) { - ret = EINVAL; - goto cleanup; - } - - memcpy(result, (void *)((unsigned long)query_complete + - query_complete->info_buffer_offset), - query_complete->info_buffer_length); - - *result_size = query_complete->info_buffer_length; - -cleanup: - if (request != NULL) - hv_put_rndis_request(device, request); - - return (ret); -} - -/* * RNDIS filter query device MAC address */ static int hv_rf_query_device_mac(rndis_device *device) { struct hn_softc *sc = device->sc; size_t hwaddr_len; int error; hwaddr_len = ETHER_ADDR_LEN; error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0, device->hw_mac_addr, &hwaddr_len); if (error) - return error; + return (error); if (hwaddr_len != ETHER_ADDR_LEN) { if_printf(sc->hn_ifp, "invalid hwaddr len %zu\n", hwaddr_len); - return EINVAL; + return (EINVAL); } - return 0; + return (0); } /* * RNDIS filter query device link status */ static inline int hv_rf_query_device_link_status(rndis_device *device) { - uint32_t size = sizeof(uint32_t); + struct hn_softc *sc = device->sc; + size_t size; + int error; - return (hv_rf_query_device(device, - RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size)); + size = sizeof(uint32_t); + error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0, + &device->link_status, &size); + if (error) + return (error); + if (size != sizeof(uint32_t)) { + if_printf(sc->hn_ifp, "invalid link status len %zu\n", size); + return (EINVAL); + } + return (0); } -static uint8_t netvsc_hash_key[HASH_KEYLEN] = { +static uint8_t netvsc_hash_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; /* - * RNDIS set vRSS parameters - */ -static int -hv_rf_set_rss_param(rndis_device *device, int num_queue) -{ - rndis_request *request; - rndis_set_request *set; - rndis_set_complete *set_complete; - rndis_recv_scale_param *rssp; - uint32_t extlen = sizeof(rndis_recv_scale_param) + - (4 * ITAB_NUM) + HASH_KEYLEN; - uint32_t *itab, status; - uint8_t *keyp; - int i, ret; - - - request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG, - RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen); - if (request == NULL) { - if (bootverbose) - printf("Netvsc: No memory to set vRSS parameters.\n"); - ret = -1; - goto cleanup; - } - - set = &request->request_msg.msg.set_request; - set->oid = RNDIS_OID_GEN_RSS_PARAMETERS; - set->info_buffer_length = extlen; - set->info_buffer_offset = sizeof(rndis_set_request); - set->device_vc_handle = 0; - - /* Fill out the rssp parameter structure */ - rssp = (rndis_recv_scale_param *)(set + 1); - rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS; - rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; - rssp->hdr.size = sizeof(rndis_recv_scale_param); - rssp->flag = 0; - rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 | - RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6; - rssp->indirect_tabsize = 4 * ITAB_NUM; - rssp->indirect_taboffset = sizeof(rndis_recv_scale_param); - rssp->hashkey_size = HASH_KEYLEN; - rssp->hashkey_offset = rssp->indirect_taboffset + - rssp->indirect_tabsize; - - /* Set indirection table entries */ - itab = (uint32_t *)(rssp + 1); - for (i = 0; i < ITAB_NUM; i++) - itab[i] = i % num_queue; - - /* Set hash key values */ - keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset); - for (i = 0; i < HASH_KEYLEN; i++) - keyp[i] = netvsc_hash_key[i]; - - ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG); - if (ret != 0) { - goto cleanup; - } - - /* - * Wait for the response from the host. Another thread will signal - * us when the response has arrived. In the failure case, - * sema_timedwait() returns a non-zero status after waiting 5 seconds. - */ - ret = sema_timedwait(&request->wait_sema, 5 * hz); - if (ret == 0) { - /* Response received, check status */ - set_complete = &request->response_msg.msg.set_complete; - status = set_complete->status; - if (status != RNDIS_STATUS_SUCCESS) { - /* Bad response status, return error */ - if (bootverbose) - printf("Netvsc: Failed to set vRSS " - "parameters.\n"); - ret = -2; - } else { - if (bootverbose) - printf("Netvsc: Successfully set vRSS " - "parameters.\n"); - } - } else { - /* - * We cannot deallocate the request since we may still - * receive a send completion for it. - */ - printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n", - request->request_msg.msg.init_request.request_id, ret); - goto exit; - } - -cleanup: - if (request != NULL) { - hv_put_rndis_request(device, request); - } -exit: - return (ret); -} - -/* * RNDIS filter set packet filter * Sends an rndis request with the new filter, then waits for a response * from the host. * Returns zero on success, non-zero on failure. */ static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter) { rndis_request *request; rndis_set_request *set; rndis_set_complete *set_complete; uint32_t status; int ret; request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG, RNDIS_MESSAGE_SIZE(rndis_set_request) + sizeof(uint32_t)); if (request == NULL) { ret = -1; goto cleanup; } /* Set up the rndis set */ set = &request->request_msg.msg.set_request; set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER; set->info_buffer_length = sizeof(uint32_t); set->info_buffer_offset = sizeof(rndis_set_request); memcpy((void *)((unsigned long)set + sizeof(rndis_set_request)), &new_filter, sizeof(uint32_t)); ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG); if (ret != 0) { goto cleanup; } /* * Wait for the response from the host. Another thread will signal * us when the response has arrived. In the failure case, * sema_timedwait() returns a non-zero status after waiting 5 seconds. */ ret = sema_timedwait(&request->wait_sema, 5 * hz); if (ret == 0) { /* Response received, check status */ set_complete = &request->response_msg.msg.set_complete; status = set_complete->status; if (status != RNDIS_STATUS_SUCCESS) { /* Bad response status, return error */ ret = -2; } } else { /* * We cannot deallocate the request since we may still * receive a send completion for it. */ goto exit; } cleanup: if (request != NULL) { hv_put_rndis_request(device, request); } exit: return (ret); } static const void * hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid, size_t reqlen, size_t *comp_len0, uint32_t comp_type) { struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT]; const struct rndis_comp_hdr *comp; bus_addr_t paddr; size_t comp_len, min_complen = *comp_len0; int gpa_cnt, error; KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid)); KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0, ("invalid request length %zu", reqlen)); KASSERT(min_complen >= sizeof(*comp), ("invalid minimum complete len %zu", min_complen)); /* * Setup the SG list. */ paddr = vmbus_xact_req_paddr(xact); KASSERT((paddr & PAGE_MASK) == 0, ("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr)); for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) { int len = PAGE_SIZE; if (reqlen == 0) break; if (reqlen < len) len = reqlen; gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt; gpa[gpa_cnt].gpa_len = len; gpa[gpa_cnt].gpa_ofs = 0; reqlen -= len; } KASSERT(reqlen == 0, ("still have %zu request data left", reqlen)); /* * Send this RNDIS control message and wait for its completion * message. */ vmbus_xact_activate(xact); error = hv_nv_on_send(sc->hn_prichan, HN_NVS_RNDIS_MTYPE_CTRL, &hn_send_ctx_none, gpa, gpa_cnt); if (error) { vmbus_xact_deactivate(xact); if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error); return (NULL); } comp = vmbus_xact_wait(xact, &comp_len); /* * Check this RNDIS complete message. */ if (comp_len < min_complen) { if (comp_len >= sizeof(*comp)) { /* rm_status field is valid */ if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, " "status 0x%08x\n", comp_len, comp->rm_status); } else { if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n", comp_len); } return (NULL); } if (comp->rm_len < min_complen) { if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n", comp->rm_len); return (NULL); } if (comp->rm_type != comp_type) { if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, " "expect 0x%08x\n", comp->rm_type, comp_type); return (NULL); } if (comp->rm_rid != rid) { if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, " "expect %u\n", comp->rm_rid, rid); return (NULL); } /* All pass! */ *comp_len0 = comp_len; return (comp); } static int hn_rndis_query(struct hn_softc *sc, uint32_t oid, const void *idata, size_t idlen, void *odata, size_t *odlen0) { struct rndis_query_req *req; const struct rndis_query_comp *comp; struct vmbus_xact *xact; size_t reqlen, odlen = *odlen0, comp_len; int error, ofs; uint32_t rid; reqlen = sizeof(*req) + idlen; xact = vmbus_xact_get(sc->hn_xact, reqlen); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_QUERY_MSG; req->rm_len = reqlen; req->rm_rid = rid; req->rm_oid = oid; /* * XXX * This is _not_ RNDIS Spec conforming: * "This MUST be set to 0 when there is no input data * associated with the OID." * * If this field was set to 0 according to the RNDIS Spec, * Hyper-V would set non-SUCCESS status in the query * completion. */ req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET; if (idlen > 0) { req->rm_infobuflen = idlen; /* Input data immediately follows RNDIS query. */ memcpy(req + 1, idata, idlen); } comp_len = sizeof(*comp) + odlen; comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len, REMOTE_NDIS_QUERY_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: " "status 0x%08x\n", oid, comp->rm_status); error = EIO; goto done; } if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) { /* No output data! */ if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid); *odlen0 = 0; error = 0; goto done; } /* * Check output data length and offset. */ /* ofs is the offset from the beginning of comp. */ ofs = RNDIS_QUERY_COMP_INFOBUFABS(comp->rm_infobufoffset); if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) { if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, " "%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen); error = EINVAL; goto done; } /* * Save output data. */ if (comp->rm_infobuflen < odlen) odlen = comp->rm_infobuflen; memcpy(odata, ((const uint8_t *)comp) + ofs, odlen); *odlen0 = odlen; error = 0; done: vmbus_xact_put(xact); return (error); } static int +hn_rndis_get_rsscaps(struct hn_softc *sc, int *rxr_cnt) +{ + struct ndis_rss_caps in, caps; + size_t caps_len; + int error; + + /* + * Only NDIS 6.30+ is supported. + */ + KASSERT(sc->hn_ndis_ver >= NDIS_VERSION_6_30, + ("NDIS 6.30+ is required, NDIS version 0x%08x", sc->hn_ndis_ver)); + *rxr_cnt = 0; + + memset(&in, 0, sizeof(in)); + in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS; + in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2; + in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE; + + caps_len = NDIS_RSS_CAPS_SIZE; + error = hn_rndis_query(sc, OID_GEN_RSS_CAPABILITIES, + &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len); + if (error) + return (error); + if (caps_len < NDIS_RSS_CAPS_SIZE_6_0) { + if_printf(sc->hn_ifp, "invalid NDIS RSS caps len %zu", + caps_len); + return (EINVAL); + } + + if (caps.ndis_nrxr == 0) { + if_printf(sc->hn_ifp, "0 RX rings!?\n"); + return (EINVAL); + } + *rxr_cnt = caps.ndis_nrxr; + + if (caps_len == NDIS_RSS_CAPS_SIZE) { + if (bootverbose) { + if_printf(sc->hn_ifp, "RSS indirect table size %u\n", + caps.ndis_nind); + } + } + return (0); +} + +static int hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen) { struct rndis_set_req *req; const struct rndis_set_comp *comp; struct vmbus_xact *xact; size_t reqlen, comp_len; uint32_t rid; int error; KASSERT(dlen > 0, ("invalid dlen %zu", dlen)); reqlen = sizeof(*req) + dlen; xact = vmbus_xact_get(sc->hn_xact, reqlen); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_SET_MSG; req->rm_len = reqlen; req->rm_rid = rid; req->rm_oid = oid; req->rm_infobuflen = dlen; req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET; /* Data immediately follows RNDIS set. */ memcpy(req + 1, data, dlen); comp_len = sizeof(*comp); comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len, REMOTE_NDIS_SET_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: " "status 0x%08x\n", oid, comp->rm_status); error = EIO; goto done; } error = 0; done: vmbus_xact_put(xact); return (error); } static int hn_rndis_conf_offload(struct hn_softc *sc) { struct ndis_offload_params params; size_t paramsz; int error; /* NOTE: 0 means "no change" */ memset(¶ms, 0, sizeof(params)); params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT; if (sc->hn_ndis_ver < NDIS_VERSION_6_30) { params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2; paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1; } else { params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3; paramsz = NDIS_OFFLOAD_PARAMS_SIZE; } params.ndis_hdr.ndis_size = paramsz; params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX; params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX; params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX; if (sc->hn_ndis_ver >= NDIS_VERSION_6_30) { params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX; params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX; } params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON; /* XXX ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON */ error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, paramsz); if (error) { if_printf(sc->hn_ifp, "offload config failed: %d\n", error); } else { if (bootverbose) if_printf(sc->hn_ifp, "offload config done\n"); } return (error); } +static int +hn_rndis_conf_rss(struct hn_softc *sc, int nchan) +{ + struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; + struct ndis_rss_params *prm = &rss->rss_params; + int i, error; + + /* + * Only NDIS 6.30+ is supported. + */ + KASSERT(sc->hn_ndis_ver >= NDIS_VERSION_6_30, + ("NDIS 6.30+ is required, NDIS version 0x%08x", sc->hn_ndis_ver)); + + memset(rss, 0, sizeof(*rss)); + prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS; + prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2; + prm->ndis_hdr.ndis_size = sizeof(*rss); + prm->ndis_hash = NDIS_HASH_FUNCTION_TOEPLITZ | + NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4 | + NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6; + /* TODO: Take ndis_rss_caps.ndis_nind into account */ + prm->ndis_indsize = sizeof(rss->rss_ind); + prm->ndis_indoffset = + __offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]); + prm->ndis_keysize = sizeof(rss->rss_key); + prm->ndis_keyoffset = + __offsetof(struct ndis_rssprm_toeplitz, rss_key[0]); + + /* Setup RSS key */ + memcpy(rss->rss_key, netvsc_hash_key, sizeof(rss->rss_key)); + + /* Setup RSS indirect table */ + /* TODO: Take ndis_rss_caps.ndis_nind into account */ + for (i = 0; i < NDIS_HASH_INDCNT; ++i) + rss->rss_ind[i] = i % nchan; + + error = hn_rndis_set(sc, OID_GEN_RSS_PARAMETERS, rss, sizeof(*rss)); + if (error) { + if_printf(sc->hn_ifp, "RSS config failed: %d\n", error); + } else { + if (bootverbose) + if_printf(sc->hn_ifp, "RSS config done\n"); + } + return (error); +} + /* * RNDIS filter init device */ static int hv_rf_init_device(rndis_device *device) { struct hn_softc *sc = device->sc; struct rndis_init_req *req; const struct rndis_init_comp *comp; struct vmbus_xact *xact; size_t comp_len; uint32_t rid; int error; /* XXX */ device->state = RNDIS_DEV_INITIALIZED; xact = vmbus_xact_get(sc->hn_xact, sizeof(*req)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS init\n"); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_INITIALIZE_MSG; req->rm_len = sizeof(*req); req->rm_rid = rid; req->rm_ver_major = RNDIS_VERSION_MAJOR; req->rm_ver_minor = RNDIS_VERSION_MINOR; req->rm_max_xfersz = HN_RNDIS_XFER_SIZE; comp_len = RNDIS_INIT_COMP_SIZE_MIN; comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len, REMOTE_NDIS_INITIALIZE_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS init failed\n"); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n", comp->rm_status); error = EIO; goto done; } if (bootverbose) { if_printf(sc->hn_ifp, "RNDIS ver %u.%u, pktsz %u, pktcnt %u\n", comp->rm_ver_major, comp->rm_ver_minor, comp->rm_pktmaxsz, comp->rm_pktmaxcnt); } error = 0; done: if (xact != NULL) vmbus_xact_put(xact); return (error); } #define HALT_COMPLETION_WAIT_COUNT 25 /* * RNDIS filter halt device */ static int hv_rf_halt_device(rndis_device *device) { rndis_request *request; int i, ret; /* Attempt to do a rndis device halt */ request = hv_rndis_request(device, REMOTE_NDIS_HALT_MSG, RNDIS_MESSAGE_SIZE(rndis_halt_request)); if (request == NULL) { return (-1); } /* initialize "poor man's semaphore" */ request->halt_complete_flag = 0; ret = hv_rf_send_request(device, request, REMOTE_NDIS_HALT_MSG); if (ret != 0) { return (-1); } /* * Wait for halt response from halt callback. We must wait for * the transaction response before freeing the request and other * resources. */ for (i=HALT_COMPLETION_WAIT_COUNT; i > 0; i--) { if (request->halt_complete_flag != 0) { break; } DELAY(400); } if (i == 0) { return (-1); } device->state = RNDIS_DEV_UNINITIALIZED; hv_put_rndis_request(device, request); return (0); } /* * RNDIS filter open device */ static int hv_rf_open_device(rndis_device *device) { int ret; if (device->state != RNDIS_DEV_INITIALIZED) { return (0); } if (hv_promisc_mode != 1) { ret = hv_rf_set_packet_filter(device, NDIS_PACKET_TYPE_BROADCAST | NDIS_PACKET_TYPE_ALL_MULTICAST | NDIS_PACKET_TYPE_DIRECTED); } else { ret = hv_rf_set_packet_filter(device, NDIS_PACKET_TYPE_PROMISCUOUS); } if (ret == 0) { device->state = RNDIS_DEV_DATAINITIALIZED; } return (ret); } /* * RNDIS filter close device */ static int hv_rf_close_device(rndis_device *device) { int ret; if (device->state != RNDIS_DEV_DATAINITIALIZED) { return (0); } ret = hv_rf_set_packet_filter(device, 0); if (ret == 0) { device->state = RNDIS_DEV_INITIALIZED; } return (ret); } /* * RNDIS filter on device add */ int hv_rf_on_device_add(struct hn_softc *sc, void *additl_info, int *nchan0, struct hn_rx_ring *rxr) { int ret; rndis_device *rndis_dev; - struct rndis_recv_scale_cap rsscaps; - uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap); netvsc_device_info *dev_info = (netvsc_device_info *)additl_info; device_t dev = sc->hn_dev; struct hn_nvs_subch_req *req; const struct hn_nvs_subch_resp *resp; size_t resp_len; struct vmbus_xact *xact = NULL; uint32_t status, nsubch; int nchan = *nchan0; + int rxr_cnt; rndis_dev = hv_get_rndis_device(); if (rndis_dev == NULL) { return (ENOMEM); } sc->rndis_dev = rndis_dev; rndis_dev->sc = sc; /* * Let the inner driver handle this first to create the netvsc channel * NOTE! Once the channel is created, we may get a receive callback * (hv_rf_on_receive()) before this call is completed. * Note: Earlier code used a function pointer here. */ ret = hv_nv_on_device_add(sc, rxr); if (ret != 0) { hv_put_rndis_device(rndis_dev); return (ret); } /* * Initialize the rndis device */ /* Send the rndis initialization message */ ret = hv_rf_init_device(rndis_dev); if (ret != 0) { /* * TODO: If rndis init failed, we will need to shut down * the channel */ } /* Get the mac address */ ret = hv_rf_query_device_mac(rndis_dev); if (ret != 0) { /* TODO: shut down rndis device and the channel */ } /* Configure NDIS offload settings */ hn_rndis_conf_offload(sc); memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, ETHER_ADDR_LEN); hv_rf_query_device_link_status(rndis_dev); dev_info->link_state = rndis_dev->link_status; - if (sc->hn_nvs_ver < NVSP_PROTOCOL_VERSION_5 || nchan == 1) + if (sc->hn_ndis_ver < NDIS_VERSION_6_30 || nchan == 1) { + /* + * Either RSS is not supported, or multiple RX/TX rings + * are not requested. + */ + *nchan0 = 1; return (0); + } - memset(&rsscaps, 0, rsscaps_size); - ret = hv_rf_query_device(rndis_dev, - RNDIS_OID_GEN_RSS_CAPABILITIES, - &rsscaps, &rsscaps_size); - if ((ret != 0) || (rsscaps.num_recv_que < 2)) { - device_printf(dev, "hv_rf_query_device failed or " - "rsscaps.num_recv_que < 2 \n"); - goto out; + /* + * Get RSS capabilities, e.g. # of RX rings, and # of indirect + * table entries. + */ + ret = hn_rndis_get_rsscaps(sc, &rxr_cnt); + if (ret) { + /* No RSS; this is benign. */ + *nchan0 = 1; + return (0); } - device_printf(dev, "channel, offered %u, requested %d\n", - rsscaps.num_recv_que, nchan); - if (nchan > rsscaps.num_recv_que) - nchan = rsscaps.num_recv_que; + if (nchan > rxr_cnt) + nchan = rxr_cnt; + if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", + rxr_cnt, nchan); if (nchan == 1) { device_printf(dev, "only 1 channel is supported, no vRSS\n"); goto out; } /* * Ask NVS to allocate sub-channels. */ xact = vmbus_xact_get(sc->hn_xact, sizeof(*req)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for nvs subch req\n"); ret = ENXIO; goto out; } req = vmbus_xact_req_data(xact); req->nvs_type = HN_NVS_TYPE_SUBCH_REQ; req->nvs_op = HN_NVS_SUBCH_OP_ALLOC; req->nvs_nsubch = nchan - 1; resp = hn_nvs_xact_execute(sc, xact, req, sizeof(*req), &resp_len); if (resp == NULL) { if_printf(sc->hn_ifp, "exec subch failed\n"); ret = EIO; goto out; } if (resp_len < sizeof(*resp)) { if_printf(sc->hn_ifp, "invalid subch resp length %zu\n", resp_len); ret = EINVAL; goto out; } if (resp->nvs_type != HN_NVS_TYPE_SUBCH_RESP) { if_printf(sc->hn_ifp, "not subch resp, type %u\n", resp->nvs_type); ret = EINVAL; goto out; } status = resp->nvs_status; nsubch = resp->nvs_nsubch; vmbus_xact_put(xact); xact = NULL; if (status != HN_NVS_STATUS_OK) { if_printf(sc->hn_ifp, "subch req failed: %x\n", status); ret = EIO; goto out; } if (nsubch > nchan - 1) { if_printf(sc->hn_ifp, "%u subchans are allocated, requested %u\n", nsubch, nchan - 1); nsubch = nchan - 1; } nchan = nsubch + 1; - ret = hv_rf_set_rss_param(rndis_dev, nchan); - *nchan0 = nchan; + ret = hn_rndis_conf_rss(sc, nchan); + if (ret != 0) + *nchan0 = 1; + else + *nchan0 = nchan; out: if (xact != NULL) vmbus_xact_put(xact); return (ret); } /* * RNDIS filter on device remove */ int hv_rf_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel) { rndis_device *rndis_dev = sc->rndis_dev; int ret; /* Halt and release the rndis device */ ret = hv_rf_halt_device(rndis_dev); sc->rndis_dev = NULL; hv_put_rndis_device(rndis_dev); /* Pass control to inner driver to remove the device */ ret |= hv_nv_on_device_remove(sc, destroy_channel); return (ret); } /* * RNDIS filter on open */ int hv_rf_on_open(struct hn_softc *sc) { return (hv_rf_open_device(sc->rndis_dev)); } /* * RNDIS filter on close */ int hv_rf_on_close(struct hn_softc *sc) { return (hv_rf_close_device(sc->rndis_dev)); } static void hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan __unused, const void *data __unused, int dlen __unused) { if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID) hn_chim_free(sc, sndc->hn_chim_idx); } static void hn_rndis_sent_halt(struct hn_send_ctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan __unused, const void *data __unused, int dlen __unused) { rndis_request *request = sndc->hn_cbarg; if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID) hn_chim_free(sc, sndc->hn_chim_idx); /* * Notify hv_rf_halt_device() about halt completion. * The halt code must wait for completion before freeing * the transaction resources. */ request->halt_complete_flag = 1; } void hv_rf_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) { netvsc_channel_rollup(rxr, txr); } Index: projects/clang390-import/sys/dev/hyperv/netvsc/ndis.h =================================================================== --- projects/clang390-import/sys/dev/hyperv/netvsc/ndis.h (revision 305016) +++ projects/clang390-import/sys/dev/hyperv/netvsc/ndis.h (revision 305017) @@ -1,118 +1,217 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NET_NDIS_H_ #define _NET_NDIS_H_ +#define NDIS_MEDIA_STATE_CONNECTED 0 +#define NDIS_MEDIA_STATE_DISCONNECTED 1 + +#define OID_GEN_RSS_CAPABILITIES 0x00010203 +#define OID_GEN_RSS_PARAMETERS 0x00010204 #define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C #define NDIS_OBJTYPE_DEFAULT 0x80 +#define NDIS_OBJTYPE_RSS_CAPS 0x88 +#define NDIS_OBJTYPE_RSS_PARAMS 0x89 /* common_set */ #define NDIS_OFFLOAD_SET_NOCHG 0 #define NDIS_OFFLOAD_SET_ON 1 #define NDIS_OFFLOAD_SET_OFF 2 /* a.k.a GRE MAC */ #define NDIS_ENCAP_TYPE_NVGRE 0x00000001 +#define NDIS_HASH_FUNCTION_MASK 0x000000FF /* see hash function */ +#define NDIS_HASH_TYPE_MASK 0x00FFFF00 /* see hash type */ + +/* hash function */ +#define NDIS_HASH_FUNCTION_TOEPLITZ 0x00000001 + +/* hash type */ +#define NDIS_HASH_IPV4 0x00000100 +#define NDIS_HASH_TCP_IPV4 0x00000200 +#define NDIS_HASH_IPV6 0x00000400 +#define NDIS_HASH_IPV6_EX 0x00000800 +#define NDIS_HASH_TCP_IPV6 0x00001000 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000 + +#define NDIS_HASH_KEYSIZE_TOEPLITZ 40 +#define NDIS_HASH_INDCNT 128 + struct ndis_object_hdr { uint8_t ndis_type; /* NDIS_OBJTYPE_ */ uint8_t ndis_rev; /* type specific */ uint16_t ndis_size; /* incl. this hdr */ }; -/* OID_TCP_OFFLOAD_PARAMETERS */ +/* + * OID_TCP_OFFLOAD_PARAMETERS + * ndis_type: NDIS_OBJTYPE_DEFAULT + */ struct ndis_offload_params { struct ndis_object_hdr ndis_hdr; uint8_t ndis_ip4csum; /* param_set */ uint8_t ndis_tcp4csum; /* param_set */ uint8_t ndis_udp4csum; /* param_set */ uint8_t ndis_tcp6csum; /* param_set */ uint8_t ndis_udp6csum; /* param_set */ uint8_t ndis_lsov1; /* lsov1_set */ uint8_t ndis_ipsecv1; /* ipsecv1_set */ uint8_t ndis_lsov2_ip4; /* lsov2_set */ uint8_t ndis_lsov2_ip6; /* lsov2_set */ uint8_t ndis_tcp4conn; /* PARAM_NOCHG */ uint8_t ndis_tcp6conn; /* PARAM_NOCHG */ uint32_t ndis_flags; /* 0 */ /* NDIS >= 6.1 */ uint8_t ndis_ipsecv2; /* ipsecv2_set */ uint8_t ndis_ipsecv2_ip4; /* ipsecv2_set */ /* NDIS >= 6.30 */ uint8_t ndis_rsc_ip4; /* rsc_set */ uint8_t ndis_rsc_ip6; /* rsc_set */ uint8_t ndis_encap; /* common_set */ uint8_t ndis_encap_types; /* NDIS_ENCAP_TYPE_ */ }; #define NDIS_OFFLOAD_PARAMS_SIZE sizeof(struct ndis_offload_params) #define NDIS_OFFLOAD_PARAMS_SIZE_6_1 \ __offsetof(struct ndis_offload_params, ndis_rsc_ip4) #define NDIS_OFFLOAD_PARAMS_REV_2 2 /* NDIS 6.1 */ #define NDIS_OFFLOAD_PARAMS_REV_3 3 /* NDIS 6.30 */ /* param_set */ #define NDIS_OFFLOAD_PARAM_NOCHG 0 /* common to all sets */ #define NDIS_OFFLOAD_PARAM_OFF 1 #define NDIS_OFFLOAD_PARAM_TX 2 #define NDIS_OFFLOAD_PARAM_RX 3 #define NDIS_OFFLOAD_PARAM_TXRX 4 /* lsov1_set */ /* NDIS_OFFLOAD_PARAM_NOCHG */ #define NDIS_OFFLOAD_LSOV1_OFF 1 #define NDIS_OFFLOAD_LSOV1_ON 2 /* ipsecv1_set */ /* NDIS_OFFLOAD_PARAM_NOCHG */ #define NDIS_OFFLOAD_IPSECV1_OFF 1 #define NDIS_OFFLOAD_IPSECV1_AH 2 #define NDIS_OFFLOAD_IPSECV1_ESP 3 #define NDIS_OFFLOAD_IPSECV1_AH_ESP 4 /* lsov2_set */ /* NDIS_OFFLOAD_PARAM_NOCHG */ #define NDIS_OFFLOAD_LSOV2_OFF 1 #define NDIS_OFFLOAD_LSOV2_ON 2 /* ipsecv2_set */ /* NDIS_OFFLOAD_PARAM_NOCHG */ #define NDIS_OFFLOAD_IPSECV2_OFF 1 #define NDIS_OFFLOAD_IPSECV2_AH 2 #define NDIS_OFFLOAD_IPSECV2_ESP 3 #define NDIS_OFFLOAD_IPSECV2_AH_ESP 4 /* rsc_set */ /* NDIS_OFFLOAD_PARAM_NOCHG */ #define NDIS_OFFLOAD_RSC_OFF 1 #define NDIS_OFFLOAD_RSC_ON 2 + +/* + * OID_GEN_RSS_CAPABILITIES + * ndis_type: NDIS_OBJTYPE_RSS_CAPS + */ +struct ndis_rss_caps { + struct ndis_object_hdr ndis_hdr; + uint32_t ndis_flags; /* NDIS_RSS_CAP_ */ + uint32_t ndis_nmsi; /* # of MSIs */ + uint32_t ndis_nrxr; /* # of RX rings */ + /* NDIS >= 6.30 */ + uint16_t ndis_nind; /* # of indtbl ent. */ + uint16_t ndis_pad; +}; + +#define NDIS_RSS_CAPS_SIZE \ + __offsetof(struct ndis_rss_caps, ndis_pad) +#define NDIS_RSS_CAPS_SIZE_6_0 \ + __offsetof(struct ndis_rss_caps, ndis_nind) + +#define NDIS_RSS_CAPS_REV_1 1 /* NDIS 6.{0,1,20} */ +#define NDIS_RSS_CAPS_REV_2 2 /* NDIS 6.30 */ + +#define NDIS_RSS_CAP_MSI 0x01000000 +#define NDIS_RSS_CAP_CLASSIFY_ISR 0x02000000 +#define NDIS_RSS_CAP_CLASSIFY_DPC 0x04000000 +#define NDIS_RSS_CAP_MSIX 0x08000000 +#define NDIS_RSS_CAP_IPV4 0x00000100 +#define NDIS_RSS_CAP_IPV6 0x00000200 +#define NDIS_RSS_CAP_IPV6_EX 0x00000400 +#define NDIS_RSS_CAP_HASH_TOEPLITZ 0x00000001 + +/* + * OID_GEN_RSS_PARAMETERS + * ndis_type: NDIS_OBJTYPE_RSS_PARAMS + */ +struct ndis_rss_params { + struct ndis_object_hdr ndis_hdr; + uint16_t ndis_flags; /* NDIS_RSS_FLAG_ */ + uint16_t ndis_bcpu; /* base cpu 0 */ + uint32_t ndis_hash; /* NDIS_HASH_ */ + uint16_t ndis_indsize; /* indirect table */ + uint32_t ndis_indoffset; + uint16_t ndis_keysize; /* hash key */ + uint32_t ndis_keyoffset; + /* NDIS >= 6.20 */ + uint32_t ndis_cpumaskoffset; + uint32_t ndis_cpumaskcnt; + uint32_t ndis_cpumaskentsz; +}; + +#define NDIS_RSS_PARAMS_SIZE sizeof(struct ndis_rss_params) +#define NDIS_RSS_PARAMS_SIZE_6_0 \ + __offsetof(struct ndis_rss_params, ndis_cpumaskoffset) + +#define NDIS_RSS_PARAMS_REV_1 1 /* NDIS 6.0 */ +#define NDIS_RSS_PARAMS_REV_2 2 /* NDIS 6.20 */ + +#define NDIS_RSS_FLAG_BCPU_UNCHG 0x0001 +#define NDIS_RSS_FLAG_HASH_UNCHG 0x0002 +#define NDIS_RSS_FLAG_IND_UNCHG 0x0004 +#define NDIS_RSS_FLAG_KEY_UNCHG 0x0008 +#define NDIS_RSS_FLAG_DISABLE 0x0010 + +/* non-standard convenient struct */ +struct ndis_rssprm_toeplitz { + struct ndis_rss_params rss_params; + /* Toeplitz hash key */ + uint8_t rss_key[NDIS_HASH_KEYSIZE_TOEPLITZ]; + /* Indirect table */ + uint32_t rss_ind[NDIS_HASH_INDCNT]; +}; #endif /* !_NET_NDIS_H_ */ Index: projects/clang390-import/sys/dev/mfi/mfi.c =================================================================== --- projects/clang390-import/sys/dev/mfi/mfi.c (revision 305016) +++ projects/clang390-import/sys/dev/mfi/mfi.c (revision 305017) @@ -1,3797 +1,3797 @@ /*- * Copyright (c) 2006 IronPort Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 2007 LSI Corp. * Copyright (c) 2007 Rajesh Prabhakaran. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_mfi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mfi_alloc_commands(struct mfi_softc *); static int mfi_comms_init(struct mfi_softc *); static int mfi_get_controller_info(struct mfi_softc *); static int mfi_get_log_state(struct mfi_softc *, struct mfi_evt_log_state **); static int mfi_parse_entries(struct mfi_softc *, int, int); static void mfi_data_cb(void *, bus_dma_segment_t *, int, int); static void mfi_startup(void *arg); static void mfi_intr(void *arg); static void mfi_ldprobe(struct mfi_softc *sc); static void mfi_syspdprobe(struct mfi_softc *sc); static void mfi_handle_evt(void *context, int pending); static int mfi_aen_register(struct mfi_softc *sc, int seq, int locale); static void mfi_aen_complete(struct mfi_command *); static int mfi_add_ld(struct mfi_softc *sc, int); static void mfi_add_ld_complete(struct mfi_command *); static int mfi_add_sys_pd(struct mfi_softc *sc, int); static void mfi_add_sys_pd_complete(struct mfi_command *); static struct mfi_command * mfi_bio_command(struct mfi_softc *); static void mfi_bio_complete(struct mfi_command *); static struct mfi_command *mfi_build_ldio(struct mfi_softc *,struct bio*); static struct mfi_command *mfi_build_syspdio(struct mfi_softc *,struct bio*); static int mfi_send_frame(struct mfi_softc *, struct mfi_command *); static int mfi_std_send_frame(struct mfi_softc *, struct mfi_command *); static int mfi_abort(struct mfi_softc *, struct mfi_command **); static int mfi_linux_ioctl_int(struct cdev *, u_long, caddr_t, int, struct thread *); static void mfi_timeout(void *); static int mfi_user_command(struct mfi_softc *, struct mfi_ioc_passthru *); static void mfi_enable_intr_xscale(struct mfi_softc *sc); static void mfi_enable_intr_ppc(struct mfi_softc *sc); static int32_t mfi_read_fw_status_xscale(struct mfi_softc *sc); static int32_t mfi_read_fw_status_ppc(struct mfi_softc *sc); static int mfi_check_clear_intr_xscale(struct mfi_softc *sc); static int mfi_check_clear_intr_ppc(struct mfi_softc *sc); static void mfi_issue_cmd_xscale(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt); static void mfi_issue_cmd_ppc(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt); static int mfi_config_lock(struct mfi_softc *sc, uint32_t opcode); static void mfi_config_unlock(struct mfi_softc *sc, int locked); static int mfi_check_command_pre(struct mfi_softc *sc, struct mfi_command *cm); static void mfi_check_command_post(struct mfi_softc *sc, struct mfi_command *cm); static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm); SYSCTL_NODE(_hw, OID_AUTO, mfi, CTLFLAG_RD, 0, "MFI driver parameters"); static int mfi_event_locale = MFI_EVT_LOCALE_ALL; SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RWTUN, &mfi_event_locale, 0, "event message locale"); static int mfi_event_class = MFI_EVT_CLASS_INFO; SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RWTUN, &mfi_event_class, 0, "event message class"); static int mfi_max_cmds = 128; SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RDTUN, &mfi_max_cmds, 0, "Max commands limit (-1 = controller limit)"); static int mfi_detect_jbod_change = 1; SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RWTUN, &mfi_detect_jbod_change, 0, "Detect a change to a JBOD"); int mfi_polled_cmd_timeout = MFI_POLL_TIMEOUT_SECS; SYSCTL_INT(_hw_mfi, OID_AUTO, polled_cmd_timeout, CTLFLAG_RWTUN, &mfi_polled_cmd_timeout, 0, "Polled command timeout - used for firmware flash etc (in seconds)"); static int mfi_cmd_timeout = MFI_CMD_TIMEOUT; SYSCTL_INT(_hw_mfi, OID_AUTO, cmd_timeout, CTLFLAG_RWTUN, &mfi_cmd_timeout, 0, "Command timeout (in seconds)"); /* Management interface */ static d_open_t mfi_open; static d_close_t mfi_close; static d_ioctl_t mfi_ioctl; static d_poll_t mfi_poll; static struct cdevsw mfi_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = mfi_open, .d_close = mfi_close, .d_ioctl = mfi_ioctl, .d_poll = mfi_poll, .d_name = "mfi", }; MALLOC_DEFINE(M_MFIBUF, "mfibuf", "Buffers for the MFI driver"); #define MFI_INQ_LENGTH SHORT_INQUIRY_LENGTH struct mfi_skinny_dma_info mfi_skinny; static void mfi_enable_intr_xscale(struct mfi_softc *sc) { MFI_WRITE4(sc, MFI_OMSK, 0x01); } static void mfi_enable_intr_ppc(struct mfi_softc *sc) { if (sc->mfi_flags & MFI_FLAGS_1078) { MFI_WRITE4(sc, MFI_ODCR0, 0xFFFFFFFF); MFI_WRITE4(sc, MFI_OMSK, ~MFI_1078_EIM); } else if (sc->mfi_flags & MFI_FLAGS_GEN2) { MFI_WRITE4(sc, MFI_ODCR0, 0xFFFFFFFF); MFI_WRITE4(sc, MFI_OMSK, ~MFI_GEN2_EIM); } else if (sc->mfi_flags & MFI_FLAGS_SKINNY) { MFI_WRITE4(sc, MFI_OMSK, ~0x00000001); } } static int32_t mfi_read_fw_status_xscale(struct mfi_softc *sc) { return MFI_READ4(sc, MFI_OMSG0); } static int32_t mfi_read_fw_status_ppc(struct mfi_softc *sc) { return MFI_READ4(sc, MFI_OSP0); } static int mfi_check_clear_intr_xscale(struct mfi_softc *sc) { int32_t status; status = MFI_READ4(sc, MFI_OSTS); if ((status & MFI_OSTS_INTR_VALID) == 0) return 1; MFI_WRITE4(sc, MFI_OSTS, status); return 0; } static int mfi_check_clear_intr_ppc(struct mfi_softc *sc) { int32_t status; status = MFI_READ4(sc, MFI_OSTS); if (sc->mfi_flags & MFI_FLAGS_1078) { if (!(status & MFI_1078_RM)) { return 1; } } else if (sc->mfi_flags & MFI_FLAGS_GEN2) { if (!(status & MFI_GEN2_RM)) { return 1; } } else if (sc->mfi_flags & MFI_FLAGS_SKINNY) { if (!(status & MFI_SKINNY_RM)) { return 1; } } if (sc->mfi_flags & MFI_FLAGS_SKINNY) MFI_WRITE4(sc, MFI_OSTS, status); else MFI_WRITE4(sc, MFI_ODCR0, status); return 0; } static void mfi_issue_cmd_xscale(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt) { MFI_WRITE4(sc, MFI_IQP,(bus_add >>3)|frame_cnt); } static void mfi_issue_cmd_ppc(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt) { if (sc->mfi_flags & MFI_FLAGS_SKINNY) { MFI_WRITE4(sc, MFI_IQPL, (bus_add | frame_cnt <<1)|1 ); MFI_WRITE4(sc, MFI_IQPH, 0x00000000); } else { MFI_WRITE4(sc, MFI_IQP, (bus_add | frame_cnt <<1)|1 ); } } int mfi_transition_firmware(struct mfi_softc *sc) { uint32_t fw_state, cur_state; int max_wait, i; uint32_t cur_abs_reg_val = 0; uint32_t prev_abs_reg_val = 0; cur_abs_reg_val = sc->mfi_read_fw_status(sc); fw_state = cur_abs_reg_val & MFI_FWSTATE_MASK; while (fw_state != MFI_FWSTATE_READY) { if (bootverbose) device_printf(sc->mfi_dev, "Waiting for firmware to " "become ready\n"); cur_state = fw_state; switch (fw_state) { case MFI_FWSTATE_FAULT: device_printf(sc->mfi_dev, "Firmware fault\n"); return (ENXIO); case MFI_FWSTATE_WAIT_HANDSHAKE: if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT) MFI_WRITE4(sc, MFI_SKINNY_IDB, MFI_FWINIT_CLEAR_HANDSHAKE); else MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_CLEAR_HANDSHAKE); max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_OPERATIONAL: if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT) MFI_WRITE4(sc, MFI_SKINNY_IDB, 7); else MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_READY); max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_UNDEFINED: case MFI_FWSTATE_BB_INIT: max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_FW_INIT_2: max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_FW_INIT: case MFI_FWSTATE_FLUSH_CACHE: max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_DEVICE_SCAN: max_wait = MFI_RESET_WAIT_TIME; /* wait for 180 seconds */ prev_abs_reg_val = cur_abs_reg_val; break; case MFI_FWSTATE_BOOT_MESSAGE_PENDING: if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT) MFI_WRITE4(sc, MFI_SKINNY_IDB, MFI_FWINIT_HOTPLUG); else MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_HOTPLUG); max_wait = MFI_RESET_WAIT_TIME; break; default: device_printf(sc->mfi_dev, "Unknown firmware state %#x\n", fw_state); return (ENXIO); } for (i = 0; i < (max_wait * 10); i++) { cur_abs_reg_val = sc->mfi_read_fw_status(sc); fw_state = cur_abs_reg_val & MFI_FWSTATE_MASK; if (fw_state == cur_state) DELAY(100000); else break; } if (fw_state == MFI_FWSTATE_DEVICE_SCAN) { /* Check the device scanning progress */ if (prev_abs_reg_val != cur_abs_reg_val) { continue; } } if (fw_state == cur_state) { device_printf(sc->mfi_dev, "Firmware stuck in state " "%#x\n", fw_state); return (ENXIO); } } return (0); } static void mfi_addr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } int mfi_attach(struct mfi_softc *sc) { uint32_t status; int error, commsz, framessz, sensesz; int frames, unit, max_fw_sge, max_fw_cmds; uint32_t tb_mem_size = 0; struct cdev *dev_t; if (sc == NULL) return EINVAL; device_printf(sc->mfi_dev, "Megaraid SAS driver Ver %s \n", MEGASAS_VERSION); mtx_init(&sc->mfi_io_lock, "MFI I/O lock", NULL, MTX_DEF); sx_init(&sc->mfi_config_lock, "MFI config"); TAILQ_INIT(&sc->mfi_ld_tqh); TAILQ_INIT(&sc->mfi_syspd_tqh); TAILQ_INIT(&sc->mfi_ld_pend_tqh); TAILQ_INIT(&sc->mfi_syspd_pend_tqh); TAILQ_INIT(&sc->mfi_evt_queue); TASK_INIT(&sc->mfi_evt_task, 0, mfi_handle_evt, sc); TASK_INIT(&sc->mfi_map_sync_task, 0, mfi_handle_map_sync, sc); TAILQ_INIT(&sc->mfi_aen_pids); TAILQ_INIT(&sc->mfi_cam_ccbq); mfi_initq_free(sc); mfi_initq_ready(sc); mfi_initq_busy(sc); mfi_initq_bio(sc); sc->adpreset = 0; sc->last_seq_num = 0; sc->disableOnlineCtrlReset = 1; sc->issuepend_done = 1; sc->hw_crit_error = 0; if (sc->mfi_flags & MFI_FLAGS_1064R) { sc->mfi_enable_intr = mfi_enable_intr_xscale; sc->mfi_read_fw_status = mfi_read_fw_status_xscale; sc->mfi_check_clear_intr = mfi_check_clear_intr_xscale; sc->mfi_issue_cmd = mfi_issue_cmd_xscale; } else if (sc->mfi_flags & MFI_FLAGS_TBOLT) { sc->mfi_enable_intr = mfi_tbolt_enable_intr_ppc; sc->mfi_disable_intr = mfi_tbolt_disable_intr_ppc; sc->mfi_read_fw_status = mfi_tbolt_read_fw_status_ppc; sc->mfi_check_clear_intr = mfi_tbolt_check_clear_intr_ppc; sc->mfi_issue_cmd = mfi_tbolt_issue_cmd_ppc; sc->mfi_adp_reset = mfi_tbolt_adp_reset; sc->mfi_tbolt = 1; TAILQ_INIT(&sc->mfi_cmd_tbolt_tqh); } else { sc->mfi_enable_intr = mfi_enable_intr_ppc; sc->mfi_read_fw_status = mfi_read_fw_status_ppc; sc->mfi_check_clear_intr = mfi_check_clear_intr_ppc; sc->mfi_issue_cmd = mfi_issue_cmd_ppc; } /* Before we get too far, see if the firmware is working */ if ((error = mfi_transition_firmware(sc)) != 0) { device_printf(sc->mfi_dev, "Firmware not in READY state, " "error %d\n", error); return (ENXIO); } /* Start: LSIP200113393 */ if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MEGASAS_MAX_NAME*sizeof(bus_addr_t), /* maxsize */ 1, /* msegments */ MEGASAS_MAX_NAME*sizeof(bus_addr_t), /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->verbuf_h_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate verbuf_h_dmat DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->verbuf_h_dmat, (void **)&sc->verbuf, BUS_DMA_NOWAIT, &sc->verbuf_h_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate verbuf_h_dmamap memory\n"); return (ENOMEM); } bzero(sc->verbuf, MEGASAS_MAX_NAME*sizeof(bus_addr_t)); bus_dmamap_load(sc->verbuf_h_dmat, sc->verbuf_h_dmamap, sc->verbuf, MEGASAS_MAX_NAME*sizeof(bus_addr_t), mfi_addr_cb, &sc->verbuf_h_busaddr, 0); /* End: LSIP200113393 */ /* * Get information needed for sizing the contiguous memory for the * frame pool. Size down the sgl parameter since we know that * we will never need more than what's required for MAXPHYS. * It would be nice if these constants were available at runtime * instead of compile time. */ status = sc->mfi_read_fw_status(sc); max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK; if (mfi_max_cmds > 0 && mfi_max_cmds < max_fw_cmds) { device_printf(sc->mfi_dev, "FW MaxCmds = %d, limiting to %d\n", max_fw_cmds, mfi_max_cmds); sc->mfi_max_fw_cmds = mfi_max_cmds; } else { sc->mfi_max_fw_cmds = max_fw_cmds; } max_fw_sge = (status & MFI_FWSTATE_MAXSGL_MASK) >> 16; sc->mfi_max_sge = min(max_fw_sge, ((MFI_MAXPHYS / PAGE_SIZE) + 1)); /* ThunderBolt Support get the contiguous memory */ if (sc->mfi_flags & MFI_FLAGS_TBOLT) { mfi_tbolt_init_globals(sc); device_printf(sc->mfi_dev, "MaxCmd = %d, Drv MaxCmd = %d, " "MaxSgl = %d, state = %#x\n", max_fw_cmds, sc->mfi_max_fw_cmds, sc->mfi_max_sge, status); tb_mem_size = mfi_tbolt_get_memory_requirement(sc); if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ tb_mem_size, /* maxsize */ 1, /* msegments */ tb_mem_size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_tb_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_tb_dmat, (void **)&sc->request_message_pool, BUS_DMA_NOWAIT, &sc->mfi_tb_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate comms memory\n"); return (ENOMEM); } bzero(sc->request_message_pool, tb_mem_size); bus_dmamap_load(sc->mfi_tb_dmat, sc->mfi_tb_dmamap, sc->request_message_pool, tb_mem_size, mfi_addr_cb, &sc->mfi_tb_busaddr, 0); /* For ThunderBolt memory init */ if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 0x100, 0, /* alignmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MFI_FRAME_SIZE, /* maxsize */ 1, /* msegments */ MFI_FRAME_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_tb_init_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_tb_init_dmat, (void **)&sc->mfi_tb_init, BUS_DMA_NOWAIT, &sc->mfi_tb_init_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate init memory\n"); return (ENOMEM); } bzero(sc->mfi_tb_init, MFI_FRAME_SIZE); bus_dmamap_load(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap, sc->mfi_tb_init, MFI_FRAME_SIZE, mfi_addr_cb, &sc->mfi_tb_init_busaddr, 0); if (mfi_tbolt_init_desc_pool(sc, sc->request_message_pool, tb_mem_size)) { device_printf(sc->mfi_dev, "Thunderbolt pool preparation error\n"); return 0; } /* Allocate DMA memory mapping for MPI2 IOC Init descriptor, we are taking it different from what we have allocated for Request and reply descriptors to avoid confusion later */ tb_mem_size = sizeof(struct MPI2_IOC_INIT_REQUEST); if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ tb_mem_size, /* maxsize */ 1, /* msegments */ tb_mem_size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_tb_ioc_init_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_tb_ioc_init_dmat, (void **)&sc->mfi_tb_ioc_init_desc, BUS_DMA_NOWAIT, &sc->mfi_tb_ioc_init_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate comms memory\n"); return (ENOMEM); } bzero(sc->mfi_tb_ioc_init_desc, tb_mem_size); bus_dmamap_load(sc->mfi_tb_ioc_init_dmat, sc->mfi_tb_ioc_init_dmamap, sc->mfi_tb_ioc_init_desc, tb_mem_size, mfi_addr_cb, &sc->mfi_tb_ioc_init_busaddr, 0); } /* * Create the dma tag for data buffers. Used both for block I/O * and for various internal data queries. */ if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ sc->mfi_max_sge, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->mfi_io_lock, /* lockfuncarg */ &sc->mfi_buffer_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * Allocate DMA memory for the comms queues. Keep it under 4GB for * efficiency. The mfi_hwcomms struct includes space for 1 reply queue * entry, so the calculated size here will be will be 1 more than * mfi_max_fw_cmds. This is apparently a requirement of the hardware. */ commsz = (sizeof(uint32_t) * sc->mfi_max_fw_cmds) + sizeof(struct mfi_hwcomms); if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ commsz, /* maxsize */ 1, /* msegments */ commsz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_comms_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_comms_dmat, (void **)&sc->mfi_comms, BUS_DMA_NOWAIT, &sc->mfi_comms_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate comms memory\n"); return (ENOMEM); } bzero(sc->mfi_comms, commsz); bus_dmamap_load(sc->mfi_comms_dmat, sc->mfi_comms_dmamap, sc->mfi_comms, commsz, mfi_addr_cb, &sc->mfi_comms_busaddr, 0); /* * Allocate DMA memory for the command frames. Keep them in the * lower 4GB for efficiency. Calculate the size of the commands at * the same time; each command is one 64 byte frame plus a set of * additional frames for holding sg lists or other data. * The assumption here is that the SG list will start at the second * frame and not use the unused bytes in the first frame. While this * isn't technically correct, it simplifies the calculation and allows * for command frames that might be larger than an mfi_io_frame. */ if (sizeof(bus_addr_t) == 8) { sc->mfi_sge_size = sizeof(struct mfi_sg64); sc->mfi_flags |= MFI_FLAGS_SG64; } else { sc->mfi_sge_size = sizeof(struct mfi_sg32); } if (sc->mfi_flags & MFI_FLAGS_SKINNY) sc->mfi_sge_size = sizeof(struct mfi_sg_skinny); frames = (sc->mfi_sge_size * sc->mfi_max_sge - 1) / MFI_FRAME_SIZE + 2; sc->mfi_cmd_size = frames * MFI_FRAME_SIZE; framessz = sc->mfi_cmd_size * sc->mfi_max_fw_cmds; if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 64, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ framessz, /* maxsize */ 1, /* nsegments */ framessz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_frames_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate frame DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_frames_dmat, (void **)&sc->mfi_frames, BUS_DMA_NOWAIT, &sc->mfi_frames_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate frames memory\n"); return (ENOMEM); } bzero(sc->mfi_frames, framessz); bus_dmamap_load(sc->mfi_frames_dmat, sc->mfi_frames_dmamap, sc->mfi_frames, framessz, mfi_addr_cb, &sc->mfi_frames_busaddr,0); /* * Allocate DMA memory for the frame sense data. Keep them in the * lower 4GB for efficiency */ sensesz = sc->mfi_max_fw_cmds * MFI_SENSE_LEN; if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 4, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sensesz, /* maxsize */ 1, /* nsegments */ sensesz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_sense_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_sense_dmat, (void **)&sc->mfi_sense, BUS_DMA_NOWAIT, &sc->mfi_sense_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate sense memory\n"); return (ENOMEM); } bus_dmamap_load(sc->mfi_sense_dmat, sc->mfi_sense_dmamap, sc->mfi_sense, sensesz, mfi_addr_cb, &sc->mfi_sense_busaddr, 0); if ((error = mfi_alloc_commands(sc)) != 0) return (error); /* Before moving the FW to operational state, check whether * hostmemory is required by the FW or not */ /* ThunderBolt MFI_IOC2 INIT */ if (sc->mfi_flags & MFI_FLAGS_TBOLT) { sc->mfi_disable_intr(sc); mtx_lock(&sc->mfi_io_lock); if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) { device_printf(sc->mfi_dev, "TB Init has failed with error %d\n",error); mtx_unlock(&sc->mfi_io_lock); return error; } mtx_unlock(&sc->mfi_io_lock); if ((error = mfi_tbolt_alloc_cmd(sc)) != 0) return error; if (bus_setup_intr(sc->mfi_dev, sc->mfi_irq, INTR_MPSAFE|INTR_TYPE_BIO, NULL, mfi_intr_tbolt, sc, &sc->mfi_intr)) { device_printf(sc->mfi_dev, "Cannot set up interrupt\n"); return (EINVAL); } sc->mfi_intr_ptr = mfi_intr_tbolt; sc->mfi_enable_intr(sc); } else { if ((error = mfi_comms_init(sc)) != 0) return (error); if (bus_setup_intr(sc->mfi_dev, sc->mfi_irq, INTR_MPSAFE|INTR_TYPE_BIO, NULL, mfi_intr, sc, &sc->mfi_intr)) { device_printf(sc->mfi_dev, "Cannot set up interrupt\n"); return (EINVAL); } sc->mfi_intr_ptr = mfi_intr; sc->mfi_enable_intr(sc); } if ((error = mfi_get_controller_info(sc)) != 0) return (error); sc->disableOnlineCtrlReset = 0; /* Register a config hook to probe the bus for arrays */ sc->mfi_ich.ich_func = mfi_startup; sc->mfi_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mfi_ich) != 0) { device_printf(sc->mfi_dev, "Cannot establish configuration " "hook\n"); return (EINVAL); } mtx_lock(&sc->mfi_io_lock); if ((error = mfi_aen_setup(sc, 0), 0) != 0) { mtx_unlock(&sc->mfi_io_lock); return (error); } mtx_unlock(&sc->mfi_io_lock); /* * Register a shutdown handler. */ if ((sc->mfi_eh = EVENTHANDLER_REGISTER(shutdown_final, mfi_shutdown, sc, SHUTDOWN_PRI_DEFAULT)) == NULL) { device_printf(sc->mfi_dev, "Warning: shutdown event " "registration failed\n"); } /* * Create the control device for doing management */ unit = device_get_unit(sc->mfi_dev); sc->mfi_cdev = make_dev(&mfi_cdevsw, unit, UID_ROOT, GID_OPERATOR, 0640, "mfi%d", unit); if (unit == 0) make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_t, sc->mfi_cdev, "%s", "megaraid_sas_ioctl_node"); if (sc->mfi_cdev != NULL) sc->mfi_cdev->si_drv1 = sc; SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->mfi_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->mfi_dev)), OID_AUTO, "delete_busy_volumes", CTLFLAG_RW, &sc->mfi_delete_busy_volumes, 0, "Allow removal of busy volumes"); SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->mfi_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->mfi_dev)), OID_AUTO, "keep_deleted_volumes", CTLFLAG_RW, &sc->mfi_keep_deleted_volumes, 0, "Don't detach the mfid device for a busy volume that is deleted"); device_add_child(sc->mfi_dev, "mfip", -1); bus_generic_attach(sc->mfi_dev); /* Start the timeout watchdog */ callout_init(&sc->mfi_watchdog_callout, 1); callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz, mfi_timeout, sc); if (sc->mfi_flags & MFI_FLAGS_TBOLT) { mtx_lock(&sc->mfi_io_lock); mfi_tbolt_sync_map_info(sc); mtx_unlock(&sc->mfi_io_lock); } return (0); } static int mfi_alloc_commands(struct mfi_softc *sc) { struct mfi_command *cm; int i, j; /* * XXX Should we allocate all the commands up front, or allocate on * demand later like 'aac' does? */ sc->mfi_commands = malloc(sizeof(sc->mfi_commands[0]) * sc->mfi_max_fw_cmds, M_MFIBUF, M_WAITOK | M_ZERO); for (i = 0; i < sc->mfi_max_fw_cmds; i++) { cm = &sc->mfi_commands[i]; cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_frames + sc->mfi_cmd_size * i); cm->cm_frame_busaddr = sc->mfi_frames_busaddr + sc->mfi_cmd_size * i; cm->cm_frame->header.context = i; cm->cm_sense = &sc->mfi_sense[i]; cm->cm_sense_busaddr= sc->mfi_sense_busaddr + MFI_SENSE_LEN * i; cm->cm_sc = sc; cm->cm_index = i; if (bus_dmamap_create(sc->mfi_buffer_dmat, 0, &cm->cm_dmamap) == 0) { mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); } else { device_printf(sc->mfi_dev, "Failed to allocate %d " "command blocks, only allocated %d\n", sc->mfi_max_fw_cmds, i - 1); for (j = 0; j < i; j++) { cm = &sc->mfi_commands[i]; bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap); } free(sc->mfi_commands, M_MFIBUF); sc->mfi_commands = NULL; return (ENOMEM); } } return (0); } void mfi_release_command(struct mfi_command *cm) { struct mfi_frame_header *hdr; uint32_t *hdr_data; mtx_assert(&cm->cm_sc->mfi_io_lock, MA_OWNED); /* * Zero out the important fields of the frame, but make sure the * context field is preserved. For efficiency, handle the fields * as 32 bit words. Clear out the first S/G entry too for safety. */ hdr = &cm->cm_frame->header; if (cm->cm_data != NULL && hdr->sg_count) { cm->cm_sg->sg32[0].len = 0; cm->cm_sg->sg32[0].addr = 0; } /* * Command may be on other queues e.g. busy queue depending on the * flow of a previous call to mfi_mapcmd, so ensure its dequeued * properly */ if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) mfi_remove_busy(cm); if ((cm->cm_flags & MFI_ON_MFIQ_READY) != 0) mfi_remove_ready(cm); /* We're not expecting it to be on any other queue but check */ if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) { panic("Command %p is still on another queue, flags = %#x", cm, cm->cm_flags); } /* tbolt cleanup */ if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) { mfi_tbolt_return_cmd(cm->cm_sc, cm->cm_sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1], cm); } hdr_data = (uint32_t *)cm->cm_frame; hdr_data[0] = 0; /* cmd, sense_len, cmd_status, scsi_status */ hdr_data[1] = 0; /* target_id, lun_id, cdb_len, sg_count */ hdr_data[4] = 0; /* flags, timeout */ hdr_data[5] = 0; /* data_len */ cm->cm_extra_frames = 0; cm->cm_flags = 0; cm->cm_complete = NULL; cm->cm_private = NULL; cm->cm_data = NULL; cm->cm_sg = 0; cm->cm_total_frame_size = 0; cm->retry_for_fw_reset = 0; mfi_enqueue_free(cm); } int mfi_dcmd_command(struct mfi_softc *sc, struct mfi_command **cmp, uint32_t opcode, void **bufp, size_t bufsize) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; void *buf = NULL; uint32_t context = 0; mtx_assert(&sc->mfi_io_lock, MA_OWNED); cm = mfi_dequeue_free(sc); if (cm == NULL) return (EBUSY); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; if ((bufsize > 0) && (bufp != NULL)) { if (*bufp == NULL) { buf = malloc(bufsize, M_MFIBUF, M_NOWAIT|M_ZERO); if (buf == NULL) { mfi_release_command(cm); return (ENOMEM); } *bufp = buf; } else { buf = *bufp; } } dcmd = &cm->cm_frame->dcmd; bzero(dcmd->mbox, MFI_MBOX_SIZE); dcmd->header.cmd = MFI_CMD_DCMD; dcmd->header.timeout = 0; dcmd->header.flags = 0; dcmd->header.data_len = bufsize; dcmd->header.scsi_status = 0; dcmd->opcode = opcode; cm->cm_sg = &dcmd->sgl; cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE; cm->cm_flags = 0; cm->cm_data = buf; cm->cm_private = buf; cm->cm_len = bufsize; *cmp = cm; if ((bufp != NULL) && (*bufp == NULL) && (buf != NULL)) *bufp = buf; return (0); } static int mfi_comms_init(struct mfi_softc *sc) { struct mfi_command *cm; struct mfi_init_frame *init; struct mfi_init_qinfo *qinfo; int error; uint32_t context = 0; mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; /* * Abuse the SG list area of the frame to hold the init_qinfo * object; */ init = &cm->cm_frame->init; qinfo = (struct mfi_init_qinfo *)((uintptr_t)init + MFI_FRAME_SIZE); bzero(qinfo, sizeof(struct mfi_init_qinfo)); qinfo->rq_entries = sc->mfi_max_fw_cmds + 1; qinfo->rq_addr_lo = sc->mfi_comms_busaddr + offsetof(struct mfi_hwcomms, hw_reply_q); qinfo->pi_addr_lo = sc->mfi_comms_busaddr + offsetof(struct mfi_hwcomms, hw_pi); qinfo->ci_addr_lo = sc->mfi_comms_busaddr + offsetof(struct mfi_hwcomms, hw_ci); init->header.cmd = MFI_CMD_INIT; init->header.data_len = sizeof(struct mfi_init_qinfo); init->qinfo_new_addr_lo = cm->cm_frame_busaddr + MFI_FRAME_SIZE; cm->cm_data = NULL; cm->cm_flags = MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed to send init command\n"); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); return (error); } static int mfi_get_controller_info(struct mfi_softc *sc) { struct mfi_command *cm = NULL; struct mfi_ctrl_info *ci = NULL; uint32_t max_sectors_1, max_sectors_2; int error; mtx_lock(&sc->mfi_io_lock); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_GETINFO, (void **)&ci, sizeof(*ci)); if (error) goto out; cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get controller info\n"); sc->mfi_max_io = (sc->mfi_max_sge - 1) * PAGE_SIZE / MFI_SECTOR_LEN; error = 0; goto out; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); max_sectors_1 = (1 << ci->stripe_sz_ops.max) * ci->max_strips_per_io; max_sectors_2 = ci->max_request_size; sc->mfi_max_io = min(max_sectors_1, max_sectors_2); sc->disableOnlineCtrlReset = ci->properties.OnOffProperties.disableOnlineCtrlReset; out: if (ci) free(ci, M_MFIBUF); if (cm) mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); return (error); } static int mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state) { struct mfi_command *cm = NULL; int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_GETINFO, (void **)log_state, sizeof(**log_state)); if (error) goto out; cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get log state\n"); goto out; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); out: if (cm) mfi_release_command(cm); return (error); } int mfi_aen_setup(struct mfi_softc *sc, uint32_t seq_start) { struct mfi_evt_log_state *log_state = NULL; union mfi_evt class_locale; int error = 0; uint32_t seq; mtx_assert(&sc->mfi_io_lock, MA_OWNED); class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; class_locale.members.evt_class = mfi_event_class; if (seq_start == 0) { if ((error = mfi_get_log_state(sc, &log_state)) != 0) goto out; sc->mfi_boot_seq_num = log_state->boot_seq_num; /* * Walk through any events that fired since the last * shutdown. */ if ((error = mfi_parse_entries(sc, log_state->shutdown_seq_num, log_state->newest_seq_num)) != 0) goto out; seq = log_state->newest_seq_num; } else seq = seq_start; error = mfi_aen_register(sc, seq, class_locale.word); out: free(log_state, M_MFIBUF); return (error); } int mfi_wait_command(struct mfi_softc *sc, struct mfi_command *cm) { mtx_assert(&sc->mfi_io_lock, MA_OWNED); cm->cm_complete = NULL; /* * MegaCli can issue a DCMD of 0. In this case do nothing * and return 0 to it as status */ if (cm->cm_frame->dcmd.opcode == 0) { cm->cm_frame->header.cmd_status = MFI_STAT_OK; cm->cm_error = 0; return (cm->cm_error); } mfi_enqueue_ready(cm); mfi_startio(sc); if ((cm->cm_flags & MFI_CMD_COMPLETED) == 0) msleep(cm, &sc->mfi_io_lock, PRIBIO, "mfiwait", 0); return (cm->cm_error); } void mfi_free(struct mfi_softc *sc) { struct mfi_command *cm; int i; callout_drain(&sc->mfi_watchdog_callout); if (sc->mfi_cdev != NULL) destroy_dev(sc->mfi_cdev); if (sc->mfi_commands != NULL) { for (i = 0; i < sc->mfi_max_fw_cmds; i++) { cm = &sc->mfi_commands[i]; bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap); } free(sc->mfi_commands, M_MFIBUF); sc->mfi_commands = NULL; } if (sc->mfi_intr) bus_teardown_intr(sc->mfi_dev, sc->mfi_irq, sc->mfi_intr); if (sc->mfi_irq != NULL) bus_release_resource(sc->mfi_dev, SYS_RES_IRQ, sc->mfi_irq_rid, sc->mfi_irq); if (sc->mfi_sense_busaddr != 0) bus_dmamap_unload(sc->mfi_sense_dmat, sc->mfi_sense_dmamap); if (sc->mfi_sense != NULL) bus_dmamem_free(sc->mfi_sense_dmat, sc->mfi_sense, sc->mfi_sense_dmamap); if (sc->mfi_sense_dmat != NULL) bus_dma_tag_destroy(sc->mfi_sense_dmat); if (sc->mfi_frames_busaddr != 0) bus_dmamap_unload(sc->mfi_frames_dmat, sc->mfi_frames_dmamap); if (sc->mfi_frames != NULL) bus_dmamem_free(sc->mfi_frames_dmat, sc->mfi_frames, sc->mfi_frames_dmamap); if (sc->mfi_frames_dmat != NULL) bus_dma_tag_destroy(sc->mfi_frames_dmat); if (sc->mfi_comms_busaddr != 0) bus_dmamap_unload(sc->mfi_comms_dmat, sc->mfi_comms_dmamap); if (sc->mfi_comms != NULL) bus_dmamem_free(sc->mfi_comms_dmat, sc->mfi_comms, sc->mfi_comms_dmamap); if (sc->mfi_comms_dmat != NULL) bus_dma_tag_destroy(sc->mfi_comms_dmat); /* ThunderBolt contiguous memory free here */ if (sc->mfi_flags & MFI_FLAGS_TBOLT) { if (sc->mfi_tb_busaddr != 0) bus_dmamap_unload(sc->mfi_tb_dmat, sc->mfi_tb_dmamap); if (sc->request_message_pool != NULL) bus_dmamem_free(sc->mfi_tb_dmat, sc->request_message_pool, sc->mfi_tb_dmamap); if (sc->mfi_tb_dmat != NULL) bus_dma_tag_destroy(sc->mfi_tb_dmat); /* Version buffer memory free */ /* Start LSIP200113393 */ if (sc->verbuf_h_busaddr != 0) bus_dmamap_unload(sc->verbuf_h_dmat, sc->verbuf_h_dmamap); if (sc->verbuf != NULL) bus_dmamem_free(sc->verbuf_h_dmat, sc->verbuf, sc->verbuf_h_dmamap); if (sc->verbuf_h_dmat != NULL) bus_dma_tag_destroy(sc->verbuf_h_dmat); /* End LSIP200113393 */ /* ThunderBolt INIT packet memory Free */ if (sc->mfi_tb_init_busaddr != 0) bus_dmamap_unload(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap); if (sc->mfi_tb_init != NULL) bus_dmamem_free(sc->mfi_tb_init_dmat, sc->mfi_tb_init, sc->mfi_tb_init_dmamap); if (sc->mfi_tb_init_dmat != NULL) bus_dma_tag_destroy(sc->mfi_tb_init_dmat); /* ThunderBolt IOC Init Desc memory free here */ if (sc->mfi_tb_ioc_init_busaddr != 0) bus_dmamap_unload(sc->mfi_tb_ioc_init_dmat, sc->mfi_tb_ioc_init_dmamap); if (sc->mfi_tb_ioc_init_desc != NULL) bus_dmamem_free(sc->mfi_tb_ioc_init_dmat, sc->mfi_tb_ioc_init_desc, sc->mfi_tb_ioc_init_dmamap); if (sc->mfi_tb_ioc_init_dmat != NULL) bus_dma_tag_destroy(sc->mfi_tb_ioc_init_dmat); if (sc->mfi_cmd_pool_tbolt != NULL) { for (int i = 0; i < sc->mfi_max_fw_cmds; i++) { if (sc->mfi_cmd_pool_tbolt[i] != NULL) { free(sc->mfi_cmd_pool_tbolt[i], M_MFIBUF); sc->mfi_cmd_pool_tbolt[i] = NULL; } } free(sc->mfi_cmd_pool_tbolt, M_MFIBUF); sc->mfi_cmd_pool_tbolt = NULL; } if (sc->request_desc_pool != NULL) { free(sc->request_desc_pool, M_MFIBUF); sc->request_desc_pool = NULL; } } if (sc->mfi_buffer_dmat != NULL) bus_dma_tag_destroy(sc->mfi_buffer_dmat); if (sc->mfi_parent_dmat != NULL) bus_dma_tag_destroy(sc->mfi_parent_dmat); if (mtx_initialized(&sc->mfi_io_lock)) { mtx_destroy(&sc->mfi_io_lock); sx_destroy(&sc->mfi_config_lock); } return; } static void mfi_startup(void *arg) { struct mfi_softc *sc; sc = (struct mfi_softc *)arg; config_intrhook_disestablish(&sc->mfi_ich); sc->mfi_enable_intr(sc); sx_xlock(&sc->mfi_config_lock); mtx_lock(&sc->mfi_io_lock); mfi_ldprobe(sc); if (sc->mfi_flags & MFI_FLAGS_SKINNY) mfi_syspdprobe(sc); mtx_unlock(&sc->mfi_io_lock); sx_xunlock(&sc->mfi_config_lock); } static void mfi_intr(void *arg) { struct mfi_softc *sc; struct mfi_command *cm; uint32_t pi, ci, context; sc = (struct mfi_softc *)arg; if (sc->mfi_check_clear_intr(sc)) return; restart: pi = sc->mfi_comms->hw_pi; ci = sc->mfi_comms->hw_ci; mtx_lock(&sc->mfi_io_lock); while (ci != pi) { context = sc->mfi_comms->hw_reply_q[ci]; if (context < sc->mfi_max_fw_cmds) { cm = &sc->mfi_commands[context]; mfi_remove_busy(cm); cm->cm_error = 0; mfi_complete(sc, cm); } if (++ci == (sc->mfi_max_fw_cmds + 1)) ci = 0; } sc->mfi_comms->hw_ci = ci; /* Give defered I/O a chance to run */ sc->mfi_flags &= ~MFI_FLAGS_QFRZN; mfi_startio(sc); mtx_unlock(&sc->mfi_io_lock); /* * Dummy read to flush the bus; this ensures that the indexes are up * to date. Restart processing if more commands have come it. */ (void)sc->mfi_read_fw_status(sc); if (pi != sc->mfi_comms->hw_pi) goto restart; return; } int mfi_shutdown(struct mfi_softc *sc) { struct mfi_dcmd_frame *dcmd; struct mfi_command *cm; int error; if (sc->mfi_aen_cm != NULL) { sc->cm_aen_abort = 1; mfi_abort(sc, &sc->mfi_aen_cm); } if (sc->mfi_map_sync_cm != NULL) { sc->cm_map_abort = 1; mfi_abort(sc, &sc->mfi_map_sync_cm); } mtx_lock(&sc->mfi_io_lock); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_SHUTDOWN, NULL, 0); if (error) { mtx_unlock(&sc->mfi_io_lock); return (error); } dcmd = &cm->cm_frame->dcmd; dcmd->header.flags = MFI_FRAME_DIR_NONE; cm->cm_flags = MFI_CMD_POLLED; cm->cm_data = NULL; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "Failed to shutdown controller\n"); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); return (error); } static void mfi_syspdprobe(struct mfi_softc *sc) { struct mfi_frame_header *hdr; struct mfi_command *cm = NULL; struct mfi_pd_list *pdlist = NULL; struct mfi_system_pd *syspd, *tmp; struct mfi_system_pending *syspd_pend; int error, i, found; sx_assert(&sc->mfi_config_lock, SA_XLOCKED); mtx_assert(&sc->mfi_io_lock, MA_OWNED); /* Add SYSTEM PD's */ error = mfi_dcmd_command(sc, &cm, MFI_DCMD_PD_LIST_QUERY, (void **)&pdlist, sizeof(*pdlist)); if (error) { device_printf(sc->mfi_dev, "Error while forming SYSTEM PD list\n"); goto out; } cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; cm->cm_frame->dcmd.mbox[0] = MR_PD_QUERY_TYPE_EXPOSED_TO_HOST; cm->cm_frame->dcmd.mbox[1] = 0; if (mfi_mapcmd(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get syspd device listing\n"); goto out; } bus_dmamap_sync(sc->mfi_buffer_dmat,cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); hdr = &cm->cm_frame->header; if (hdr->cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "MFI_DCMD_PD_LIST_QUERY failed %x\n", hdr->cmd_status); goto out; } /* Get each PD and add it to the system */ for (i = 0; i < pdlist->count; i++) { if (pdlist->addr[i].device_id == pdlist->addr[i].encl_device_id) continue; found = 0; TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) { if (syspd->pd_id == pdlist->addr[i].device_id) found = 1; } TAILQ_FOREACH(syspd_pend, &sc->mfi_syspd_pend_tqh, pd_link) { if (syspd_pend->pd_id == pdlist->addr[i].device_id) found = 1; } if (found == 0) mfi_add_sys_pd(sc, pdlist->addr[i].device_id); } /* Delete SYSPD's whose state has been changed */ TAILQ_FOREACH_SAFE(syspd, &sc->mfi_syspd_tqh, pd_link, tmp) { found = 0; for (i = 0; i < pdlist->count; i++) { if (syspd->pd_id == pdlist->addr[i].device_id) { found = 1; break; } } if (found == 0) { printf("DELETE\n"); mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); device_delete_child(sc->mfi_dev, syspd->pd_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } } out: if (pdlist) free(pdlist, M_MFIBUF); if (cm) mfi_release_command(cm); return; } static void mfi_ldprobe(struct mfi_softc *sc) { struct mfi_frame_header *hdr; struct mfi_command *cm = NULL; struct mfi_ld_list *list = NULL; struct mfi_disk *ld; struct mfi_disk_pending *ld_pend; int error, i; sx_assert(&sc->mfi_config_lock, SA_XLOCKED); mtx_assert(&sc->mfi_io_lock, MA_OWNED); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_LIST, (void **)&list, sizeof(*list)); if (error) goto out; cm->cm_flags = MFI_CMD_DATAIN; if (mfi_wait_command(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get device listing\n"); goto out; } hdr = &cm->cm_frame->header; if (hdr->cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "MFI_DCMD_LD_GET_LIST failed %x\n", hdr->cmd_status); goto out; } for (i = 0; i < list->ld_count; i++) { TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == list->ld_list[i].ld.v.target_id) goto skip_add; } TAILQ_FOREACH(ld_pend, &sc->mfi_ld_pend_tqh, ld_link) { if (ld_pend->ld_id == list->ld_list[i].ld.v.target_id) goto skip_add; } mfi_add_ld(sc, list->ld_list[i].ld.v.target_id); skip_add:; } out: if (list) free(list, M_MFIBUF); if (cm) mfi_release_command(cm); return; } /* * The timestamp is the number of seconds since 00:00 Jan 1, 2000. If * the bits in 24-31 are all set, then it is the number of seconds since * boot. */ static const char * format_timestamp(uint32_t timestamp) { static char buffer[32]; if ((timestamp & 0xff000000) == 0xff000000) snprintf(buffer, sizeof(buffer), "boot + %us", timestamp & 0x00ffffff); else snprintf(buffer, sizeof(buffer), "%us", timestamp); return (buffer); } static const char * format_class(int8_t class) { static char buffer[6]; switch (class) { case MFI_EVT_CLASS_DEBUG: return ("debug"); case MFI_EVT_CLASS_PROGRESS: return ("progress"); case MFI_EVT_CLASS_INFO: return ("info"); case MFI_EVT_CLASS_WARNING: return ("WARN"); case MFI_EVT_CLASS_CRITICAL: return ("CRIT"); case MFI_EVT_CLASS_FATAL: return ("FATAL"); case MFI_EVT_CLASS_DEAD: return ("DEAD"); default: snprintf(buffer, sizeof(buffer), "%d", class); return (buffer); } } static void mfi_decode_evt(struct mfi_softc *sc, struct mfi_evt_detail *detail) { struct mfi_system_pd *syspd = NULL; device_printf(sc->mfi_dev, "%d (%s/0x%04x/%s) - %s\n", detail->seq, format_timestamp(detail->time), detail->evt_class.members.locale, format_class(detail->evt_class.members.evt_class), detail->description); /* Don't act on old AEN's or while shutting down */ if (detail->seq < sc->mfi_boot_seq_num || sc->mfi_detaching) return; switch (detail->arg_type) { case MR_EVT_ARGS_NONE: if (detail->code == MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED) { device_printf(sc->mfi_dev, "HostBus scan raised\n"); if (mfi_detect_jbod_change) { /* * Probe for new SYSPD's and Delete * invalid SYSPD's */ sx_xlock(&sc->mfi_config_lock); mtx_lock(&sc->mfi_io_lock); mfi_syspdprobe(sc); mtx_unlock(&sc->mfi_io_lock); sx_xunlock(&sc->mfi_config_lock); } } break; case MR_EVT_ARGS_LD_STATE: /* During load time driver reads all the events starting * from the one that has been logged after shutdown. Avoid * these old events. */ if (detail->args.ld_state.new_state == MFI_LD_STATE_OFFLINE ) { /* Remove the LD */ struct mfi_disk *ld; TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == detail->args.ld_state.ld.target_id) break; } /* Fix: for kernel panics when SSCD is removed KASSERT(ld != NULL, ("volume dissappeared")); */ if (ld != NULL) { mtx_lock(&Giant); device_delete_child(sc->mfi_dev, ld->ld_dev); mtx_unlock(&Giant); } } break; case MR_EVT_ARGS_PD: if (detail->code == MR_EVT_PD_REMOVED) { if (mfi_detect_jbod_change) { /* * If the removed device is a SYSPD then * delete it */ TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) { if (syspd->pd_id == detail->args.pd.device_id) { mtx_lock(&Giant); device_delete_child( sc->mfi_dev, syspd->pd_dev); mtx_unlock(&Giant); break; } } } } if (detail->code == MR_EVT_PD_INSERTED) { if (mfi_detect_jbod_change) { /* Probe for new SYSPD's */ sx_xlock(&sc->mfi_config_lock); mtx_lock(&sc->mfi_io_lock); mfi_syspdprobe(sc); mtx_unlock(&sc->mfi_io_lock); sx_xunlock(&sc->mfi_config_lock); } } if (sc->mfi_cam_rescan_cb != NULL && (detail->code == MR_EVT_PD_INSERTED || detail->code == MR_EVT_PD_REMOVED)) { sc->mfi_cam_rescan_cb(sc, detail->args.pd.device_id); } break; } } static void mfi_queue_evt(struct mfi_softc *sc, struct mfi_evt_detail *detail) { struct mfi_evt_queue_elm *elm; mtx_assert(&sc->mfi_io_lock, MA_OWNED); elm = malloc(sizeof(*elm), M_MFIBUF, M_NOWAIT|M_ZERO); if (elm == NULL) return; memcpy(&elm->detail, detail, sizeof(*detail)); TAILQ_INSERT_TAIL(&sc->mfi_evt_queue, elm, link); taskqueue_enqueue(taskqueue_swi, &sc->mfi_evt_task); } static void mfi_handle_evt(void *context, int pending) { TAILQ_HEAD(,mfi_evt_queue_elm) queue; struct mfi_softc *sc; struct mfi_evt_queue_elm *elm; sc = context; TAILQ_INIT(&queue); mtx_lock(&sc->mfi_io_lock); TAILQ_CONCAT(&queue, &sc->mfi_evt_queue, link); mtx_unlock(&sc->mfi_io_lock); while ((elm = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, elm, link); mfi_decode_evt(sc, &elm->detail); free(elm, M_MFIBUF); } } static int mfi_aen_register(struct mfi_softc *sc, int seq, int locale) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; union mfi_evt current_aen, prior_aen; struct mfi_evt_detail *ed = NULL; int error = 0; mtx_assert(&sc->mfi_io_lock, MA_OWNED); current_aen.word = locale; if (sc->mfi_aen_cm != NULL) { prior_aen.word = ((uint32_t *)&sc->mfi_aen_cm->cm_frame->dcmd.mbox)[1]; if (prior_aen.members.evt_class <= current_aen.members.evt_class && !((prior_aen.members.locale & current_aen.members.locale) ^current_aen.members.locale)) { return (0); } else { prior_aen.members.locale |= current_aen.members.locale; if (prior_aen.members.evt_class < current_aen.members.evt_class) current_aen.members.evt_class = prior_aen.members.evt_class; mfi_abort(sc, &sc->mfi_aen_cm); } } error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_WAIT, (void **)&ed, sizeof(*ed)); if (error) goto out; dcmd = &cm->cm_frame->dcmd; ((uint32_t *)&dcmd->mbox)[0] = seq; ((uint32_t *)&dcmd->mbox)[1] = locale; cm->cm_flags = MFI_CMD_DATAIN; cm->cm_complete = mfi_aen_complete; sc->last_seq_num = seq; sc->mfi_aen_cm = cm; mfi_enqueue_ready(cm); mfi_startio(sc); out: return (error); } static void mfi_aen_complete(struct mfi_command *cm) { struct mfi_frame_header *hdr; struct mfi_softc *sc; struct mfi_evt_detail *detail; struct mfi_aen *mfi_aen_entry, *tmp; int seq = 0, aborted = 0; sc = cm->cm_sc; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if (sc->mfi_aen_cm == NULL) return; hdr = &cm->cm_frame->header; if (sc->cm_aen_abort || hdr->cmd_status == MFI_STAT_INVALID_STATUS) { sc->cm_aen_abort = 0; aborted = 1; } else { sc->mfi_aen_triggered = 1; if (sc->mfi_poll_waiting) { sc->mfi_poll_waiting = 0; selwakeup(&sc->mfi_select); } detail = cm->cm_data; mfi_queue_evt(sc, detail); seq = detail->seq + 1; TAILQ_FOREACH_SAFE(mfi_aen_entry, &sc->mfi_aen_pids, aen_link, tmp) { TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); PROC_LOCK(mfi_aen_entry->p); kern_psignal(mfi_aen_entry->p, SIGIO); PROC_UNLOCK(mfi_aen_entry->p); free(mfi_aen_entry, M_MFIBUF); } } free(cm->cm_data, M_MFIBUF); wakeup(&sc->mfi_aen_cm); sc->mfi_aen_cm = NULL; mfi_release_command(cm); /* set it up again so the driver can catch more events */ if (!aborted) mfi_aen_setup(sc, seq); } #define MAX_EVENTS 15 static int mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; struct mfi_evt_list *el; union mfi_evt class_locale; int error, i, seq, size; mtx_assert(&sc->mfi_io_lock, MA_OWNED); class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; class_locale.members.evt_class = mfi_event_class; size = sizeof(struct mfi_evt_list) + sizeof(struct mfi_evt_detail) * (MAX_EVENTS - 1); el = malloc(size, M_MFIBUF, M_NOWAIT | M_ZERO); if (el == NULL) return (ENOMEM); for (seq = start_seq;;) { if ((cm = mfi_dequeue_free(sc)) == NULL) { free(el, M_MFIBUF); return (EBUSY); } dcmd = &cm->cm_frame->dcmd; bzero(dcmd->mbox, MFI_MBOX_SIZE); dcmd->header.cmd = MFI_CMD_DCMD; dcmd->header.timeout = 0; dcmd->header.data_len = size; dcmd->opcode = MFI_DCMD_CTRL_EVENT_GET; ((uint32_t *)&dcmd->mbox)[0] = seq; ((uint32_t *)&dcmd->mbox)[1] = class_locale.word; cm->cm_sg = &dcmd->sgl; cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE; cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; cm->cm_data = el; cm->cm_len = size; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get controller entries\n"); mfi_release_command(cm); break; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); if (dcmd->header.cmd_status == MFI_STAT_NOT_FOUND) { mfi_release_command(cm); break; } if (dcmd->header.cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "Error %d fetching controller entries\n", dcmd->header.cmd_status); mfi_release_command(cm); error = EIO; break; } mfi_release_command(cm); for (i = 0; i < el->count; i++) { /* * If this event is newer than 'stop_seq' then * break out of the loop. Note that the log * is a circular buffer so we have to handle * the case that our stop point is earlier in * the buffer than our start point. */ if (el->event[i].seq >= stop_seq) { if (start_seq <= stop_seq) break; else if (el->event[i].seq < start_seq) break; } mfi_queue_evt(sc, &el->event[i]); } seq = el->event[el->count - 1].seq + 1; } free(el, M_MFIBUF); return (error); } static int mfi_add_ld(struct mfi_softc *sc, int id) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd = NULL; struct mfi_ld_info *ld_info = NULL; struct mfi_disk_pending *ld_pend; int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); ld_pend = malloc(sizeof(*ld_pend), M_MFIBUF, M_NOWAIT | M_ZERO); if (ld_pend != NULL) { ld_pend->ld_id = id; TAILQ_INSERT_TAIL(&sc->mfi_ld_pend_tqh, ld_pend, ld_link); } error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_INFO, (void **)&ld_info, sizeof(*ld_info)); if (error) { device_printf(sc->mfi_dev, "Failed to allocate for MFI_DCMD_LD_GET_INFO %d\n", error); if (ld_info) free(ld_info, M_MFIBUF); return (error); } cm->cm_flags = MFI_CMD_DATAIN; dcmd = &cm->cm_frame->dcmd; dcmd->mbox[0] = id; if (mfi_wait_command(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get logical drive: %d\n", id); free(ld_info, M_MFIBUF); return (0); } if (ld_info->ld_config.params.isSSCD != 1) mfi_add_ld_complete(cm); else { mfi_release_command(cm); if (ld_info) /* SSCD drives ld_info free here */ free(ld_info, M_MFIBUF); } return (0); } static void mfi_add_ld_complete(struct mfi_command *cm) { struct mfi_frame_header *hdr; struct mfi_ld_info *ld_info; struct mfi_softc *sc; device_t child; sc = cm->cm_sc; hdr = &cm->cm_frame->header; ld_info = cm->cm_private; if (sc->cm_map_abort || hdr->cmd_status != MFI_STAT_OK) { free(ld_info, M_MFIBUF); wakeup(&sc->mfi_map_sync_cm); mfi_release_command(cm); return; } wakeup(&sc->mfi_map_sync_cm); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); if ((child = device_add_child(sc->mfi_dev, "mfid", -1)) == NULL) { device_printf(sc->mfi_dev, "Failed to add logical disk\n"); free(ld_info, M_MFIBUF); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); return; } device_set_ivars(child, ld_info); device_set_desc(child, "MFI Logical Disk"); bus_generic_attach(sc->mfi_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } static int mfi_add_sys_pd(struct mfi_softc *sc, int id) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd = NULL; struct mfi_pd_info *pd_info = NULL; struct mfi_system_pending *syspd_pend; int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); syspd_pend = malloc(sizeof(*syspd_pend), M_MFIBUF, M_NOWAIT | M_ZERO); if (syspd_pend != NULL) { syspd_pend->pd_id = id; TAILQ_INSERT_TAIL(&sc->mfi_syspd_pend_tqh, syspd_pend, pd_link); } error = mfi_dcmd_command(sc, &cm, MFI_DCMD_PD_GET_INFO, (void **)&pd_info, sizeof(*pd_info)); if (error) { device_printf(sc->mfi_dev, "Failed to allocated for MFI_DCMD_PD_GET_INFO %d\n", error); if (pd_info) free(pd_info, M_MFIBUF); return (error); } cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; dcmd = &cm->cm_frame->dcmd; dcmd->mbox[0]=id; dcmd->header.scsi_status = 0; dcmd->header.pad0 = 0; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get physical drive info %d\n", id); free(pd_info, M_MFIBUF); mfi_release_command(cm); return (error); } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); mfi_add_sys_pd_complete(cm); return (0); } static void mfi_add_sys_pd_complete(struct mfi_command *cm) { struct mfi_frame_header *hdr; struct mfi_pd_info *pd_info; struct mfi_softc *sc; device_t child; sc = cm->cm_sc; hdr = &cm->cm_frame->header; pd_info = cm->cm_private; if (hdr->cmd_status != MFI_STAT_OK) { free(pd_info, M_MFIBUF); mfi_release_command(cm); return; } if (pd_info->fw_state != MFI_PD_STATE_SYSTEM) { device_printf(sc->mfi_dev, "PD=%x is not SYSTEM PD\n", pd_info->ref.v.device_id); free(pd_info, M_MFIBUF); mfi_release_command(cm); return; } mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); if ((child = device_add_child(sc->mfi_dev, "mfisyspd", -1)) == NULL) { device_printf(sc->mfi_dev, "Failed to add system pd\n"); free(pd_info, M_MFIBUF); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); return; } device_set_ivars(child, pd_info); device_set_desc(child, "MFI System PD"); bus_generic_attach(sc->mfi_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } static struct mfi_command * mfi_bio_command(struct mfi_softc *sc) { struct bio *bio; struct mfi_command *cm = NULL; /*reserving two commands to avoid starvation for IOCTL*/ if (sc->mfi_qstat[MFIQ_FREE].q_length < 2) { return (NULL); } if ((bio = mfi_dequeue_bio(sc)) == NULL) { return (NULL); } if ((uintptr_t)bio->bio_driver2 == MFI_LD_IO) { cm = mfi_build_ldio(sc, bio); } else if ((uintptr_t) bio->bio_driver2 == MFI_SYS_PD_IO) { cm = mfi_build_syspdio(sc, bio); } if (!cm) mfi_enqueue_bio(sc, bio); return cm; } /* * mostly copied from cam/scsi/scsi_all.c:scsi_read_write */ int mfi_build_cdb(int readop, uint8_t byte2, u_int64_t lba, u_int32_t block_count, uint8_t *cdb) { int cdb_len; if (((lba & 0x1fffff) == lba) && ((block_count & 0xff) == block_count) && (byte2 == 0)) { /* We can fit in a 6 byte cdb */ struct scsi_rw_6 *scsi_cmd; scsi_cmd = (struct scsi_rw_6 *)cdb; scsi_cmd->opcode = readop ? READ_6 : WRITE_6; scsi_ulto3b(lba, scsi_cmd->addr); scsi_cmd->length = block_count & 0xff; scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } else if (((block_count & 0xffff) == block_count) && ((lba & 0xffffffff) == lba)) { /* Need a 10 byte CDB */ struct scsi_rw_10 *scsi_cmd; scsi_cmd = (struct scsi_rw_10 *)cdb; scsi_cmd->opcode = readop ? READ_10 : WRITE_10; scsi_cmd->byte2 = byte2; scsi_ulto4b(lba, scsi_cmd->addr); scsi_cmd->reserved = 0; scsi_ulto2b(block_count, scsi_cmd->length); scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } else if (((block_count & 0xffffffff) == block_count) && ((lba & 0xffffffff) == lba)) { /* Block count is too big for 10 byte CDB use a 12 byte CDB */ struct scsi_rw_12 *scsi_cmd; scsi_cmd = (struct scsi_rw_12 *)cdb; scsi_cmd->opcode = readop ? READ_12 : WRITE_12; scsi_cmd->byte2 = byte2; scsi_ulto4b(lba, scsi_cmd->addr); scsi_cmd->reserved = 0; scsi_ulto4b(block_count, scsi_cmd->length); scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } else { /* * 16 byte CDB. We'll only get here if the LBA is larger * than 2^32 */ struct scsi_rw_16 *scsi_cmd; scsi_cmd = (struct scsi_rw_16 *)cdb; scsi_cmd->opcode = readop ? READ_16 : WRITE_16; scsi_cmd->byte2 = byte2; scsi_u64to8b(lba, scsi_cmd->addr); scsi_cmd->reserved = 0; scsi_ulto4b(block_count, scsi_cmd->length); scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } return cdb_len; } extern char *unmapped_buf; static struct mfi_command * mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio) { struct mfi_command *cm; struct mfi_pass_frame *pass; uint32_t context = 0; int flags = 0, blkcount = 0, readop; uint8_t cdb_len; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm = mfi_dequeue_free(sc)) == NULL) return (NULL); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; pass = &cm->cm_frame->pass; bzero(pass->cdb, 16); pass->header.cmd = MFI_CMD_PD_SCSI_IO; switch (bio->bio_cmd) { case BIO_READ: flags = MFI_CMD_DATAIN | MFI_CMD_BIO; readop = 1; break; case BIO_WRITE: flags = MFI_CMD_DATAOUT | MFI_CMD_BIO; readop = 0; break; default: /* TODO: what about BIO_DELETE??? */ panic("Unsupported bio command %x\n", bio->bio_cmd); } /* Cheat with the sector length to avoid a non-constant division */ blkcount = howmany(bio->bio_bcount, MFI_SECTOR_LEN); /* Fill the LBA and Transfer length in CDB */ cdb_len = mfi_build_cdb(readop, 0, bio->bio_pblkno, blkcount, pass->cdb); pass->header.target_id = (uintptr_t)bio->bio_driver1; pass->header.lun_id = 0; pass->header.timeout = 0; pass->header.flags = 0; pass->header.scsi_status = 0; pass->header.sense_len = MFI_SENSE_LEN; pass->header.data_len = bio->bio_bcount; pass->header.cdb_len = cdb_len; pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); cm->cm_complete = mfi_bio_complete; cm->cm_private = bio; cm->cm_data = unmapped_buf; cm->cm_len = bio->bio_bcount; cm->cm_sg = &pass->sgl; cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE; cm->cm_flags = flags; return (cm); } static struct mfi_command * mfi_build_ldio(struct mfi_softc *sc, struct bio *bio) { struct mfi_io_frame *io; struct mfi_command *cm; int flags; uint32_t blkcount; uint32_t context = 0; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm = mfi_dequeue_free(sc)) == NULL) return (NULL); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; io = &cm->cm_frame->io; switch (bio->bio_cmd) { case BIO_READ: io->header.cmd = MFI_CMD_LD_READ; flags = MFI_CMD_DATAIN | MFI_CMD_BIO; break; case BIO_WRITE: io->header.cmd = MFI_CMD_LD_WRITE; flags = MFI_CMD_DATAOUT | MFI_CMD_BIO; break; default: /* TODO: what about BIO_DELETE??? */ panic("Unsupported bio command %x\n", bio->bio_cmd); } /* Cheat with the sector length to avoid a non-constant division */ blkcount = howmany(bio->bio_bcount, MFI_SECTOR_LEN); io->header.target_id = (uintptr_t)bio->bio_driver1; io->header.timeout = 0; io->header.flags = 0; io->header.scsi_status = 0; io->header.sense_len = MFI_SENSE_LEN; io->header.data_len = blkcount; io->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; io->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); io->lba_hi = (bio->bio_pblkno & 0xffffffff00000000) >> 32; io->lba_lo = bio->bio_pblkno & 0xffffffff; cm->cm_complete = mfi_bio_complete; cm->cm_private = bio; cm->cm_data = unmapped_buf; cm->cm_len = bio->bio_bcount; cm->cm_sg = &io->sgl; cm->cm_total_frame_size = MFI_IO_FRAME_SIZE; cm->cm_flags = flags; return (cm); } static void mfi_bio_complete(struct mfi_command *cm) { struct bio *bio; struct mfi_frame_header *hdr; struct mfi_softc *sc; bio = cm->cm_private; hdr = &cm->cm_frame->header; sc = cm->cm_sc; if ((hdr->cmd_status != MFI_STAT_OK) || (hdr->scsi_status != 0)) { bio->bio_flags |= BIO_ERROR; bio->bio_error = EIO; device_printf(sc->mfi_dev, "I/O error, cmd=%p, status=%#x, " "scsi_status=%#x\n", cm, hdr->cmd_status, hdr->scsi_status); mfi_print_sense(cm->cm_sc, cm->cm_sense); } else if (cm->cm_error != 0) { bio->bio_flags |= BIO_ERROR; bio->bio_error = cm->cm_error; device_printf(sc->mfi_dev, "I/O error, cmd=%p, error=%#x\n", cm, cm->cm_error); } mfi_release_command(cm); mfi_disk_complete(bio); } void mfi_startio(struct mfi_softc *sc) { struct mfi_command *cm; struct ccb_hdr *ccbh; for (;;) { /* Don't bother if we're short on resources */ if (sc->mfi_flags & MFI_FLAGS_QFRZN) break; /* Try a command that has already been prepared */ cm = mfi_dequeue_ready(sc); if (cm == NULL) { if ((ccbh = TAILQ_FIRST(&sc->mfi_cam_ccbq)) != NULL) cm = sc->mfi_cam_start(ccbh); } /* Nope, so look for work on the bioq */ if (cm == NULL) cm = mfi_bio_command(sc); /* No work available, so exit */ if (cm == NULL) break; /* Send the command to the controller */ if (mfi_mapcmd(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to startio\n"); mfi_requeue_ready(cm); break; } } } int mfi_mapcmd(struct mfi_softc *sc, struct mfi_command *cm) { int error, polled; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm->cm_data != NULL) && (cm->cm_frame->header.cmd != MFI_CMD_STP )) { polled = (cm->cm_flags & MFI_CMD_POLLED) ? BUS_DMA_NOWAIT : 0; if (cm->cm_flags & MFI_CMD_CCB) error = bus_dmamap_load_ccb(sc->mfi_buffer_dmat, cm->cm_dmamap, cm->cm_data, mfi_data_cb, cm, polled); else if (cm->cm_flags & MFI_CMD_BIO) error = bus_dmamap_load_bio(sc->mfi_buffer_dmat, cm->cm_dmamap, cm->cm_private, mfi_data_cb, cm, polled); else error = bus_dmamap_load(sc->mfi_buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_len, mfi_data_cb, cm, polled); if (error == EINPROGRESS) { sc->mfi_flags |= MFI_FLAGS_QFRZN; return (0); } } else { error = mfi_send_frame(sc, cm); } return (error); } static void mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mfi_frame_header *hdr; struct mfi_command *cm; union mfi_sgl *sgl; struct mfi_softc *sc; int i, j, first, dir; int sge_size, locked; cm = (struct mfi_command *)arg; sc = cm->cm_sc; hdr = &cm->cm_frame->header; sgl = cm->cm_sg; /* * We need to check if we have the lock as this is async * callback so even though our caller mfi_mapcmd asserts * it has the lock, there is no guarantee that hasn't been * dropped if bus_dmamap_load returned prior to our * completion. */ if ((locked = mtx_owned(&sc->mfi_io_lock)) == 0) mtx_lock(&sc->mfi_io_lock); if (error) { printf("error %d in callback\n", error); cm->cm_error = error; mfi_complete(sc, cm); goto out; } /* Use IEEE sgl only for IO's on a SKINNY controller * For other commands on a SKINNY controller use either * sg32 or sg64 based on the sizeof(bus_addr_t). * Also calculate the total frame size based on the type * of SGL used. */ if (((cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) || (cm->cm_frame->header.cmd == MFI_CMD_LD_READ) || (cm->cm_frame->header.cmd == MFI_CMD_LD_WRITE)) && (sc->mfi_flags & MFI_FLAGS_SKINNY)) { for (i = 0; i < nsegs; i++) { sgl->sg_skinny[i].addr = segs[i].ds_addr; sgl->sg_skinny[i].len = segs[i].ds_len; sgl->sg_skinny[i].flag = 0; } hdr->flags |= MFI_FRAME_IEEE_SGL | MFI_FRAME_SGL64; sge_size = sizeof(struct mfi_sg_skinny); hdr->sg_count = nsegs; } else { j = 0; if (cm->cm_frame->header.cmd == MFI_CMD_STP) { first = cm->cm_stp_len; if ((sc->mfi_flags & MFI_FLAGS_SG64) == 0) { sgl->sg32[j].addr = segs[0].ds_addr; sgl->sg32[j++].len = first; } else { sgl->sg64[j].addr = segs[0].ds_addr; sgl->sg64[j++].len = first; } } else first = 0; if ((sc->mfi_flags & MFI_FLAGS_SG64) == 0) { for (i = 0; i < nsegs; i++) { sgl->sg32[j].addr = segs[i].ds_addr + first; sgl->sg32[j++].len = segs[i].ds_len - first; first = 0; } } else { for (i = 0; i < nsegs; i++) { sgl->sg64[j].addr = segs[i].ds_addr + first; sgl->sg64[j++].len = segs[i].ds_len - first; first = 0; } hdr->flags |= MFI_FRAME_SGL64; } hdr->sg_count = j; sge_size = sc->mfi_sge_size; } dir = 0; if (cm->cm_flags & MFI_CMD_DATAIN) { dir |= BUS_DMASYNC_PREREAD; hdr->flags |= MFI_FRAME_DIR_READ; } if (cm->cm_flags & MFI_CMD_DATAOUT) { dir |= BUS_DMASYNC_PREWRITE; hdr->flags |= MFI_FRAME_DIR_WRITE; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, dir); cm->cm_flags |= MFI_CMD_MAPPED; /* * Instead of calculating the total number of frames in the * compound frame, it's already assumed that there will be at * least 1 frame, so don't compensate for the modulo of the * following division. */ cm->cm_total_frame_size += (sc->mfi_sge_size * nsegs); cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE; if ((error = mfi_send_frame(sc, cm)) != 0) { printf("error %d in callback from mfi_send_frame\n", error); cm->cm_error = error; mfi_complete(sc, cm); goto out; } out: /* leave the lock in the state we found it */ if (locked == 0) mtx_unlock(&sc->mfi_io_lock); return; } static int mfi_send_frame(struct mfi_softc *sc, struct mfi_command *cm) { int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if (sc->MFA_enabled) error = mfi_tbolt_send_frame(sc, cm); else error = mfi_std_send_frame(sc, cm); if (error != 0 && (cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) mfi_remove_busy(cm); return (error); } static int mfi_std_send_frame(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_frame_header *hdr; int tm = mfi_polled_cmd_timeout * 1000; hdr = &cm->cm_frame->header; if ((cm->cm_flags & MFI_CMD_POLLED) == 0) { cm->cm_timestamp = time_uptime; mfi_enqueue_busy(cm); } else { hdr->cmd_status = MFI_STAT_INVALID_STATUS; hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; } /* * The bus address of the command is aligned on a 64 byte boundary, * leaving the least 6 bits as zero. For whatever reason, the * hardware wants the address shifted right by three, leaving just * 3 zero bits. These three bits are then used as a prefetching * hint for the hardware to predict how many frames need to be * fetched across the bus. If a command has more than 8 frames * then the 3 bits are set to 0x7 and the firmware uses other * information in the command to determine the total amount to fetch. * However, FreeBSD doesn't support I/O larger than 128K, so 8 frames * is enough for both 32bit and 64bit systems. */ if (cm->cm_extra_frames > 7) cm->cm_extra_frames = 7; sc->mfi_issue_cmd(sc, cm->cm_frame_busaddr, cm->cm_extra_frames); if ((cm->cm_flags & MFI_CMD_POLLED) == 0) return (0); /* This is a polled command, so busy-wait for it to complete. */ while (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { DELAY(1000); tm -= 1; if (tm <= 0) break; } if (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { device_printf(sc->mfi_dev, "Frame %p timed out " "command 0x%X\n", hdr, cm->cm_frame->dcmd.opcode); return (ETIMEDOUT); } return (0); } void mfi_complete(struct mfi_softc *sc, struct mfi_command *cm) { int dir; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm->cm_flags & MFI_CMD_MAPPED) != 0) { dir = 0; if ((cm->cm_flags & MFI_CMD_DATAIN) || (cm->cm_frame->header.cmd == MFI_CMD_STP)) dir |= BUS_DMASYNC_POSTREAD; if (cm->cm_flags & MFI_CMD_DATAOUT) dir |= BUS_DMASYNC_POSTWRITE; bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, dir); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); cm->cm_flags &= ~MFI_CMD_MAPPED; } cm->cm_flags |= MFI_CMD_COMPLETED; if (cm->cm_complete != NULL) cm->cm_complete(cm); else wakeup(cm); } static int mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort) { struct mfi_command *cm; struct mfi_abort_frame *abort; int i = 0, error; uint32_t context = 0; mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; abort = &cm->cm_frame->abort; abort->header.cmd = MFI_CMD_ABORT; abort->header.flags = 0; abort->header.scsi_status = 0; abort->abort_context = (*cm_abort)->cm_frame->header.context; abort->abort_mfi_addr_lo = (uint32_t)(*cm_abort)->cm_frame_busaddr; abort->abort_mfi_addr_hi = (uint32_t)((uint64_t)(*cm_abort)->cm_frame_busaddr >> 32); cm->cm_data = NULL; cm->cm_flags = MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed to abort command\n"); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); while (i < 5 && *cm_abort != NULL) { tsleep(cm_abort, 0, "mfiabort", 5 * hz); i++; } if (*cm_abort != NULL) { /* Force a complete if command didn't abort */ mtx_lock(&sc->mfi_io_lock); (*cm_abort)->cm_complete(*cm_abort); mtx_unlock(&sc->mfi_io_lock); } return (error); } int mfi_dump_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt, int len) { struct mfi_command *cm; struct mfi_io_frame *io; int error; uint32_t context = 0; if ((cm = mfi_dequeue_free(sc)) == NULL) return (EBUSY); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; io = &cm->cm_frame->io; io->header.cmd = MFI_CMD_LD_WRITE; io->header.target_id = id; io->header.timeout = 0; io->header.flags = 0; io->header.scsi_status = 0; io->header.sense_len = MFI_SENSE_LEN; io->header.data_len = howmany(len, MFI_SECTOR_LEN); io->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; io->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); io->lba_hi = (lba & 0xffffffff00000000) >> 32; io->lba_lo = lba & 0xffffffff; cm->cm_data = virt; cm->cm_len = len; cm->cm_sg = &io->sgl; cm->cm_total_frame_size = MFI_IO_FRAME_SIZE; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed dump blocks\n"); bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); mfi_release_command(cm); return (error); } int mfi_dump_syspd_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt, int len) { struct mfi_command *cm; struct mfi_pass_frame *pass; int error, readop, cdb_len; uint32_t blkcount; if ((cm = mfi_dequeue_free(sc)) == NULL) return (EBUSY); pass = &cm->cm_frame->pass; bzero(pass->cdb, 16); pass->header.cmd = MFI_CMD_PD_SCSI_IO; readop = 0; blkcount = howmany(len, MFI_SECTOR_LEN); cdb_len = mfi_build_cdb(readop, 0, lba, blkcount, pass->cdb); pass->header.target_id = id; pass->header.timeout = 0; pass->header.flags = 0; pass->header.scsi_status = 0; pass->header.sense_len = MFI_SENSE_LEN; pass->header.data_len = len; pass->header.cdb_len = cdb_len; pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); cm->cm_data = virt; cm->cm_len = len; cm->cm_sg = &pass->sgl; cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT | MFI_CMD_SCSI; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed dump blocks\n"); bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); mfi_release_command(cm); return (error); } static int mfi_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct mfi_softc *sc; int error; sc = dev->si_drv1; mtx_lock(&sc->mfi_io_lock); if (sc->mfi_detaching) error = ENXIO; else { sc->mfi_flags |= MFI_FLAGS_OPEN; error = 0; } mtx_unlock(&sc->mfi_io_lock); return (error); } static int mfi_close(struct cdev *dev, int flags, int fmt, struct thread *td) { struct mfi_softc *sc; struct mfi_aen *mfi_aen_entry, *tmp; sc = dev->si_drv1; mtx_lock(&sc->mfi_io_lock); sc->mfi_flags &= ~MFI_FLAGS_OPEN; TAILQ_FOREACH_SAFE(mfi_aen_entry, &sc->mfi_aen_pids, aen_link, tmp) { if (mfi_aen_entry->p == curproc) { TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); free(mfi_aen_entry, M_MFIBUF); } } mtx_unlock(&sc->mfi_io_lock); return (0); } static int mfi_config_lock(struct mfi_softc *sc, uint32_t opcode) { switch (opcode) { case MFI_DCMD_LD_DELETE: case MFI_DCMD_CFG_ADD: case MFI_DCMD_CFG_CLEAR: case MFI_DCMD_CFG_FOREIGN_IMPORT: sx_xlock(&sc->mfi_config_lock); return (1); default: return (0); } } static void mfi_config_unlock(struct mfi_softc *sc, int locked) { if (locked) sx_xunlock(&sc->mfi_config_lock); } /* * Perform pre-issue checks on commands from userland and possibly veto * them. */ static int mfi_check_command_pre(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_disk *ld, *ld2; int error; struct mfi_system_pd *syspd = NULL; uint16_t syspd_id; uint16_t *mbox; mtx_assert(&sc->mfi_io_lock, MA_OWNED); error = 0; switch (cm->cm_frame->dcmd.opcode) { case MFI_DCMD_LD_DELETE: TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == cm->cm_frame->dcmd.mbox[0]) break; } if (ld == NULL) error = ENOENT; else error = mfi_disk_disable(ld); break; case MFI_DCMD_CFG_CLEAR: TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { error = mfi_disk_disable(ld); if (error) break; } if (error) { TAILQ_FOREACH(ld2, &sc->mfi_ld_tqh, ld_link) { if (ld2 == ld) break; mfi_disk_enable(ld2); } } break; case MFI_DCMD_PD_STATE_SET: mbox = (uint16_t *) cm->cm_frame->dcmd.mbox; syspd_id = mbox[0]; if (mbox[2] == MFI_PD_STATE_UNCONFIGURED_GOOD) { TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) { if (syspd->pd_id == syspd_id) break; } } else break; if (syspd) error = mfi_syspd_disable(syspd); break; default: break; } return (error); } /* Perform post-issue checks on commands from userland. */ static void mfi_check_command_post(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_disk *ld, *ldn; struct mfi_system_pd *syspd = NULL; uint16_t syspd_id; uint16_t *mbox; switch (cm->cm_frame->dcmd.opcode) { case MFI_DCMD_LD_DELETE: TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == cm->cm_frame->dcmd.mbox[0]) break; } KASSERT(ld != NULL, ("volume dissappeared")); if (cm->cm_frame->header.cmd_status == MFI_STAT_OK) { mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); device_delete_child(sc->mfi_dev, ld->ld_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } else mfi_disk_enable(ld); break; case MFI_DCMD_CFG_CLEAR: if (cm->cm_frame->header.cmd_status == MFI_STAT_OK) { mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); TAILQ_FOREACH_SAFE(ld, &sc->mfi_ld_tqh, ld_link, ldn) { device_delete_child(sc->mfi_dev, ld->ld_dev); } mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } else { TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) mfi_disk_enable(ld); } break; case MFI_DCMD_CFG_ADD: mfi_ldprobe(sc); break; case MFI_DCMD_CFG_FOREIGN_IMPORT: mfi_ldprobe(sc); break; case MFI_DCMD_PD_STATE_SET: mbox = (uint16_t *) cm->cm_frame->dcmd.mbox; syspd_id = mbox[0]; if (mbox[2] == MFI_PD_STATE_UNCONFIGURED_GOOD) { TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh,pd_link) { if (syspd->pd_id == syspd_id) break; } } else break; /* If the transition fails then enable the syspd again */ if (syspd && cm->cm_frame->header.cmd_status != MFI_STAT_OK) mfi_syspd_enable(syspd); break; } } static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_config_data *conf_data; struct mfi_command *ld_cm = NULL; struct mfi_ld_info *ld_info = NULL; struct mfi_ld_config *ld; char *p; int error = 0; conf_data = (struct mfi_config_data *)cm->cm_data; if (cm->cm_frame->dcmd.opcode == MFI_DCMD_CFG_ADD) { p = (char *)conf_data->array; p += conf_data->array_size * conf_data->array_count; ld = (struct mfi_ld_config *)p; if (ld->params.isSSCD == 1) error = 1; } else if (cm->cm_frame->dcmd.opcode == MFI_DCMD_LD_DELETE) { error = mfi_dcmd_command (sc, &ld_cm, MFI_DCMD_LD_GET_INFO, (void **)&ld_info, sizeof(*ld_info)); if (error) { device_printf(sc->mfi_dev, "Failed to allocate" "MFI_DCMD_LD_GET_INFO %d", error); if (ld_info) free(ld_info, M_MFIBUF); return 0; } ld_cm->cm_flags = MFI_CMD_DATAIN; ld_cm->cm_frame->dcmd.mbox[0]= cm->cm_frame->dcmd.mbox[0]; ld_cm->cm_frame->header.target_id = cm->cm_frame->dcmd.mbox[0]; if (mfi_wait_command(sc, ld_cm) != 0) { device_printf(sc->mfi_dev, "failed to get log drv\n"); mfi_release_command(ld_cm); free(ld_info, M_MFIBUF); return 0; } if (ld_cm->cm_frame->header.cmd_status != MFI_STAT_OK) { free(ld_info, M_MFIBUF); mfi_release_command(ld_cm); return 0; } else ld_info = (struct mfi_ld_info *)ld_cm->cm_private; if (ld_info->ld_config.params.isSSCD == 1) error = 1; mfi_release_command(ld_cm); free(ld_info, M_MFIBUF); } return error; } static int mfi_stp_cmd(struct mfi_softc *sc, struct mfi_command *cm,caddr_t arg) { uint8_t i; struct mfi_ioc_packet *ioc; ioc = (struct mfi_ioc_packet *)arg; int sge_size, error; struct megasas_sge *kern_sge; memset(sc->kbuff_arr, 0, sizeof(sc->kbuff_arr)); kern_sge =(struct megasas_sge *) ((uintptr_t)cm->cm_frame + ioc->mfi_sgl_off); cm->cm_frame->header.sg_count = ioc->mfi_sge_count; if (sizeof(bus_addr_t) == 8) { cm->cm_frame->header.flags |= MFI_FRAME_SGL64; cm->cm_extra_frames = 2; sge_size = sizeof(struct mfi_sg64); } else { cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE; sge_size = sizeof(struct mfi_sg32); } cm->cm_total_frame_size += (sge_size * ioc->mfi_sge_count); for (i = 0; i < ioc->mfi_sge_count; i++) { if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ioc->mfi_sgl[i].iov_len,/* maxsize */ 2, /* nsegments */ ioc->mfi_sgl[i].iov_len,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_kbuff_arr_dmat[i])) { device_printf(sc->mfi_dev, "Cannot allocate mfi_kbuff_arr_dmat tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_kbuff_arr_dmat[i], (void **)&sc->kbuff_arr[i], BUS_DMA_NOWAIT, &sc->mfi_kbuff_arr_dmamap[i])) { device_printf(sc->mfi_dev, "Cannot allocate mfi_kbuff_arr_dmamap memory\n"); return (ENOMEM); } bus_dmamap_load(sc->mfi_kbuff_arr_dmat[i], sc->mfi_kbuff_arr_dmamap[i], sc->kbuff_arr[i], ioc->mfi_sgl[i].iov_len, mfi_addr_cb, &sc->mfi_kbuff_arr_busaddr[i], 0); if (!sc->kbuff_arr[i]) { device_printf(sc->mfi_dev, "Could not allocate memory for kbuff_arr info\n"); return -1; } kern_sge[i].phys_addr = sc->mfi_kbuff_arr_busaddr[i]; kern_sge[i].length = ioc->mfi_sgl[i].iov_len; if (sizeof(bus_addr_t) == 8) { cm->cm_frame->stp.sgl.sg64[i].addr = kern_sge[i].phys_addr; cm->cm_frame->stp.sgl.sg64[i].len = ioc->mfi_sgl[i].iov_len; } else { cm->cm_frame->stp.sgl.sg32[i].addr = kern_sge[i].phys_addr; cm->cm_frame->stp.sgl.sg32[i].len = ioc->mfi_sgl[i].iov_len; } error = copyin(ioc->mfi_sgl[i].iov_base, sc->kbuff_arr[i], ioc->mfi_sgl[i].iov_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy in failed\n"); return error; } } cm->cm_flags |=MFI_CMD_MAPPED; return 0; } static int mfi_user_command(struct mfi_softc *sc, struct mfi_ioc_passthru *ioc) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; void *ioc_buf = NULL; uint32_t context; int error = 0, locked; if (ioc->buf_size > 0) { if (ioc->buf_size > 1024 * 1024) return (ENOMEM); ioc_buf = malloc(ioc->buf_size, M_MFIBUF, M_WAITOK); error = copyin(ioc->buf, ioc_buf, ioc->buf_size); if (error) { device_printf(sc->mfi_dev, "failed to copyin\n"); free(ioc_buf, M_MFIBUF); return (error); } } locked = mfi_config_lock(sc, ioc->ioc_frame.opcode); mtx_lock(&sc->mfi_io_lock); while ((cm = mfi_dequeue_free(sc)) == NULL) msleep(mfi_user_command, &sc->mfi_io_lock, 0, "mfiioc", hz); /* Save context for later */ context = cm->cm_frame->header.context; dcmd = &cm->cm_frame->dcmd; bcopy(&ioc->ioc_frame, dcmd, sizeof(struct mfi_dcmd_frame)); cm->cm_sg = &dcmd->sgl; cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE; cm->cm_data = ioc_buf; cm->cm_len = ioc->buf_size; /* restore context */ cm->cm_frame->header.context = context; /* Cheat since we don't know if we're writing or reading */ cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_DATAOUT; error = mfi_check_command_pre(sc, cm); if (error) goto out; error = mfi_wait_command(sc, cm); if (error) { device_printf(sc->mfi_dev, "ioctl failed %d\n", error); goto out; } bcopy(dcmd, &ioc->ioc_frame, sizeof(struct mfi_dcmd_frame)); mfi_check_command_post(sc, cm); out: mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); mfi_config_unlock(sc, locked); if (ioc->buf_size > 0) error = copyout(ioc_buf, ioc->buf, ioc->buf_size); if (ioc_buf) free(ioc_buf, M_MFIBUF); return (error); } #define PTRIN(p) ((void *)(uintptr_t)(p)) static int mfi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td) { struct mfi_softc *sc; union mfi_statrequest *ms; struct mfi_ioc_packet *ioc; #ifdef COMPAT_FREEBSD32 struct mfi_ioc_packet32 *ioc32; #endif struct mfi_ioc_aen *aen; struct mfi_command *cm = NULL; uint32_t context = 0; union mfi_sense_ptr sense_ptr; uint8_t *data = NULL, *temp, *addr, skip_pre_post = 0; size_t len; int i, res; struct mfi_ioc_passthru *iop = (struct mfi_ioc_passthru *)arg; #ifdef COMPAT_FREEBSD32 struct mfi_ioc_passthru32 *iop32 = (struct mfi_ioc_passthru32 *)arg; struct mfi_ioc_passthru iop_swab; #endif int error, locked; union mfi_sgl *sgl; sc = dev->si_drv1; error = 0; if (sc->adpreset) return EBUSY; if (sc->hw_crit_error) return EBUSY; if (sc->issuepend_done == 0) return EBUSY; switch (cmd) { case MFIIO_STATS: ms = (union mfi_statrequest *)arg; switch (ms->ms_item) { case MFIQ_FREE: case MFIQ_BIO: case MFIQ_READY: case MFIQ_BUSY: bcopy(&sc->mfi_qstat[ms->ms_item], &ms->ms_qstat, sizeof(struct mfi_qstat)); break; default: error = ENOIOCTL; break; } break; case MFIIO_QUERY_DISK: { struct mfi_query_disk *qd; struct mfi_disk *ld; qd = (struct mfi_query_disk *)arg; mtx_lock(&sc->mfi_io_lock); TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == qd->array_id) break; } if (ld == NULL) { qd->present = 0; mtx_unlock(&sc->mfi_io_lock); return (0); } qd->present = 1; if (ld->ld_flags & MFI_DISK_FLAGS_OPEN) qd->open = 1; bzero(qd->devname, SPECNAMELEN + 1); snprintf(qd->devname, SPECNAMELEN, "mfid%d", ld->ld_unit); mtx_unlock(&sc->mfi_io_lock); break; } case MFI_CMD: #ifdef COMPAT_FREEBSD32 case MFI_CMD32: #endif { devclass_t devclass; ioc = (struct mfi_ioc_packet *)arg; int adapter; adapter = ioc->mfi_adapter_no; if (device_get_unit(sc->mfi_dev) == 0 && adapter != 0) { devclass = devclass_find("mfi"); sc = devclass_get_softc(devclass, adapter); } mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } mtx_unlock(&sc->mfi_io_lock); locked = 0; /* * save off original context since copying from user * will clobber some data */ context = cm->cm_frame->header.context; cm->cm_frame->header.context = cm->cm_index; bcopy(ioc->mfi_frame.raw, cm->cm_frame, 2 * MEGAMFI_FRAME_SIZE); cm->cm_total_frame_size = (sizeof(union mfi_sgl) * ioc->mfi_sge_count) + ioc->mfi_sgl_off; cm->cm_frame->header.scsi_status = 0; cm->cm_frame->header.pad0 = 0; if (ioc->mfi_sge_count) { cm->cm_sg = (union mfi_sgl *)&cm->cm_frame->bytes[ioc->mfi_sgl_off]; } sgl = cm->cm_sg; cm->cm_flags = 0; if (cm->cm_frame->header.flags & MFI_FRAME_DATAIN) cm->cm_flags |= MFI_CMD_DATAIN; if (cm->cm_frame->header.flags & MFI_FRAME_DATAOUT) cm->cm_flags |= MFI_CMD_DATAOUT; /* Legacy app shim */ if (cm->cm_flags == 0) cm->cm_flags |= MFI_CMD_DATAIN | MFI_CMD_DATAOUT; cm->cm_len = cm->cm_frame->header.data_len; if (cm->cm_frame->header.cmd == MFI_CMD_STP) { #ifdef COMPAT_FREEBSD32 if (cmd == MFI_CMD) { #endif /* Native */ cm->cm_stp_len = ioc->mfi_sgl[0].iov_len; #ifdef COMPAT_FREEBSD32 } else { /* 32bit on 64bit */ ioc32 = (struct mfi_ioc_packet32 *)ioc; cm->cm_stp_len = ioc32->mfi_sgl[0].iov_len; } #endif cm->cm_len += cm->cm_stp_len; } if (cm->cm_len && (cm->cm_flags & (MFI_CMD_DATAIN | MFI_CMD_DATAOUT))) { cm->cm_data = data = malloc(cm->cm_len, M_MFIBUF, M_WAITOK | M_ZERO); } else { cm->cm_data = 0; } /* restore header context */ cm->cm_frame->header.context = context; if (cm->cm_frame->header.cmd == MFI_CMD_STP) { res = mfi_stp_cmd(sc, cm, arg); if (res != 0) goto out; } else { temp = data; if ((cm->cm_flags & MFI_CMD_DATAOUT) || (cm->cm_frame->header.cmd == MFI_CMD_STP)) { for (i = 0; i < ioc->mfi_sge_count; i++) { #ifdef COMPAT_FREEBSD32 if (cmd == MFI_CMD) { #endif /* Native */ addr = ioc->mfi_sgl[i].iov_base; len = ioc->mfi_sgl[i].iov_len; #ifdef COMPAT_FREEBSD32 } else { /* 32bit on 64bit */ ioc32 = (struct mfi_ioc_packet32 *)ioc; addr = PTRIN(ioc32->mfi_sgl[i].iov_base); len = ioc32->mfi_sgl[i].iov_len; } #endif error = copyin(addr, temp, len); if (error != 0) { device_printf(sc->mfi_dev, "Copy in failed\n"); goto out; } temp = &temp[len]; } } } if (cm->cm_frame->header.cmd == MFI_CMD_DCMD) locked = mfi_config_lock(sc, cm->cm_frame->dcmd.opcode); if (cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) { cm->cm_frame->pass.sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; cm->cm_frame->pass.sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); } mtx_lock(&sc->mfi_io_lock); skip_pre_post = mfi_check_for_sscd (sc, cm); if (!skip_pre_post) { error = mfi_check_command_pre(sc, cm); if (error) { mtx_unlock(&sc->mfi_io_lock); goto out; } } if ((error = mfi_wait_command(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Controller polled failed\n"); mtx_unlock(&sc->mfi_io_lock); goto out; } if (!skip_pre_post) { mfi_check_command_post(sc, cm); } mtx_unlock(&sc->mfi_io_lock); if (cm->cm_frame->header.cmd != MFI_CMD_STP) { temp = data; if ((cm->cm_flags & MFI_CMD_DATAIN) || (cm->cm_frame->header.cmd == MFI_CMD_STP)) { for (i = 0; i < ioc->mfi_sge_count; i++) { #ifdef COMPAT_FREEBSD32 if (cmd == MFI_CMD) { #endif /* Native */ addr = ioc->mfi_sgl[i].iov_base; len = ioc->mfi_sgl[i].iov_len; #ifdef COMPAT_FREEBSD32 } else { /* 32bit on 64bit */ ioc32 = (struct mfi_ioc_packet32 *)ioc; addr = PTRIN(ioc32->mfi_sgl[i].iov_base); len = ioc32->mfi_sgl[i].iov_len; } #endif error = copyout(temp, addr, len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } temp = &temp[len]; } } } if (ioc->mfi_sense_len) { /* get user-space sense ptr then copy out sense */ bcopy(&ioc->mfi_frame.raw[ioc->mfi_sense_off], &sense_ptr.sense_ptr_data[0], sizeof(sense_ptr.sense_ptr_data)); #ifdef COMPAT_FREEBSD32 if (cmd != MFI_CMD) { /* * not 64bit native so zero out any address * over 32bit */ sense_ptr.addr.high = 0; } #endif error = copyout(cm->cm_sense, sense_ptr.user_space, ioc->mfi_sense_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } } ioc->mfi_frame.hdr.cmd_status = cm->cm_frame->header.cmd_status; out: mfi_config_unlock(sc, locked); if (data) free(data, M_MFIBUF); if (cm->cm_frame->header.cmd == MFI_CMD_STP) { for (i = 0; i < 2; i++) { if (sc->kbuff_arr[i]) { - if (sc->mfi_kbuff_arr_busaddr != 0) + if (sc->mfi_kbuff_arr_busaddr[i] != 0) bus_dmamap_unload( sc->mfi_kbuff_arr_dmat[i], sc->mfi_kbuff_arr_dmamap[i] ); if (sc->kbuff_arr[i] != NULL) bus_dmamem_free( sc->mfi_kbuff_arr_dmat[i], sc->kbuff_arr[i], sc->mfi_kbuff_arr_dmamap[i] ); if (sc->mfi_kbuff_arr_dmat[i] != NULL) bus_dma_tag_destroy( sc->mfi_kbuff_arr_dmat[i]); } } } if (cm) { mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); } break; } case MFI_SET_AEN: aen = (struct mfi_ioc_aen *)arg; mtx_lock(&sc->mfi_io_lock); error = mfi_aen_register(sc, aen->aen_seq_num, aen->aen_class_locale); mtx_unlock(&sc->mfi_io_lock); break; case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */ { devclass_t devclass; struct mfi_linux_ioc_packet l_ioc; int adapter; devclass = devclass_find("mfi"); if (devclass == NULL) return (ENOENT); error = copyin(arg, &l_ioc, sizeof(l_ioc)); if (error) return (error); adapter = l_ioc.lioc_adapter_no; sc = devclass_get_softc(devclass, adapter); if (sc == NULL) return (ENOENT); return (mfi_linux_ioctl_int(sc->mfi_cdev, cmd, arg, flag, td)); break; } case MFI_LINUX_SET_AEN_2: /* AEN Linux ioctl shim */ { devclass_t devclass; struct mfi_linux_ioc_aen l_aen; int adapter; devclass = devclass_find("mfi"); if (devclass == NULL) return (ENOENT); error = copyin(arg, &l_aen, sizeof(l_aen)); if (error) return (error); adapter = l_aen.laen_adapter_no; sc = devclass_get_softc(devclass, adapter); if (sc == NULL) return (ENOENT); return (mfi_linux_ioctl_int(sc->mfi_cdev, cmd, arg, flag, td)); break; } #ifdef COMPAT_FREEBSD32 case MFIIO_PASSTHRU32: if (!SV_CURPROC_FLAG(SV_ILP32)) { error = ENOTTY; break; } iop_swab.ioc_frame = iop32->ioc_frame; iop_swab.buf_size = iop32->buf_size; iop_swab.buf = PTRIN(iop32->buf); iop = &iop_swab; /* FALLTHROUGH */ #endif case MFIIO_PASSTHRU: error = mfi_user_command(sc, iop); #ifdef COMPAT_FREEBSD32 if (cmd == MFIIO_PASSTHRU32) iop32->ioc_frame = iop_swab.ioc_frame; #endif break; default: device_printf(sc->mfi_dev, "IOCTL 0x%lx not handled\n", cmd); error = ENOTTY; break; } return (error); } static int mfi_linux_ioctl_int(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td) { struct mfi_softc *sc; struct mfi_linux_ioc_packet l_ioc; struct mfi_linux_ioc_aen l_aen; struct mfi_command *cm = NULL; struct mfi_aen *mfi_aen_entry; union mfi_sense_ptr sense_ptr; uint32_t context = 0; uint8_t *data = NULL, *temp; int i; int error, locked; sc = dev->si_drv1; error = 0; switch (cmd) { case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */ error = copyin(arg, &l_ioc, sizeof(l_ioc)); if (error != 0) return (error); if (l_ioc.lioc_sge_count > MAX_LINUX_IOCTL_SGE) { return (EINVAL); } mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } mtx_unlock(&sc->mfi_io_lock); locked = 0; /* * save off original context since copying from user * will clobber some data */ context = cm->cm_frame->header.context; bcopy(l_ioc.lioc_frame.raw, cm->cm_frame, 2 * MFI_DCMD_FRAME_SIZE); /* this isn't quite right */ cm->cm_total_frame_size = (sizeof(union mfi_sgl) * l_ioc.lioc_sge_count) + l_ioc.lioc_sgl_off; cm->cm_frame->header.scsi_status = 0; cm->cm_frame->header.pad0 = 0; if (l_ioc.lioc_sge_count) cm->cm_sg = (union mfi_sgl *)&cm->cm_frame->bytes[l_ioc.lioc_sgl_off]; cm->cm_flags = 0; if (cm->cm_frame->header.flags & MFI_FRAME_DATAIN) cm->cm_flags |= MFI_CMD_DATAIN; if (cm->cm_frame->header.flags & MFI_FRAME_DATAOUT) cm->cm_flags |= MFI_CMD_DATAOUT; cm->cm_len = cm->cm_frame->header.data_len; if (cm->cm_len && (cm->cm_flags & (MFI_CMD_DATAIN | MFI_CMD_DATAOUT))) { cm->cm_data = data = malloc(cm->cm_len, M_MFIBUF, M_WAITOK | M_ZERO); } else { cm->cm_data = 0; } /* restore header context */ cm->cm_frame->header.context = context; temp = data; if (cm->cm_flags & MFI_CMD_DATAOUT) { for (i = 0; i < l_ioc.lioc_sge_count; i++) { error = copyin(PTRIN(l_ioc.lioc_sgl[i].iov_base), temp, l_ioc.lioc_sgl[i].iov_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy in failed\n"); goto out; } temp = &temp[l_ioc.lioc_sgl[i].iov_len]; } } if (cm->cm_frame->header.cmd == MFI_CMD_DCMD) locked = mfi_config_lock(sc, cm->cm_frame->dcmd.opcode); if (cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) { cm->cm_frame->pass.sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; cm->cm_frame->pass.sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); } mtx_lock(&sc->mfi_io_lock); error = mfi_check_command_pre(sc, cm); if (error) { mtx_unlock(&sc->mfi_io_lock); goto out; } if ((error = mfi_wait_command(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Controller polled failed\n"); mtx_unlock(&sc->mfi_io_lock); goto out; } mfi_check_command_post(sc, cm); mtx_unlock(&sc->mfi_io_lock); temp = data; if (cm->cm_flags & MFI_CMD_DATAIN) { for (i = 0; i < l_ioc.lioc_sge_count; i++) { error = copyout(temp, PTRIN(l_ioc.lioc_sgl[i].iov_base), l_ioc.lioc_sgl[i].iov_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } temp = &temp[l_ioc.lioc_sgl[i].iov_len]; } } if (l_ioc.lioc_sense_len) { /* get user-space sense ptr then copy out sense */ bcopy(&((struct mfi_linux_ioc_packet*)arg) ->lioc_frame.raw[l_ioc.lioc_sense_off], &sense_ptr.sense_ptr_data[0], sizeof(sense_ptr.sense_ptr_data)); #ifdef __amd64__ /* * only 32bit Linux support so zero out any * address over 32bit */ sense_ptr.addr.high = 0; #endif error = copyout(cm->cm_sense, sense_ptr.user_space, l_ioc.lioc_sense_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } } error = copyout(&cm->cm_frame->header.cmd_status, &((struct mfi_linux_ioc_packet*)arg) ->lioc_frame.hdr.cmd_status, 1); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } out: mfi_config_unlock(sc, locked); if (data) free(data, M_MFIBUF); if (cm) { mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); } return (error); case MFI_LINUX_SET_AEN_2: /* AEN Linux ioctl shim */ error = copyin(arg, &l_aen, sizeof(l_aen)); if (error != 0) return (error); printf("AEN IMPLEMENTED for pid %d\n", curproc->p_pid); mfi_aen_entry = malloc(sizeof(struct mfi_aen), M_MFIBUF, M_WAITOK); mtx_lock(&sc->mfi_io_lock); if (mfi_aen_entry != NULL) { mfi_aen_entry->p = curproc; TAILQ_INSERT_TAIL(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); } error = mfi_aen_register(sc, l_aen.laen_seq_num, l_aen.laen_class_locale); if (error != 0) { TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); free(mfi_aen_entry, M_MFIBUF); } mtx_unlock(&sc->mfi_io_lock); return (error); default: device_printf(sc->mfi_dev, "IOCTL 0x%lx not handled\n", cmd); error = ENOENT; break; } return (error); } static int mfi_poll(struct cdev *dev, int poll_events, struct thread *td) { struct mfi_softc *sc; int revents = 0; sc = dev->si_drv1; if (poll_events & (POLLIN | POLLRDNORM)) { if (sc->mfi_aen_triggered != 0) { revents |= poll_events & (POLLIN | POLLRDNORM); sc->mfi_aen_triggered = 0; } if (sc->mfi_aen_triggered == 0 && sc->mfi_aen_cm == NULL) { revents |= POLLERR; } } if (revents == 0) { if (poll_events & (POLLIN | POLLRDNORM)) { sc->mfi_poll_waiting = 1; selrecord(td, &sc->mfi_select); } } return revents; } static void mfi_dump_all(void) { struct mfi_softc *sc; struct mfi_command *cm; devclass_t dc; time_t deadline; int timedout; int i; dc = devclass_find("mfi"); if (dc == NULL) { printf("No mfi dev class\n"); return; } for (i = 0; ; i++) { sc = devclass_get_softc(dc, i); if (sc == NULL) break; device_printf(sc->mfi_dev, "Dumping\n\n"); timedout = 0; deadline = time_uptime - mfi_cmd_timeout; mtx_lock(&sc->mfi_io_lock); TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) { if (cm->cm_timestamp <= deadline) { device_printf(sc->mfi_dev, "COMMAND %p TIMEOUT AFTER %d SECONDS\n", cm, (int)(time_uptime - cm->cm_timestamp)); MFI_PRINT_CMD(cm); timedout++; } } #if 0 if (timedout) MFI_DUMP_CMDS(sc); #endif mtx_unlock(&sc->mfi_io_lock); } return; } static void mfi_timeout(void *data) { struct mfi_softc *sc = (struct mfi_softc *)data; struct mfi_command *cm, *tmp; time_t deadline; int timedout = 0; deadline = time_uptime - mfi_cmd_timeout; if (sc->adpreset == 0) { if (!mfi_tbolt_reset(sc)) { callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz, mfi_timeout, sc); return; } } mtx_lock(&sc->mfi_io_lock); TAILQ_FOREACH_SAFE(cm, &sc->mfi_busy, cm_link, tmp) { if (sc->mfi_aen_cm == cm || sc->mfi_map_sync_cm == cm) continue; if (cm->cm_timestamp <= deadline) { if (sc->adpreset != 0 && sc->issuepend_done == 0) { cm->cm_timestamp = time_uptime; } else { device_printf(sc->mfi_dev, "COMMAND %p TIMEOUT AFTER %d SECONDS\n", cm, (int)(time_uptime - cm->cm_timestamp) ); MFI_PRINT_CMD(cm); MFI_VALIDATE_CMD(sc, cm); /* * While commands can get stuck forever we do * not fail them as there is no way to tell if * the controller has actually processed them * or not. * * In addition its very likely that force * failing a command here would cause a panic * e.g. in UFS. */ timedout++; } } } #if 0 if (timedout) MFI_DUMP_CMDS(sc); #endif mtx_unlock(&sc->mfi_io_lock); callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz, mfi_timeout, sc); if (0) mfi_dump_all(); return; } Index: projects/clang390-import/sys/dev/ofw/ofw_bus_subr.c =================================================================== --- projects/clang390-import/sys/dev/ofw/ofw_bus_subr.c (revision 305016) +++ projects/clang390-import/sys/dev/ofw/ofw_bus_subr.c (revision 305017) @@ -1,929 +1,951 @@ /*- * Copyright (c) 2001 - 2003 by Thomas Moestl . * Copyright (c) 2005 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include "ofw_bus_if.h" +#define OFW_COMPAT_LEN 255 + int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *obd, phandle_t node) { if (obd == NULL) return (ENOMEM); /* The 'name' property is considered mandatory. */ if ((OF_getprop_alloc(node, "name", 1, (void **)&obd->obd_name)) == -1) return (EINVAL); OF_getprop_alloc(node, "compatible", 1, (void **)&obd->obd_compat); OF_getprop_alloc(node, "device_type", 1, (void **)&obd->obd_type); OF_getprop_alloc(node, "model", 1, (void **)&obd->obd_model); OF_getprop_alloc(node, "status", 1, (void **)&obd->obd_status); obd->obd_node = node; return (0); } void ofw_bus_gen_destroy_devinfo(struct ofw_bus_devinfo *obd) { if (obd == NULL) return; if (obd->obd_compat != NULL) free(obd->obd_compat, M_OFWPROP); if (obd->obd_model != NULL) free(obd->obd_model, M_OFWPROP); if (obd->obd_name != NULL) free(obd->obd_name, M_OFWPROP); if (obd->obd_type != NULL) free(obd->obd_type, M_OFWPROP); if (obd->obd_status != NULL) free(obd->obd_status, M_OFWPROP); } int ofw_bus_gen_child_pnpinfo_str(device_t cbdev, device_t child, char *buf, size_t buflen) { if (ofw_bus_get_name(child) != NULL) { strlcat(buf, "name=", buflen); strlcat(buf, ofw_bus_get_name(child), buflen); } if (ofw_bus_get_compat(child) != NULL) { strlcat(buf, " compat=", buflen); strlcat(buf, ofw_bus_get_compat(child), buflen); } return (0); }; const char * ofw_bus_gen_get_compat(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_compat); } const char * ofw_bus_gen_get_model(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_model); } const char * ofw_bus_gen_get_name(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_name); } phandle_t ofw_bus_gen_get_node(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (0); return (obd->obd_node); } const char * ofw_bus_gen_get_type(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_type); } const char * ofw_bus_get_status(device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(device_get_parent(dev), dev); if (obd == NULL) return (NULL); return (obd->obd_status); } int ofw_bus_status_okay(device_t dev) { const char *status; status = ofw_bus_get_status(dev); if (status == NULL || strcmp(status, "okay") == 0 || strcmp(status, "ok") == 0) return (1); return (0); } static int -ofw_bus_node_is_compatible(const char *compat, int len, const char *onecompat) +ofw_bus_node_is_compatible_int(const char *compat, int len, + const char *onecompat) { int onelen, l, ret; onelen = strlen(onecompat); ret = 0; while (len > 0) { if (strlen(compat) == onelen && strncasecmp(compat, onecompat, onelen) == 0) { /* Found it. */ ret = 1; break; } /* Slide to the next sub-string. */ l = strlen(compat) + 1; compat += l; len -= l; } return (ret); } int +ofw_bus_node_is_compatible(phandle_t node, const char *compatstr) +{ + char compat[OFW_COMPAT_LEN]; + int len, rv; + + if ((len = OF_getproplen(node, "compatible")) <= 0) + return (0); + + bzero(compat, OFW_COMPAT_LEN); + + if (OF_getprop(node, "compatible", compat, OFW_COMPAT_LEN) < 0) + return (0); + + rv = ofw_bus_node_is_compatible_int(compat, len, compatstr); + + return (rv); +} + +int ofw_bus_is_compatible(device_t dev, const char *onecompat) { phandle_t node; const char *compat; int len; if ((compat = ofw_bus_get_compat(dev)) == NULL) return (0); if ((node = ofw_bus_get_node(dev)) == -1) return (0); /* Get total 'compatible' prop len */ if ((len = OF_getproplen(node, "compatible")) <= 0) return (0); - return (ofw_bus_node_is_compatible(compat, len, onecompat)); + return (ofw_bus_node_is_compatible_int(compat, len, onecompat)); } int ofw_bus_is_compatible_strict(device_t dev, const char *compatible) { const char *compat; size_t len; if ((compat = ofw_bus_get_compat(dev)) == NULL) return (0); len = strlen(compatible); if (strlen(compat) == len && strncasecmp(compat, compatible, len) == 0) return (1); return (0); } const struct ofw_compat_data * ofw_bus_search_compatible(device_t dev, const struct ofw_compat_data *compat) { if (compat == NULL) return NULL; for (; compat->ocd_str != NULL; ++compat) { if (ofw_bus_is_compatible(dev, compat->ocd_str)) break; } return (compat); } int ofw_bus_has_prop(device_t dev, const char *propname) { phandle_t node; if ((node = ofw_bus_get_node(dev)) == -1) return (0); return (OF_hasprop(node, propname)); } void ofw_bus_setup_iinfo(phandle_t node, struct ofw_bus_iinfo *ii, int intrsz) { pcell_t addrc; int msksz; if (OF_getencprop(node, "#address-cells", &addrc, sizeof(addrc)) == -1) addrc = 2; ii->opi_addrc = addrc * sizeof(pcell_t); ii->opi_imapsz = OF_getencprop_alloc(node, "interrupt-map", 1, (void **)&ii->opi_imap); if (ii->opi_imapsz > 0) { msksz = OF_getencprop_alloc(node, "interrupt-map-mask", 1, (void **)&ii->opi_imapmsk); /* * Failure to get the mask is ignored; a full mask is used * then. We barf on bad mask sizes, however. */ if (msksz != -1 && msksz != ii->opi_addrc + intrsz) panic("ofw_bus_setup_iinfo: bad interrupt-map-mask " "property!"); } } int ofw_bus_lookup_imap(phandle_t node, struct ofw_bus_iinfo *ii, void *reg, int regsz, void *pintr, int pintrsz, void *mintr, int mintrsz, phandle_t *iparent) { uint8_t maskbuf[regsz + pintrsz]; int rv; if (ii->opi_imapsz <= 0) return (0); KASSERT(regsz >= ii->opi_addrc, ("ofw_bus_lookup_imap: register size too small: %d < %d", regsz, ii->opi_addrc)); if (node != -1) { rv = OF_getencprop(node, "reg", reg, regsz); if (rv < regsz) panic("ofw_bus_lookup_imap: cannot get reg property"); } return (ofw_bus_search_intrmap(pintr, pintrsz, reg, ii->opi_addrc, ii->opi_imap, ii->opi_imapsz, ii->opi_imapmsk, maskbuf, mintr, mintrsz, iparent)); } /* * Map an interrupt using the firmware reg, interrupt-map and * interrupt-map-mask properties. * The interrupt property to be mapped must be of size intrsz, and pointed to * by intr. The regs property of the node for which the mapping is done must * be passed as regs. This property is an array of register specifications; * the size of the address part of such a specification must be passed as * physsz. Only the first element of the property is used. * imap and imapsz hold the interrupt mask and it's size. * imapmsk is a pointer to the interrupt-map-mask property, which must have * a size of physsz + intrsz; it may be NULL, in which case a full mask is * assumed. * maskbuf must point to a buffer of length physsz + intrsz. * The interrupt is returned in result, which must point to a buffer of length * rintrsz (which gives the expected size of the mapped interrupt). * Returns number of cells in the interrupt if a mapping was found, 0 otherwise. */ int ofw_bus_search_intrmap(void *intr, int intrsz, void *regs, int physsz, void *imap, int imapsz, void *imapmsk, void *maskbuf, void *result, int rintrsz, phandle_t *iparent) { phandle_t parent; uint8_t *ref = maskbuf; uint8_t *uiintr = intr; uint8_t *uiregs = regs; uint8_t *uiimapmsk = imapmsk; uint8_t *mptr; pcell_t paddrsz; pcell_t pintrsz; int i, rsz, tsz; rsz = -1; if (imapmsk != NULL) { for (i = 0; i < physsz; i++) ref[i] = uiregs[i] & uiimapmsk[i]; for (i = 0; i < intrsz; i++) ref[physsz + i] = uiintr[i] & uiimapmsk[physsz + i]; } else { bcopy(regs, ref, physsz); bcopy(intr, ref + physsz, intrsz); } mptr = imap; i = imapsz; paddrsz = 0; while (i > 0) { bcopy(mptr + physsz + intrsz, &parent, sizeof(parent)); #ifndef OFW_IMAP_NO_IPARENT_ADDR_CELLS /* * Find if we need to read the parent address data. * CHRP-derived OF bindings, including ePAPR-compliant FDTs, * use this as an optional part of the specifier. */ if (OF_getencprop(OF_node_from_xref(parent), "#address-cells", &paddrsz, sizeof(paddrsz)) == -1) paddrsz = 0; /* default */ paddrsz *= sizeof(pcell_t); #endif if (OF_searchencprop(OF_node_from_xref(parent), "#interrupt-cells", &pintrsz, sizeof(pintrsz)) == -1) pintrsz = 1; /* default */ pintrsz *= sizeof(pcell_t); /* Compute the map stride size. */ tsz = physsz + intrsz + sizeof(phandle_t) + paddrsz + pintrsz; KASSERT(i >= tsz, ("ofw_bus_search_intrmap: truncated map")); if (bcmp(ref, mptr, physsz + intrsz) == 0) { bcopy(mptr + physsz + intrsz + sizeof(parent) + paddrsz, result, MIN(rintrsz, pintrsz)); if (iparent != NULL) *iparent = parent; return (pintrsz/sizeof(pcell_t)); } mptr += tsz; i -= tsz; } return (0); } int ofw_bus_msimap(phandle_t node, uint16_t pci_rid, phandle_t *msi_parent, uint32_t *msi_rid) { pcell_t *map, mask, msi_base, rid_base, rid_length; ssize_t len; uint32_t masked_rid, rid; int err, i; /* TODO: This should be OF_searchprop_alloc if we had it */ len = OF_getencprop_alloc(node, "msi-map", sizeof(*map), (void **)&map); if (len < 0) { if (msi_parent != NULL) { *msi_parent = 0; OF_getencprop(node, "msi-parent", msi_parent, sizeof(*msi_parent)); } if (msi_rid != NULL) *msi_rid = pci_rid; return (0); } err = ENOENT; rid = 0; mask = 0xffffffff; OF_getencprop(node, "msi-map-mask", &mask, sizeof(mask)); masked_rid = pci_rid & mask; for (i = 0; i < len; i += 4) { rid_base = map[i + 0]; rid_length = map[i + 3]; if (masked_rid < rid_base || masked_rid >= (rid_base + rid_length)) continue; msi_base = map[i + 2]; if (msi_parent != NULL) *msi_parent = map[i + 1]; if (msi_rid != NULL) *msi_rid = masked_rid - rid_base + msi_base; err = 0; break; } free(map, M_OFWPROP); return (err); } int ofw_bus_reg_to_rl(device_t dev, phandle_t node, pcell_t acells, pcell_t scells, struct resource_list *rl) { uint64_t phys, size; ssize_t i, j, rid, nreg, ret; uint32_t *reg; char *name; /* * This may be just redundant when having ofw_bus_devinfo * but makes this routine independent of it. */ ret = OF_getprop_alloc(node, "name", sizeof(*name), (void **)&name); if (ret == -1) name = NULL; ret = OF_getencprop_alloc(node, "reg", sizeof(*reg), (void **)®); nreg = (ret == -1) ? 0 : ret; if (nreg % (acells + scells) != 0) { if (bootverbose) device_printf(dev, "Malformed reg property on <%s>\n", (name == NULL) ? "unknown" : name); nreg = 0; } for (i = 0, rid = 0; i < nreg; i += acells + scells, rid++) { phys = size = 0; for (j = 0; j < acells; j++) { phys <<= 32; phys |= reg[i + j]; } for (j = 0; j < scells; j++) { size <<= 32; size |= reg[i + acells + j]; } /* Skip the dummy reg property of glue devices like ssm(4). */ if (size != 0) resource_list_add(rl, SYS_RES_MEMORY, rid, phys, phys + size - 1, size); } free(name, M_OFWPROP); free(reg, M_OFWPROP); return (0); } /* * Get interrupt parent for given node. * Returns 0 if interrupt parent doesn't exist. */ phandle_t ofw_bus_find_iparent(phandle_t node) { phandle_t iparent; if (OF_searchencprop(node, "interrupt-parent", &iparent, sizeof(iparent)) == -1) { for (iparent = node; iparent != 0; iparent = OF_parent(iparent)) { if (OF_hasprop(iparent, "interrupt-controller")) break; } iparent = OF_xref_from_node(iparent); } return (iparent); } int ofw_bus_intr_to_rl(device_t dev, phandle_t node, struct resource_list *rl, int *rlen) { phandle_t iparent; uint32_t icells, *intr; int err, i, irqnum, nintr, rid; boolean_t extended; nintr = OF_getencprop_alloc(node, "interrupts", sizeof(*intr), (void **)&intr); if (nintr > 0) { iparent = ofw_bus_find_iparent(node); if (iparent == 0) { device_printf(dev, "No interrupt-parent found, " "assuming direct parent\n"); iparent = OF_parent(node); iparent = OF_xref_from_node(iparent); } if (OF_searchencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)) == -1) { device_printf(dev, "Missing #interrupt-cells " "property, assuming <1>\n"); icells = 1; } if (icells < 1 || icells > nintr) { device_printf(dev, "Invalid #interrupt-cells property " "value <%d>, assuming <1>\n", icells); icells = 1; } extended = false; } else { nintr = OF_getencprop_alloc(node, "interrupts-extended", sizeof(*intr), (void **)&intr); if (nintr <= 0) return (0); extended = true; } err = 0; rid = 0; for (i = 0; i < nintr; i += icells) { if (extended) { iparent = intr[i++]; if (OF_searchencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)) == -1) { device_printf(dev, "Missing #interrupt-cells " "property\n"); err = ENOENT; break; } if (icells < 1 || (i + icells) > nintr) { device_printf(dev, "Invalid #interrupt-cells " "property value <%d>\n", icells); err = ERANGE; break; } } irqnum = ofw_bus_map_intr(dev, iparent, icells, &intr[i]); resource_list_add(rl, SYS_RES_IRQ, rid++, irqnum, irqnum, 1); } if (rlen != NULL) *rlen = rid; free(intr, M_OFWPROP); return (err); } int ofw_bus_intr_by_rid(device_t dev, phandle_t node, int wanted_rid, phandle_t *producer, int *ncells, pcell_t **cells) { phandle_t iparent; uint32_t icells, *intr; int err, i, nintr, rid; boolean_t extended; nintr = OF_getencprop_alloc(node, "interrupts", sizeof(*intr), (void **)&intr); if (nintr > 0) { iparent = ofw_bus_find_iparent(node); if (iparent == 0) { device_printf(dev, "No interrupt-parent found, " "assuming direct parent\n"); iparent = OF_parent(node); iparent = OF_xref_from_node(iparent); } if (OF_searchencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)) == -1) { device_printf(dev, "Missing #interrupt-cells " "property, assuming <1>\n"); icells = 1; } if (icells < 1 || icells > nintr) { device_printf(dev, "Invalid #interrupt-cells property " "value <%d>, assuming <1>\n", icells); icells = 1; } extended = false; } else { nintr = OF_getencprop_alloc(node, "interrupts-extended", sizeof(*intr), (void **)&intr); if (nintr <= 0) return (ESRCH); extended = true; } err = ESRCH; rid = 0; for (i = 0; i < nintr; i += icells, rid++) { if (extended) { iparent = intr[i++]; if (OF_searchencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)) == -1) { device_printf(dev, "Missing #interrupt-cells " "property\n"); err = ENOENT; break; } if (icells < 1 || (i + icells) > nintr) { device_printf(dev, "Invalid #interrupt-cells " "property value <%d>\n", icells); err = ERANGE; break; } } if (rid == wanted_rid) { *cells = malloc(icells * sizeof(**cells), M_OFWPROP, M_WAITOK); *producer = iparent; *ncells= icells; memcpy(*cells, intr + i, icells * sizeof(**cells)); err = 0; break; } } free(intr, M_OFWPROP); return (err); } phandle_t ofw_bus_find_child(phandle_t start, const char *child_name) { char *name; int ret; phandle_t child; for (child = OF_child(start); child != 0; child = OF_peer(child)) { ret = OF_getprop_alloc(child, "name", sizeof(*name), (void **)&name); if (ret == -1) continue; if (strcmp(name, child_name) == 0) { free(name, M_OFWPROP); return (child); } free(name, M_OFWPROP); } return (0); } phandle_t ofw_bus_find_compatible(phandle_t node, const char *onecompat) { phandle_t child, ret; void *compat; int len; /* * Traverse all children of 'start' node, and find first with * matching 'compatible' property. */ for (child = OF_child(node); child != 0; child = OF_peer(child)) { len = OF_getprop_alloc(child, "compatible", 1, &compat); if (len >= 0) { - ret = ofw_bus_node_is_compatible(compat, len, + ret = ofw_bus_node_is_compatible_int(compat, len, onecompat); free(compat, M_OFWPROP); if (ret != 0) return (child); } ret = ofw_bus_find_compatible(child, onecompat); if (ret != 0) return (ret); } return (0); } /** * @brief Return child of bus whose phandle is node * * A direct child of @p will be returned if it its phandle in the * OFW tree is @p node. Otherwise, NULL is returned. * * @param bus The bus to examine * @param node The phandle_t to look for. */ device_t ofw_bus_find_child_device_by_phandle(device_t bus, phandle_t node) { device_t *children, retval, child; int nkid, i; /* * Nothing can match the flag value for no node. */ if (node == -1) return (NULL); /* * Search the children for a match. We microoptimize * a bit by not using ofw_bus_get since we already know * the parent. We do not recurse. */ if (device_get_children(bus, &children, &nkid) != 0) return (NULL); retval = NULL; for (i = 0; i < nkid; i++) { child = children[i]; if (OFW_BUS_GET_NODE(bus, child) == node) { retval = child; break; } } free(children, M_TEMP); return (retval); } /* * Parse property that contain list of xrefs and values * (like standard "clocks" and "resets" properties) * Input arguments: * node - consumers device node * list_name - name of parsed list - "clocks" * cells_name - name of size property - "#clock-cells" * idx - the index of the requested list entry, or, if -1, an indication * to return the number of entries in the parsed list. * Output arguments: * producer - handle of producer * ncells - number of cells in result or the number of items in the list when * idx == -1. * cells - array of decoded cells */ static int ofw_bus_parse_xref_list_internal(phandle_t node, const char *list_name, const char *cells_name, int idx, phandle_t *producer, int *ncells, pcell_t **cells) { phandle_t pnode; phandle_t *elems; uint32_t pcells; int rv, i, j, nelems, cnt; elems = NULL; nelems = OF_getencprop_alloc(node, list_name, sizeof(*elems), (void **)&elems); if (nelems <= 0) return (ENOENT); rv = (idx == -1) ? 0 : ENOENT; for (i = 0, cnt = 0; i < nelems; i += pcells, cnt++) { pnode = elems[i++]; if (OF_getencprop(OF_node_from_xref(pnode), cells_name, &pcells, sizeof(pcells)) == -1) { printf("Missing %s property\n", cells_name); rv = ENOENT; break; } if ((i + pcells) > nelems) { printf("Invalid %s property value <%d>\n", cells_name, pcells); rv = ERANGE; break; } if (cnt == idx) { *cells= malloc(pcells * sizeof(**cells), M_OFWPROP, M_WAITOK); *producer = pnode; *ncells = pcells; for (j = 0; j < pcells; j++) (*cells)[j] = elems[i + j]; rv = 0; break; } } if (elems != NULL) free(elems, M_OFWPROP); if (idx == -1 && rv == 0) *ncells = cnt; return (rv); } /* * Parse property that contain list of xrefs and values * (like standard "clocks" and "resets" properties) * Input arguments: * node - consumers device node * list_name - name of parsed list - "clocks" * cells_name - name of size property - "#clock-cells" * idx - the index of the requested list entry (>= 0) * Output arguments: * producer - handle of producer * ncells - number of cells in result * cells - array of decoded cells */ int ofw_bus_parse_xref_list_alloc(phandle_t node, const char *list_name, const char *cells_name, int idx, phandle_t *producer, int *ncells, pcell_t **cells) { KASSERT(idx >= 0, ("ofw_bus_parse_xref_list_alloc: negative index supplied")); return (ofw_bus_parse_xref_list_internal(node, list_name, cells_name, idx, producer, ncells, cells)); } /* * Parse property that contain list of xrefs and values * (like standard "clocks" and "resets" properties) * and determine the number of items in the list * Input arguments: * node - consumers device node * list_name - name of parsed list - "clocks" * cells_name - name of size property - "#clock-cells" * Output arguments: * count - number of items in list */ int ofw_bus_parse_xref_list_get_length(phandle_t node, const char *list_name, const char *cells_name, int *count) { return (ofw_bus_parse_xref_list_internal(node, list_name, cells_name, -1, NULL, count, NULL)); } /* * Find index of string in string list property (case sensitive). */ int ofw_bus_find_string_index(phandle_t node, const char *list_name, const char *name, int *idx) { char *elems; int rv, i, cnt, nelems; elems = NULL; nelems = OF_getprop_alloc(node, list_name, 1, (void **)&elems); if (nelems <= 0) return (ENOENT); rv = ENOENT; for (i = 0, cnt = 0; i < nelems; cnt++) { if (strcmp(elems + i, name) == 0) { *idx = cnt; rv = 0; break; } i += strlen(elems + i) + 1; } if (elems != NULL) free(elems, M_OFWPROP); return (rv); } /* * Create zero terminated array of strings from string list property. */ int ofw_bus_string_list_to_array(phandle_t node, const char *list_name, const char ***out_array) { char *elems, *tptr; const char **array; int i, cnt, nelems, len; elems = NULL; nelems = OF_getprop_alloc(node, list_name, 1, (void **)&elems); if (nelems <= 0) return (nelems); /* Count number of strings. */ for (i = 0, cnt = 0; i < nelems; cnt++) i += strlen(elems + i) + 1; /* Allocate space for arrays and all strings. */ array = malloc((cnt + 1) * sizeof(char *) + nelems, M_OFWPROP, M_WAITOK); /* Get address of first string. */ tptr = (char *)(array + cnt + 1); /* Copy strings. */ memcpy(tptr, elems, nelems); free(elems, M_OFWPROP); /* Fill string pointers. */ for (i = 0, cnt = 0; i < nelems; cnt++) { len = strlen(tptr) + 1; array[cnt] = tptr; i += len; tptr += len; } array[cnt] = 0; *out_array = array; return (cnt); } Index: projects/clang390-import/sys/dev/ofw/ofw_bus_subr.h =================================================================== --- projects/clang390-import/sys/dev/ofw/ofw_bus_subr.h (revision 305016) +++ projects/clang390-import/sys/dev/ofw/ofw_bus_subr.h (revision 305017) @@ -1,144 +1,145 @@ /*- * Copyright (c) 2005 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _DEV_OFW_OFW_BUS_SUBR_H_ #define _DEV_OFW_OFW_BUS_SUBR_H_ #include #ifdef INTRNG #include #endif #include #include "ofw_bus_if.h" #define ORIP_NOINT -1 #define ORIR_NOTFOUND 0xffffffff struct ofw_bus_iinfo { uint8_t *opi_imap; uint8_t *opi_imapmsk; int opi_imapsz; pcell_t opi_addrc; }; struct ofw_compat_data { const char *ocd_str; uintptr_t ocd_data; }; #ifdef INTRNG struct intr_map_data_fdt { struct intr_map_data hdr; phandle_t iparent; u_int ncells; pcell_t cells[]; }; #endif #define SIMPLEBUS_PNP_DESCR "Z:compat;P:private;" #define SIMPLEBUS_PNP_INFO(t) \ MODULE_PNP_INFO(SIMPLEBUS_PNP_DESCR, simplebus, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0])); /* Generic implementation of ofw_bus_if.m methods and helper routines */ int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *, phandle_t); void ofw_bus_gen_destroy_devinfo(struct ofw_bus_devinfo *); ofw_bus_get_compat_t ofw_bus_gen_get_compat; ofw_bus_get_model_t ofw_bus_gen_get_model; ofw_bus_get_name_t ofw_bus_gen_get_name; ofw_bus_get_node_t ofw_bus_gen_get_node; ofw_bus_get_type_t ofw_bus_gen_get_type; /* Helper method to report interesting OF properties in pnpinfo */ bus_child_pnpinfo_str_t ofw_bus_gen_child_pnpinfo_str; /* Routines for processing firmware interrupt maps */ void ofw_bus_setup_iinfo(phandle_t, struct ofw_bus_iinfo *, int); int ofw_bus_lookup_imap(phandle_t, struct ofw_bus_iinfo *, void *, int, void *, int, void *, int, phandle_t *); int ofw_bus_search_intrmap(void *, int, void *, int, void *, int, void *, void *, void *, int, phandle_t *); /* Routines for processing msi maps */ int ofw_bus_msimap(phandle_t, uint16_t, phandle_t *, uint32_t *); /* Routines for parsing device-tree data into resource lists. */ int ofw_bus_reg_to_rl(device_t, phandle_t, pcell_t, pcell_t, struct resource_list *); int ofw_bus_intr_to_rl(device_t, phandle_t, struct resource_list *, int *); int ofw_bus_intr_by_rid(device_t, phandle_t, int, phandle_t *, int *, pcell_t **); /* Helper to get device status property */ const char *ofw_bus_get_status(device_t dev); int ofw_bus_status_okay(device_t dev); /* Helper to get node's interrupt parent */ phandle_t ofw_bus_find_iparent(phandle_t); /* Helper routine for checking compat prop */ int ofw_bus_is_compatible(device_t, const char *); int ofw_bus_is_compatible_strict(device_t, const char *); +int ofw_bus_node_is_compatible(phandle_t, const char *); /* * Helper routine to search a list of compat properties. The table is * terminated by an entry with a NULL compat-string pointer; a pointer to that * table entry is returned if none of the compat strings match for the device, * giving you control over the not-found value. Will not return NULL unless the * provided table pointer is NULL. */ const struct ofw_compat_data * ofw_bus_search_compatible(device_t, const struct ofw_compat_data *); /* Helper routine for checking existence of a prop */ int ofw_bus_has_prop(device_t, const char *); /* Helper to search for a child with a given compat prop */ phandle_t ofw_bus_find_compatible(phandle_t, const char *); /* Helper to search for a child with a given name */ phandle_t ofw_bus_find_child(phandle_t, const char *); /* Helper routine to find a device_t child matching a given phandle_t */ device_t ofw_bus_find_child_device_by_phandle(device_t bus, phandle_t node); /* Helper routines for parsing lists */ int ofw_bus_parse_xref_list_alloc(phandle_t node, const char *list_name, const char *cells_name, int idx, phandle_t *producer, int *ncells, pcell_t **cells); int ofw_bus_parse_xref_list_get_length(phandle_t node, const char *list_name, const char *cells_name, int *count); int ofw_bus_find_string_index(phandle_t node, const char *list_name, const char *name, int *idx); int ofw_bus_string_list_to_array(phandle_t node, const char *list_name, const char ***array); #endif /* !_DEV_OFW_OFW_BUS_SUBR_H_ */ Index: projects/clang390-import/sys/dev/syscons/syscons.c =================================================================== --- projects/clang390-import/sys/dev/syscons/syscons.c (revision 305016) +++ projects/clang390-import/sys/dev/syscons/syscons.c (revision 305017) @@ -1,3983 +1,4004 @@ /*- * Copyright (c) 1992-1998 Søren Schmidt * All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Sascha Wildner * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_syscons.h" #include "opt_splash.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__arm__) || defined(__mips__) || \ defined(__powerpc__) || defined(__sparc64__) #include #else #include #endif #if defined( __i386__) || defined(__amd64__) #include #include #endif #include #include #include #include #include #define COLD 0 #define WARM 1 #define DEFAULT_BLANKTIME (5*60) /* 5 minutes */ #define MAX_BLANKTIME (7*24*60*60) /* 7 days!? */ #define KEYCODE_BS 0x0e /* "<-- Backspace" key, XXX */ /* NULL-safe version of "tty_opened()" */ #define tty_opened_ns(tp) ((tp) != NULL && tty_opened(tp)) typedef struct default_attr { int std_color; /* normal hardware color */ int rev_color; /* reverse hardware color */ } default_attr; static default_attr user_default = { SC_NORM_ATTR, SC_NORM_REV_ATTR, }; static int sc_console_unit = -1; static int sc_saver_keyb_only = 1; static scr_stat *sc_console; static struct consdev *sc_consptr; static scr_stat main_console; static struct tty *main_devs[MAXCONS]; static char init_done = COLD; static int shutdown_in_progress = FALSE; static int suspend_in_progress = FALSE; static char sc_malloc = FALSE; static int saver_mode = CONS_NO_SAVER; /* LKM/user saver */ static int run_scrn_saver = FALSE; /* should run the saver? */ static int enable_bell = TRUE; /* enable beeper */ #ifndef SC_DISABLE_REBOOT static int enable_reboot = TRUE; /* enable keyboard reboot */ #endif #ifndef SC_DISABLE_KDBKEY static int enable_kdbkey = TRUE; /* enable keyboard debug */ #endif static long scrn_blank_time = 0; /* screen saver timeout value */ #ifdef DEV_SPLASH static int scrn_blanked; /* # of blanked screen */ static int sticky_splash = FALSE; static void none_saver(sc_softc_t *sc, int blank) { } static void (*current_saver)(sc_softc_t *, int) = none_saver; #endif #ifdef SC_NO_SUSPEND_VTYSWITCH static int sc_no_suspend_vtswitch = 1; #else static int sc_no_suspend_vtswitch = 0; #endif static int sc_susp_scr; static SYSCTL_NODE(_hw, OID_AUTO, syscons, CTLFLAG_RD, 0, "syscons"); static SYSCTL_NODE(_hw_syscons, OID_AUTO, saver, CTLFLAG_RD, 0, "saver"); SYSCTL_INT(_hw_syscons_saver, OID_AUTO, keybonly, CTLFLAG_RW, &sc_saver_keyb_only, 0, "screen saver interrupted by input only"); SYSCTL_INT(_hw_syscons, OID_AUTO, bell, CTLFLAG_RW, &enable_bell, 0, "enable bell"); #ifndef SC_DISABLE_REBOOT SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_reboot, CTLFLAG_RW|CTLFLAG_SECURE, &enable_reboot, 0, "enable keyboard reboot"); #endif #ifndef SC_DISABLE_KDBKEY SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_debug, CTLFLAG_RW|CTLFLAG_SECURE, &enable_kdbkey, 0, "enable keyboard debug"); #endif SYSCTL_INT(_hw_syscons, OID_AUTO, sc_no_suspend_vtswitch, CTLFLAG_RWTUN, &sc_no_suspend_vtswitch, 0, "Disable VT switch before suspend."); #if !defined(SC_NO_FONT_LOADING) && defined(SC_DFLT_FONT) #include "font.h" #endif tsw_ioctl_t *sc_user_ioctl; static bios_values_t bios_value; static int enable_panic_key; SYSCTL_INT(_machdep, OID_AUTO, enable_panic_key, CTLFLAG_RW, &enable_panic_key, 0, "Enable panic via keypress specified in kbdmap(5)"); #define SC_CONSOLECTL 255 #define VTY_WCHAN(sc, vty) (&SC_DEV(sc, vty)) /* prototypes */ static int sc_allocate_keyboard(sc_softc_t *sc, int unit); static int scvidprobe(int unit, int flags, int cons); static int sckbdprobe(int unit, int flags, int cons); static void scmeminit(void *arg); static int scdevtounit(struct tty *tp); static kbd_callback_func_t sckbdevent; static void scinit(int unit, int flags); static scr_stat *sc_get_stat(struct tty *tp); static void scterm(int unit, int flags); static void scshutdown(void *, int); static void scsuspend(void *); static void scresume(void *); static u_int scgetc(sc_softc_t *sc, u_int flags, struct sc_cnstate *sp); static void sc_puts(scr_stat *scp, u_char *buf, int len, int kernel); #define SCGETC_CN 1 #define SCGETC_NONBLOCK 2 static void sccnupdate(scr_stat *scp); static scr_stat *alloc_scp(sc_softc_t *sc, int vty); static void init_scp(sc_softc_t *sc, int vty, scr_stat *scp); static timeout_t scrn_timer; static int and_region(int *s1, int *e1, int s2, int e2); static void scrn_update(scr_stat *scp, int show_cursor); #ifdef DEV_SPLASH static int scsplash_callback(int event, void *arg); static void scsplash_saver(sc_softc_t *sc, int show); static int add_scrn_saver(void (*this_saver)(sc_softc_t *, int)); static int remove_scrn_saver(void (*this_saver)(sc_softc_t *, int)); static int set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border); static int restore_scrn_saver_mode(scr_stat *scp, int changemode); static void stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int)); static int wait_scrn_saver_stop(sc_softc_t *sc); #define scsplash_stick(stick) (sticky_splash = (stick)) #else /* !DEV_SPLASH */ #define scsplash_stick(stick) #endif /* DEV_SPLASH */ static int do_switch_scr(sc_softc_t *sc, int s); static int vt_proc_alive(scr_stat *scp); static int signal_vt_rel(scr_stat *scp); static int signal_vt_acq(scr_stat *scp); static int finish_vt_rel(scr_stat *scp, int release, int *s); static int finish_vt_acq(scr_stat *scp); static void exchange_scr(sc_softc_t *sc); static void update_cursor_image(scr_stat *scp); static void change_cursor_shape(scr_stat *scp, int flags, int base, int height); static void update_font(scr_stat *); static int save_kbd_state(scr_stat *scp); static int update_kbd_state(scr_stat *scp, int state, int mask); static int update_kbd_leds(scr_stat *scp, int which); static timeout_t blink_screen; static struct tty *sc_alloc_tty(int, int); static cn_probe_t sc_cnprobe; static cn_init_t sc_cninit; static cn_term_t sc_cnterm; static cn_getc_t sc_cngetc; static cn_putc_t sc_cnputc; static cn_grab_t sc_cngrab; static cn_ungrab_t sc_cnungrab; CONSOLE_DRIVER(sc); static tsw_open_t sctty_open; static tsw_close_t sctty_close; static tsw_outwakeup_t sctty_outwakeup; static tsw_ioctl_t sctty_ioctl; static tsw_mmap_t sctty_mmap; static struct ttydevsw sc_ttydevsw = { .tsw_open = sctty_open, .tsw_close = sctty_close, .tsw_outwakeup = sctty_outwakeup, .tsw_ioctl = sctty_ioctl, .tsw_mmap = sctty_mmap, }; static d_ioctl_t consolectl_ioctl; static d_close_t consolectl_close; static struct cdevsw consolectl_devsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT | D_TRACKCLOSE, .d_ioctl = consolectl_ioctl, .d_close = consolectl_close, .d_name = "consolectl", }; int sc_probe_unit(int unit, int flags) { if (!vty_enabled(VTY_SC)) return ENXIO; if (!scvidprobe(unit, flags, FALSE)) { if (bootverbose) printf("%s%d: no video adapter found.\n", SC_DRIVER_NAME, unit); return ENXIO; } /* syscons will be attached even when there is no keyboard */ sckbdprobe(unit, flags, FALSE); return 0; } /* probe video adapters, return TRUE if found */ static int scvidprobe(int unit, int flags, int cons) { /* * Access the video adapter driver through the back door! * Video adapter drivers need to be configured before syscons. * However, when syscons is being probed as the low-level console, * they have not been initialized yet. We force them to initialize * themselves here. XXX */ vid_configure(cons ? VIO_PROBE_ONLY : 0); return (vid_find_adapter("*", unit) >= 0); } /* probe the keyboard, return TRUE if found */ static int sckbdprobe(int unit, int flags, int cons) { /* access the keyboard driver through the backdoor! */ kbd_configure(cons ? KB_CONF_PROBE_ONLY : 0); return (kbd_find_keyboard("*", unit) >= 0); } static char *adapter_name(video_adapter_t *adp) { static struct { int type; char *name[2]; } names[] = { { KD_MONO, { "MDA", "MDA" } }, { KD_HERCULES, { "Hercules", "Hercules" } }, { KD_CGA, { "CGA", "CGA" } }, { KD_EGA, { "EGA", "EGA (mono)" } }, { KD_VGA, { "VGA", "VGA (mono)" } }, { KD_PC98, { "PC-98x1", "PC-98x1" } }, { KD_TGA, { "TGA", "TGA" } }, { -1, { "Unknown", "Unknown" } }, }; int i; for (i = 0; names[i].type != -1; ++i) if (names[i].type == adp->va_type) break; return names[i].name[(adp->va_flags & V_ADP_COLOR) ? 0 : 1]; } static void sctty_outwakeup(struct tty *tp) { size_t len; u_char buf[PCBURST]; scr_stat *scp = sc_get_stat(tp); if (scp->status & SLKED || (scp == scp->sc->cur_scp && scp->sc->blink_in_progress)) return; for (;;) { len = ttydisc_getc(tp, buf, sizeof buf); if (len == 0) break; SC_VIDEO_LOCK(scp->sc); sc_puts(scp, buf, len, 0); SC_VIDEO_UNLOCK(scp->sc); } } static struct tty * sc_alloc_tty(int index, int devnum) { struct sc_ttysoftc *stc; struct tty *tp; /* Allocate TTY object and softc to store unit number. */ stc = malloc(sizeof(struct sc_ttysoftc), M_DEVBUF, M_WAITOK); stc->st_index = index; stc->st_stat = NULL; tp = tty_alloc_mutex(&sc_ttydevsw, stc, &Giant); /* Create device node. */ tty_makedev(tp, NULL, "v%r", devnum); return (tp); } #ifdef SC_PIXEL_MODE static void sc_set_vesa_mode(scr_stat *scp, sc_softc_t *sc, int unit) { video_info_t info; u_char *font; int depth; int fontsize; int i; int vmode; vmode = 0; (void)resource_int_value("sc", unit, "vesa_mode", &vmode); if (vmode < M_VESA_BASE || vmode > M_VESA_MODE_MAX || vidd_get_info(sc->adp, vmode, &info) != 0 || !sc_support_pixel_mode(&info)) vmode = 0; /* * If the mode is unset or unsupported, search for an available * 800x600 graphics mode with the highest color depth. */ if (vmode == 0) { for (depth = 0, i = M_VESA_BASE; i <= M_VESA_MODE_MAX; i++) if (vidd_get_info(sc->adp, i, &info) == 0 && info.vi_width == 800 && info.vi_height == 600 && sc_support_pixel_mode(&info) && info.vi_depth > depth) { vmode = i; depth = info.vi_depth; } if (vmode == 0) return; vidd_get_info(sc->adp, vmode, &info); } #if !defined(SC_NO_FONT_LOADING) && defined(SC_DFLT_FONT) fontsize = info.vi_cheight; #else fontsize = scp->font_size; #endif if (fontsize < 14) fontsize = 8; else if (fontsize >= 16) fontsize = 16; else fontsize = 14; #ifndef SC_NO_FONT_LOADING switch (fontsize) { case 8: if ((sc->fonts_loaded & FONT_8) == 0) return; font = sc->font_8; break; case 14: if ((sc->fonts_loaded & FONT_14) == 0) return; font = sc->font_14; break; case 16: if ((sc->fonts_loaded & FONT_16) == 0) return; font = sc->font_16; break; } #else font = NULL; #endif #ifdef DEV_SPLASH if ((sc->flags & SC_SPLASH_SCRN) != 0) splash_term(sc->adp); #endif #ifndef SC_NO_HISTORY if (scp->history != NULL) { sc_vtb_append(&scp->vtb, 0, scp->history, scp->ypos * scp->xsize + scp->xpos); scp->history_pos = sc_vtb_tail(scp->history); } #endif vidd_set_mode(sc->adp, vmode); scp->status |= (UNKNOWN_MODE | PIXEL_MODE | MOUSE_HIDDEN); scp->status &= ~(GRAPHICS_MODE | MOUSE_VISIBLE); scp->xpixel = info.vi_width; scp->ypixel = info.vi_height; scp->xsize = scp->xpixel / 8; scp->ysize = scp->ypixel / fontsize; scp->xpos = 0; scp->ypos = scp->ysize - 1; scp->xoff = scp->yoff = 0; scp->font = font; scp->font_size = fontsize; scp->font_width = 8; scp->start = scp->xsize * scp->ysize - 1; scp->end = 0; scp->cursor_pos = scp->cursor_oldpos = scp->xsize * scp->xsize; scp->mode = sc->initial_mode = vmode; #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)sc->adp->va_window, FALSE); #endif sc_alloc_scr_buffer(scp, FALSE, FALSE); sc_init_emulator(scp, NULL); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(scp, FALSE); #endif #ifndef SC_NO_HISTORY sc_alloc_history_buffer(scp, 0, 0, FALSE); #endif sc_set_border(scp, scp->border); sc_set_cursor_image(scp); scp->status &= ~UNKNOWN_MODE; #ifdef DEV_SPLASH if ((sc->flags & SC_SPLASH_SCRN) != 0) splash_init(sc->adp, scsplash_callback, sc); #endif } #endif int sc_attach_unit(int unit, int flags) { sc_softc_t *sc; scr_stat *scp; struct cdev *dev; int vc; if (!vty_enabled(VTY_SC)) return ENXIO; flags &= ~SC_KERNEL_CONSOLE; if (sc_console_unit == unit) { /* * If this unit is being used as the system console, we need to * adjust some variables and buffers before and after scinit(). */ /* assert(sc_console != NULL) */ flags |= SC_KERNEL_CONSOLE; scmeminit(NULL); } scinit(unit, flags); sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); sc->config = flags; callout_init(&sc->ctimeout, 0); callout_init(&sc->cblink, 0); scp = sc_get_stat(sc->dev[0]); if (sc_console == NULL) /* sc_console_unit < 0 */ sc_console = scp; #ifdef SC_PIXEL_MODE if ((sc->config & SC_VESAMODE) != 0) sc_set_vesa_mode(scp, sc, unit); #endif /* SC_PIXEL_MODE */ /* initialize cursor */ if (!ISGRAPHSC(scp)) update_cursor_image(scp); /* get screen update going */ scrn_timer(sc); /* set up the keyboard */ (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); update_kbd_state(scp, scp->status, LOCK_MASK); printf("%s%d: %s <%d virtual consoles, flags=0x%x>\n", SC_DRIVER_NAME, unit, adapter_name(sc->adp), sc->vtys, sc->config); if (bootverbose) { printf("%s%d:", SC_DRIVER_NAME, unit); if (sc->adapter >= 0) printf(" fb%d", sc->adapter); if (sc->keyboard >= 0) printf(", kbd%d", sc->keyboard); if (scp->tsw) printf(", terminal emulator: %s (%s)", scp->tsw->te_name, scp->tsw->te_desc); printf("\n"); } /* Register suspend/resume/shutdown callbacks for the kernel console. */ if (sc_console_unit == unit) { EVENTHANDLER_REGISTER(power_suspend_early, scsuspend, NULL, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(power_resume, scresume, NULL, EVENTHANDLER_PRI_ANY); EVENTHANDLER_REGISTER(shutdown_pre_sync, scshutdown, NULL, SHUTDOWN_PRI_DEFAULT); } for (vc = 0; vc < sc->vtys; vc++) { if (sc->dev[vc] == NULL) { sc->dev[vc] = sc_alloc_tty(vc, vc + unit * MAXCONS); if (vc == 0 && sc->dev == main_devs) SC_STAT(sc->dev[0]) = &main_console; } /* * The first vty already has struct tty and scr_stat initialized * in scinit(). The other vtys will have these structs when * first opened. */ } dev = make_dev(&consolectl_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "consolectl"); dev->si_drv1 = sc->dev[0]; return 0; } static void scmeminit(void *arg) { if (!vty_enabled(VTY_SC)) return; if (sc_malloc) return; sc_malloc = TRUE; /* * As soon as malloc() becomes functional, we had better allocate * various buffers for the kernel console. */ if (sc_console_unit < 0) /* sc_console == NULL */ return; /* copy the temporary buffer to the final buffer */ sc_alloc_scr_buffer(sc_console, FALSE, FALSE); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(sc_console, FALSE); #endif #ifndef SC_NO_HISTORY /* initialize history buffer & pointers */ sc_alloc_history_buffer(sc_console, 0, 0, FALSE); #endif } /* XXX */ SYSINIT(sc_mem, SI_SUB_KMEM, SI_ORDER_ANY, scmeminit, NULL); static int scdevtounit(struct tty *tp) { int vty = SC_VTY(tp); if (vty == SC_CONSOLECTL) return ((sc_console != NULL) ? sc_console->sc->unit : -1); else if ((vty < 0) || (vty >= MAXCONS*sc_max_unit())) return -1; else return vty/MAXCONS; } static int sctty_open(struct tty *tp) { int unit = scdevtounit(tp); sc_softc_t *sc; scr_stat *scp; #ifndef __sparc64__ keyarg_t key; #endif DPRINTF(5, ("scopen: dev:%s, unit:%d, vty:%d\n", devtoname(tp->t_dev), unit, SC_VTY(tp))); sc = sc_get_softc(unit, (sc_console_unit == unit) ? SC_KERNEL_CONSOLE : 0); if (sc == NULL) return ENXIO; if (!tty_opened(tp)) { /* Use the current setting of the <-- key as default VERASE. */ /* If the Delete key is preferable, an stty is necessary */ #ifndef __sparc64__ if (sc->kbd != NULL) { key.keynum = KEYCODE_BS; (void)kbdd_ioctl(sc->kbd, GIO_KEYMAPENT, (caddr_t)&key); tp->t_termios.c_cc[VERASE] = key.key.map[0]; } #endif } scp = sc_get_stat(tp); if (scp == NULL) { scp = SC_STAT(tp) = alloc_scp(sc, SC_VTY(tp)); if (ISGRAPHSC(scp)) sc_set_pixel_mode(scp, NULL, 0, 0, 16, 8); } if (!tp->t_winsize.ws_col && !tp->t_winsize.ws_row) { tp->t_winsize.ws_col = scp->xsize; tp->t_winsize.ws_row = scp->ysize; } return (0); } static void sctty_close(struct tty *tp) { scr_stat *scp; int s; if (SC_VTY(tp) != SC_CONSOLECTL) { scp = sc_get_stat(tp); /* were we in the middle of the VT switching process? */ DPRINTF(5, ("sc%d: scclose(), ", scp->sc->unit)); s = spltty(); if ((scp == scp->sc->cur_scp) && (scp->sc->unit == sc_console_unit)) cnavailable(sc_consptr, TRUE); if (finish_vt_rel(scp, TRUE, &s) == 0) /* force release */ DPRINTF(5, ("reset WAIT_REL, ")); if (finish_vt_acq(scp) == 0) /* force acknowledge */ DPRINTF(5, ("reset WAIT_ACQ, ")); #ifdef not_yet_done if (scp == &main_console) { scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; } else { sc_vtb_destroy(&scp->vtb); #ifndef __sparc64__ sc_vtb_destroy(&scp->scr); #endif sc_free_history_buffer(scp, scp->ysize); SC_STAT(tp) = NULL; free(scp, M_DEVBUF); } #else scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; #endif scp->kbd_mode = K_XLATE; if (scp == scp->sc->cur_scp) (void)kbdd_ioctl(scp->sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); DPRINTF(5, ("done.\n")); } } #if 0 /* XXX mpsafetty: fix screensaver. What about outwakeup? */ static int scread(struct cdev *dev, struct uio *uio, int flag) { if (!sc_saver_keyb_only) sc_touch_scrn_saver(); return ttyread(dev, uio, flag); } #endif static int sckbdevent(keyboard_t *thiskbd, int event, void *arg) { sc_softc_t *sc; struct tty *cur_tty; int c, error = 0; size_t len; const u_char *cp; sc = (sc_softc_t *)arg; /* assert(thiskbd == sc->kbd) */ mtx_lock(&Giant); switch (event) { case KBDIO_KEYINPUT: break; case KBDIO_UNLOADING: sc->kbd = NULL; sc->keyboard = -1; kbd_release(thiskbd, (void *)&sc->keyboard); goto done; default: error = EINVAL; goto done; } /* * Loop while there is still input to get from the keyboard. * I don't think this is nessesary, and it doesn't fix * the Xaccel-2.1 keyboard hang, but it can't hurt. XXX */ while ((c = scgetc(sc, SCGETC_NONBLOCK, NULL)) != NOKEY) { cur_tty = SC_DEV(sc, sc->cur_scp->index); if (!tty_opened_ns(cur_tty)) continue; if ((*sc->cur_scp->tsw->te_input)(sc->cur_scp, c, cur_tty)) continue; switch (KEYFLAGS(c)) { case 0x0000: /* normal key */ ttydisc_rint(cur_tty, KEYCHAR(c), 0); break; case FKEY: /* function key, return string */ cp = (*sc->cur_scp->tsw->te_fkeystr)(sc->cur_scp, c); if (cp != NULL) { ttydisc_rint_simple(cur_tty, cp, strlen(cp)); break; } cp = kbdd_get_fkeystr(thiskbd, KEYCHAR(c), &len); if (cp != NULL) ttydisc_rint_simple(cur_tty, cp, len); break; case MKEY: /* meta is active, prepend ESC */ ttydisc_rint(cur_tty, 0x1b, 0); ttydisc_rint(cur_tty, KEYCHAR(c), 0); break; case BKEY: /* backtab fixed sequence (esc [ Z) */ ttydisc_rint_simple(cur_tty, "\x1B[Z", 3); break; } ttydisc_rint_done(cur_tty); } sc->cur_scp->status |= MOUSE_HIDDEN; done: mtx_unlock(&Giant); return (error); } static int sctty_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td) { int error; int i; sc_softc_t *sc; scr_stat *scp; int s; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) int ival; #endif /* If there is a user_ioctl function call that first */ if (sc_user_ioctl) { error = (*sc_user_ioctl)(tp, cmd, data, td); if (error != ENOIOCTL) return error; } error = sc_vid_ioctl(tp, cmd, data, td); if (error != ENOIOCTL) return error; #ifndef SC_NO_HISTORY error = sc_hist_ioctl(tp, cmd, data, td); if (error != ENOIOCTL) return error; #endif #ifndef SC_NO_SYSMOUSE error = sc_mouse_ioctl(tp, cmd, data, td); if (error != ENOIOCTL) return error; #endif scp = sc_get_stat(tp); /* assert(scp != NULL) */ /* scp is sc_console, if SC_VTY(dev) == SC_CONSOLECTL. */ sc = scp->sc; if (scp->tsw) { error = (*scp->tsw->te_ioctl)(scp, tp, cmd, data, td); if (error != ENOIOCTL) return error; } switch (cmd) { /* process console hardware related ioctl's */ case GIO_ATTR: /* get current attributes */ /* this ioctl is not processed here, but in the terminal emulator */ return ENOTTY; case GIO_COLOR: /* is this a color console ? */ *(int *)data = (sc->adp->va_flags & V_ADP_COLOR) ? 1 : 0; return 0; case CONS_BLANKTIME: /* set screen saver timeout (0 = no saver) */ if (*(int *)data < 0 || *(int *)data > MAX_BLANKTIME) return EINVAL; s = spltty(); scrn_blank_time = *(int *)data; run_scrn_saver = (scrn_blank_time != 0); splx(s); return 0; case CONS_CURSORTYPE: /* set cursor type (obsolete) */ s = spltty(); *(int *)data &= CONS_CURSOR_ATTRS; sc_change_cursor_shape(scp, *(int *)data, -1, -1); splx(s); return 0; case CONS_GETCURSORSHAPE: /* get cursor shape (new interface) */ if (((int *)data)[0] & CONS_LOCAL_CURSOR) { ((int *)data)[0] = scp->curr_curs_attr.flags; ((int *)data)[1] = scp->curr_curs_attr.base; ((int *)data)[2] = scp->curr_curs_attr.height; } else { ((int *)data)[0] = sc->curs_attr.flags; ((int *)data)[1] = sc->curs_attr.base; ((int *)data)[2] = sc->curs_attr.height; } return 0; case CONS_SETCURSORSHAPE: /* set cursor shape (new interface) */ s = spltty(); sc_change_cursor_shape(scp, ((int *)data)[0], ((int *)data)[1], ((int *)data)[2]); splx(s); return 0; case CONS_BELLTYPE: /* set bell type sound/visual */ if ((*(int *)data) & CONS_VISUAL_BELL) sc->flags |= SC_VISUAL_BELL; else sc->flags &= ~SC_VISUAL_BELL; if ((*(int *)data) & CONS_QUIET_BELL) sc->flags |= SC_QUIET_BELL; else sc->flags &= ~SC_QUIET_BELL; return 0; case CONS_GETINFO: /* get current (virtual) console info */ { vid_info_t *ptr = (vid_info_t*)data; if (ptr->size == sizeof(struct vid_info)) { ptr->m_num = sc->cur_scp->index; ptr->font_size = scp->font_size; ptr->mv_col = scp->xpos; ptr->mv_row = scp->ypos; ptr->mv_csz = scp->xsize; ptr->mv_rsz = scp->ysize; ptr->mv_hsz = (scp->history != NULL) ? scp->history->vtb_rows : 0; /* * The following fields are filled by the terminal emulator. XXX * * ptr->mv_norm.fore * ptr->mv_norm.back * ptr->mv_rev.fore * ptr->mv_rev.back */ ptr->mv_grfc.fore = 0; /* not supported */ ptr->mv_grfc.back = 0; /* not supported */ ptr->mv_ovscan = scp->border; if (scp == sc->cur_scp) save_kbd_state(scp); ptr->mk_keylock = scp->status & LOCK_MASK; return 0; } return EINVAL; } case CONS_GETVERS: /* get version number */ *(int*)data = 0x200; /* version 2.0 */ return 0; case CONS_IDLE: /* see if the screen has been idle */ /* * When the screen is in the GRAPHICS_MODE or UNKNOWN_MODE, * the user process may have been writing something on the * screen and syscons is not aware of it. Declare the screen * is NOT idle if it is in one of these modes. But there is * an exception to it; if a screen saver is running in the * graphics mode in the current screen, we should say that the * screen has been idle. */ *(int *)data = (sc->flags & SC_SCRN_IDLE) && (!ISGRAPHSC(sc->cur_scp) || (sc->cur_scp->status & SAVER_RUNNING)); return 0; case CONS_SAVERMODE: /* set saver mode */ switch(*(int *)data) { case CONS_NO_SAVER: case CONS_USR_SAVER: /* if a LKM screen saver is running, stop it first. */ scsplash_stick(FALSE); saver_mode = *(int *)data; s = spltty(); #ifdef DEV_SPLASH if ((error = wait_scrn_saver_stop(NULL))) { splx(s); return error; } #endif run_scrn_saver = TRUE; if (saver_mode == CONS_USR_SAVER) scp->status |= SAVER_RUNNING; else scp->status &= ~SAVER_RUNNING; scsplash_stick(TRUE); splx(s); break; case CONS_LKM_SAVER: s = spltty(); if ((saver_mode == CONS_USR_SAVER) && (scp->status & SAVER_RUNNING)) scp->status &= ~SAVER_RUNNING; saver_mode = *(int *)data; splx(s); break; default: return EINVAL; } return 0; case CONS_SAVERSTART: /* immediately start/stop the screen saver */ /* * Note that this ioctl does not guarantee the screen saver * actually starts or stops. It merely attempts to do so... */ s = spltty(); run_scrn_saver = (*(int *)data != 0); if (run_scrn_saver) sc->scrn_time_stamp -= scrn_blank_time; splx(s); return 0; case CONS_SCRSHOT: /* get a screen shot */ { int retval, hist_rsz; size_t lsize, csize; vm_offset_t frbp, hstp; unsigned lnum; scrshot_t *ptr = (scrshot_t *)data; void *outp = ptr->buf; if (ptr->x < 0 || ptr->y < 0 || ptr->xsize < 0 || ptr->ysize < 0) return EINVAL; s = spltty(); if (ISGRAPHSC(scp)) { splx(s); return EOPNOTSUPP; } hist_rsz = (scp->history != NULL) ? scp->history->vtb_rows : 0; if (((u_int)ptr->x + ptr->xsize) > scp->xsize || ((u_int)ptr->y + ptr->ysize) > (scp->ysize + hist_rsz)) { splx(s); return EINVAL; } lsize = scp->xsize * sizeof(u_int16_t); csize = ptr->xsize * sizeof(u_int16_t); /* Pointer to the last line of framebuffer */ frbp = scp->vtb.vtb_buffer + scp->ysize * lsize + ptr->x * sizeof(u_int16_t); /* Pointer to the last line of target buffer */ outp = (char *)outp + ptr->ysize * csize; /* Pointer to the last line of history buffer */ if (scp->history != NULL) hstp = scp->history->vtb_buffer + sc_vtb_tail(scp->history) * sizeof(u_int16_t) + ptr->x * sizeof(u_int16_t); else hstp = 0; retval = 0; for (lnum = 0; lnum < (ptr->y + ptr->ysize); lnum++) { if (lnum < scp->ysize) { frbp -= lsize; } else { hstp -= lsize; if (hstp < scp->history->vtb_buffer) hstp += scp->history->vtb_rows * lsize; frbp = hstp; } if (lnum < ptr->y) continue; outp = (char *)outp - csize; retval = copyout((void *)frbp, outp, csize); if (retval != 0) break; } splx(s); return retval; } case VT_SETMODE: /* set screen switcher mode */ { struct vt_mode *mode; struct proc *p1; mode = (struct vt_mode *)data; DPRINTF(5, ("%s%d: VT_SETMODE ", SC_DRIVER_NAME, sc->unit)); if (scp->smode.mode == VT_PROCESS) { p1 = pfind(scp->pid); if (scp->proc == p1 && scp->proc != td->td_proc) { if (p1) PROC_UNLOCK(p1); DPRINTF(5, ("error EPERM\n")); return EPERM; } if (p1) PROC_UNLOCK(p1); } s = spltty(); if (mode->mode == VT_AUTO) { scp->smode.mode = VT_AUTO; scp->proc = NULL; scp->pid = 0; DPRINTF(5, ("VT_AUTO, ")); if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit)) cnavailable(sc_consptr, TRUE); /* were we in the middle of the vty switching process? */ if (finish_vt_rel(scp, TRUE, &s) == 0) DPRINTF(5, ("reset WAIT_REL, ")); if (finish_vt_acq(scp) == 0) DPRINTF(5, ("reset WAIT_ACQ, ")); } else { if (!ISSIGVALID(mode->relsig) || !ISSIGVALID(mode->acqsig) || !ISSIGVALID(mode->frsig)) { splx(s); DPRINTF(5, ("error EINVAL\n")); return EINVAL; } DPRINTF(5, ("VT_PROCESS %d, ", td->td_proc->p_pid)); bcopy(data, &scp->smode, sizeof(struct vt_mode)); scp->proc = td->td_proc; scp->pid = scp->proc->p_pid; if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit)) cnavailable(sc_consptr, FALSE); } splx(s); DPRINTF(5, ("\n")); return 0; } case VT_GETMODE: /* get screen switcher mode */ bcopy(&scp->smode, data, sizeof(struct vt_mode)); return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 4): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_RELDISP: /* screen switcher ioctl */ s = spltty(); /* * This must be the current vty which is in the VT_PROCESS * switching mode... */ if ((scp != sc->cur_scp) || (scp->smode.mode != VT_PROCESS)) { splx(s); return EINVAL; } /* ...and this process is controlling it. */ if (scp->proc != td->td_proc) { splx(s); return EPERM; } error = EINVAL; switch(*(int *)data) { case VT_FALSE: /* user refuses to release screen, abort */ if ((error = finish_vt_rel(scp, FALSE, &s)) == 0) DPRINTF(5, ("%s%d: VT_FALSE\n", SC_DRIVER_NAME, sc->unit)); break; case VT_TRUE: /* user has released screen, go on */ if ((error = finish_vt_rel(scp, TRUE, &s)) == 0) DPRINTF(5, ("%s%d: VT_TRUE\n", SC_DRIVER_NAME, sc->unit)); break; case VT_ACKACQ: /* acquire acknowledged, switch completed */ if ((error = finish_vt_acq(scp)) == 0) DPRINTF(5, ("%s%d: VT_ACKACQ\n", SC_DRIVER_NAME, sc->unit)); break; default: break; } splx(s); return error; case VT_OPENQRY: /* return free virtual console */ for (i = sc->first_vty; i < sc->first_vty + sc->vtys; i++) { tp = SC_DEV(sc, i); if (!tty_opened_ns(tp)) { *(int *)data = i + 1; return 0; } } return EINVAL; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 5): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_ACTIVATE: /* switch to screen *data */ i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1); s = spltty(); error = sc_clean_up(sc->cur_scp); splx(s); if (error) return error; error = sc_switch_scr(sc, i); return (error); #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 6): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_WAITACTIVE: /* wait for switch to occur */ i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1); if ((i < sc->first_vty) || (i >= sc->first_vty + sc->vtys)) return EINVAL; if (i == sc->cur_scp->index) return 0; error = tsleep(VTY_WCHAN(sc, i), (PZERO + 1) | PCATCH, "waitvt", 0); return error; case VT_GETACTIVE: /* get active vty # */ *(int *)data = sc->cur_scp->index + 1; return 0; case VT_GETINDEX: /* get this vty # */ *(int *)data = scp->index + 1; return 0; case VT_LOCKSWITCH: /* prevent vty switching */ if ((*(int *)data) & 0x01) sc->flags |= SC_SCRN_VTYLOCK; else sc->flags &= ~SC_SCRN_VTYLOCK; return 0; case KDENABIO: /* allow io operations */ error = priv_check(td, PRIV_IO); if (error != 0) return error; error = securelevel_gt(td->td_ucred, 0); if (error != 0) return error; #ifdef __i386__ td->td_frame->tf_eflags |= PSL_IOPL; #elif defined(__amd64__) td->td_frame->tf_rflags |= PSL_IOPL; #endif return 0; case KDDISABIO: /* disallow io operations (default) */ #ifdef __i386__ td->td_frame->tf_eflags &= ~PSL_IOPL; #elif defined(__amd64__) td->td_frame->tf_rflags &= ~PSL_IOPL; #endif return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 20): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSKBSTATE: /* set keyboard state (locks) */ if (*(int *)data & ~LOCK_MASK) return EINVAL; scp->status &= ~LOCK_MASK; scp->status |= *(int *)data; if (scp == sc->cur_scp) update_kbd_state(scp, scp->status, LOCK_MASK); return 0; case KDGKBSTATE: /* get keyboard state (locks) */ if (scp == sc->cur_scp) save_kbd_state(scp); *(int *)data = scp->status & LOCK_MASK; return 0; case KDGETREPEAT: /* get keyboard repeat & delay rates */ case KDSETREPEAT: /* set keyboard repeat & delay rates (new) */ error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 67): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSETRAD: /* set keyboard repeat & delay rates (old) */ if (*(int *)data & ~0x7f) return EINVAL; error = kbdd_ioctl(sc->kbd, KDSETRAD, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 7): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSKBMODE: /* set keyboard mode */ switch (*(int *)data) { case K_XLATE: /* switch to XLT ascii mode */ case K_RAW: /* switch to RAW scancode mode */ case K_CODE: /* switch to CODE mode */ scp->kbd_mode = *(int *)data; if (scp == sc->cur_scp) (void)kbdd_ioctl(sc->kbd, KDSKBMODE, data); return 0; default: return EINVAL; } /* NOT REACHED */ case KDGKBMODE: /* get keyboard mode */ *(int *)data = scp->kbd_mode; return 0; case KDGKBINFO: error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 8): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDMKTONE: /* sound the bell */ if (*(int*)data) sc_bell(scp, (*(int*)data)&0xffff, (((*(int*)data)>>16)&0xffff)*hz/1000); else sc_bell(scp, scp->bell_pitch, scp->bell_duration); return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 63): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KIOCSOUND: /* make tone (*data) hz */ if (scp == sc->cur_scp) { if (*(int *)data) return sc_tone(*(int *)data); else return sc_tone(0); } return 0; case KDGKBTYPE: /* get keyboard type */ error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) { /* always return something? XXX */ *(int *)data = 0; } return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 66): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSETLED: /* set keyboard LED status */ if (*(int *)data & ~LED_MASK) /* FIXME: LOCK_MASK? */ return EINVAL; scp->status &= ~LED_MASK; scp->status |= *(int *)data; if (scp == sc->cur_scp) update_kbd_leds(scp, scp->status); return 0; case KDGETLED: /* get keyboard LED status */ if (scp == sc->cur_scp) save_kbd_state(scp); *(int *)data = scp->status & LED_MASK; return 0; case KBADDKBD: /* add/remove keyboard to/from mux */ case KBRELKBD: error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('c', 110): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case CONS_SETKBD: /* set the new keyboard */ { keyboard_t *newkbd; s = spltty(); newkbd = kbd_get_keyboard(*(int *)data); if (newkbd == NULL) { splx(s); return EINVAL; } error = 0; if (sc->kbd != newkbd) { i = kbd_allocate(newkbd->kb_name, newkbd->kb_unit, (void *)&sc->keyboard, sckbdevent, sc); /* i == newkbd->kb_index */ if (i >= 0) { if (sc->kbd != NULL) { save_kbd_state(sc->cur_scp); kbd_release(sc->kbd, (void *)&sc->keyboard); } sc->kbd = kbd_get_keyboard(i); /* sc->kbd == newkbd */ sc->keyboard = i; (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); update_kbd_state(sc->cur_scp, sc->cur_scp->status, LOCK_MASK); } else { error = EPERM; /* XXX */ } } splx(s); return error; } case CONS_RELKBD: /* release the current keyboard */ s = spltty(); error = 0; if (sc->kbd != NULL) { save_kbd_state(sc->cur_scp); error = kbd_release(sc->kbd, (void *)&sc->keyboard); if (error == 0) { sc->kbd = NULL; sc->keyboard = -1; } } splx(s); return error; case CONS_GETTERM: /* get the current terminal emulator info */ { sc_term_sw_t *sw; if (((term_info_t *)data)->ti_index == 0) { sw = scp->tsw; } else { sw = sc_term_match_by_number(((term_info_t *)data)->ti_index); } if (sw != NULL) { strncpy(((term_info_t *)data)->ti_name, sw->te_name, sizeof(((term_info_t *)data)->ti_name)); strncpy(((term_info_t *)data)->ti_desc, sw->te_desc, sizeof(((term_info_t *)data)->ti_desc)); ((term_info_t *)data)->ti_flags = 0; return 0; } else { ((term_info_t *)data)->ti_name[0] = '\0'; ((term_info_t *)data)->ti_desc[0] = '\0'; ((term_info_t *)data)->ti_flags = 0; return EINVAL; } } case CONS_SETTERM: /* set the current terminal emulator */ s = spltty(); error = sc_init_emulator(scp, ((term_info_t *)data)->ti_name); /* FIXME: what if scp == sc_console! XXX */ splx(s); return error; case GIO_SCRNMAP: /* get output translation table */ bcopy(&sc->scr_map, data, sizeof(sc->scr_map)); return 0; case PIO_SCRNMAP: /* set output translation table */ bcopy(data, &sc->scr_map, sizeof(sc->scr_map)); for (i=0; iscr_map); i++) { sc->scr_rmap[sc->scr_map[i]] = i; } return 0; case GIO_KEYMAP: /* get keyboard translation table */ case PIO_KEYMAP: /* set keyboard translation table */ case OGIO_KEYMAP: /* get keyboard translation table (compat) */ case OPIO_KEYMAP: /* set keyboard translation table (compat) */ case GIO_DEADKEYMAP: /* get accent key translation table */ case PIO_DEADKEYMAP: /* set accent key translation table */ case GETFKEY: /* get function key string */ case SETFKEY: /* set function key string */ error = kbdd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #ifndef SC_NO_FONT_LOADING case PIO_FONT8x8: /* set 8x8 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_8, 8*256); sc->fonts_loaded |= FONT_8; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x8. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size < 14)) sc_load_font(sc->cur_scp, 0, 8, 8, sc->font_8, 0, 256); return 0; case GIO_FONT8x8: /* get 8x8 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_8) { bcopy(sc->font_8, data, 8*256); return 0; } else return ENXIO; case PIO_FONT8x14: /* set 8x14 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_14, 14*256); sc->fonts_loaded |= FONT_14; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x14. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size >= 14) && (sc->cur_scp->font_size < 16)) sc_load_font(sc->cur_scp, 0, 14, 8, sc->font_14, 0, 256); return 0; case GIO_FONT8x14: /* get 8x14 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_14) { bcopy(sc->font_14, data, 14*256); return 0; } else return ENXIO; case PIO_FONT8x16: /* set 8x16 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_16, 16*256); sc->fonts_loaded |= FONT_16; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x16. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size >= 16)) sc_load_font(sc->cur_scp, 0, 16, 8, sc->font_16, 0, 256); return 0; case GIO_FONT8x16: /* get 8x16 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_16) { bcopy(sc->font_16, data, 16*256); return 0; } else return ENXIO; #endif /* SC_NO_FONT_LOADING */ default: break; } return (ENOIOCTL); } static int consolectl_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { return sctty_ioctl(dev->si_drv1, cmd, data, td); } static int consolectl_close(struct cdev *dev, int flags, int mode, struct thread *td) { #ifndef SC_NO_SYSMOUSE mouse_info_t info; memset(&info, 0, sizeof(info)); info.operation = MOUSE_ACTION; /* * Make sure all buttons are released when moused and other * console daemons exit, so that no buttons are left pressed. */ (void) sctty_ioctl(dev->si_drv1, CONS_MOUSECTL, (caddr_t)&info, td); #endif return (0); } static void sc_cnprobe(struct consdev *cp) { int unit; int flags; if (!vty_enabled(VTY_SC)) { cp->cn_pri = CN_DEAD; return; } cp->cn_pri = sc_get_cons_priority(&unit, &flags); /* a video card is always required */ if (!scvidprobe(unit, flags, TRUE)) cp->cn_pri = CN_DEAD; /* syscons will become console even when there is no keyboard */ sckbdprobe(unit, flags, TRUE); if (cp->cn_pri == CN_DEAD) return; /* initialize required fields */ strcpy(cp->cn_name, "ttyv0"); } static void sc_cninit(struct consdev *cp) { int unit; int flags; sc_get_cons_priority(&unit, &flags); scinit(unit, flags | SC_KERNEL_CONSOLE); sc_console_unit = unit; sc_console = sc_get_stat(sc_get_softc(unit, SC_KERNEL_CONSOLE)->dev[0]); sc_consptr = cp; } static void sc_cnterm(struct consdev *cp) { /* we are not the kernel console any more, release everything */ if (sc_console_unit < 0) return; /* shouldn't happen */ #if 0 /* XXX */ sc_clear_screen(sc_console); sccnupdate(sc_console); #endif scterm(sc_console_unit, SC_KERNEL_CONSOLE); sc_console_unit = -1; sc_console = NULL; } static void sccnclose(sc_softc_t *sc, struct sc_cnstate *sp); static void sccnopen(sc_softc_t *sc, struct sc_cnstate *sp, int flags); +static void sccnscrlock(sc_softc_t *sc, struct sc_cnstate *sp); +static void sccnscrunlock(sc_softc_t *sc, struct sc_cnstate *sp); static void +sccnscrlock(sc_softc_t *sc, struct sc_cnstate *sp) +{ + SC_VIDEO_LOCK(sc); +} + +static void +sccnscrunlock(sc_softc_t *sc, struct sc_cnstate *sp) +{ + SC_VIDEO_UNLOCK(sc); +} + +static void sccnopen(sc_softc_t *sc, struct sc_cnstate *sp, int flags) { int kbd_mode; /* assert(sc_console_unit >= 0) */ sp->kbd_opened = FALSE; sp->scr_opened = FALSE; /* Opening the keyboard is optional. */ if (!(flags & 1) || sc->kbd == NULL) goto over_keyboard; /* * Make sure the keyboard is accessible even when the kbd device * driver is disabled. */ kbdd_enable(sc->kbd); /* Switch the keyboard to console mode (K_XLATE, polled) on all scp's. */ kbd_mode = K_XLATE; (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&kbd_mode); sc->kbd_open_level++; kbdd_poll(sc->kbd, TRUE); sp->kbd_opened = TRUE; over_keyboard: ; /* The screen is opened iff locking it succeeds. */ + sccnscrlock(sc, sp); sp->scr_opened = TRUE; /* The screen switch is optional. */ if (!(flags & 2)) return; /* try to switch to the kernel console screen */ if (!cold && sc->cur_scp->index != sc_console->index && sc->cur_scp->smode.mode == VT_AUTO && sc_console->smode.mode == VT_AUTO) sc_switch_scr(sc, sc_console->index); } static void sccnclose(sc_softc_t *sc, struct sc_cnstate *sp) { sp->scr_opened = FALSE; + sccnscrunlock(sc, sp); if (!sp->kbd_opened) return; /* Restore keyboard mode (for the current, possibly-changed scp). */ kbdd_poll(sc->kbd, FALSE); if (--sc->kbd_open_level == 0) (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); kbdd_disable(sc->kbd); sp->kbd_opened = FALSE; } /* * Grabbing switches the screen and keyboard focus to sc_console and the * keyboard mode to (K_XLATE, polled). Only switching to polled mode is * essential (for preventing the interrupt handler from eating input * between polls). Focus is part of the UI, and the other switches are * work just was well when they are done on every entry and exit. * * Screen switches while grabbed are supported, and to maintain focus for * this ungrabbing and closing only restore the polling state and then * the keyboard mode if on the original screen. */ static void sc_cngrab(struct consdev *cp) { sc_softc_t *sc; int lev; sc = sc_console->sc; lev = atomic_fetchadd_int(&sc->grab_level, 1); - if (lev >= 0 && lev < 2) + if (lev >= 0 && lev < 2) { sccnopen(sc, &sc->grab_state[lev], 1 | 2); + sccnscrunlock(sc, &sc->grab_state[lev]); + } } static void sc_cnungrab(struct consdev *cp) { sc_softc_t *sc; int lev; sc = sc_console->sc; lev = atomic_load_acq_int(&sc->grab_level) - 1; - if (lev >= 0 && lev < 2) + if (lev >= 0 && lev < 2) { + sccnscrlock(sc, &sc->grab_state[lev]); sccnclose(sc, &sc->grab_state[lev]); + } atomic_add_int(&sc->grab_level, -1); } static void sc_cnputc(struct consdev *cd, int c) { + struct sc_cnstate st; u_char buf[1]; scr_stat *scp = sc_console; #ifndef SC_NO_HISTORY #if 0 struct tty *tp; #endif #endif /* !SC_NO_HISTORY */ int s; /* assert(sc_console != NULL) */ - SC_VIDEO_LOCK(scp->sc); + sccnopen(scp->sc, &st, 0); #ifndef SC_NO_HISTORY if (scp == scp->sc->cur_scp && scp->status & SLKED) { scp->status &= ~SLKED; update_kbd_state(scp, scp->status, SLKED); if (scp->status & BUFFER_SAVED) { if (!sc_hist_restore(scp)) sc_remove_cutmarking(scp); scp->status &= ~BUFFER_SAVED; scp->status |= CURSOR_ENABLED; sc_draw_cursor_image(scp); } #if 0 /* * XXX: Now that TTY's have their own locks, we cannot process * any data after disabling scroll lock. cnputs already holds a * spinlock. */ tp = SC_DEV(scp->sc, scp->index); /* XXX "tp" can be NULL */ tty_lock(tp); if (tty_opened(tp)) sctty_outwakeup(tp); tty_unlock(tp); #endif } #endif /* !SC_NO_HISTORY */ buf[0] = c; sc_puts(scp, buf, 1, 1); s = spltty(); /* block sckbdevent and scrn_timer */ sccnupdate(scp); splx(s); - SC_VIDEO_UNLOCK(scp->sc); + sccnclose(scp->sc, &st); } static int sc_cngetc(struct consdev *cd) { static struct fkeytab fkey; static int fkeycp; scr_stat *scp; const u_char *p; int s = spltty(); /* block sckbdevent and scrn_timer while we poll */ int c; /* assert(sc_console != NULL) */ /* * Stop the screen saver and update the screen if necessary. * What if we have been running in the screen saver code... XXX */ sc_touch_scrn_saver(); scp = sc_console->sc->cur_scp; /* XXX */ sccnupdate(scp); if (fkeycp < fkey.len) { splx(s); return fkey.str[fkeycp++]; } if (scp->sc->kbd == NULL) { splx(s); return -1; } c = scgetc(scp->sc, SCGETC_CN | SCGETC_NONBLOCK, NULL); switch (KEYFLAGS(c)) { case 0: /* normal char */ return KEYCHAR(c); case FKEY: /* function key */ p = (*scp->tsw->te_fkeystr)(scp, c); if (p != NULL) { fkey.len = strlen(p); bcopy(p, fkey.str, fkey.len); fkeycp = 1; return fkey.str[0]; } p = kbdd_get_fkeystr(scp->sc->kbd, KEYCHAR(c), (size_t *)&fkeycp); fkey.len = fkeycp; if ((p != NULL) && (fkey.len > 0)) { bcopy(p, fkey.str, fkey.len); fkeycp = 1; return fkey.str[0]; } return c; /* XXX */ case NOKEY: case ERRKEY: default: return -1; } /* NOT REACHED */ } static void sccnupdate(scr_stat *scp) { /* this is a cut-down version of scrn_timer()... */ if (suspend_in_progress || scp->sc->font_loading_in_progress) return; if (kdb_active || panicstr || shutdown_in_progress) { sc_touch_scrn_saver(); } else if (scp != scp->sc->cur_scp) { return; } if (!run_scrn_saver) scp->sc->flags &= ~SC_SCRN_IDLE; #ifdef DEV_SPLASH if ((saver_mode != CONS_LKM_SAVER) || !(scp->sc->flags & SC_SCRN_IDLE)) if (scp->sc->flags & SC_SCRN_BLANKED) stop_scrn_saver(scp->sc, current_saver); #endif if (scp != scp->sc->cur_scp || scp->sc->blink_in_progress || scp->sc->switch_in_progress) return; /* * FIXME: unlike scrn_timer(), we call scrn_update() from here even * when write_in_progress is non-zero. XXX */ if (!ISGRAPHSC(scp) && !(scp->sc->flags & SC_SCRN_BLANKED)) scrn_update(scp, TRUE); } static void scrn_timer(void *arg) { #ifndef PC98 static time_t kbd_time_stamp = 0; #endif sc_softc_t *sc; scr_stat *scp; int again, rate; again = (arg != NULL); if (arg != NULL) sc = (sc_softc_t *)arg; else if (sc_console != NULL) sc = sc_console->sc; else return; /* find the vty to update */ scp = sc->cur_scp; /* don't do anything when we are performing some I/O operations */ if (suspend_in_progress || sc->font_loading_in_progress) goto done; #ifndef PC98 if ((sc->kbd == NULL) && (sc->config & SC_AUTODETECT_KBD)) { /* try to allocate a keyboard automatically */ if (kbd_time_stamp != time_uptime) { kbd_time_stamp = time_uptime; sc->keyboard = sc_allocate_keyboard(sc, -1); if (sc->keyboard >= 0) { sc->kbd = kbd_get_keyboard(sc->keyboard); (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); update_kbd_state(sc->cur_scp, sc->cur_scp->status, LOCK_MASK); } } } #endif /* PC98 */ /* should we stop the screen saver? */ if (kdb_active || panicstr || shutdown_in_progress) sc_touch_scrn_saver(); if (run_scrn_saver) { if (time_uptime > sc->scrn_time_stamp + scrn_blank_time) sc->flags |= SC_SCRN_IDLE; else sc->flags &= ~SC_SCRN_IDLE; } else { sc->scrn_time_stamp = time_uptime; sc->flags &= ~SC_SCRN_IDLE; if (scrn_blank_time > 0) run_scrn_saver = TRUE; } #ifdef DEV_SPLASH if ((saver_mode != CONS_LKM_SAVER) || !(sc->flags & SC_SCRN_IDLE)) if (sc->flags & SC_SCRN_BLANKED) stop_scrn_saver(sc, current_saver); #endif /* should we just return ? */ if (sc->blink_in_progress || sc->switch_in_progress || sc->write_in_progress) goto done; /* Update the screen */ scp = sc->cur_scp; /* cur_scp may have changed... */ if (!ISGRAPHSC(scp) && !(sc->flags & SC_SCRN_BLANKED)) scrn_update(scp, TRUE); #ifdef DEV_SPLASH /* should we activate the screen saver? */ if ((saver_mode == CONS_LKM_SAVER) && (sc->flags & SC_SCRN_IDLE)) if (!ISGRAPHSC(scp) || (sc->flags & SC_SCRN_BLANKED)) (*current_saver)(sc, TRUE); #endif done: if (again) { /* * Use reduced "refresh" rate if we are in graphics and that is not a * graphical screen saver. In such case we just have nothing to do. */ if (ISGRAPHSC(scp) && !(sc->flags & SC_SCRN_BLANKED)) rate = 2; else rate = 30; callout_reset_sbt(&sc->ctimeout, SBT_1S / rate, 0, scrn_timer, sc, C_PREL(1)); } } static int and_region(int *s1, int *e1, int s2, int e2) { if (*e1 < s2 || e2 < *s1) return FALSE; *s1 = imax(*s1, s2); *e1 = imin(*e1, e2); return TRUE; } static void scrn_update(scr_stat *scp, int show_cursor) { int start; int end; int s; int e; /* assert(scp == scp->sc->cur_scp) */ SC_VIDEO_LOCK(scp->sc); #ifndef SC_NO_CUTPASTE /* remove the previous mouse pointer image if necessary */ if (scp->status & MOUSE_VISIBLE) { s = scp->mouse_pos; e = scp->mouse_pos + scp->xsize + 1; if ((scp->status & (MOUSE_MOVED | MOUSE_HIDDEN)) || and_region(&s, &e, scp->start, scp->end) || ((scp->status & CURSOR_ENABLED) && (scp->cursor_pos != scp->cursor_oldpos) && (and_region(&s, &e, scp->cursor_pos, scp->cursor_pos) || and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos)))) { sc_remove_mouse_image(scp); if (scp->end >= scp->xsize*scp->ysize) scp->end = scp->xsize*scp->ysize - 1; } } #endif /* !SC_NO_CUTPASTE */ #if 1 /* debug: XXX */ if (scp->end >= scp->xsize*scp->ysize) { printf("scrn_update(): scp->end %d > size_of_screen!!\n", scp->end); scp->end = scp->xsize*scp->ysize - 1; } if (scp->start < 0) { printf("scrn_update(): scp->start %d < 0\n", scp->start); scp->start = 0; } #endif /* update screen image */ if (scp->start <= scp->end) { if (scp->mouse_cut_end >= 0) { /* there is a marked region for cut & paste */ if (scp->mouse_cut_start <= scp->mouse_cut_end) { start = scp->mouse_cut_start; end = scp->mouse_cut_end; } else { start = scp->mouse_cut_end; end = scp->mouse_cut_start - 1; } s = start; e = end; /* does the cut-mark region overlap with the update region? */ if (and_region(&s, &e, scp->start, scp->end)) { (*scp->rndr->draw)(scp, s, e - s + 1, TRUE); s = 0; e = start - 1; if (and_region(&s, &e, scp->start, scp->end)) (*scp->rndr->draw)(scp, s, e - s + 1, FALSE); s = end + 1; e = scp->xsize*scp->ysize - 1; if (and_region(&s, &e, scp->start, scp->end)) (*scp->rndr->draw)(scp, s, e - s + 1, FALSE); } else { (*scp->rndr->draw)(scp, scp->start, scp->end - scp->start + 1, FALSE); } } else { (*scp->rndr->draw)(scp, scp->start, scp->end - scp->start + 1, FALSE); } } /* we are not to show the cursor and the mouse pointer... */ if (!show_cursor) { scp->end = 0; scp->start = scp->xsize*scp->ysize - 1; SC_VIDEO_UNLOCK(scp->sc); return; } /* update cursor image */ if (scp->status & CURSOR_ENABLED) { s = scp->start; e = scp->end; /* did cursor move since last time ? */ if (scp->cursor_pos != scp->cursor_oldpos) { /* do we need to remove old cursor image ? */ if (!and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos)) sc_remove_cursor_image(scp); sc_draw_cursor_image(scp); } else { if (and_region(&s, &e, scp->cursor_pos, scp->cursor_pos)) /* cursor didn't move, but has been overwritten */ sc_draw_cursor_image(scp); else if (scp->curs_attr.flags & CONS_BLINK_CURSOR) /* if it's a blinking cursor, update it */ (*scp->rndr->blink_cursor)(scp, scp->cursor_pos, sc_inside_cutmark(scp, scp->cursor_pos)); } } #ifndef SC_NO_CUTPASTE /* update "pseudo" mouse pointer image */ if (scp->sc->flags & SC_MOUSE_ENABLED) { if (!(scp->status & (MOUSE_VISIBLE | MOUSE_HIDDEN))) { scp->status &= ~MOUSE_MOVED; sc_draw_mouse_image(scp); } } #endif /* SC_NO_CUTPASTE */ scp->end = 0; scp->start = scp->xsize*scp->ysize - 1; SC_VIDEO_UNLOCK(scp->sc); } #ifdef DEV_SPLASH static int scsplash_callback(int event, void *arg) { sc_softc_t *sc; int error; sc = (sc_softc_t *)arg; switch (event) { case SPLASH_INIT: if (add_scrn_saver(scsplash_saver) == 0) { sc->flags &= ~SC_SAVER_FAILED; run_scrn_saver = TRUE; if (cold && !(boothowto & RB_VERBOSE)) { scsplash_stick(TRUE); (*current_saver)(sc, TRUE); } } return 0; case SPLASH_TERM: if (current_saver == scsplash_saver) { scsplash_stick(FALSE); error = remove_scrn_saver(scsplash_saver); if (error) return error; } return 0; default: return EINVAL; } } static void scsplash_saver(sc_softc_t *sc, int show) { static int busy = FALSE; scr_stat *scp; if (busy) return; busy = TRUE; scp = sc->cur_scp; if (show) { if (!(sc->flags & SC_SAVER_FAILED)) { if (!(sc->flags & SC_SCRN_BLANKED)) set_scrn_saver_mode(scp, -1, NULL, 0); switch (splash(sc->adp, TRUE)) { case 0: /* succeeded */ break; case EAGAIN: /* try later */ restore_scrn_saver_mode(scp, FALSE); sc_touch_scrn_saver(); /* XXX */ break; default: sc->flags |= SC_SAVER_FAILED; scsplash_stick(FALSE); restore_scrn_saver_mode(scp, TRUE); printf("scsplash_saver(): failed to put up the image\n"); break; } } } else if (!sticky_splash) { if ((sc->flags & SC_SCRN_BLANKED) && (splash(sc->adp, FALSE) == 0)) restore_scrn_saver_mode(scp, TRUE); } busy = FALSE; } static int add_scrn_saver(void (*this_saver)(sc_softc_t *, int)) { #if 0 int error; if (current_saver != none_saver) { error = remove_scrn_saver(current_saver); if (error) return error; } #endif if (current_saver != none_saver) return EBUSY; run_scrn_saver = FALSE; saver_mode = CONS_LKM_SAVER; current_saver = this_saver; return 0; } static int remove_scrn_saver(void (*this_saver)(sc_softc_t *, int)) { if (current_saver != this_saver) return EINVAL; #if 0 /* * In order to prevent `current_saver' from being called by * the timeout routine `scrn_timer()' while we manipulate * the saver list, we shall set `current_saver' to `none_saver' * before stopping the current saver, rather than blocking by `splXX()'. */ current_saver = none_saver; if (scrn_blanked) stop_scrn_saver(this_saver); #endif /* unblank all blanked screens */ wait_scrn_saver_stop(NULL); if (scrn_blanked) return EBUSY; current_saver = none_saver; return 0; } static int set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border) { int s; /* assert(scp == scp->sc->cur_scp) */ s = spltty(); if (!ISGRAPHSC(scp)) sc_remove_cursor_image(scp); scp->splash_save_mode = scp->mode; scp->splash_save_status = scp->status & (GRAPHICS_MODE | PIXEL_MODE); scp->status &= ~(GRAPHICS_MODE | PIXEL_MODE); scp->status |= (UNKNOWN_MODE | SAVER_RUNNING); scp->sc->flags |= SC_SCRN_BLANKED; ++scrn_blanked; splx(s); if (mode < 0) return 0; scp->mode = mode; if (set_mode(scp) == 0) { if (scp->sc->adp->va_info.vi_flags & V_INFO_GRAPHICS) scp->status |= GRAPHICS_MODE; #ifndef SC_NO_PALETTE_LOADING if (pal != NULL) vidd_load_palette(scp->sc->adp, pal); #endif sc_set_border(scp, border); return 0; } else { s = spltty(); scp->mode = scp->splash_save_mode; scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING); scp->status |= scp->splash_save_status; splx(s); return 1; } } static int restore_scrn_saver_mode(scr_stat *scp, int changemode) { int mode; int status; int s; /* assert(scp == scp->sc->cur_scp) */ s = spltty(); mode = scp->mode; status = scp->status; scp->mode = scp->splash_save_mode; scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING); scp->status |= scp->splash_save_status; scp->sc->flags &= ~SC_SCRN_BLANKED; if (!changemode) { if (!ISGRAPHSC(scp)) sc_draw_cursor_image(scp); --scrn_blanked; splx(s); return 0; } if (set_mode(scp) == 0) { #ifndef SC_NO_PALETTE_LOADING #ifdef SC_PIXEL_MODE if (scp->sc->adp->va_info.vi_mem_model == V_INFO_MM_DIRECT) vidd_load_palette(scp->sc->adp, scp->sc->palette2); else #endif vidd_load_palette(scp->sc->adp, scp->sc->palette); #endif --scrn_blanked; splx(s); return 0; } else { scp->mode = mode; scp->status = status; splx(s); return 1; } } static void stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int)) { (*saver)(sc, FALSE); run_scrn_saver = FALSE; /* the screen saver may have chosen not to stop after all... */ if (sc->flags & SC_SCRN_BLANKED) return; mark_all(sc->cur_scp); if (sc->delayed_next_scr) sc_switch_scr(sc, sc->delayed_next_scr - 1); if (!kdb_active) wakeup(&scrn_blanked); } static int wait_scrn_saver_stop(sc_softc_t *sc) { int error = 0; while (scrn_blanked > 0) { run_scrn_saver = FALSE; if (sc && !(sc->flags & SC_SCRN_BLANKED)) { error = 0; break; } error = tsleep(&scrn_blanked, PZERO | PCATCH, "scrsav", 0); if ((error != 0) && (error != ERESTART)) break; } run_scrn_saver = FALSE; return error; } #endif /* DEV_SPLASH */ void sc_touch_scrn_saver(void) { scsplash_stick(FALSE); run_scrn_saver = FALSE; } int sc_switch_scr(sc_softc_t *sc, u_int next_scr) { scr_stat *cur_scp; struct tty *tp; struct proc *p; int s; DPRINTF(5, ("sc0: sc_switch_scr() %d ", next_scr + 1)); if (sc->cur_scp == NULL) return (0); /* prevent switch if previously requested */ if (sc->flags & SC_SCRN_VTYLOCK) { sc_bell(sc->cur_scp, sc->cur_scp->bell_pitch, sc->cur_scp->bell_duration); return EPERM; } /* delay switch if the screen is blanked or being updated */ if ((sc->flags & SC_SCRN_BLANKED) || sc->write_in_progress || sc->blink_in_progress) { sc->delayed_next_scr = next_scr + 1; sc_touch_scrn_saver(); DPRINTF(5, ("switch delayed\n")); return 0; } sc->delayed_next_scr = 0; s = spltty(); cur_scp = sc->cur_scp; /* we are in the middle of the vty switching process... */ if (sc->switch_in_progress && (cur_scp->smode.mode == VT_PROCESS) && cur_scp->proc) { p = pfind(cur_scp->pid); if (cur_scp->proc != p) { if (p) PROC_UNLOCK(p); /* * The controlling process has died!!. Do some clean up. * NOTE:`cur_scp->proc' and `cur_scp->smode.mode' * are not reset here yet; they will be cleared later. */ DPRINTF(5, ("cur_scp controlling process %d died, ", cur_scp->pid)); if (cur_scp->status & SWITCH_WAIT_REL) { /* * Force the previous switch to finish, but return now * with error. */ DPRINTF(5, ("reset WAIT_REL, ")); finish_vt_rel(cur_scp, TRUE, &s); splx(s); DPRINTF(5, ("finishing previous switch\n")); return EINVAL; } else if (cur_scp->status & SWITCH_WAIT_ACQ) { /* let's assume screen switch has been completed. */ DPRINTF(5, ("reset WAIT_ACQ, ")); finish_vt_acq(cur_scp); } else { /* * We are in between screen release and acquisition, and * reached here via scgetc() or scrn_timer() which has * interrupted exchange_scr(). Don't do anything stupid. */ DPRINTF(5, ("waiting nothing, ")); } } else { if (p) PROC_UNLOCK(p); /* * The controlling process is alive, but not responding... * It is either buggy or it may be just taking time. * The following code is a gross kludge to cope with this * problem for which there is no clean solution. XXX */ if (cur_scp->status & SWITCH_WAIT_REL) { switch (sc->switch_in_progress++) { case 1: break; case 2: DPRINTF(5, ("sending relsig again, ")); signal_vt_rel(cur_scp); break; case 3: break; case 4: default: /* * Act as if the controlling program returned * VT_FALSE. */ DPRINTF(5, ("force reset WAIT_REL, ")); finish_vt_rel(cur_scp, FALSE, &s); splx(s); DPRINTF(5, ("act as if VT_FALSE was seen\n")); return EINVAL; } } else if (cur_scp->status & SWITCH_WAIT_ACQ) { switch (sc->switch_in_progress++) { case 1: break; case 2: DPRINTF(5, ("sending acqsig again, ")); signal_vt_acq(cur_scp); break; case 3: break; case 4: default: /* clear the flag and finish the previous switch */ DPRINTF(5, ("force reset WAIT_ACQ, ")); finish_vt_acq(cur_scp); break; } } } } /* * Return error if an invalid argument is given, or vty switch * is still in progress. */ if ((next_scr < sc->first_vty) || (next_scr >= sc->first_vty + sc->vtys) || sc->switch_in_progress) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error 1\n")); return EINVAL; } /* * Don't allow switching away from the graphics mode vty * if the switch mode is VT_AUTO, unless the next vty is the same * as the current or the current vty has been closed (but showing). */ tp = SC_DEV(sc, cur_scp->index); if ((cur_scp->index != next_scr) && tty_opened_ns(tp) && (cur_scp->smode.mode == VT_AUTO) && ISGRAPHSC(cur_scp)) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error, graphics mode\n")); return EINVAL; } /* * Is the wanted vty open? Don't allow switching to a closed vty. * If we are in DDB, don't switch to a vty in the VT_PROCESS mode. * Note that we always allow the user to switch to the kernel * console even if it is closed. */ if ((sc_console == NULL) || (next_scr != sc_console->index)) { tp = SC_DEV(sc, next_scr); if (!tty_opened_ns(tp)) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error 2, requested vty isn't open!\n")); return EINVAL; } if (kdb_active && SC_STAT(tp)->smode.mode == VT_PROCESS) { splx(s); DPRINTF(5, ("error 3, requested vty is in the VT_PROCESS mode\n")); return EINVAL; } } /* this is the start of vty switching process... */ ++sc->switch_in_progress; sc->old_scp = cur_scp; sc->new_scp = sc_get_stat(SC_DEV(sc, next_scr)); if (sc->new_scp == sc->old_scp) { sc->switch_in_progress = 0; /* * XXX wakeup() locks the scheduler lock which will hang if * the lock is in an in-between state, e.g., when we stop at * a breakpoint at fork_exit. It has always been wrong to call * wakeup() when the debugger is active. In RELENG_4, wakeup() * is supposed to be locked by splhigh(), but the debugger may * be invoked at splhigh(). */ if (!kdb_active) wakeup(VTY_WCHAN(sc,next_scr)); splx(s); DPRINTF(5, ("switch done (new == old)\n")); return 0; } /* has controlling process died? */ vt_proc_alive(sc->old_scp); vt_proc_alive(sc->new_scp); /* wait for the controlling process to release the screen, if necessary */ if (signal_vt_rel(sc->old_scp)) { splx(s); return 0; } /* go set up the new vty screen */ splx(s); exchange_scr(sc); s = spltty(); /* wake up processes waiting for this vty */ if (!kdb_active) wakeup(VTY_WCHAN(sc,next_scr)); /* wait for the controlling process to acknowledge, if necessary */ if (signal_vt_acq(sc->cur_scp)) { splx(s); return 0; } sc->switch_in_progress = 0; if (sc->unit == sc_console_unit) cnavailable(sc_consptr, TRUE); splx(s); DPRINTF(5, ("switch done\n")); return 0; } static int do_switch_scr(sc_softc_t *sc, int s) { vt_proc_alive(sc->new_scp); splx(s); exchange_scr(sc); s = spltty(); /* sc->cur_scp == sc->new_scp */ wakeup(VTY_WCHAN(sc,sc->cur_scp->index)); /* wait for the controlling process to acknowledge, if necessary */ if (!signal_vt_acq(sc->cur_scp)) { sc->switch_in_progress = 0; if (sc->unit == sc_console_unit) cnavailable(sc_consptr, TRUE); } return s; } static int vt_proc_alive(scr_stat *scp) { struct proc *p; if (scp->proc) { if ((p = pfind(scp->pid)) != NULL) PROC_UNLOCK(p); if (scp->proc == p) return TRUE; scp->proc = NULL; scp->smode.mode = VT_AUTO; DPRINTF(5, ("vt controlling process %d died\n", scp->pid)); } return FALSE; } static int signal_vt_rel(scr_stat *scp) { if (scp->smode.mode != VT_PROCESS) return FALSE; scp->status |= SWITCH_WAIT_REL; PROC_LOCK(scp->proc); kern_psignal(scp->proc, scp->smode.relsig); PROC_UNLOCK(scp->proc); DPRINTF(5, ("sending relsig to %d\n", scp->pid)); return TRUE; } static int signal_vt_acq(scr_stat *scp) { if (scp->smode.mode != VT_PROCESS) return FALSE; if (scp->sc->unit == sc_console_unit) cnavailable(sc_consptr, FALSE); scp->status |= SWITCH_WAIT_ACQ; PROC_LOCK(scp->proc); kern_psignal(scp->proc, scp->smode.acqsig); PROC_UNLOCK(scp->proc); DPRINTF(5, ("sending acqsig to %d\n", scp->pid)); return TRUE; } static int finish_vt_rel(scr_stat *scp, int release, int *s) { if (scp == scp->sc->old_scp && scp->status & SWITCH_WAIT_REL) { scp->status &= ~SWITCH_WAIT_REL; if (release) *s = do_switch_scr(scp->sc, *s); else scp->sc->switch_in_progress = 0; return 0; } return EINVAL; } static int finish_vt_acq(scr_stat *scp) { if (scp == scp->sc->new_scp && scp->status & SWITCH_WAIT_ACQ) { scp->status &= ~SWITCH_WAIT_ACQ; scp->sc->switch_in_progress = 0; return 0; } return EINVAL; } static void exchange_scr(sc_softc_t *sc) { scr_stat *scp; /* save the current state of video and keyboard */ sc_move_cursor(sc->old_scp, sc->old_scp->xpos, sc->old_scp->ypos); if (!ISGRAPHSC(sc->old_scp)) sc_remove_cursor_image(sc->old_scp); if (sc->old_scp->kbd_mode == K_XLATE) save_kbd_state(sc->old_scp); /* set up the video for the new screen */ scp = sc->cur_scp = sc->new_scp; #ifdef PC98 if (sc->old_scp->mode != scp->mode || ISUNKNOWNSC(sc->old_scp) || ISUNKNOWNSC(sc->new_scp)) #else if (sc->old_scp->mode != scp->mode || ISUNKNOWNSC(sc->old_scp)) #endif set_mode(scp); #ifndef __sparc64__ else sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)sc->adp->va_window, FALSE); #endif scp->status |= MOUSE_HIDDEN; sc_move_cursor(scp, scp->xpos, scp->ypos); if (!ISGRAPHSC(scp)) sc_set_cursor_image(scp); #ifndef SC_NO_PALETTE_LOADING if (ISGRAPHSC(sc->old_scp)) { #ifdef SC_PIXEL_MODE if (sc->adp->va_info.vi_mem_model == V_INFO_MM_DIRECT) vidd_load_palette(sc->adp, sc->palette2); else #endif vidd_load_palette(sc->adp, sc->palette); } #endif sc_set_border(scp, scp->border); /* set up the keyboard for the new screen */ if (sc->kbd_open_level == 0 && sc->old_scp->kbd_mode != scp->kbd_mode) (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); update_kbd_state(scp, scp->status, LOCK_MASK); mark_all(scp); } static void sc_puts(scr_stat *scp, u_char *buf, int len, int kernel) { #ifdef DEV_SPLASH /* make screensaver happy */ if (!sticky_splash && scp == scp->sc->cur_scp && !sc_saver_keyb_only) run_scrn_saver = FALSE; #endif if (scp->tsw) (*scp->tsw->te_puts)(scp, buf, len, kernel); if (scp->sc->delayed_next_scr) sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1); } void sc_draw_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_cursor)(scp, scp->cursor_pos, scp->curs_attr.flags & CONS_BLINK_CURSOR, TRUE, sc_inside_cutmark(scp, scp->cursor_pos)); scp->cursor_oldpos = scp->cursor_pos; SC_VIDEO_UNLOCK(scp->sc); } void sc_remove_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_cursor)(scp, scp->cursor_oldpos, scp->curs_attr.flags & CONS_BLINK_CURSOR, FALSE, sc_inside_cutmark(scp, scp->cursor_oldpos)); SC_VIDEO_UNLOCK(scp->sc); } static void update_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ sc_remove_cursor_image(scp); sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } void sc_set_cursor_image(scr_stat *scp) { scp->curs_attr.flags = scp->curr_curs_attr.flags; if (scp->curs_attr.flags & CONS_HIDDEN_CURSOR) { /* hidden cursor is internally represented as zero-height underline */ scp->curs_attr.flags = CONS_CHAR_CURSOR; scp->curs_attr.base = scp->curs_attr.height = 0; } else if (scp->curs_attr.flags & CONS_CHAR_CURSOR) { scp->curs_attr.base = imin(scp->curr_curs_attr.base, scp->font_size - 1); scp->curs_attr.height = imin(scp->curr_curs_attr.height, scp->font_size - scp->curs_attr.base); } else { /* block cursor */ scp->curs_attr.base = 0; scp->curs_attr.height = scp->font_size; } /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->set_cursor)(scp, scp->curs_attr.base, scp->curs_attr.height, scp->curs_attr.flags & CONS_BLINK_CURSOR); SC_VIDEO_UNLOCK(scp->sc); } static void change_cursor_shape(scr_stat *scp, int flags, int base, int height) { if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp)) sc_remove_cursor_image(scp); if (base >= 0) scp->curr_curs_attr.base = base; if (height >= 0) scp->curr_curs_attr.height = height; if (flags & CONS_RESET_CURSOR) scp->curr_curs_attr = scp->dflt_curs_attr; else scp->curr_curs_attr.flags = flags & CONS_CURSOR_ATTRS; if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp)) { sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } } void sc_change_cursor_shape(scr_stat *scp, int flags, int base, int height) { sc_softc_t *sc; struct tty *tp; int s; int i; s = spltty(); if ((flags != -1) && (flags & CONS_LOCAL_CURSOR)) { /* local (per vty) change */ change_cursor_shape(scp, flags, base, height); splx(s); return; } /* global change */ sc = scp->sc; if (base >= 0) sc->curs_attr.base = base; if (height >= 0) sc->curs_attr.height = height; if (flags != -1) { if (flags & CONS_RESET_CURSOR) sc->curs_attr = sc->dflt_curs_attr; else sc->curs_attr.flags = flags & CONS_CURSOR_ATTRS; } for (i = sc->first_vty; i < sc->first_vty + sc->vtys; ++i) { if ((tp = SC_DEV(sc, i)) == NULL) continue; if ((scp = sc_get_stat(tp)) == NULL) continue; scp->dflt_curs_attr = sc->curs_attr; change_cursor_shape(scp, CONS_RESET_CURSOR, -1, -1); } splx(s); } static void scinit(int unit, int flags) { /* * When syscons is being initialized as the kernel console, malloc() * is not yet functional, because various kernel structures has not been * fully initialized yet. Therefore, we need to declare the following * static buffers for the console. This is less than ideal, * but is necessry evil for the time being. XXX */ #ifdef PC98 static u_short sc_buffer[ROW*COL*2];/* XXX */ #else static u_short sc_buffer[ROW*COL]; /* XXX */ #endif #ifndef SC_NO_FONT_LOADING static u_char font_8[256*8]; static u_char font_14[256*14]; static u_char font_16[256*16]; #endif sc_softc_t *sc; scr_stat *scp; video_adapter_t *adp; int col; int row; int i; /* one time initialization */ if (init_done == COLD) sc_get_bios_values(&bios_value); init_done = WARM; /* * Allocate resources. Even if we are being called for the second * time, we must allocate them again, because they might have * disappeared... */ sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); if ((sc->flags & SC_INIT_DONE) == 0) SC_VIDEO_LOCKINIT(sc); adp = NULL; if (sc->adapter >= 0) { vid_release(sc->adp, (void *)&sc->adapter); adp = sc->adp; sc->adp = NULL; } if (sc->keyboard >= 0) { DPRINTF(5, ("sc%d: releasing kbd%d\n", unit, sc->keyboard)); i = kbd_release(sc->kbd, (void *)&sc->keyboard); DPRINTF(5, ("sc%d: kbd_release returned %d\n", unit, i)); if (sc->kbd != NULL) { DPRINTF(5, ("sc%d: kbd != NULL!, index:%d, unit:%d, flags:0x%x\n", unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags)); } sc->kbd = NULL; } sc->adapter = vid_allocate("*", unit, (void *)&sc->adapter); sc->adp = vid_get_adapter(sc->adapter); /* assert((sc->adapter >= 0) && (sc->adp != NULL)) */ sc->keyboard = sc_allocate_keyboard(sc, unit); DPRINTF(1, ("sc%d: keyboard %d\n", unit, sc->keyboard)); sc->kbd = kbd_get_keyboard(sc->keyboard); if (sc->kbd != NULL) { DPRINTF(1, ("sc%d: kbd index:%d, unit:%d, flags:0x%x\n", unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags)); } if (!(sc->flags & SC_INIT_DONE) || (adp != sc->adp)) { sc->initial_mode = sc->adp->va_initial_mode; #ifndef SC_NO_FONT_LOADING if (flags & SC_KERNEL_CONSOLE) { sc->font_8 = font_8; sc->font_14 = font_14; sc->font_16 = font_16; } else if (sc->font_8 == NULL) { /* assert(sc_malloc) */ sc->font_8 = malloc(sizeof(font_8), M_DEVBUF, M_WAITOK); sc->font_14 = malloc(sizeof(font_14), M_DEVBUF, M_WAITOK); sc->font_16 = malloc(sizeof(font_16), M_DEVBUF, M_WAITOK); } #endif /* extract the hardware cursor location and hide the cursor for now */ vidd_read_hw_cursor(sc->adp, &col, &row); vidd_set_hw_cursor(sc->adp, -1, -1); /* set up the first console */ sc->first_vty = unit*MAXCONS; sc->vtys = MAXCONS; /* XXX: should be configurable */ if (flags & SC_KERNEL_CONSOLE) { /* * Set up devs structure but don't use it yet, calling make_dev() * might panic kernel. Wait for sc_attach_unit() to actually * create the devices. */ sc->dev = main_devs; scp = &main_console; init_scp(sc, sc->first_vty, scp); sc_vtb_init(&scp->vtb, VTB_MEMORY, scp->xsize, scp->ysize, (void *)sc_buffer, FALSE); /* move cursors to the initial positions */ if (col >= scp->xsize) col = 0; if (row >= scp->ysize) row = scp->ysize - 1; scp->xpos = col; scp->ypos = row; scp->cursor_pos = scp->cursor_oldpos = row*scp->xsize + col; if (sc_init_emulator(scp, SC_DFLT_TERM)) sc_init_emulator(scp, "*"); (*scp->tsw->te_default_attr)(scp, user_default.std_color, user_default.rev_color); } else { /* assert(sc_malloc) */ sc->dev = malloc(sizeof(struct tty *)*sc->vtys, M_DEVBUF, M_WAITOK|M_ZERO); sc->dev[0] = sc_alloc_tty(0, unit * MAXCONS); scp = alloc_scp(sc, sc->first_vty); SC_STAT(sc->dev[0]) = scp; } sc->cur_scp = scp; #ifndef __sparc64__ /* copy screen to temporary buffer */ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)scp->sc->adp->va_window, FALSE); if (ISTEXTSC(scp)) sc_vtb_copy(&scp->scr, 0, &scp->vtb, 0, scp->xsize*scp->ysize); #endif if (bios_value.cursor_end < scp->font_size) sc->dflt_curs_attr.base = scp->font_size - bios_value.cursor_end - 1; else sc->dflt_curs_attr.base = 0; i = bios_value.cursor_end - bios_value.cursor_start + 1; sc->dflt_curs_attr.height = imin(i, scp->font_size); sc->dflt_curs_attr.flags = 0; sc->curs_attr = sc->dflt_curs_attr; scp->curr_curs_attr = scp->dflt_curs_attr = sc->curs_attr; #ifndef SC_NO_SYSMOUSE sc_mouse_move(scp, scp->xpixel/2, scp->ypixel/2); #endif if (!ISGRAPHSC(scp)) { sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } /* save font and palette */ #ifndef SC_NO_FONT_LOADING sc->fonts_loaded = 0; if (ISFONTAVAIL(sc->adp->va_flags)) { #ifdef SC_DFLT_FONT bcopy(dflt_font_8, sc->font_8, sizeof(dflt_font_8)); bcopy(dflt_font_14, sc->font_14, sizeof(dflt_font_14)); bcopy(dflt_font_16, sc->font_16, sizeof(dflt_font_16)); sc->fonts_loaded = FONT_16 | FONT_14 | FONT_8; if (scp->font_size < 14) { sc_load_font(scp, 0, 8, 8, sc->font_8, 0, 256); } else if (scp->font_size >= 16) { sc_load_font(scp, 0, 16, 8, sc->font_16, 0, 256); } else { sc_load_font(scp, 0, 14, 8, sc->font_14, 0, 256); } #else /* !SC_DFLT_FONT */ if (scp->font_size < 14) { sc_save_font(scp, 0, 8, 8, sc->font_8, 0, 256); sc->fonts_loaded = FONT_8; } else if (scp->font_size >= 16) { sc_save_font(scp, 0, 16, 8, sc->font_16, 0, 256); sc->fonts_loaded = FONT_16; } else { sc_save_font(scp, 0, 14, 8, sc->font_14, 0, 256); sc->fonts_loaded = FONT_14; } #endif /* SC_DFLT_FONT */ /* FONT KLUDGE: always use the font page #0. XXX */ sc_show_font(scp, 0); } #endif /* !SC_NO_FONT_LOADING */ #ifndef SC_NO_PALETTE_LOADING vidd_save_palette(sc->adp, sc->palette); #ifdef SC_PIXEL_MODE for (i = 0; i < sizeof(sc->palette2); i++) sc->palette2[i] = i / 3; #endif #endif #ifdef DEV_SPLASH if (!(sc->flags & SC_SPLASH_SCRN)) { /* we are ready to put up the splash image! */ splash_init(sc->adp, scsplash_callback, sc); sc->flags |= SC_SPLASH_SCRN; } #endif } /* the rest is not necessary, if we have done it once */ if (sc->flags & SC_INIT_DONE) return; /* initialize mapscrn arrays to a one to one map */ for (i = 0; i < sizeof(sc->scr_map); i++) sc->scr_map[i] = sc->scr_rmap[i] = i; #ifdef PC98 sc->scr_map[0x5c] = (u_char)0xfc; /* for backslash */ #endif sc->flags |= SC_INIT_DONE; } static void scterm(int unit, int flags) { sc_softc_t *sc; scr_stat *scp; sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); if (sc == NULL) return; /* shouldn't happen */ #ifdef DEV_SPLASH /* this console is no longer available for the splash screen */ if (sc->flags & SC_SPLASH_SCRN) { splash_term(sc->adp); sc->flags &= ~SC_SPLASH_SCRN; } #endif #if 0 /* XXX */ /* move the hardware cursor to the upper-left corner */ vidd_set_hw_cursor(sc->adp, 0, 0); #endif /* release the keyboard and the video card */ if (sc->keyboard >= 0) kbd_release(sc->kbd, &sc->keyboard); if (sc->adapter >= 0) vid_release(sc->adp, &sc->adapter); /* stop the terminal emulator, if any */ scp = sc_get_stat(sc->dev[0]); if (scp->tsw) (*scp->tsw->te_term)(scp, &scp->ts); if (scp->ts != NULL) free(scp->ts, M_DEVBUF); mtx_destroy(&sc->video_mtx); /* clear the structure */ if (!(flags & SC_KERNEL_CONSOLE)) { /* XXX: We need delete_dev() for this */ free(sc->dev, M_DEVBUF); #if 0 /* XXX: We need a ttyunregister for this */ free(sc->tty, M_DEVBUF); #endif #ifndef SC_NO_FONT_LOADING free(sc->font_8, M_DEVBUF); free(sc->font_14, M_DEVBUF); free(sc->font_16, M_DEVBUF); #endif /* XXX vtb, history */ } bzero(sc, sizeof(*sc)); sc->keyboard = -1; sc->adapter = -1; } static void scshutdown(__unused void *arg, __unused int howto) { KASSERT(sc_console != NULL, ("sc_console != NULL")); KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL")); KASSERT(sc_console->sc->cur_scp != NULL, ("sc_console->sc->cur_scp != NULL")); sc_touch_scrn_saver(); if (!cold && sc_console->sc->cur_scp->index != sc_console->index && sc_console->sc->cur_scp->smode.mode == VT_AUTO && sc_console->smode.mode == VT_AUTO) sc_switch_scr(sc_console->sc, sc_console->index); shutdown_in_progress = TRUE; } static void scsuspend(__unused void *arg) { int retry; KASSERT(sc_console != NULL, ("sc_console != NULL")); KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL")); KASSERT(sc_console->sc->cur_scp != NULL, ("sc_console->sc->cur_scp != NULL")); sc_susp_scr = sc_console->sc->cur_scp->index; if (sc_no_suspend_vtswitch || sc_susp_scr == sc_console->index) { sc_touch_scrn_saver(); sc_susp_scr = -1; return; } for (retry = 0; retry < 10; retry++) { sc_switch_scr(sc_console->sc, sc_console->index); if (!sc_console->sc->switch_in_progress) break; pause("scsuspend", hz); } suspend_in_progress = TRUE; } static void scresume(__unused void *arg) { KASSERT(sc_console != NULL, ("sc_console != NULL")); KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL")); KASSERT(sc_console->sc->cur_scp != NULL, ("sc_console->sc->cur_scp != NULL")); suspend_in_progress = FALSE; if (sc_susp_scr < 0) { update_font(sc_console->sc->cur_scp); return; } sc_switch_scr(sc_console->sc, sc_susp_scr); } int sc_clean_up(scr_stat *scp) { #ifdef DEV_SPLASH int error; #endif if (scp->sc->flags & SC_SCRN_BLANKED) { sc_touch_scrn_saver(); #ifdef DEV_SPLASH if ((error = wait_scrn_saver_stop(scp->sc))) return error; #endif } scp->status |= MOUSE_HIDDEN; sc_remove_mouse_image(scp); sc_remove_cutmarking(scp); return 0; } void sc_alloc_scr_buffer(scr_stat *scp, int wait, int discard) { sc_vtb_t new; sc_vtb_t old; old = scp->vtb; sc_vtb_init(&new, VTB_MEMORY, scp->xsize, scp->ysize, NULL, wait); if (!discard && (old.vtb_flags & VTB_VALID)) { /* retain the current cursor position and buffer contants */ scp->cursor_oldpos = scp->cursor_pos; /* * This works only if the old buffer has the same size as or larger * than the new one. XXX */ sc_vtb_copy(&old, 0, &new, 0, scp->xsize*scp->ysize); scp->vtb = new; } else { scp->vtb = new; sc_vtb_destroy(&old); } #ifndef SC_NO_SYSMOUSE /* move the mouse cursor at the center of the screen */ sc_mouse_move(scp, scp->xpixel / 2, scp->ypixel / 2); #endif } static scr_stat *alloc_scp(sc_softc_t *sc, int vty) { scr_stat *scp; /* assert(sc_malloc) */ scp = (scr_stat *)malloc(sizeof(scr_stat), M_DEVBUF, M_WAITOK); init_scp(sc, vty, scp); sc_alloc_scr_buffer(scp, TRUE, TRUE); if (sc_init_emulator(scp, SC_DFLT_TERM)) sc_init_emulator(scp, "*"); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(scp, TRUE); #endif #ifndef SC_NO_HISTORY sc_alloc_history_buffer(scp, 0, 0, TRUE); #endif return scp; } static void init_scp(sc_softc_t *sc, int vty, scr_stat *scp) { video_info_t info; bzero(scp, sizeof(*scp)); scp->index = vty; scp->sc = sc; scp->status = 0; scp->mode = sc->initial_mode; vidd_get_info(sc->adp, scp->mode, &info); if (info.vi_flags & V_INFO_GRAPHICS) { scp->status |= GRAPHICS_MODE; scp->xpixel = info.vi_width; scp->ypixel = info.vi_height; scp->xsize = info.vi_width/info.vi_cwidth; scp->ysize = info.vi_height/info.vi_cheight; scp->font_size = 0; scp->font = NULL; } else { scp->xsize = info.vi_width; scp->ysize = info.vi_height; scp->xpixel = scp->xsize*info.vi_cwidth; scp->ypixel = scp->ysize*info.vi_cheight; } scp->font_size = info.vi_cheight; scp->font_width = info.vi_cwidth; #ifndef SC_NO_FONT_LOADING if (info.vi_cheight < 14) scp->font = sc->font_8; else if (info.vi_cheight >= 16) scp->font = sc->font_16; else scp->font = sc->font_14; #else scp->font = NULL; #endif sc_vtb_init(&scp->vtb, VTB_MEMORY, 0, 0, NULL, FALSE); #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, 0, 0, NULL, FALSE); #endif scp->xoff = scp->yoff = 0; scp->xpos = scp->ypos = 0; scp->start = scp->xsize * scp->ysize - 1; scp->end = 0; scp->tsw = NULL; scp->ts = NULL; scp->rndr = NULL; scp->border = (SC_NORM_ATTR >> 4) & 0x0f; scp->curr_curs_attr = scp->dflt_curs_attr = sc->curs_attr; scp->mouse_cut_start = scp->xsize*scp->ysize; scp->mouse_cut_end = -1; scp->mouse_signal = 0; scp->mouse_pid = 0; scp->mouse_proc = NULL; scp->kbd_mode = K_XLATE; scp->bell_pitch = bios_value.bell_pitch; scp->bell_duration = BELL_DURATION; scp->status |= (bios_value.shift_state & NLKED); scp->status |= CURSOR_ENABLED | MOUSE_HIDDEN; scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; scp->history = NULL; scp->history_pos = 0; scp->history_size = 0; } int sc_init_emulator(scr_stat *scp, char *name) { sc_term_sw_t *sw; sc_rndr_sw_t *rndr; void *p; int error; if (name == NULL) /* if no name is given, use the current emulator */ sw = scp->tsw; else /* ...otherwise find the named emulator */ sw = sc_term_match(name); if (sw == NULL) return EINVAL; rndr = NULL; if (strcmp(sw->te_renderer, "*") != 0) { rndr = sc_render_match(scp, sw->te_renderer, scp->status & (GRAPHICS_MODE | PIXEL_MODE)); } if (rndr == NULL) { rndr = sc_render_match(scp, scp->sc->adp->va_name, scp->status & (GRAPHICS_MODE | PIXEL_MODE)); if (rndr == NULL) return ENODEV; } if (sw == scp->tsw) { error = (*sw->te_init)(scp, &scp->ts, SC_TE_WARM_INIT); scp->rndr = rndr; scp->rndr->init(scp); sc_clear_screen(scp); /* assert(error == 0); */ return error; } if (sc_malloc && (sw->te_size > 0)) p = malloc(sw->te_size, M_DEVBUF, M_NOWAIT); else p = NULL; error = (*sw->te_init)(scp, &p, SC_TE_COLD_INIT); if (error) return error; if (scp->tsw) (*scp->tsw->te_term)(scp, &scp->ts); if (scp->ts != NULL) free(scp->ts, M_DEVBUF); scp->tsw = sw; scp->ts = p; scp->rndr = rndr; scp->rndr->init(scp); /* XXX */ (*sw->te_default_attr)(scp, user_default.std_color, user_default.rev_color); sc_clear_screen(scp); return 0; } /* * scgetc(flags) - get character from keyboard. * If flags & SCGETC_CN, then avoid harmful side effects. * If flags & SCGETC_NONBLOCK, then wait until a key is pressed, else * return NOKEY if there is nothing there. */ static u_int scgetc(sc_softc_t *sc, u_int flags, struct sc_cnstate *sp) { scr_stat *scp; #ifndef SC_NO_HISTORY struct tty *tp; #endif u_int c; int this_scr; int f; int i; if (sc->kbd == NULL) return NOKEY; next_code: #if 1 /* I don't like this, but... XXX */ if (flags & SCGETC_CN) sccnupdate(sc->cur_scp); #endif scp = sc->cur_scp; /* first see if there is something in the keyboard port */ for (;;) { c = kbdd_read_char(sc->kbd, !(flags & SCGETC_NONBLOCK)); if (c == ERRKEY) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); } else if (c == NOKEY) return c; else break; } /* make screensaver happy */ if (!(c & RELKEY)) sc_touch_scrn_saver(); if (!(flags & SCGETC_CN)) random_harvest_queue(&c, sizeof(c), 1, RANDOM_KEYBOARD); if (sc->kbd_open_level == 0 && scp->kbd_mode != K_XLATE) return KEYCHAR(c); /* if scroll-lock pressed allow history browsing */ if (!ISGRAPHSC(scp) && scp->history && scp->status & SLKED) { scp->status &= ~CURSOR_ENABLED; sc_remove_cursor_image(scp); #ifndef SC_NO_HISTORY if (!(scp->status & BUFFER_SAVED)) { scp->status |= BUFFER_SAVED; sc_hist_save(scp); } switch (c) { /* FIXME: key codes */ case SPCLKEY | FKEY | F(49): /* home key */ sc_remove_cutmarking(scp); sc_hist_home(scp); goto next_code; case SPCLKEY | FKEY | F(57): /* end key */ sc_remove_cutmarking(scp); sc_hist_end(scp); goto next_code; case SPCLKEY | FKEY | F(50): /* up arrow key */ sc_remove_cutmarking(scp); if (sc_hist_up_line(scp)) if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); goto next_code; case SPCLKEY | FKEY | F(58): /* down arrow key */ sc_remove_cutmarking(scp); if (sc_hist_down_line(scp)) if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); goto next_code; case SPCLKEY | FKEY | F(51): /* page up key */ sc_remove_cutmarking(scp); for (i=0; iysize; i++) if (sc_hist_up_line(scp)) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); break; } goto next_code; case SPCLKEY | FKEY | F(59): /* page down key */ sc_remove_cutmarking(scp); for (i=0; iysize; i++) if (sc_hist_down_line(scp)) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); break; } goto next_code; } #endif /* SC_NO_HISTORY */ } /* * Process and consume special keys here. Return a plain char code * or a char code with the META flag or a function key code. */ if (c & RELKEY) { /* key released */ /* goto next_code */ } else { /* key pressed */ if (c & SPCLKEY) { c &= ~SPCLKEY; switch (KEYCHAR(c)) { /* LOCKING KEYS */ case NLK: case CLK: case ALK: break; case SLK: (void)kbdd_ioctl(sc->kbd, KDGKBSTATE, (caddr_t)&f); if (f & SLKED) { scp->status |= SLKED; } else { if (scp->status & SLKED) { scp->status &= ~SLKED; #ifndef SC_NO_HISTORY if (scp->status & BUFFER_SAVED) { if (!sc_hist_restore(scp)) sc_remove_cutmarking(scp); scp->status &= ~BUFFER_SAVED; scp->status |= CURSOR_ENABLED; sc_draw_cursor_image(scp); } /* Only safe in Giant-locked context. */ tp = SC_DEV(sc, scp->index); if (!(flags & SCGETC_CN) && tty_opened_ns(tp)) sctty_outwakeup(tp); #endif } } break; case PASTE: #ifndef SC_NO_CUTPASTE sc_mouse_paste(scp); #endif break; /* NON-LOCKING KEYS */ case NOP: case LSH: case RSH: case LCTR: case RCTR: case LALT: case RALT: case ASH: case META: break; case BTAB: if (!(sc->flags & SC_SCRN_BLANKED)) return c; break; case SPSC: #ifdef DEV_SPLASH /* force activatation/deactivation of the screen saver */ if (!(sc->flags & SC_SCRN_BLANKED)) { run_scrn_saver = TRUE; sc->scrn_time_stamp -= scrn_blank_time; } if (cold) { /* * While devices are being probed, the screen saver need * to be invoked explicitly. XXX */ if (sc->flags & SC_SCRN_BLANKED) { scsplash_stick(FALSE); stop_scrn_saver(sc, current_saver); } else { if (!ISGRAPHSC(scp)) { scsplash_stick(TRUE); (*current_saver)(sc, TRUE); } } } #endif /* DEV_SPLASH */ break; case RBT: #ifndef SC_DISABLE_REBOOT if (enable_reboot && !(flags & SCGETC_CN)) shutdown_nice(0); #endif break; case HALT: #ifndef SC_DISABLE_REBOOT if (enable_reboot && !(flags & SCGETC_CN)) shutdown_nice(RB_HALT); #endif break; case PDWN: #ifndef SC_DISABLE_REBOOT if (enable_reboot && !(flags & SCGETC_CN)) shutdown_nice(RB_HALT|RB_POWEROFF); #endif break; case SUSP: power_pm_suspend(POWER_SLEEP_STATE_SUSPEND); break; case STBY: power_pm_suspend(POWER_SLEEP_STATE_STANDBY); break; case DBG: #ifndef SC_DISABLE_KDBKEY if (enable_kdbkey) kdb_break(); #endif break; case PNC: if (enable_panic_key) panic("Forced by the panic key"); break; case NEXT: this_scr = scp->index; for (i = (this_scr - sc->first_vty + 1)%sc->vtys; sc->first_vty + i != this_scr; i = (i + 1)%sc->vtys) { struct tty *tp = SC_DEV(sc, sc->first_vty + i); if (tty_opened_ns(tp)) { sc_switch_scr(scp->sc, sc->first_vty + i); break; } } break; case PREV: this_scr = scp->index; for (i = (this_scr - sc->first_vty + sc->vtys - 1)%sc->vtys; sc->first_vty + i != this_scr; i = (i + sc->vtys - 1)%sc->vtys) { struct tty *tp = SC_DEV(sc, sc->first_vty + i); if (tty_opened_ns(tp)) { sc_switch_scr(scp->sc, sc->first_vty + i); break; } } break; default: if (KEYCHAR(c) >= F_SCR && KEYCHAR(c) <= L_SCR) { sc_switch_scr(scp->sc, sc->first_vty + KEYCHAR(c) - F_SCR); break; } /* assert(c & FKEY) */ if (!(sc->flags & SC_SCRN_BLANKED)) return c; break; } /* goto next_code */ } else { /* regular keys (maybe MKEY is set) */ #if !defined(SC_DISABLE_KDBKEY) && defined(KDB) if (enable_kdbkey) kdb_alt_break(c, &sc->sc_altbrk); #endif if (!(sc->flags & SC_SCRN_BLANKED)) return c; } } goto next_code; } static int sctty_mmap(struct tty *tp, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { scr_stat *scp; scp = sc_get_stat(tp); if (scp != scp->sc->cur_scp) return -1; return vidd_mmap(scp->sc->adp, offset, paddr, nprot, memattr); } static void update_font(scr_stat *scp) { #ifndef SC_NO_FONT_LOADING /* load appropriate font */ if (!(scp->status & GRAPHICS_MODE)) { if (!(scp->status & PIXEL_MODE) && ISFONTAVAIL(scp->sc->adp->va_flags)) { if (scp->font_size < 14) { if (scp->sc->fonts_loaded & FONT_8) sc_load_font(scp, 0, 8, 8, scp->sc->font_8, 0, 256); } else if (scp->font_size >= 16) { if (scp->sc->fonts_loaded & FONT_16) sc_load_font(scp, 0, 16, 8, scp->sc->font_16, 0, 256); } else { if (scp->sc->fonts_loaded & FONT_14) sc_load_font(scp, 0, 14, 8, scp->sc->font_14, 0, 256); } /* * FONT KLUDGE: * This is an interim kludge to display correct font. * Always use the font page #0 on the video plane 2. * Somehow we cannot show the font in other font pages on * some video cards... XXX */ sc_show_font(scp, 0); } mark_all(scp); } #endif /* !SC_NO_FONT_LOADING */ } static int save_kbd_state(scr_stat *scp) { int state; int error; error = kbdd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; if (error == 0) { scp->status &= ~LOCK_MASK; scp->status |= state; } return error; } static int update_kbd_state(scr_stat *scp, int new_bits, int mask) { int state; int error; if (mask != LOCK_MASK) { error = kbdd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; if (error) return error; state &= ~mask; state |= new_bits & mask; } else { state = new_bits & LOCK_MASK; } error = kbdd_ioctl(scp->sc->kbd, KDSKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; return error; } static int update_kbd_leds(scr_stat *scp, int which) { int error; which &= LOCK_MASK; error = kbdd_ioctl(scp->sc->kbd, KDSETLED, (caddr_t)&which); if (error == ENOIOCTL) error = ENODEV; return error; } int set_mode(scr_stat *scp) { video_info_t info; /* reject unsupported mode */ if (vidd_get_info(scp->sc->adp, scp->mode, &info)) return 1; /* if this vty is not currently showing, do nothing */ if (scp != scp->sc->cur_scp) return 0; /* setup video hardware for the given mode */ vidd_set_mode(scp->sc->adp, scp->mode); scp->rndr->init(scp); #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)scp->sc->adp->va_window, FALSE); #endif update_font(scp); sc_set_border(scp, scp->border); sc_set_cursor_image(scp); return 0; } void sc_set_border(scr_stat *scp, int color) { SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_border)(scp, color); SC_VIDEO_UNLOCK(scp->sc); } #ifndef SC_NO_FONT_LOADING void sc_load_font(scr_stat *scp, int page, int size, int width, u_char *buf, int base, int count) { sc_softc_t *sc; sc = scp->sc; sc->font_loading_in_progress = TRUE; vidd_load_font(sc->adp, page, size, width, buf, base, count); sc->font_loading_in_progress = FALSE; } void sc_save_font(scr_stat *scp, int page, int size, int width, u_char *buf, int base, int count) { sc_softc_t *sc; sc = scp->sc; sc->font_loading_in_progress = TRUE; vidd_save_font(sc->adp, page, size, width, buf, base, count); sc->font_loading_in_progress = FALSE; } void sc_show_font(scr_stat *scp, int page) { vidd_show_font(scp->sc->adp, page); } #endif /* !SC_NO_FONT_LOADING */ void sc_paste(scr_stat *scp, const u_char *p, int count) { struct tty *tp; u_char *rmap; tp = SC_DEV(scp->sc, scp->sc->cur_scp->index); if (!tty_opened_ns(tp)) return; rmap = scp->sc->scr_rmap; for (; count > 0; --count) ttydisc_rint(tp, rmap[*p++], 0); ttydisc_rint_done(tp); } void sc_respond(scr_stat *scp, const u_char *p, int count, int wakeup) { struct tty *tp; tp = SC_DEV(scp->sc, scp->sc->cur_scp->index); if (!tty_opened_ns(tp)) return; ttydisc_rint_simple(tp, p, count); if (wakeup) { /* XXX: we can't always call ttydisc_rint_done() here! */ ttydisc_rint_done(tp); } } void sc_bell(scr_stat *scp, int pitch, int duration) { if (cold || kdb_active || shutdown_in_progress || !enable_bell) return; if (scp != scp->sc->cur_scp && (scp->sc->flags & SC_QUIET_BELL)) return; if (scp->sc->flags & SC_VISUAL_BELL) { if (scp->sc->blink_in_progress) return; scp->sc->blink_in_progress = 3; if (scp != scp->sc->cur_scp) scp->sc->blink_in_progress += 2; blink_screen(scp->sc->cur_scp); } else if (duration != 0 && pitch != 0) { if (scp != scp->sc->cur_scp) pitch *= 2; sysbeep(1193182 / pitch, duration); } } static void blink_screen(void *arg) { scr_stat *scp = arg; struct tty *tp; if (ISGRAPHSC(scp) || (scp->sc->blink_in_progress <= 1)) { scp->sc->blink_in_progress = 0; mark_all(scp); tp = SC_DEV(scp->sc, scp->index); if (tty_opened_ns(tp)) sctty_outwakeup(tp); if (scp->sc->delayed_next_scr) sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1); } else { (*scp->rndr->draw)(scp, 0, scp->xsize*scp->ysize, scp->sc->blink_in_progress & 1); scp->sc->blink_in_progress--; callout_reset_sbt(&scp->sc->cblink, SBT_1S / 15, 0, blink_screen, scp, C_PREL(0)); } } /* * Until sc_attach_unit() gets called no dev structures will be available * to store the per-screen current status. This is the case when the * kernel is initially booting and needs access to its console. During * this early phase of booting the console's current status is kept in * one statically defined scr_stat structure, and any pointers to the * dev structures will be NULL. */ static scr_stat * sc_get_stat(struct tty *tp) { if (tp == NULL) return (&main_console); return (SC_STAT(tp)); } /* * Allocate active keyboard. Try to allocate "kbdmux" keyboard first, and, * if found, add all non-busy keyboards to "kbdmux". Otherwise look for * any keyboard. */ static int sc_allocate_keyboard(sc_softc_t *sc, int unit) { int idx0, idx; keyboard_t *k0, *k; keyboard_info_t ki; idx0 = kbd_allocate("kbdmux", -1, (void *)&sc->keyboard, sckbdevent, sc); if (idx0 != -1) { k0 = kbd_get_keyboard(idx0); for (idx = kbd_find_keyboard2("*", -1, 0); idx != -1; idx = kbd_find_keyboard2("*", -1, idx + 1)) { k = kbd_get_keyboard(idx); if (idx == idx0 || KBD_IS_BUSY(k)) continue; bzero(&ki, sizeof(ki)); strcpy(ki.kb_name, k->kb_name); ki.kb_unit = k->kb_unit; (void)kbdd_ioctl(k0, KBADDKBD, (caddr_t) &ki); } } else idx0 = kbd_allocate("*", unit, (void *)&sc->keyboard, sckbdevent, sc); return (idx0); } Index: projects/clang390-import/sys/dev/uart/uart_cpu_powerpc.c =================================================================== --- projects/clang390-import/sys/dev/uart/uart_cpu_powerpc.c (revision 305016) +++ projects/clang390-import/sys/dev/uart/uart_cpu_powerpc.c (revision 305017) @@ -1,203 +1,203 @@ /*- * Copyright (c) 2006 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include +#include #include #include #include bus_space_tag_t uart_bus_space_io = &bs_le_tag; bus_space_tag_t uart_bus_space_mem = &bs_le_tag; int uart_cpu_eqres(struct uart_bas *b1, struct uart_bas *b2) { return ((pmap_kextract(b1->bsh) == pmap_kextract(b2->bsh)) ? 1 : 0); } static int ofw_get_uart_console(phandle_t opts, phandle_t *result, const char *inputdev, const char *outputdev) { char buf[64]; phandle_t input; if (OF_getprop(opts, inputdev, buf, sizeof(buf)) == -1) return (ENXIO); input = OF_finddevice(buf); if (input == -1) return (ENXIO); if (outputdev != NULL) { if (OF_getprop(opts, outputdev, buf, sizeof(buf)) == -1) return (ENXIO); if (OF_finddevice(buf) != input) return (ENXIO); } *result = input; return (0); } static int ofw_get_console_phandle_path(phandle_t node, phandle_t *result, const char *prop) { union { char buf[64]; phandle_t ref; } field; phandle_t output; ssize_t size; size = OF_getproplen(node, prop); if (size == -1) return (ENXIO); OF_getprop(node, prop, &field, sizeof(field)); /* This property might be either a ihandle or path. Hooray. */ output = -1; if (field.buf[size - 1] == 0) output = OF_finddevice(field.buf); if (output == -1 && size == 4) output = OF_instance_to_package(field.ref); if (output != -1) { *result = output; return (0); } return (ENXIO); } int uart_cpu_getdev(int devtype, struct uart_devinfo *di) { char buf[64]; struct uart_class *class; phandle_t input, opts, chosen; int error; opts = OF_finddevice("/options"); chosen = OF_finddevice("/chosen"); switch (devtype) { case UART_DEV_CONSOLE: error = ENXIO; if (chosen != -1 && error != 0) error = ofw_get_uart_console(chosen, &input, "stdout-path", NULL); if (chosen != -1 && error != 0) error = ofw_get_uart_console(chosen, &input, "linux,stdout-path", NULL); if (chosen != -1 && error != 0) error = ofw_get_console_phandle_path(chosen, &input, "stdout"); if (chosen != -1 && error != 0) error = ofw_get_uart_console(chosen, &input, "stdin-path", NULL); if (chosen != -1 && error != 0) error = ofw_get_console_phandle_path(chosen, &input, "stdin"); if (opts != -1 && error != 0) error = ofw_get_uart_console(opts, &input, "input-device", "output-device"); if (opts != -1 && error != 0) error = ofw_get_uart_console(opts, &input, "input-device-1", "output-device-1"); if (error != 0) { input = OF_finddevice("serial0"); /* Last ditch */ if (input == -1) error = (ENXIO); } if (error != 0) return (error); break; case UART_DEV_DBGPORT: if (!getenv_string("hw.uart.dbgport", buf, sizeof(buf))) return (ENXIO); input = OF_finddevice(buf); if (input == -1) return (ENXIO); break; default: return (EINVAL); } if (OF_getprop(input, "device_type", buf, sizeof(buf)) == -1) return (ENXIO); if (strcmp(buf, "serial") != 0) return (ENXIO); - if (OF_getprop(input, "compatible", buf, sizeof(buf)) == -1) - return (ENXIO); - if (strncmp(buf, "chrp,es", 7) == 0) { + if (ofw_bus_node_is_compatible(input, "chrp,es")) { class = &uart_z8530_class; di->bas.regshft = 4; di->bas.chan = 1; - } else if (strcmp(buf,"ns16550") == 0 || strcmp(buf,"ns8250") == 0) { + } else if (ofw_bus_node_is_compatible(input,"ns16550") || + ofw_bus_node_is_compatible(input,"ns8250")) { class = &uart_ns8250_class; di->bas.regshft = 0; di->bas.chan = 0; } else return (ENXIO); if (class == NULL) return (ENXIO); error = OF_decode_addr(input, 0, &di->bas.bst, &di->bas.bsh, NULL); if (error) return (error); di->ops = uart_getops(class); if (OF_getprop(input, "clock-frequency", &di->bas.rclk, sizeof(di->bas.rclk)) == -1) di->bas.rclk = 230400; if (OF_getprop(input, "current-speed", &di->baudrate, sizeof(di->baudrate)) == -1) di->baudrate = 0; OF_getprop(input, "reg-shift", &di->bas.regshft, sizeof(di->bas.regshft)); di->databits = 8; di->stopbits = 1; di->parity = UART_PARITY_NONE; return (0); } Index: projects/clang390-import/sys/i386/i386/pmap.c =================================================================== --- projects/clang390-import/sys/i386/i386/pmap.c (revision 305016) +++ projects/clang390-import/sys/i386/i386/pmap.c (revision 305017) @@ -1,5628 +1,5624 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_apic.h" #include "opt_cpu.h" #include "opt_pmap.h" #include "opt_smp.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include #include #include #endif #include #include #include #include #include #ifdef SMP #include #endif #ifdef XBOX #include #endif #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ atomic_clear_int((u_int *)(pte), PG_W)) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ int pgeflag = 0; /* PG_G or-in */ int pseflag = 0; /* PG_PS or-in */ static int nkpt = NKPT; vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR; extern u_int32_t KERNend; extern u_int32_t KPTphys; #if defined(PAE) || defined(PAE_TABLES) pt_entry_t pg_nx; static uma_zone_t pdptzone; #endif static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int pat_works = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, "Is page attribute table fully functional?"); static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pg_ps_enabled, 0, "Are large page mappings enabled?"); #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ /* * pmap_mapdev support pre initialization (i.e. console) */ #define PMAP_PREINIT_MAPPING_COUNT 8 static struct pmap_preinit_mapping { vm_paddr_t pa; vm_offset_t va; vm_size_t sz; int mode; } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; static int pmap_initialized; static struct rwlock_padalign pvh_global_lock; /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ int pv_maxchunks; /* How many chunks we have KVA for */ vm_offset_t pv_vafree; /* freelist stored in the PTE */ /* * All those kernel PT submaps that BSD is so fond of */ struct sysmaps { struct mtx lock; pt_entry_t *CMAP1; pt_entry_t *CMAP2; caddr_t CADDR1; caddr_t CADDR2; }; static struct sysmaps sysmaps_pcpu[MAXCPU]; pt_entry_t *CMAP3; static pd_entry_t *KPTD; caddr_t ptvmmap = 0; caddr_t CADDR3; struct msgbuf *msgbufp = NULL; /* * Crashdump maps. */ static caddr_t crashdumpmap; static pt_entry_t *PMAP1 = NULL, *PMAP2; static pt_entry_t *PADDR1 = NULL, *PADDR2; #ifdef SMP static int PMAP1cpu; static int PMAP1changedcpu; SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, &PMAP1changedcpu, 0, "Number of times pmap_pte_quick changed CPU with same PMAP1"); #endif static int PMAP1changed; SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, &PMAP1changed, 0, "Number of times pmap_pte_quick changed PMAP1"); static int PMAP1unchanged; SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, &PMAP1unchanged, 0, "Number of times pmap_pte_quick didn't change PMAP1"); static struct mtx PMAP2mutex; static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static int pmap_pvh_wired_mappings(struct md_page *pvh, int count); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_flush_page(vm_page_t m); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, struct spglist *free); static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, struct spglist *free); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags); static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags); static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free); static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *); #if defined(PAE) || defined(PAE_TABLES) static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait); #endif static void pmap_set_pg(void); static __inline void pagezero(void *page); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * If you get an error here, then you set KVA_PAGES wrong! See the * description of KVA_PAGES in sys/i386/include/pmap.h. It must be * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. */ CTASSERT(KERNBASE % (1 << 24) == 0); /* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(vm_paddr_t firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; struct sysmaps *sysmaps; int i; /* * Add a physical memory segment (vm_phys_seg) corresponding to the * preallocated kernel page table pages so that vm_page structures * representing these pages will be created. The vm_page structures * are required for promotion of the corresponding kernel virtual * addresses to superpage mappings. */ vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt)); /* * Initialize the first available kernel virtual address. However, * using "firstaddr" may waste a few pages of the kernel virtual * address space, because locore may not have mapped every physical * page that it allocated. Preferably, locore would provide a first * unused virtual address in addition to "firstaddr". */ virtual_avail = (vm_offset_t) KERNBASE + firstaddr; virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); #if defined(PAE) || defined(PAE_TABLES) kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); LIST_INIT(&allpmaps); /* * Request a spin mutex so that changes to allpmaps cannot be * preempted by smp_rendezvous_cpus(). Otherwise, * pmap_update_pde_kernel() could access allpmaps while it is * being changed. */ mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. * CMAP3 is used for the idle process page zeroing. */ for (i = 0; i < MAXCPU; i++) { sysmaps = &sysmaps_pcpu[i]; mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) } SYSMAP(caddr_t, CMAP3, CADDR3, 1) /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) /* * ptvmmap is used for reading arbitrary physical pages via /dev/mem. */ SYSMAP(caddr_t, unused, ptvmmap, 1) /* * msgbufp is used to map the system message buffer. */ SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize))) /* * KPTmap is used by pmap_kextract(). * * KPTmap is first initialized by locore. However, that initial * KPTmap can only support NKPT page table pages. Here, a larger * KPTmap is created that can support KVA_PAGES page table pages. */ SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES) for (i = 0; i < NKPT; i++) KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V; /* * Adjust the start of the KPTD and KPTmap so that the implementation * of pmap_kextract() and pmap_growkernel() can be made simpler. */ KPTD -= KPTDI; KPTmap -= i386_btop(KPTDI << PDRSHIFT); /* * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(), * respectively. */ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); virtual_avail = va; /* * Leave in place an identity mapping (virt == phys) for the low 1 MB * physical memory region that is used by the ACPI wakeup code. This * mapping must not have PG_G set. */ #ifdef XBOX /* FIXME: This is gross, but needed for the XBOX. Since we are in such * an early stadium, we cannot yet neatly map video memory ... :-( * Better fixes are very welcome! */ if (!arch_i386_is_xbox) #endif for (i = 1; i < NKPT; i++) PTD[i] = 0; /* Initialize the PAT MSR if present. */ pmap_init_pat(); /* Turn on PG_G on kernel page(s) */ pmap_set_pg(); } static void pmap_init_qpages(void) { struct pcpu *pc; int i; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); if (pc->pc_qmap_addr == 0) panic("pmap_init_qpages: unable to allocate KVA"); } } SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_qpages, NULL); /* * Setup the PAT MSR. */ void pmap_init_pat(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) pat_table[i] = -1; pat_table[PAT_WRITE_BACK] = 0; pat_table[PAT_WRITE_THROUGH] = 1; pat_table[PAT_UNCACHEABLE] = 3; pat_table[PAT_WRITE_COMBINING] = 3; pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; /* Bail if this CPU doesn't implement PAT. */ if ((cpu_feature & CPUID_PAT) == 0) { for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; pat_works = 0; return; } /* * Due to some Intel errata, we can only safely use the lower 4 * PAT entries. * * Intel Pentium III Processor Specification Update * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B * or Mode C Paging) * * Intel Pentium IV Processor Specification Update * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) */ if (cpu_vendor_id == CPU_VENDOR_INTEL && !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) pat_works = 0; /* Initialize default PAT entries. */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); if (pat_works) { /* * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. * Program 5 and 6 as WP and WC. * Leave 4 and 7 as WB and UC. */ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | PAT_VALUE(6, PAT_WRITE_COMBINING); pat_table[PAT_UNCACHED] = 2; pat_table[PAT_WRITE_PROTECTED] = 5; pat_table[PAT_WRITE_COMBINING] = 6; } else { /* * Just replace PAT Index 2 with WC instead of UC-. */ pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_table[PAT_WRITE_COMBINING] = 2; } /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* Disable caches (CD = 1, NW = 0). */ cr0 = rcr0(); load_cr0((cr0 & ~CR0_NW) | CR0_CD); /* Flushes caches and TLBs. */ wbinvd(); invltlb(); /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); invltlb(); /* Restore caches and PGE. */ load_cr0(cr0); load_cr4(cr4); } /* * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. */ static void pmap_set_pg(void) { pt_entry_t *pte; vm_offset_t va, endva; if (pgeflag == 0) return; endva = KERNBASE + KERNend; if (pseflag) { va = KERNBASE + KERNLOAD; while (va < endva) { pdir_pde(PTD, va) |= pgeflag; invltlb(); /* Flush non-PG_G entries. */ va += NBPDR; } } else { va = (vm_offset_t)btext; while (va < endva) { pte = vtopte(va); if (*pte) *pte |= pgeflag; invltlb(); /* Flush non-PG_G entries. */ va += PAGE_SIZE; } } } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; } #if defined(PAE) || defined(PAE_TABLES) static void * pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); } #endif /* * Abuse the pte nodes for unmapped kva to thread a kva freelist through. * Requirements: * - Must deal with pages in order to ensure that none of the PG_* bits * are ever set, PG_V in particular. * - Assumes we can write to ptes without pte_store() atomic ops, even * on PAE systems. This should be ok. * - Assumes nothing will ever test these addresses for 0 to indicate * no mapping instead of correctly checking PG_V. * - Assumes a vm_offset_t will fit in a pte (true for i386). * Because PG_V is never set, there can be no mappings to invalidate. */ static vm_offset_t pmap_ptelist_alloc(vm_offset_t *head) { pt_entry_t *pte; vm_offset_t va; va = *head; if (va == 0) panic("pmap_ptelist_alloc: exhausted ptelist KVA"); pte = vtopte(va); *head = *pte; if (*head & PG_V) panic("pmap_ptelist_alloc: va with PG_V set!"); *pte = 0; return (va); } static void pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) { pt_entry_t *pte; if (va & PG_V) panic("pmap_ptelist_free: freeing va with PG_V set!"); pte = vtopte(va); *pte = *head; /* virtual! PG_V is 0 though */ *head = va; } static void pmap_ptelist_init(vm_offset_t *head, void *base, int npages) { int i; vm_offset_t va; *head = 0; for (i = npages - 1; i >= 0; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_ptelist_free(head, va); } } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { struct pmap_preinit_mapping *ppim; vm_page_t mpte; vm_size_t s; int i, pv_npg; /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ for (i = 0; i < NKPT; i++) { mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = i + KPTDI; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); } /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * If the kernel is running on a virtual machine, then it must assume * that MCA is enabled by the hypervisor. Moreover, the kernel must * be prepared for the hypervisor changing the vendor and family that * are reported by CPUID. Consequently, the workaround for AMD Family * 10h Erratum 383 is enabled if the processor's feature set does not * include at least one feature that is only supported by older Intel * or newer AMD processors. */ if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 && (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI | CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP | AMDID2_FMA4)) == 0) workaround_erratum383 = 1; /* * Are large page mappings supported and enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); if (pseflag == 0) pg_ps_enabled = 0; else if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; } /* * Calculate the size of the pv head table for superpages. * Handle the possibility that "vm_phys_segs[...].end" is zero. */ pv_npg = trunc_4mpage(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) / NBPDR + 1; /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("pmap_init: not enough kvm for pv chunks"); pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); #if defined(PAE) || defined(PAE_TABLES) pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, UMA_ZONE_VM | UMA_ZONE_NOFREE); uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); #endif pmap_initialized = 1; if (!bootverbose) return; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) continue; printf("PPIM %u: PA=%#jx, VA=%#x, size=%#x, mode=%#x\n", i, (uintmax_t)ppim->pa, ppim->va, ppim->sz, ppim->mode); } } SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2/4MB page mapping counters"); static u_long pmap_pde_demotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pde_demotions, 0, "2/4MB page demotions"); static u_long pmap_pde_mappings; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pde_mappings, 0, "2/4MB page mappings"); static u_long pmap_pde_p_failures; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); static u_long pmap_pde_promotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pde_promotions, 0, "2/4MB page promotions"); /*************************************************** * Low level helper routines..... ***************************************************/ /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ int pmap_cache_bits(int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0) panic("Unknown caching mode %d\n", mode); /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; /* Map the caching mode to a PAT index. */ pat_idx = pat_index[mode]; /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_idx & 0x4) cache_bits |= pat_flag; if (pat_idx & 0x2) cache_bits |= PG_NC_PCD; if (pat_idx & 0x1) cache_bits |= PG_NC_PWT; return (cache_bits); } /* * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde) { pd_entry_t *pde; pmap_t pmap; boolean_t PTD_updated; PTD_updated = FALSE; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)) PTD_updated = TRUE; pde = pmap_pde(pmap, va); pde_store(pde, newpde); } mtx_unlock_spin(&allpmaps_lock); KASSERT(PTD_updated, ("pmap_kenter_pde: current page table is not in allpmaps")); } /* * After changing the page size for the specified virtual address in the page * table, flush the corresponding entries from the processor's TLB. Only the * calling processor's TLB is affected. * * The calling thread must be pinned to a processor. */ static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) { u_long cr4; if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ invlpg(va); else if ((newpde & PG_G) == 0) /* * Promotion: flush every 4KB page mapping from the TLB * because there are too many to flush individually. */ invltlb(); else { /* * Promotion: flush every 4KB page mapping from the TLB, * including any global (PG_G) mappings. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* * Although preemption at this point could be detrimental to * performance, it would not lead to an error. PG_G is simply * ignored if CR4.PGE is clear. Moreover, in case this block * is re-entered, the load_cr4() either above or below will * modify CR4.PGE flushing the TLB. */ load_cr4(cr4 | CR4_PGE); } } void invltlb_glob(void) { uint64_t cr4; if (pgeflag == 0) { invltlb(); } else { cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); load_cr4(cr4 | CR4_PGE); } } #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t *mask, other_cpus; u_int cpuid; sched_pin(); if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) invlpg(va); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invlpg(*mask, va); sched_unpin(); } /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ #define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t *mask, other_cpus; vm_offset_t addr; u_int cpuid; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { pmap_invalidate_all(pmap); return; } sched_pin(); if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invlpg_range(*mask, sva, eva); sched_unpin(); } void pmap_invalidate_all(pmap_t pmap) { cpuset_t *mask, other_cpus; u_int cpuid; sched_pin(); if (pmap == kernel_pmap) { invltlb_glob(); mask = &all_cpus; } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) invltlb(); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invltlb(*mask, pmap); sched_unpin(); } void pmap_invalidate_cache(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } struct pde_action { cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; u_int store; /* processor that updates the PDE */ }; static void pmap_update_pde_kernel(void *arg) { struct pde_action *act = arg; pd_entry_t *pde; pmap_t pmap; if (act->store == PCPU_GET(cpuid)) { /* * Elsewhere, this operation requires allpmaps_lock for * synchronization. Here, it does not because it is being * performed in the context of an all_cpus rendezvous. */ LIST_FOREACH(pmap, &allpmaps, pm_list) { pde = pmap_pde(pmap, act->va); pde_store(pde, act->newpde); } } } static void pmap_update_pde_user(void *arg) { struct pde_action *act = arg; if (act->store == PCPU_GET(cpuid)) pde_store(act->pde, act->newpde); } static void pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate)) pmap_update_pde_invalidate(act->va, act->newpde); } /* * Change the page size for the specified virtual address in a way that * prevents any possibility of the TLB ever having two entries that map the * same virtual address using different page sizes. This is the recommended * workaround for Erratum 383 on AMD Family 10h processors. It prevents a * machine check exception for a TLB state that is improperly diagnosed as a * hardware error. */ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; cpuset_t active, other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpuid; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; CPU_SET(cpuid, &active); smp_rendezvous_cpus(active, smp_no_rendevous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); } else { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (CPU_ISSET(cpuid, &active)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap) invltlb_glob(); else if (!CPU_EMPTY(&pmap->pm_active)) invltlb(); } PMAP_INLINE void pmap_invalidate_cache(void) { wbinvd(); } static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ #define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) { if (force) { sva &= ~(vm_offset_t)cpu_clflush_line_size; } else { KASSERT((sva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: sva not page-aligned")); KASSERT((eva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: eva not page-aligned")); } if ((cpu_feature & CPUID_SS) != 0 && !force) ; /* If "Self Snoop" is supported and allowed, do nothing. */ else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { #ifdef DEV_APIC /* * XXX: Some CPUs fault, hang, or trash the local APIC * registers if we use CLFLUSH on the local APIC * range. The local APIC is always uncached, so we * don't need to flush for that range anyway. */ if (pmap_kextract(sva) == lapic_paddr) return; #endif /* * Otherwise, do per-cache line flush. Use the mfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); mfence(); } else if ((cpu_feature & CPUID_CLFSH) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { #ifdef DEV_APIC if (pmap_kextract(sva) == lapic_paddr) return; #endif /* * Writes are ordered by CLFLUSH on Intel CPUs. */ if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); } else { /* * No targeted cache flush methods are supported by CPU, * or the supplied range is bigger than 2MB. * Globally invalidate cache. */ pmap_invalidate_cache(); } } void pmap_invalidate_cache_pages(vm_page_t *pages, int count) { int i; if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || (cpu_feature & CPUID_CLFSH) == 0) { pmap_invalidate_cache(); } else { for (i = 0; i < count; i++) pmap_flush_page(pages[i]); } } /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || pmap == vmspace_pmap(curthread->td_proc->p_vmspace)); } /* * If the given pmap is not the current or kernel pmap, the returned pte must * be released by passing it to pmap_pte_release(). */ pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); mtx_lock(&PMAP2mutex); newpf = *pde & PG_FRAME; if ((*PMAP2 & PG_FRAME) != newpf) { *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } return (NULL); } /* * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte * being NULL. */ static __inline void pmap_pte_release(pt_entry_t *pte) { if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) mtx_unlock(&PMAP2mutex); } /* * NB: The sequence of updating a page table followed by accesses to the * corresponding pages is subject to the situation described in the "AMD64 * Architecture Programmer's Manual Volume 2: System Programming" rev. 3.23, * "7.3.1 Special Coherency Considerations". Therefore, issuing the INVLPG * right after modifying the PTE bits is crucial. */ static __inline void invlcaddr(void *caddr) { invlpg((u_int)caddr); } /* * Super fast pmap_pte routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. * * If the given pmap is not the current pmap, pvh_global_lock * must be held and curthread pinned to a CPU. */ static pt_entry_t * pmap_pte_quick(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); newpf = *pde & PG_FRAME; if ((*PMAP1 & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t rtval; pt_entry_t *pte; pd_entry_t pde; rtval = 0; PMAP_LOCK(pmap); pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); else { pte = pmap_pte(pmap, va); rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); pmap_pte_release(pte); } } PMAP_UNLOCK(pmap); return (rtval); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; pt_entry_t pte, *ptep; vm_page_t m; vm_paddr_t pa; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { ptep = pmap_pte(pmap, va); pte = *ptep; pmap_pte_release(ptep); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | pgeflag); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); pte_clear(pte); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; vm_paddr_t superpage_offset; pd_entry_t newpde; va = *virt; /* * Does the physical address range's size and alignment permit at * least one superpage mapping to be created? */ superpage_offset = start & PDRMASK; if ((end - start) - ((NBPDR - superpage_offset) & PDRMASK) >= NBPDR) { /* * Increase the starting virtual address so that its alignment * does not preclude the use of superpage mappings. */ if ((va & PDRMASK) < superpage_offset) va = (va & ~PDRMASK) + superpage_offset; else if ((va & PDRMASK) > superpage_offset) va = ((va + PDRMASK) & ~PDRMASK) + superpage_offset; } sva = va; while (start < end) { if ((start & PDRMASK) == 0 && end - start >= NBPDR && pseflag) { KASSERT((va & PDRMASK) == 0, ("pmap_map: misaligned va %#x", va)); newpde = start | PG_PS | pgeflag | PG_RW | PG_V; pmap_kenter_pde(va, newpde); va += NBPDR; start += NBPDR; } else { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } } pmap_invalidate_range(kernel_pmap, sva, va); *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { m = *ma++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { oldpte |= *pte; pte_store(pte, pa | pgeflag | PG_RW | PG_V); } pte++; } if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static __inline int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_insert(&pmap->pm_root, mpte)); } /* * Looks for a page table page mapping the specified virtual address in the * specified pmap's collection of idle page table pages. Returns NULL if there * is no page table page corresponding to the specified virtual address. */ static __inline vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_lookup(&pmap->pm_root, va >> PDRSHIFT)); } /* * Removes the specified page table page from the specified pmap's collection * of idle page table pages. The specified page table page must be a member of * the pmap's collection. */ static __inline void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); vm_radix_remove(&pmap->pm_root, mpte->pindex); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_ptp(pmap, m, free); return (TRUE); } else return (FALSE); } static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free) { vm_offset_t pteva; /* * unmap the page table page */ pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); /* * Do an invltlb to make the invalidated mapping * take effect immediately. */ pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); pmap_invalidate_page(pmap, pteva); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, struct spglist *free) { pd_entry_t ptepde; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); ptepde = *pmap_pde(pmap, va); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return (pmap_unwire_ptp(pmap, mpte, free)); } /* * Initialize the pmap for the swapper process. */ void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); /* * Since the page table directory is shared with the kernel pmap, * which is already included in the list "allpmaps", this pmap does * not need to be inserted into that list. */ pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); #if defined(PAE) || defined(PAE_TABLES) pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { vm_page_t m, ptdpg[NPGPTD]; vm_paddr_t pa; int i; /* * No need to allocate page table space yet but we do need a valid * page directory table. */ if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD); if (pmap->pm_pdir == NULL) return (0); #if defined(PAE) || defined(PAE_TABLES) pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); KASSERT(((vm_offset_t)pmap->pm_pdpt & ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, ("pmap_pinit: pdpt misaligned")); KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), ("pmap_pinit: pdpt above 4g")); #endif pmap->pm_root.rt_root = 0; } KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_pinit: pmap has reserved page table page(s)")); /* * allocate the page directory page(s) */ for (i = 0; i < NPGPTD;) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) VM_WAIT; else { ptdpg[i++] = m; } } pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); for (i = 0; i < NPGPTD; i++) if ((ptdpg[i]->flags & PG_ZERO) == 0) pagezero(pmap->pm_pdir + (i * NPDEPG)); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); /* Copy the kernel page table directory entries. */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); mtx_unlock_spin(&allpmaps_lock); /* install self-referential address mapping entry(s) */ for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; #if defined(PAE) || defined(PAE_TABLES) pmap->pm_pdpt[i] = pa | PG_V; #endif } CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } /* * this routine is called if the page table page is not * mapped correctly. */ static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags) { vm_paddr_t ptepa; vm_page_t m; /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); VM_WAIT; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ pmap->pm_stats.resident_count++; ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); return (m); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags) { u_int ptepindex; pd_entry_t ptepa; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; retry: /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * This supports switching from a 4MB page to a * normal 4K page. */ if (ptepa & PG_PS) { (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va); ptepa = pmap->pm_pdir[ptepindex]; } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (ptepa) { m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has * been deallocated. */ m = _pmap_allocpte(pmap, ptepindex, flags); if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m, ptdpg[NPGPTD]; int i; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); for (i = 0; i < NPGPTD; i++) ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] & PG_FRAME); bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * sizeof(*pmap->pm_pdir)); pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); for (i = 0; i < NPGPTD; i++) { m = ptdpg[i]; #if defined(PAE) || defined(PAE_TABLES) KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), ("pmap_release: got wrong ptd page")); #endif m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); } } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, NBPDR); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir; pmap_kenter_pde(kernel_vm_end, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); CTASSERT(_NPCPV == 336); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ static const uint32_t pc_freemask[_NPCM] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE10 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. */ static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap) { struct pch newtail; struct pv_chunk *pc; struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; struct spglist free; uint32_t inuse; int bit, field, freed; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); pmap = NULL; m_pc = NULL; SLIST_INIT(&free); TAILQ_INIT(&newtail); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || SLIST_EMPTY(&free))) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); if (pmap != pc->pc_pmap) { if (pmap != NULL) { pmap_invalidate_all(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { pmap = NULL; TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = bsfl(inuse); pv = &pc->pc_pventry[field * 32 + bit]; va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) continue; pte = pmap_pte(pmap, va); tpte = *pte; if ((tpte & PG_W) == 0) tpte = pte_load_clear(pte); pmap_pte_release(pte); if ((tpte & PG_W) != 0) continue; KASSERT(tpte != 0, ("pmap_pv_reclaim: pmap %p va %x zero pte", pmap, va)); if ((tpte & PG_G) != 0) pmap_invalidate_page(pmap, va); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) { vm_page_aflag_clear(m, PGA_WRITEABLE); } } pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt(pmap, va, &free); freed++; } } if (freed == 0) { TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } /* Every freed mapping is for a 4 KB page. */ pmap->pm_stats.resident_count -= freed; PV_STAT(pv_entry_frees += freed); PV_STAT(pv_entry_spare += freed); pv_entry_count -= freed; TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != pc_freemask[field]) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); /* * One freed pv entry in locked_pmap is * sufficient. */ if (pmap == locked_pmap) goto out; break; } if (field == _NPCM) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); break; } } out: TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); if (pmap != NULL) { pmap_invalidate_all(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; atomic_add_int(&vm_cnt.v_wire_count, 1); } pmap_free_zero_pages(&free); return (m_pc); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 32; bit = idx % 32; pc->pc_map[field] |= 1ul << bit; for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) { /* * 98% of the time, pc is already at the head of the * list. If it isn't already, move it to the head. */ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != pc)) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, PQ_NONE); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } /* * get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfl(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 32 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the ptelist "pv_vafree" is synchronized by the pvh * global lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } m = pmap_pv_reclaim(pmap); if (m == NULL) goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } return (pv); } static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 4mpage aligned")); /* * Transfer the 4mpage's pv entry for this mapping to the first * page's pv list. */ pvh = pa_to_pvh(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); va += PAGE_SIZE; pmap_insert_entry(pmap, va, m); } while (va < va_last); } static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 4mpage aligned")); /* * Transfer the first page's pv entry for this mapping to the * 4mpage's pv list. Aside from avoiding the cost of a call * to get_pv_entry(), a transfer avoids the possibility that * get_pv_entry() calls pmap_collect() and that pmap_collect() * removes one of the mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { struct md_page *pvh; rw_assert(&pvh_global_lock, RA_WLOCKED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Create the pv entries for each of the pages within a superpage. */ static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Fills a page table page with mappings to consecutive physical pages. */ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) { pt_entry_t *pte; for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; newpte += PAGE_SIZE; } } /* * Tries to demote a 2- or 4MB page mapping. If demotion fails, the * 2- or 4MB page mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) != NULL) pmap_remove_pt_page(pmap, mpte); else { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); /* * Invalidate the 2- or 4MB page mapping and return * "failure" if the mapping was never accessed or the * allocation of the new page table page fails. */ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); pmap_invalidate_page(pmap, trunc_4mpage(va)); pmap_free_zero_pages(&free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" " in pmap %p", va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap->pm_stats.resident_count++; } mptepa = VM_PAGE_TO_PHYS(mpte); /* * If the page mapping is in the kernel's address space, then the * KPTmap can provide access to the page table page. Otherwise, * temporarily map the page table page (mpte) into the kernel's * address space at either PADDR1 or PADDR2. */ if (va >= KERNBASE) firstpte = &KPTmap[i386_btop(trunc_4mpage(va))]; else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { if ((*PMAP1 & PG_FRAME) != mptepa) { *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; firstpte = PADDR1; } else { mtx_lock(&PMAP2mutex); if ((*PMAP2 & PG_FRAME) != mptepa) { *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); } firstpte = PADDR2; } newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = NPTEPG; pmap_fill_ptp(firstpte, newpte); } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has * PG_A set. If the old PDE has PG_RW set, it also has PG_M * set. Thus, there is no danger of a race with another * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (firstpte == PADDR2) mtx_unlock(&PMAP2mutex); /* * Invalidate the recursive mapping of the page table page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); /* * Demote the pv entry. This depends on the earlier demotion * of the mapping. Specifically, the (re)creation of a per- * page pv entry might trigger the execution of pmap_collect(), * which might reclaim a newly (re)created per-page pv entry * and destroy the associated mapping. In order to destroy * the mapping, the PDE must have already changed from mapping * the 2mpage to referencing the page table page. */ if ((oldpde & PG_MANAGED) != 0) pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); pmap_pde_demotions++; CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x" " in pmap %p", va, pmap); return (TRUE); } /* * Removes a 2- or 4MB page mapping from the kernel pmap. */ static void pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mpte = pmap_lookup_pt_page(pmap, va); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); pmap_remove_pt_page(pmap, mpte); mptepa = VM_PAGE_TO_PHYS(mpte); newpde = mptepa | PG_M | PG_A | PG_RW | PG_V; /* * Initialize the page table page. */ pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]); /* * Remove the mapping. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pmap_kenter_pde(va, newpde); /* * Invalidate the recursive mapping of the page table page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); } /* * pmap_remove_pde: do the things to unmap a superpage in a process */ static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; vm_page_t m, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_remove_pde: sva is not 4mpage aligned")); oldpde = pte_load_clear(pdq); if (oldpde & PG_W) pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpde & PG_G) pmap_invalidate_page(kernel_pmap, sva); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) { if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpde & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { pmap_remove_kernel_pde(pmap, pdq, sva); } else { mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); atomic_subtract_int(&vm_cnt.v_wire_count, 1); } } } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, struct spglist *free) { pt_entry_t oldpte; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = pte_load_clear(ptq); KASSERT(oldpte != 0, ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va)); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); pmap_remove_entry(pmap, m, va); } return (pmap_unuse_pt(pmap, va, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) { pt_entry_t *pte; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; pmap_remove_pte(pmap, pte, va, free); pmap_invalidate_page(pmap, va); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; struct spglist free; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if ((sva + PAGE_SIZE == eva) && ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { pmap_remove_page(pmap, sva, &free); goto out; } for (; sva < eva; sva = pdnxt) { u_int pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; if (pmap->pm_stats.resident_count == 0) break; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_remove_pde(). */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; pmap_remove_pde(pmap, &pmap->pm_pdir[pdirindex], sva, &free); continue; } else if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* The large page mapping was destroyed. */ continue; } } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if (*pte == 0) continue; /* * The TLB entry for a PG_G mapping is invalidated * by pmap_remove_pte(). */ if ((*pte & PG_G) == 0) anyvalid = 1; if (pmap_remove_pte(pmap, pte, sva, &free)) break; } } out: sched_unpin(); if (anyvalid) pmap_invalidate_all(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt_entry_t *pte, tpte; pd_entry_t *pde; vm_offset_t va; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } small_mappings: while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); tpte = pte_load_clear(pte); KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte", pmap, pv->pv_va)); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); pmap_free_zero_pages(&free); } /* * pmap_protect_pde: do the things to protect a 4mpage in a process */ static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) { pd_entry_t newpde, oldpde; vm_offset_t eva, va; vm_page_t m; boolean_t anychanged; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_protect_pde: sva is not 4mpage aligned")); anychanged = FALSE; retry: oldpde = newpde = *pde; if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (newpde != oldpde) { if (!pde_cmpset(pde, oldpde, newpde)) goto retry; if (oldpde & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } return (anychanged); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; boolean_t anychanged, pv_lists_locked; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } #if defined(PAE) || defined(PAE_TABLES) if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; #else if (prot & VM_PROT_WRITE) return; #endif if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pt_entry_t obits, pbits; u_int pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_protect_pde(). */ if (pmap_protect_pde(pmap, &pmap->pm_pdir[pdirindex], sva, prot)) anychanged = TRUE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) pmap_invalidate_all( pmap); PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* * The large page mapping was * destroyed. */ continue; } } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { vm_page_t m; retry: /* * Regardless of whether a pte is 32 or 64 bits in * size, PG_RW, PG_A, and PG_M are among the least * significant 32 bits. */ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; #endif if (pbits != obits) { #if defined(PAE) || defined(PAE_TABLES) if (!atomic_cmpset_64(pte, obits, pbits)) goto retry; #else if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) goto retry; #endif if (obits & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } } } if (anychanged) pmap_invalidate_all(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are * within a single page table page (PTP) to a single 2- or 4MB page mapping. * For promotion to occur, two conditions must be met: (1) the 4KB page * mappings must map aligned, contiguous physical memory and (2) the 4KB page * mappings must have identical characteristics. * * Managed (PG_MANAGED) mappings within the kernel address space are not * promoted. The reason is that kernel PDEs are replicated in each pmap but * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel * pmap. */ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; vm_offset_t oldpteva; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE in the specified PTP. Abort if this PTE is * either invalid, unused, or does not map the first 4KB physical page * within a 2- or 4MB page. */ firstpte = pmap_pte_quick(pmap, trunc_4mpage(va)); setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; oldpteva = (oldpte & PG_FRAME & PDRMASK) | (va & ~PDRMASK); CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x" " in pmap %p", oldpteva, pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } pa -= PAGE_SIZE; } /* * Save the page table page in its current state until the PDE * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == va >> PDRSHIFT, ("pmap_promote_pde: page table page's pindex is wrong")); if (pmap_insert_pt_page(pmap, mpte)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x in pmap %p", va, pmap); return; } /* * Promote the pv entries. */ if ((newpde & PG_MANAGED) != 0) pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); /* * Propagate the PAT index to its proper position. */ if ((newpde & PG_PTE_PAT) != 0) newpde ^= PG_PDE_PAT | PG_PTE_PAT; /* * Map the superpage. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, PG_PS | newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, PG_PS | newpde); else pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" " in pmap %p", va, pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { pd_entry_t *pde; pt_entry_t *pte; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva, wired; va = trunc_page(va); mpte = NULL; wired = (flags & PMAP_ENTER_WIRED) != 0; KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { mpte = pmap_allocpte(pmap, va, flags); if (mpte == NULL) { KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, ("pmap_allocpte failed with sleep allowed")); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } } pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) panic("pmap_enter: attempted pmap_enter on 4MB page"); pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ if (pte == NULL) { panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", (uintmax_t)pmap->pm_pdir[PTDPTDI], va); } pa = VM_PAGE_TO_PHYS(m); om = NULL; origpte = *pte; opa = origpte & PG_FRAME; /* * Mapping has not changed, must be protection or wiring change. */ if (origpte && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && ((origpte & PG_W) == 0)) pmap->pm_stats.wired_count++; else if (!wired && (origpte & PG_W)) pmap->pm_stats.wired_count--; /* * Remove extra pte reference */ if (mpte) mpte->wire_count--; if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } pv = NULL; /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { if (origpte & PG_W) pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); pv = pmap_pvh_remove(&om->md, pmap, va); } if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%x", va)); } } else pmap->pm_stats.resident_count++; /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); /* * Increment counters */ if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; if ((newpte & PG_MANAGED) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= pgeflag; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { newpte |= PG_A; if ((flags & VM_PROT_WRITE) != 0) newpte |= PG_M; if (origpte & PG_V) { invlva = FALSE; origpte = pte_load_store(pte, newpte); if (origpte & PG_A) { if (origpte & PG_MANAGED) vm_page_aflag_set(om, PGA_REFERENCED); if (opa != VM_PAGE_TO_PHYS(m)) invlva = TRUE; #if defined(PAE) || defined(PAE_TABLES) if ((origpte & PG_NX) == 0 && (newpte & PG_NX) != 0) invlva = TRUE; #endif } if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(om); if ((prot & VM_PROT_WRITE) == 0) invlva = TRUE; } if ((origpte & PG_MANAGED) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else pte_store(pte, newpte); } /* * If both the page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Tries to create a 2- or 4MB page mapping. Returns TRUE if successful and * FALSE otherwise. Fails if (1) a page table page cannot be allocated without * blocking, (2) a mapping already exists at the specified virtual address, or * (3) a pv entry cannot be allocated without reclaiming another pv entry. */ static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pd_entry_t *pde, newpde; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pde = pmap_pde(pmap, va); if (*pde != 0) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) | PG_PS | PG_V; if ((m->oflags & VPO_UNMANAGED) == 0) { newpde |= PG_MANAGED; /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } } #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; /* * Increment counters. */ pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; /* * Map the superpage. */ pde_store(pde, newpde); pmap_pde_mappings++; CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pg_ps_enabled && pmap_enter_pde(pmap, va, m, prot)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte) { pt_entry_t *pte; vm_paddr_t pa; struct spglist free; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { u_int ptepindex; pd_entry_t ptepa; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptepa) { if (ptepa & PG_PS) return (NULL); mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); mpte->wire_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex, PMAP_ENTER_NOSLEEP); if (mpte == NULL) return (mpte); } } } else { mpte = NULL; } /* * This call to vtopte makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte_quick. * But that isn't as quick as vtopte. */ pte = vtopte(va); if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap->pm_stats.resident_count++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) pa |= pg_nx; #endif /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) != 0) pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); return (mpte); } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; vm_page_t p; int pat_mode; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if (pseflag && (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2/4MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and * "size" is a multiple of 2/4M, adding the PAT setting to * "pa" will not affect the termination of this loop. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pde = pmap_pde(pmap, addr); if (*pde == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } /* Else continue on if the PDE is already valid. */ addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t *pde; pt_entry_t *pte; boolean_t pv_lists_locked; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pde = pmap_pde(pmap, sva); if ((*pde & PG_V) == 0) continue; if ((*pde & PG_PS) != 0) { if ((*pde & PG_W) == 0) panic("pmap_unwire: pde %#jx is missing PG_W", (uintmax_t)*pde); /* * Are we unwiring the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * Regardless of whether a pde (or pte) is 32 * or 64 bits in size, PG_W is among the least * significant 32 bits. */ atomic_clear_int((u_int *)pde, PG_W); pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); /* Repeat sva. */ goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, pde, sva)) panic("pmap_unwire: demotion failed"); } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if ((*pte & PG_V) == 0) continue; if ((*pte & PG_W) == 0) panic("pmap_unwire: pte %#jx is missing PG_W", (uintmax_t)*pte); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. * * PG_W is among the least significant 32 bits. */ atomic_clear_int((u_int *)pte, PG_W); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { struct spglist free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) return; rw_wlock(&pvh_global_lock); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } sched_pin(); for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; u_int ptepindex; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pdnxt = (addr + NBPDR) & ~PDRMASK; if (pdnxt < addr) pdnxt = end_addr; ptepindex = addr >> PDRSHIFT; srcptepaddr = src_pmap->pm_pdir[ptepindex]; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr) continue; if (dst_pmap->pm_pdir[ptepindex] == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & PG_PS_FRAME))) { dst_pmap->pm_pdir[ptepindex] = srcptepaddr & ~PG_W; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } continue; } srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (pdnxt > end_addr) pdnxt = end_addr; src_pte = vtopte(addr); while (addr < pdnxt) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { dstmpte = pmap_allocpte(dst_pmap, addr, PMAP_ENTER_NOSLEEP); if (dstmpte == NULL) goto out; dst_pte = pmap_pte_quick(dst_pmap, addr); if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); dst_pmap->pm_stats.resident_count++; } else { SLIST_INIT(&free); if (pmap_unwire_ptp(dst_pmap, dstmpte, &free)) { pmap_invalidate_page(dst_pmap, addr); pmap_free_zero_pages(&free); } goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } +/* + * Zero 1 page of virtual memory mapped from a hardware page by the caller. + */ static __inline void pagezero(void *page) { #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) { #if defined(CPU_ENABLE_SSE) if (cpu_feature & CPUID_SSE2) sse2_pagezero(page); else #endif i686_pagezero(page); } else #endif bzero(page, PAGE_SIZE); } /* - * pmap_zero_page zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. + * Zero the specified hardware page. */ void pmap_zero_page(vm_page_t m) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_zero_page: CMAP2 busy"); sched_pin(); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); pagezero(sysmaps->CADDR2); *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } /* - * pmap_zero_page_area zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. - * - * off and size may not cover an area beyond a single hardware page. + * Zero an an area within a single hardware page. off and size must not + * cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_zero_page_area: CMAP2 busy"); sched_pin(); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); if (off == 0 && size == PAGE_SIZE) pagezero(sysmaps->CADDR2); else bzero((char *)sysmaps->CADDR2 + off, size); *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } /* - * pmap_zero_page_idle zeros the specified hardware page by mapping - * the page into KVM and using bzero to clear its contents. This - * is intended to be called from the vm_pagezero process only and - * outside of Giant. + * Zero the specified hardware page in a way that minimizes cache thrashing. + * This is intended to be called from the vm_pagezero process only and + * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { if (*CMAP3) panic("pmap_zero_page_idle: CMAP3 busy"); sched_pin(); *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(CADDR3); pagezero(CADDR3); *CMAP3 = 0; sched_unpin(); } /* - * pmap_copy_page copies the specified (machine independent) - * page by mapping the page into virtual memory and using - * bcopy to copy the page, one machine dependent page at a - * time. + * Copy 1 specified hardware page to another. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*sysmaps->CMAP2) panic("pmap_copy_page: CMAP2 busy"); sched_pin(); *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A | pmap_cache_bits(src->md.pat_mode, 0); invlcaddr(sysmaps->CADDR1); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M | pmap_cache_bits(dst->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); *sysmaps->CMAP1 = 0; *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { struct sysmaps *sysmaps; vm_page_t a_pg, b_pg; char *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP1 != 0) panic("pmap_copy_pages: CMAP1 busy"); if (*sysmaps->CMAP2 != 0) panic("pmap_copy_pages: CMAP2 busy"); sched_pin(); while (xfersize > 0) { a_pg = ma[a_offset >> PAGE_SHIFT]; a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); b_pg = mb[b_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(a_pg) | PG_A | pmap_cache_bits(a_pg->md.pat_mode, 0); invlcaddr(sysmaps->CADDR1); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(b_pg) | PG_A | PG_M | pmap_cache_bits(b_pg->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); a_cp = sysmaps->CADDR1 + a_pg_offset; b_cp = sysmaps->CADDR2 + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } *sysmaps->CMAP1 = 0; *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); count = pmap_pvh_wired_mappings(&m->md, count); if ((m->flags & PG_FICTITIOUS) == 0) { count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); } rw_wunlock(&pvh_global_lock); return (count); } /* * pmap_pvh_wired_mappings: * * Return the updated number "count" of managed mappings that are wired. */ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count) { pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } sched_unpin(); return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 4mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_wunlock(&pvh_global_lock); return (rv); } /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap_t pmap) { pt_entry_t *pte, tpte; vm_page_t m, mpte, mt; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; struct spglist free; int field, idx; int32_t bit; uint32_t inuse, bitmask; int allfree; if (pmap != PCPU_GET(curpmap)) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } SLIST_INIT(&free); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap, pc->pc_pmap)); allfree = 1; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = bsfl(inuse); bitmask = 1UL << bit; idx = field * 32 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_pde(pmap, pv->pv_va); tpte = *pte; if ((tpte & PG_PS) == 0) { pte = vtopte(pv->pv_va); tpte = *pte & ~PG_PTE_PAT; } if (tpte == 0) { printf( "TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); panic("bad pte"); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pte_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((tpte & PG_PS) != 0) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_dirty(mt); } else vm_page_dirty(m); } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; if ((tpte & PG_PS) != 0) { pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_lookup_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, &free, FALSE); atomic_subtract_int(&vm_cnt.v_wire_count, 1); } } else { pmap->pm_stats.resident_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } pmap_unuse_pt(pmap, pv->pv_va, &free); } } } if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } sched_unpin(); pmap_invalidate_all(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns TRUE if any of the given mappings were used to modify * physical memory. Otherwise, returns FALSE. Both page and 2mpage * mappings are supported. */ static boolean_t pmap_is_modified_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pde; pt_entry_t *pte; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); if (*pde != 0 && (*pde & PG_PS) == 0) { pte = vtopte(addr); rv = *pte == 0; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); rw_wlock(&pvh_global_lock); rv = pmap_is_referenced_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns TRUE if any of the given mappings were referenced and FALSE * otherwise. Both page and 4mpage mappings are supported. */ static boolean_t pmap_is_referenced_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); retry: oldpte = *pte; if ((oldpte & PG_RW) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_RW | PG_M))) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); } #define PMAP_TS_REFERENCED_MAX 5 /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; pd_entry_t *pde; pt_entry_t *pte; vm_paddr_t pa; int rtval = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); pa = VM_PAGE_TO_PHYS(m); pvh = pa_to_pvh(pa); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0 || (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); if ((*pde & PG_A) != 0) { /* * Since this reference bit is shared by either 1024 * or 512 4KB pages, it should not be cleared every * time it is tested. Apply a simple "hash" function * on the physical page number, the virtual superpage * number, and the pmap address to select one 4KB page * out of the 1024 or 512 on which testing the * reference bit will result in clearing that bit. * This function is designed to avoid the selection of * the same 4KB page for every 2- or 4MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the superpage is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^ (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && (*pde & PG_W) == 0) { atomic_clear_int((u_int *)pde, PG_A); pmap_invalidate_page(pmap, pv->pv_va); } rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); } if (rtval >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced: found a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_A) != 0) { atomic_clear_int((u_int *)pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < PMAP_TS_REFERENCED_MAX); out: sched_unpin(); rw_wunlock(&pvh_global_lock); return (rtval); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { pd_entry_t oldpde, *pde; pt_entry_t *pte; vm_offset_t pdnxt; vm_page_t m; boolean_t anychanged, pv_lists_locked; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pde = pmap_pde(pmap, sva); oldpde = *pde; if ((oldpde & PG_V) == 0) continue; else if ((oldpde & PG_PS) != 0) { if ((oldpde & PG_MANAGED) == 0) continue; if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, pde, sva)) { /* * The large page mapping was destroyed. */ continue; } /* * Unless the page mappings are wired, remove the * mapping to a single page so that a subsequent * access may repromote. Since the underlying page * table page is fully populated, this removal never * frees a page table page. */ if ((oldpde & PG_W) == 0) { pte = pmap_pte_quick(pmap, sva); KASSERT((*pte & PG_V) != 0, ("pmap_advise: invalid PTE")); pmap_remove_pte(pmap, pte, sva, NULL); anychanged = TRUE; } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | PG_V)) continue; else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); vm_page_dirty(m); } atomic_clear_int((u_int *)pte, PG_M | PG_A); } else if ((*pte & PG_A) != 0) atomic_clear_int((u_int *)pte, PG_A); else continue; if ((*pte & PG_G) != 0) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } } if (anychanged) pmap_invalidate_all(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t oldpde, *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { if (pmap_demote_pde(pmap, pde, va)) { if ((oldpde & PG_W) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pte_quick(pmap, va); oldpte = *pte; if ((oldpte & PG_V) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ while (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); pmap_invalidate_page(pmap, va); } } } } PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_M is among the least significant * 32 bits. */ atomic_clear_int((u_int *)pte, PG_M); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * Miscellaneous support routines follow */ /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void pmap_pte_attr(pt_entry_t *pte, int cache_bits) { u_int opte, npte; /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~PG_PTE_CACHE; npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ static __inline void pmap_pde_attr(pd_entry_t *pde, int cache_bits) { u_int opde, npde; /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; npde = opde & ~PG_PDE_CACHE; npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) { struct pmap_preinit_mapping *ppim; vm_offset_t va, offset; vm_size_t tmpsize; int i; offset = pa & PAGE_MASK; size = round_page(offset + size); pa = pa & PG_FRAME; if (pa < KERNLOAD && pa + size <= KERNLOAD) va = KERNBASE + pa; else if (!pmap_initialized) { va = 0; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) { ppim->pa = pa; ppim->sz = size; ppim->mode = mode; ppim->va = virtual_avail; virtual_avail += size; va = ppim->va; break; } } if (va == 0) panic("%s: too many preinit mappings", __func__); } else { /* * If we have a preinit mapping, re-use it. */ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->pa == pa && ppim->sz == size && ppim->mode == mode) return ((void *)(ppim->va + offset)); } va = kva_alloc(size); if (va == 0) panic("%s: Couldn't allocate KVA", __func__); } for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + size, FALSE); return ((void *)(va + offset)); } void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); } void pmap_unmapdev(vm_offset_t va, vm_size_t size) { struct pmap_preinit_mapping *ppim; vm_offset_t offset; int i; if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) return; offset = va & PAGE_MASK; size = round_page(offset + size); va = trunc_page(va); for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == va && ppim->sz == size) { if (pmap_initialized) return; ppim->pa = 0; ppim->va = 0; ppim->sz = 0; ppim->mode = 0; if (va + size == virtual_avail) virtual_avail = va; return; } } if (pmap_initialized) kva_free(va, size); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) return; /* * If "m" is a normal page, flush it from the cache. * See pmap_invalidate_cache_range(). * * First, try to find an existing mapping of the page by sf * buffer. sf_buf_invalidate_cache() modifies mapping and * flushes the cache. */ if (sf_buf_invalidate_cache(m)) return; /* * If page is not mapped by sf buffer, but CPU does not * support self snoop, map the page transient and do * invalidation. In the worst case, whole cache is flushed by * pmap_invalidate_cache_range(). */ if ((cpu_feature & CPUID_SS) == 0) pmap_flush_page(m); } static void pmap_flush_page(vm_page_t m) { struct sysmaps *sysmaps; vm_offset_t sva, eva; bool useclflushopt; useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) { sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_flush_page: CMAP2 busy"); sched_pin(); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); sva = (vm_offset_t)sysmaps->CADDR2; eva = sva + PAGE_SIZE; /* * Use mfence despite the ordering implied by * mtx_{un,}lock() because clflush on non-Intel CPUs * and clflushopt are not guaranteed to be ordered by * any other instruction. */ if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) { if (useclflushopt) clflushopt(sva); else clflush(sva); } if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } else pmap_invalidate_cache(); } /* * Changes the specified virtual address range's memory type to that given by * the parameter "mode". The specified virtual address range must be * completely contained within either the kernel map. * * Returns zero if the change completed successfully, and either EINVAL or * ENOMEM if the change failed. Specifically, EINVAL is returned if some part * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; pd_entry_t *pde; pt_entry_t *pte; int cache_bits_pte, cache_bits_pde; boolean_t changed; base = trunc_page(va); offset = va & PAGE_MASK; size = round_page(offset + size); /* * Only supported on kernel virtual addresses above the recursive map. */ if (base < VM_MIN_KERNEL_ADDRESS) return (EINVAL); cache_bits_pde = pmap_cache_bits(mode, 1); cache_bits_pte = pmap_cache_bits(mode, 0); changed = FALSE; /* * Pages that aren't mapped aren't supported. Also break down * 2/4MB pages into 4KB pages if required. */ PMAP_LOCK(kernel_pmap); for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } if (*pde & PG_PS) { /* * If the current 2/4MB page already has * the required memory type, then we need not * demote this page. Just increment tmpva to * the next 2/4MB page frame. */ if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_4mpage(tmpva) + NBPDR; continue; } /* * If the current offset aligns with a 2/4MB * page frame and there is at least 2/4MB left * within the range, then we need not break * down this page into 4KB pages. */ if ((tmpva & PDRMASK) == 0 && tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { PMAP_UNLOCK(kernel_pmap); return (ENOMEM); } } pte = vtopte(tmpva); if (*pte == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } tmpva += PAGE_SIZE; } PMAP_UNLOCK(kernel_pmap); /* * Ok, all the pages exist, so run through them updating their * cache mode if required. */ for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde & PG_PS) { if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde); changed = TRUE; } tmpva = trunc_4mpage(tmpva) + NBPDR; } else { pte = vtopte(tmpva); if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte); changed = TRUE; } tmpva += PAGE_SIZE; } } /* * Flush CPU caches to make sure any data isn't cached that * shouldn't be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva, FALSE); } return (0); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t *ptep, pte; vm_paddr_t pa; int val; PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, addr); if (*pdep != 0) { if (*pdep & PG_PS) { pte = *pdep; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; val = MINCORE_SUPER; } else { ptep = pmap_pte(pmap, addr); pte = *ptep; pmap_pte_release(ptep); pa = pte & PG_FRAME; val = 0; } } else { pte = 0; pa = 0; val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int cpuid; u_int32_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); cpuid = PCPU_GET(cpuid); #if defined(SMP) CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); #endif #if defined(PAE) || defined(PAE_TABLES) cr3 = vtophys(pmap->pm_pdpt); #else cr3 = vtophys(pmap->pm_pdir); #endif /* * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); PCPU_SET(curpmap, pmap); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } vm_offset_t pmap_quick_enter_page(vm_page_t m) { vm_offset_t qaddr; pt_entry_t *pte; critical_enter(); qaddr = PCPU_GET(qmap_addr); pte = vtopte(qaddr); KASSERT(*pte == 0, ("pmap_quick_enter_page: PTE busy")); *pte = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(pmap_page_get_memattr(m), 0); invlpg(qaddr); return (qaddr); } void pmap_quick_remove_page(vm_offset_t addr) { vm_offset_t qaddr; pt_entry_t *pte; qaddr = PCPU_GET(qmap_addr); pte = vtopte(qaddr); KASSERT(*pte != 0, ("pmap_quick_remove_page: PTE not in use")); KASSERT(addr == qaddr, ("pmap_quick_remove_page: invalid address")); *pte = 0; critical_exit(); } #if defined(PMAP_DEBUG) pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte = 0; int index; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid) continue; if (p->p_vmspace) { int i,j; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPDEPTD; i++) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { for (j = 0; j < NPTEPG; j++) { vm_offset_t va = base + (j << PAGE_SHIFT); if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return (npte); } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { pt_entry_t pa; vm_page_t m; pa = *pte; m = PHYS_TO_VM_PAGE(pa & PG_FRAME); printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } } } sx_sunlock(&allproc_lock); return (npte); } #endif Index: projects/clang390-import/sys/i386/i386/support.s =================================================================== --- projects/clang390-import/sys/i386/i386/support.s (revision 305016) +++ projects/clang390-import/sys/i386/i386/support.s (revision 305017) @@ -1,834 +1,841 @@ /*- * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_npx.h" #include #include #include #include #include "assym.s" #define IDXSHIFT 10 .text /* * bcopy family * void bzero(void *buf, u_int len) */ ENTRY(bzero) pushl %edi movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx cld rep stosl movl 12(%esp),%ecx andl $3,%ecx rep stosb popl %edi ret END(bzero) ENTRY(sse2_pagezero) pushl %ebx movl 8(%esp),%ecx movl %ecx,%eax addl $4096,%eax xor %ebx,%ebx + jmp 1f + /* + * The loop takes 14 bytes. Ensure that it doesn't cross a 16-byte + * cache line. + */ + .p2align 4,0x90 1: movnti %ebx,(%ecx) - addl $4,%ecx + movnti %ebx,4(%ecx) + addl $8,%ecx cmpl %ecx,%eax jne 1b sfence popl %ebx ret END(sse2_pagezero) ENTRY(i686_pagezero) pushl %edi pushl %ebx movl 12(%esp),%edi movl $1024,%ecx cld ALIGN_TEXT 1: xorl %eax,%eax repe scasl jnz 2f popl %ebx popl %edi ret ALIGN_TEXT 2: incl %ecx subl $4,%edi movl %ecx,%edx cmpl $16,%ecx jge 3f movl %edi,%ebx andl $0x3f,%ebx shrl %ebx shrl %ebx movl $16,%ecx subl %ebx,%ecx 3: subl %ecx,%edx rep stosl movl %edx,%ecx testl %edx,%edx jnz 1b popl %ebx popl %edi ret END(i686_pagezero) /* fillw(pat, base, cnt) */ ENTRY(fillw) pushl %edi movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx cld rep stosw popl %edi ret END(fillw) ENTRY(bcopyb) pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f cld /* nope, copy forwards */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards. */ addl %ecx,%esi decl %edi decl %esi std rep movsb popl %edi popl %esi cld ret END(bcopyb) /* * bcopy(src, dst, cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(bcopy) pushl %ebp movl %esp,%ebp pushl %esi pushl %edi movl 8(%ebp),%esi movl 12(%ebp),%edi movl 16(%ebp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep movsl movl 16(%ebp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi popl %ebp ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi decl %edi decl %esi andl $3,%ecx /* any fractional bytes? */ std rep movsb movl 16(%ebp),%ecx /* copy remainder by 32-bit words */ shrl $2,%ecx subl $3,%esi subl $3,%edi rep movsl popl %edi popl %esi cld popl %ebp ret END(bcopy) /* * Note: memcpy does not support overlapping copies */ ENTRY(memcpy) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%ecx movl %edi,%eax shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep movsl movl 20(%esp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %esi popl %edi ret END(memcpy) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines and possibly * the math- and DOS emulators should be the only places that do this. * * We have to access the memory with user's permissions, so use a segment * selector with RPL 3. For writes to user space we have to additionally * check the PTE for write permission, because the 386 does not check * write permissions when we are executing with EPL 0. The 486 does check * this if the WP bit is set in CR0, so we can use a simpler version here. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ /* * copyout(from_kernel, to_user, len) - MP SAFE */ ENTRY(copyout) movl PCPU(CURPCB),%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi pushl %ebx movl 16(%esp),%esi movl 20(%esp),%edi movl 24(%esp),%ebx testl %ebx,%ebx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movl %edi,%eax addl %ebx,%eax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ cmpl $VM_MAXUSER_ADDRESS,%eax ja copyout_fault /* bcopy(%esi, %edi, %ebx) */ movl %ebx,%ecx shrl $2,%ecx cld rep movsl movb %bl,%cl andb $3,%cl rep movsb done_copyout: popl %ebx popl %edi popl %esi xorl %eax,%eax movl PCPU(CURPCB),%edx movl %eax,PCB_ONFAULT(%edx) ret END(copyout) ALIGN_TEXT copyout_fault: popl %ebx popl %edi popl %esi movl PCPU(CURPCB),%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * copyin(from_user, to_kernel, len) - MP SAFE */ ENTRY(copyin) movl PCPU(CURPCB),%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi movl 12(%esp),%esi /* caddr_t from */ movl 16(%esp),%edi /* caddr_t to */ movl 20(%esp),%ecx /* size_t len */ /* * make sure address is valid */ movl %esi,%edx addl %ecx,%edx jc copyin_fault cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld rep movsl movb %al,%cl andb $3,%cl /* copy remaining bytes */ rep movsb popl %edi popl %esi xorl %eax,%eax movl PCPU(CURPCB),%edx movl %eax,PCB_ONFAULT(%edx) ret END(copyin) ALIGN_TEXT copyin_fault: popl %edi popl %esi movl PCPU(CURPCB),%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * casueword. Compare and set user word. Returns -1 on fault, * 0 on non-faulting access. The current value is in *oldp. */ ALTENTRY(casueword32) ENTRY(casueword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx /* dst */ movl 8(%esp),%eax /* old */ movl 16(%esp),%ecx /* new */ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ ja fusufault #ifdef SMP lock #endif cmpxchgl %ecx,(%edx) /* Compare and set. */ /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl 12(%esp),%edx /* oldp */ movl %eax,(%edx) xorl %eax,%eax ret END(casueword32) END(casueword) /* * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user * memory. */ ALTENTRY(fueword32) ENTRY(fueword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx /* from */ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ ja fusufault movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) movl 8(%esp),%edx movl %eax,(%edx) xorl %eax,%eax ret END(fueword32) END(fueword) /* * fuswintr() and suswintr() are specialized variants of fuword16() and * suword16(), respectively. They are called from the profiling code, * potentially at interrupt time. If they fail, that's okay; good things * will happen later. They always fail for now, until the trap code is * able to deal with this. */ ALTENTRY(suswintr) ENTRY(fuswintr) movl $-1,%eax ret END(suswintr) END(fuswintr) ENTRY(fuword16) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja fusufault movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret END(fuword16) ENTRY(fubyte) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx ja fusufault movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret END(fubyte) ALIGN_TEXT fusufault: movl PCPU(CURPCB),%ecx xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) decl %eax ret /* * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory. * All these functions are MPSAFE. */ ALTENTRY(suword32) ENTRY(suword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ ja fusufault movl 8(%esp),%eax movl %eax,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret END(suword32) END(suword) ENTRY(suword16) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ ja fusufault movw 8(%esp),%ax movw %ax,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx /* restore trashed register */ movl %eax,PCB_ONFAULT(%ecx) ret END(suword16) ENTRY(subyte) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ ja fusufault movb 8(%esp),%al movb %al,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx /* restore trashed register */ movl %eax,PCB_ONFAULT(%ecx) ret END(subyte) /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) pushl %esi pushl %edi movl PCPU(CURPCB),%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ movl $VM_MAXUSER_ADDRESS,%eax /* make sure 'from' is within bounds */ subl %esi,%eax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpl %edx,%eax jae 1f movl %eax,%edx movl %eax,20(%esp) 1: incl %edx cld 2: decl %edx jz 3f lodsb stosb orb %al,%al jnz 2b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp cpystrflt_x 3: /* edx is zero - return ENAMETOOLONG or EFAULT */ cmpl $VM_MAXUSER_ADDRESS,%esi jae cpystrflt 4: movl $ENAMETOOLONG,%eax jmp cpystrflt_x cpystrflt: movl $EFAULT,%eax cpystrflt_x: /* set *lencopied and return %eax */ movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx testl %edx,%edx jz 1f movl %ecx,(%edx) 1: popl %edi popl %esi ret END(copyinstr) /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE */ ENTRY(copystr) pushl %esi pushl %edi movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx cld 1: decl %edx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax 6: /* set *lencopied and return %eax */ movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx testl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret END(copystr) ENTRY(bcmp) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%edx movl %edx,%ecx shrl $2,%ecx cld /* compare forwards */ repe cmpsl jne 1f movl %edx,%ecx andl $3,%ecx repe cmpsb 1: setne %al movsbl %al,%eax popl %esi popl %edi ret END(bcmp) /* * Handling of special 386 registers and descriptor tables etc */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) /* flush the prefetch q */ jmp 1f nop 1: /* reload "stale" selectors */ movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%gs movl %eax,%ss movl $KPSEL,%eax movl %eax,%fs /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax pushl %eax movl $KCSEL,4(%esp) MEXITCOUNT lret END(lgdt) /* ssdtosd(*ssdp,*sdp) */ ENTRY(ssdtosd) pushl %ebx movl 8(%esp),%ecx movl 8(%ecx),%ebx shll $16,%ebx movl (%ecx),%edx roll $16,%edx movb %dh,%bl movb %dl,%bh rorl $8,%ebx movl 4(%ecx),%eax movw %ax,%dx andl $0xf0000,%eax orl %eax,%ebx movl 12(%esp),%ecx movl %edx,(%ecx) movl %ebx,4(%ecx) popl %ebx ret END(ssdtosd) /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax movl %eax,%dr7 /* disable all breakpoints first */ movl %eax,%dr0 movl %eax,%dr1 movl %eax,%dr2 movl %eax,%dr3 movl %eax,%dr6 ret END(reset_dbregs) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) /* save ebx */ movl %esp,4(%eax) /* save esp */ movl %ebp,8(%eax) /* save ebp */ movl %esi,12(%eax) /* save esi */ movl %edi,16(%eax) /* save edi */ movl (%esp),%edx /* get rta */ movl %edx,20(%eax) /* save eip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx /* restore ebx */ movl 4(%eax),%esp /* restore esp */ movl 8(%eax),%ebp /* restore ebp */ movl 12(%eax),%esi /* restore esi */ movl 16(%eax),%edi /* restore edi */ movl 20(%eax),%edx /* get rta */ movl %edx,(%esp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx rdmsr movl 8(%esp),%ecx movl %eax,(%ecx) movl %edx,4(%ecx) xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * Support for writing MSRs in the safe manner. */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx movl 8(%esp),%eax movl 12(%esp),%edx wrmsr xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl $EFAULT,%eax ret Index: projects/clang390-import/sys/modules/cloudabi32/Makefile =================================================================== --- projects/clang390-import/sys/modules/cloudabi32/Makefile (revision 305016) +++ projects/clang390-import/sys/modules/cloudabi32/Makefile (revision 305017) @@ -1,37 +1,38 @@ # $FreeBSD$ SYSDIR?=${.CURDIR}/../.. .PATH: ${SYSDIR}/compat/cloudabi32 .PATH: ${SYSDIR}/${MACHINE_CPUARCH}/cloudabi32 +.PATH: ${SYSDIR}/${MACHINE}/cloudabi32 KMOD= cloudabi32 SRCS= cloudabi32_fd.c cloudabi32_module.c cloudabi32_poll.c \ cloudabi32_sock.c cloudabi32_syscalls.c cloudabi32_sysent.c \ cloudabi32_sysvec.c cloudabi32_thread.c OBJS= cloudabi32_vdso_blob.o CLEANFILES=cloudabi32_vdso.o .if ${MACHINE_CPUARCH} == "i386" VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_i686.S OUTPUT_TARGET=elf32-i386-freebsd BINARY_ARCHITECTURE=aarch32 .elif ${MACHINE_CPUARCH} == "amd64" VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_i686_on_64bit.S OUTPUT_TARGET=elf64-x86-64-freebsd BINARY_ARCHITECTURE=i386 .endif cloudabi32_vdso.o: ${VDSO_SRCS} ${CC} -x assembler-with-cpp -m32 -shared -nostdinc -nostdlib \ -Wl,-T${SYSDIR}/compat/cloudabi/cloudabi_vdso.lds \ ${VDSO_SRCS} -o ${.TARGET} cloudabi32_vdso_blob.o: cloudabi32_vdso.o ${OBJCOPY} --input-target binary \ --output-target ${OUTPUT_TARGET} \ --binary-architecture ${BINARY_ARCHITECTURE} \ cloudabi32_vdso.o ${.TARGET} .include Index: projects/clang390-import/sys/modules/cloudabi64/Makefile =================================================================== --- projects/clang390-import/sys/modules/cloudabi64/Makefile (revision 305016) +++ projects/clang390-import/sys/modules/cloudabi64/Makefile (revision 305017) @@ -1,37 +1,38 @@ # $FreeBSD$ SYSDIR?=${.CURDIR}/../.. .PATH: ${SYSDIR}/compat/cloudabi64 .PATH: ${SYSDIR}/${MACHINE_CPUARCH}/cloudabi64 +.PATH: ${SYSDIR}/${MACHINE}/cloudabi64 KMOD= cloudabi64 SRCS= cloudabi64_fd.c cloudabi64_module.c cloudabi64_poll.c \ cloudabi64_sock.c cloudabi64_syscalls.c cloudabi64_sysent.c \ cloudabi64_sysvec.c cloudabi64_thread.c OBJS= cloudabi64_vdso_blob.o CLEANFILES=cloudabi64_vdso.o .if ${MACHINE_CPUARCH} == "aarch64" VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_aarch64.S OUTPUT_TARGET=elf64-littleaarch64 BINARY_ARCHITECTURE=aarch64 .elif ${MACHINE_CPUARCH} == "amd64" VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_x86_64.S OUTPUT_TARGET=elf64-x86-64-freebsd BINARY_ARCHITECTURE=i386 .endif cloudabi64_vdso.o: ${VDSO_SRCS} ${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib \ -Wl,-T${SYSDIR}/compat/cloudabi/cloudabi_vdso.lds \ ${VDSO_SRCS} -o ${.TARGET} cloudabi64_vdso_blob.o: cloudabi64_vdso.o ${OBJCOPY} --input-target binary \ --output-target ${OUTPUT_TARGET} \ --binary-architecture ${BINARY_ARCHITECTURE} \ cloudabi64_vdso.o ${.TARGET} .include Index: projects/clang390-import/usr.sbin/newsyslog/tests/legacy_test.sh =================================================================== --- projects/clang390-import/usr.sbin/newsyslog/tests/legacy_test.sh (revision 305016) +++ projects/clang390-import/usr.sbin/newsyslog/tests/legacy_test.sh (revision 305017) @@ -1,444 +1,457 @@ #!/bin/sh # $FreeBSD$ COUNT=0 TMPDIR=$(pwd)/work if [ $? -ne 0 ]; then echo "$0: Can't create temp dir, exiting..." exit 1 fi # Begin an individual test begin() { COUNT=`expr $COUNT + 1` OK=1 NAME="$1" } # End an individual test end() { + local message + if [ $OK = 1 ] then - printf 'ok ' + message='ok ' else - printf 'not ok ' + message='not ok ' fi - echo "$COUNT - $NAME" + + message="$message $COUNT - $NAME" + if [ -n "$TODO" ] + then + message="$message # TODO $TODO" + fi + + echo "$message" } # Make a file that can later be verified mkf() { CN=`basename $1` echo "$CN-$CN" >$1 } # Verify that the file specified is correct ckf() { if [ -f $2 ] && echo "$1-$1" | diff - $2 >/dev/null then ok else notok fi } # Check that a file exists ckfe() { if [ -f $1 ] then ok else notok fi } # Verify that the specified file does not exist # (is not there) cknt() { if [ -r $1 ] then notok else ok fi } # Check if a file is there, depending of if it's supposed to or not - # basically how many log files we are supposed to keep vs. how many we # actually keep. ckntfe() { curcnt=$1 keepcnt=$2 f=$3 if [ $curcnt -le $keepcnt ] then #echo Assuming file there ckfe $f else #echo Assuming file NOT there cknt $f fi } # A part of a test succeeds ok() { : } # A part of a test fails notok() { OK=0 } # Verify that the exit code passed is for unsuccessful termination ckfail() { if [ $1 -gt 0 ] then ok else notok fi } # Verify that the exit code passed is for successful termination ckok() { if [ $1 -eq 0 ] then ok else notok fi } # Check that there are X files which match expr chkfcnt() { cnt=$1; shift if [ $cnt -eq `echo "$@" | wc -w` ] then ok else notok fi } # Check that two strings are alike ckstr() { if [ "$1" = "$2" ] then ok else notok fi } tmpdir_create() { mkdir -p ${TMPDIR}/log ${TMPDIR}/alog cd ${TMPDIR}/log } tmpdir_clean() { cd ${TMPDIR} rm -rf "${TMPDIR}/log" "${TMPDIR}/alog" newsyslog.conf } run_newsyslog() { newsyslog -f ../newsyslog.conf -F -r "$@" } tests_normal_rotate() { ext="$1" dir="$2" if [ -n "$dir" ]; then newsyslog_args=" -a ${dir}" name_postfix="${ext} archive dir" else newsyslog_args="" name_postfix="${ext}" fi tmpdir_create begin "create file ${name_postfix}" -newdir run_newsyslog -C ckfe $LOGFNAME cknt ${dir}${LOGFNAME}.0${ext} end begin "rotate normal 1 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} cknt ${dir}${LOGFNAME}.1${ext} end begin "rotate normal 2 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} cknt ${dir}${LOGFNAME}.2${ext} end begin "rotate normal 3 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} ckfe ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.3${ext} end begin "rotate normal 4 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} ckfe ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.4${ext} end begin "rotate normal 5 ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckfe ${dir}${LOGFNAME}.0${ext} ckfe ${dir}${LOGFNAME}.1${ext} ckfe ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.4${ext} end # Wait a bit so we can see if the noaction test rotates files sleep 1.1 begin "noaction ${name_postfix}" ofiles=`ls -Tl ${dir}${LOGFNAME}.*${ext} | tr -d '\n'` run_newsyslog ${newsyslog_args} -n >/dev/null ckfe ${LOGFNAME} ckstr "$ofiles" "`ls -lT ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`" end tmpdir_clean } tests_normal_rotate_keepn() { cnt="$1" ext="$2" dir="$3" if [ -n "$dir" ]; then newsyslog_args=" -a ${dir}" name_postfix="${ext} archive dir" else newsyslog_args="" name_postfix="${ext}" fi tmpdir_create begin "create file ${name_postfix}" -newdir run_newsyslog -C ckfe $LOGFNAME cknt ${dir}${LOGFNAME}.0${ext} end begin "rotate normal 1 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} cknt ${dir}${LOGFNAME}.1${ext} end begin "rotate normal 2 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext} cknt ${dir}${LOGFNAME}.2${ext} end begin "rotate normal 3 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext} ckntfe 3 $cnt ${dir}${LOGFNAME}.2${ext} cknt ${dir}${LOGFNAME}.3${ext} end begin "rotate normal 3 cnt=$cnt ${name_postfix}" run_newsyslog $newsyslog_args ckfe ${LOGFNAME} ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext} ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext} ckntfe 3 $cnt ${dir}${LOGFNAME}.2${ext} ckntfe 4 $cnt ${dir}${LOGFNAME}.3${ext} cknt ${dir}${LOGFNAME}.4${ext} end # Wait a bit so we can see if the noaction test rotates files sleep 1.1 begin "noaction ${name_postfix}" osum=`md5 ${dir}${LOGFNAME} | tr -d '\n'` run_newsyslog ${newsyslog_args} -n >/dev/null ckfe ${LOGFNAME} ckstr "$osum" "`md5 ${dir}${LOGFNAME} | tr -d '\n'`" end tmpdir_clean } tests_time_rotate() { ext="$1" dir="$2" if [ -n "$dir" ]; then newsyslog_args="-t DEFAULT -a ${dir}" name_postfix="${ext} archive dir" else newsyslog_args="-t DEFAULT" name_postfix="${ext}" fi tmpdir_create begin "create file ${name_postfix}" -newdir run_newsyslog -C ${newsyslog_args} ckfe ${LOGFNAME} end begin "rotate time 1 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 1 ${dir}${LOGFNAME}.*${ext} end sleep 1.1 + ( + TODO="rotate time 2-4 fail today; bug 212160" + begin "rotate time 2 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 2 ${dir}${LOGFNAME}.*${ext} end sleep 1.1 begin "rotate time 3 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 3 ${dir}${LOGFNAME}.*${ext} end sleep 1.1 begin "rotate time 4 ${name_postfix}" run_newsyslog ${newsyslog_args} ckfe ${LOGFNAME} chkfcnt 3 ${dir}${LOGFNAME}.*${ext} end + ) begin "noaction ${name_postfix}" ofiles=`ls -1 ${dir}${LOGFNAME}.*${ext} | tr -d '\n'` run_newsyslog ${newsyslog_args} -n >/dev/null ckfe ${LOGFNAME} ckstr "$ofiles" "`ls -1 ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`" end tmpdir_clean } echo 1..126 mkdir -p ${TMPDIR} cd ${TMPDIR} LOGFNAME=foo.log LOGFPATH=${TMPDIR}/log/${LOGFNAME} # Normal, no archive dir, keep X files echo "$LOGFPATH 640 0 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 0 echo "$LOGFPATH 640 1 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 1 echo "$LOGFPATH 640 2 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 2 echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_normal_rotate_keepn 3 # Normal, no archive dir, keep X files, gz echo "$LOGFPATH 640 0 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 0 ".gz" echo "$LOGFPATH 640 1 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 1 ".gz" echo "$LOGFPATH 640 2 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 2 ".gz" echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_normal_rotate_keepn 3 ".gz" # Normal, no archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_normal_rotate echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_normal_rotate ".gz" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_normal_rotate ".bz2" # Normal, archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_normal_rotate "" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_normal_rotate ".gz" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_normal_rotate ".bz2" "${TMPDIR}/alog/" # Time based, no archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_time_rotate echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_time_rotate "gz" "" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_time_rotate "bz2" "" # Time based, archive dir echo "$LOGFPATH 640 3 * @T00 NC" > newsyslog.conf tests_time_rotate "" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCZ" > newsyslog.conf tests_time_rotate "gz" "${TMPDIR}/alog/" echo "$LOGFPATH 640 3 * @T00 NCJ" > newsyslog.conf tests_time_rotate "bz2" "${TMPDIR}/alog/" rm -rf "${TMPDIR}" Index: projects/clang390-import =================================================================== --- projects/clang390-import (revision 305016) +++ projects/clang390-import (revision 305017) Property changes on: projects/clang390-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r304965-305016