Index: projects/clang-trunk/contrib/llvm/tools/lldb/FREEBSD-Xlist =================================================================== --- projects/clang-trunk/contrib/llvm/tools/lldb/FREEBSD-Xlist (revision 287501) +++ projects/clang-trunk/contrib/llvm/tools/lldb/FREEBSD-Xlist (revision 287502) @@ -1,186 +1,212 @@ # $FreeBSD$ .arcconfig .clang-format .gitignore CMakeLists.txt +CODE_OWNERS.txt INSTALL.txt Makefile cmake/ docs/CMakeLists.txt docs/building-with-debug-llvm.txt docs/code-signing.txt docs/doxygen.cfg.in docs/doxygen.footer docs/doxygen.header docs/doxygen.intro docs/lldb-for-gdb-users.txt docs/lldb-gdb-remote.txt docs/testsuite/ examples/ gtest/ include/Makefile include/lldb/Host/android/ include/lldb/Host/linux/ include/lldb/Host/macosx/ include/lldb/Host/mingw/ include/lldb/Host/msvc/ include/lldb/Host/windows/ include/lldb/Makefile lib/ +lit/ lldb.xcodeproj/ lldb.xcworkspace/ resources/ scripts/ source/API/CMakeLists.txt source/API/Makefile source/Breakpoint/CMakeLists.txt source/Breakpoint/Makefile source/CMakeLists.txt source/Commands/CMakeLists.txt source/Commands/Makefile source/Core/CMakeLists.txt source/Core/Makefile source/DataFormatters/CMakeLists.txt source/DataFormatters/Makefile source/Expression/CMakeLists.txt source/Expression/Makefile source/Host/CMakeLists.txt source/Host/Makefile +source/Host/android/ source/Host/common/Makefile source/Host/freebsd/Makefile source/Host/linux/ source/Host/macosx/ source/Host/posix/Makefile source/Host/windows/ +source/Initialization/CMakeLists.txt +source/Initialization/Makefile source/Interpreter/CMakeLists.txt source/Interpreter/Makefile source/Makefile source/Plugins/ABI/CMakeLists.txt source/Plugins/ABI/MacOSX-arm/CMakeLists.txt source/Plugins/ABI/MacOSX-arm/Makefile source/Plugins/ABI/MacOSX-arm64/CMakeLists.txt source/Plugins/ABI/MacOSX-arm64/Makefile source/Plugins/ABI/MacOSX-i386/CMakeLists.txt source/Plugins/ABI/MacOSX-i386/Makefile +source/Plugins/ABI/SysV-arm/CMakeLists.txt +source/Plugins/ABI/SysV-arm/Makefile +source/Plugins/ABI/SysV-arm64/CMakeLists.txt +source/Plugins/ABI/SysV-arm64/Makefile source/Plugins/ABI/SysV-hexagon/CMakeLists.txt source/Plugins/ABI/SysV-hexagon/Makefile +source/Plugins/ABI/SysV-i386/CMakeLists.txt +source/Plugins/ABI/SysV-i386/Makefile +source/Plugins/ABI/SysV-mips/CMakeLists.txt +source/Plugins/ABI/SysV-mips/Makefile +source/Plugins/ABI/SysV-mips64/CMakeLists.txt +source/Plugins/ABI/SysV-mips64/Makefile source/Plugins/ABI/SysV-ppc/CMakeLists.txt source/Plugins/ABI/SysV-ppc/Makefile source/Plugins/ABI/SysV-ppc64/CMakeLists.txt source/Plugins/ABI/SysV-ppc64/Makefile source/Plugins/ABI/SysV-x86_64/CMakeLists.txt source/Plugins/ABI/SysV-x86_64/Makefile source/Plugins/CMakeLists.txt source/Plugins/Disassembler/CMakeLists.txt source/Plugins/Disassembler/llvm/CMakeLists.txt source/Plugins/Disassembler/llvm/Makefile source/Plugins/DynamicLoader/CMakeLists.txt source/Plugins/DynamicLoader/Darwin-Kernel/ source/Plugins/DynamicLoader/Hexagon-DYLD/CMakeLists.txt source/Plugins/DynamicLoader/Hexagon-DYLD/Makefile source/Plugins/DynamicLoader/MacOSX-DYLD/ source/Plugins/DynamicLoader/POSIX-DYLD/CMakeLists.txt source/Plugins/DynamicLoader/POSIX-DYLD/Makefile source/Plugins/DynamicLoader/Static/CMakeLists.txt source/Plugins/DynamicLoader/Static/Makefile source/Plugins/Instruction/ARM/CMakeLists.txt source/Plugins/Instruction/ARM/Makefile source/Plugins/Instruction/ARM64/CMakeLists.txt source/Plugins/Instruction/ARM64/Makefile source/Plugins/Instruction/CMakeLists.txt +source/Plugins/Instruction/MIPS/CMakeLists.txt +source/Plugins/Instruction/MIPS/Makefile +source/Plugins/Instruction/MIPS64/CMakeLists.txt +source/Plugins/Instruction/MIPS64/Makefile source/Plugins/InstrumentationRuntime/AddressSanitizer/CMakeLists.txt source/Plugins/InstrumentationRuntime/AddressSanitizer/Makefile source/Plugins/InstrumentationRuntime/CMakeLists.txt source/Plugins/JITLoader/CMakeLists.txt source/Plugins/JITLoader/GDB/CMakeLists.txt source/Plugins/JITLoader/GDB/Makefile source/Plugins/LanguageRuntime/CMakeLists.txt source/Plugins/LanguageRuntime/CPlusPlus/CMakeLists.txt source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/CMakeLists.txt source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/Makefile source/Plugins/LanguageRuntime/ObjC/ +source/Plugins/LanguageRuntime/RenderScript/CMakeLists.txt +source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt +source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/Makefile source/Plugins/Makefile source/Plugins/MemoryHistory/CMakeLists.txt source/Plugins/MemoryHistory/asan/CMakeLists.txt source/Plugins/MemoryHistory/asan/Makefile source/Plugins/ObjectContainer/BSD-Archive/CMakeLists.txt source/Plugins/ObjectContainer/BSD-Archive/Makefile source/Plugins/ObjectContainer/CMakeLists.txt source/Plugins/ObjectContainer/Universal-Mach-O/ source/Plugins/ObjectFile/CMakeLists.txt source/Plugins/ObjectFile/ELF/CMakeLists.txt source/Plugins/ObjectFile/ELF/Makefile source/Plugins/ObjectFile/JIT/CMakeLists.txt source/Plugins/ObjectFile/JIT/Makefile source/Plugins/ObjectFile/Mach-O/ source/Plugins/ObjectFile/PECOFF/ source/Plugins/OperatingSystem/CMakeLists.txt source/Plugins/OperatingSystem/Python/CMakeLists.txt source/Plugins/OperatingSystem/Python/Makefile +source/Plugins/Platform/Android/ source/Plugins/Platform/CMakeLists.txt source/Plugins/Platform/FreeBSD/CMakeLists.txt source/Plugins/Platform/FreeBSD/Makefile source/Plugins/Platform/Kalimba/ source/Plugins/Platform/Linux/ source/Plugins/Platform/MacOSX/ source/Plugins/Platform/Makefile source/Plugins/Platform/POSIX/CMakeLists.txt source/Plugins/Platform/POSIX/Makefile source/Plugins/Platform/Windows/ source/Plugins/Platform/gdb-server/CMakeLists.txt source/Plugins/Platform/gdb-server/Makefile source/Plugins/Process/CMakeLists.txt source/Plugins/Process/FreeBSD/CMakeLists.txt source/Plugins/Process/FreeBSD/Makefile source/Plugins/Process/Linux/ source/Plugins/Process/MacOSX-Kernel/ source/Plugins/Process/POSIX/CMakeLists.txt source/Plugins/Process/POSIX/Makefile source/Plugins/Process/Utility/CMakeLists.txt source/Plugins/Process/Utility/Makefile source/Plugins/Process/Windows/ source/Plugins/Process/elf-core/CMakeLists.txt source/Plugins/Process/elf-core/Makefile source/Plugins/Process/gdb-remote/CMakeLists.txt source/Plugins/Process/gdb-remote/Makefile source/Plugins/Process/mach-core/ source/Plugins/SymbolFile/CMakeLists.txt source/Plugins/SymbolFile/DWARF/CMakeLists.txt source/Plugins/SymbolFile/DWARF/Makefile source/Plugins/SymbolFile/Symtab/CMakeLists.txt source/Plugins/SymbolFile/Symtab/Makefile source/Plugins/SymbolVendor/CMakeLists.txt source/Plugins/SymbolVendor/ELF/CMakeLists.txt source/Plugins/SymbolVendor/ELF/Makefile source/Plugins/SymbolVendor/MacOSX/ source/Plugins/SystemRuntime/ source/Plugins/UnwindAssembly/CMakeLists.txt source/Plugins/UnwindAssembly/InstEmulation/CMakeLists.txt source/Plugins/UnwindAssembly/InstEmulation/Makefile source/Plugins/UnwindAssembly/x86/CMakeLists.txt source/Plugins/UnwindAssembly/x86/Makefile source/Symbol/CMakeLists.txt source/Symbol/Makefile source/Target/CMakeLists.txt source/Target/Makefile source/Utility/CMakeLists.txt source/Utility/Makefile test/ tools/CMakeLists.txt tools/Makefile +tools/argdumper/CMakeLists.txt tools/darwin-debug/ tools/darwin-threads/ tools/debugserver/ tools/driver/CMakeLists.txt tools/driver/Makefile tools/driver/lldb-Info.plist tools/install-headers/ tools/lldb-gdbserver/ tools/lldb-mi/CMakeLists.txt tools/lldb-mi/Makefile tools/lldb-mi/lldb-Info.plist tools/lldb-perf/ -tools/lldb-platform/CMakeLists.txt -tools/lldb-platform/Makefile +tools/lldb-platform/ +tools/lldb-server/CMakeLists.txt +tools/lldb-server/Makefile +unittests/ utils/ www/ Index: projects/clang-trunk/contrib/llvm/tools/lldb =================================================================== --- projects/clang-trunk/contrib/llvm/tools/lldb (revision 287501) +++ projects/clang-trunk/contrib/llvm/tools/lldb (revision 287502) Property changes on: projects/clang-trunk/contrib/llvm/tools/lldb ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/llvm/tools/lldb:r286422-287501 Index: projects/clang-trunk/contrib/llvm =================================================================== --- projects/clang-trunk/contrib/llvm (revision 287501) +++ projects/clang-trunk/contrib/llvm (revision 287502) Property changes on: projects/clang-trunk/contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/llvm:r286422-287501 Index: projects/clang-trunk/sys/cam/ctl/ctl.c =================================================================== --- projects/clang-trunk/sys/cam/ctl/ctl.c (revision 287501) +++ projects/clang-trunk/sys/cam/ctl/ctl.c (revision 287502) @@ -1,13829 +1,13816 @@ /*- * Copyright (c) 2003-2009 Silicon Graphics International Corp. * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id$ */ /* * CAM Target Layer, a SCSI device emulation subsystem. * * Author: Ken Merry */ #define _CTL_C #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct ctl_softc *control_softc = NULL; /* * Size and alignment macros needed for Copan-specific HA hardware. These * can go away when the HA code is re-written, and uses busdma for any * hardware. */ #define CTL_ALIGN_8B(target, source, type) \ if (((uint32_t)source & 0x7) != 0) \ target = (type)(source + (0x8 - ((uint32_t)source & 0x7)));\ else \ target = (type)source; #define CTL_SIZE_8B(target, size) \ if ((size & 0x7) != 0) \ target = size + (0x8 - (size & 0x7)); \ else \ target = size; #define CTL_ALIGN_8B_MARGIN 16 /* * Template mode pages. */ /* * Note that these are default values only. The actual values will be * filled in when the user does a mode sense. */ const static struct copan_debugconf_subpage debugconf_page_default = { DBGCNF_PAGE_CODE | SMPH_SPF, /* page_code */ DBGCNF_SUBPAGE_CODE, /* subpage */ {(sizeof(struct copan_debugconf_subpage) - 4) >> 8, (sizeof(struct copan_debugconf_subpage) - 4) >> 0}, /* page_length */ DBGCNF_VERSION, /* page_version */ {CTL_TIME_IO_DEFAULT_SECS>>8, CTL_TIME_IO_DEFAULT_SECS>>0}, /* ctl_time_io_secs */ }; const static struct copan_debugconf_subpage debugconf_page_changeable = { DBGCNF_PAGE_CODE | SMPH_SPF, /* page_code */ DBGCNF_SUBPAGE_CODE, /* subpage */ {(sizeof(struct copan_debugconf_subpage) - 4) >> 8, (sizeof(struct copan_debugconf_subpage) - 4) >> 0}, /* page_length */ 0, /* page_version */ {0xff,0xff}, /* ctl_time_io_secs */ }; const static struct scsi_da_rw_recovery_page rw_er_page_default = { /*page_code*/SMS_RW_ERROR_RECOVERY_PAGE, /*page_length*/sizeof(struct scsi_da_rw_recovery_page) - 2, /*byte3*/SMS_RWER_AWRE|SMS_RWER_ARRE, /*read_retry_count*/0, /*correction_span*/0, /*head_offset_count*/0, /*data_strobe_offset_cnt*/0, /*byte8*/SMS_RWER_LBPERE, /*write_retry_count*/0, /*reserved2*/0, /*recovery_time_limit*/{0, 0}, }; const static struct scsi_da_rw_recovery_page rw_er_page_changeable = { /*page_code*/SMS_RW_ERROR_RECOVERY_PAGE, /*page_length*/sizeof(struct scsi_da_rw_recovery_page) - 2, /*byte3*/0, /*read_retry_count*/0, /*correction_span*/0, /*head_offset_count*/0, /*data_strobe_offset_cnt*/0, /*byte8*/0, /*write_retry_count*/0, /*reserved2*/0, /*recovery_time_limit*/{0, 0}, }; const static struct scsi_format_page format_page_default = { /*page_code*/SMS_FORMAT_DEVICE_PAGE, /*page_length*/sizeof(struct scsi_format_page) - 2, /*tracks_per_zone*/ {0, 0}, /*alt_sectors_per_zone*/ {0, 0}, /*alt_tracks_per_zone*/ {0, 0}, /*alt_tracks_per_lun*/ {0, 0}, /*sectors_per_track*/ {(CTL_DEFAULT_SECTORS_PER_TRACK >> 8) & 0xff, CTL_DEFAULT_SECTORS_PER_TRACK & 0xff}, /*bytes_per_sector*/ {0, 0}, /*interleave*/ {0, 0}, /*track_skew*/ {0, 0}, /*cylinder_skew*/ {0, 0}, /*flags*/ SFP_HSEC, /*reserved*/ {0, 0, 0} }; const static struct scsi_format_page format_page_changeable = { /*page_code*/SMS_FORMAT_DEVICE_PAGE, /*page_length*/sizeof(struct scsi_format_page) - 2, /*tracks_per_zone*/ {0, 0}, /*alt_sectors_per_zone*/ {0, 0}, /*alt_tracks_per_zone*/ {0, 0}, /*alt_tracks_per_lun*/ {0, 0}, /*sectors_per_track*/ {0, 0}, /*bytes_per_sector*/ {0, 0}, /*interleave*/ {0, 0}, /*track_skew*/ {0, 0}, /*cylinder_skew*/ {0, 0}, /*flags*/ 0, /*reserved*/ {0, 0, 0} }; const static struct scsi_rigid_disk_page rigid_disk_page_default = { /*page_code*/SMS_RIGID_DISK_PAGE, /*page_length*/sizeof(struct scsi_rigid_disk_page) - 2, /*cylinders*/ {0, 0, 0}, /*heads*/ CTL_DEFAULT_HEADS, /*start_write_precomp*/ {0, 0, 0}, /*start_reduced_current*/ {0, 0, 0}, /*step_rate*/ {0, 0}, /*landing_zone_cylinder*/ {0, 0, 0}, /*rpl*/ SRDP_RPL_DISABLED, /*rotational_offset*/ 0, /*reserved1*/ 0, /*rotation_rate*/ {(CTL_DEFAULT_ROTATION_RATE >> 8) & 0xff, CTL_DEFAULT_ROTATION_RATE & 0xff}, /*reserved2*/ {0, 0} }; const static struct scsi_rigid_disk_page rigid_disk_page_changeable = { /*page_code*/SMS_RIGID_DISK_PAGE, /*page_length*/sizeof(struct scsi_rigid_disk_page) - 2, /*cylinders*/ {0, 0, 0}, /*heads*/ 0, /*start_write_precomp*/ {0, 0, 0}, /*start_reduced_current*/ {0, 0, 0}, /*step_rate*/ {0, 0}, /*landing_zone_cylinder*/ {0, 0, 0}, /*rpl*/ 0, /*rotational_offset*/ 0, /*reserved1*/ 0, /*rotation_rate*/ {0, 0}, /*reserved2*/ {0, 0} }; const static struct scsi_caching_page caching_page_default = { /*page_code*/SMS_CACHING_PAGE, /*page_length*/sizeof(struct scsi_caching_page) - 2, /*flags1*/ SCP_DISC | SCP_WCE, /*ret_priority*/ 0, /*disable_pf_transfer_len*/ {0xff, 0xff}, /*min_prefetch*/ {0, 0}, /*max_prefetch*/ {0xff, 0xff}, /*max_pf_ceiling*/ {0xff, 0xff}, /*flags2*/ 0, /*cache_segments*/ 0, /*cache_seg_size*/ {0, 0}, /*reserved*/ 0, /*non_cache_seg_size*/ {0, 0, 0} }; const static struct scsi_caching_page caching_page_changeable = { /*page_code*/SMS_CACHING_PAGE, /*page_length*/sizeof(struct scsi_caching_page) - 2, /*flags1*/ SCP_WCE | SCP_RCD, /*ret_priority*/ 0, /*disable_pf_transfer_len*/ {0, 0}, /*min_prefetch*/ {0, 0}, /*max_prefetch*/ {0, 0}, /*max_pf_ceiling*/ {0, 0}, /*flags2*/ 0, /*cache_segments*/ 0, /*cache_seg_size*/ {0, 0}, /*reserved*/ 0, /*non_cache_seg_size*/ {0, 0, 0} }; const static struct scsi_control_page control_page_default = { /*page_code*/SMS_CONTROL_MODE_PAGE, /*page_length*/sizeof(struct scsi_control_page) - 2, /*rlec*/0, /*queue_flags*/SCP_QUEUE_ALG_RESTRICTED, /*eca_and_aen*/0, /*flags4*/SCP_TAS, /*aen_holdoff_period*/{0, 0}, /*busy_timeout_period*/{0, 0}, /*extended_selftest_completion_time*/{0, 0} }; const static struct scsi_control_page control_page_changeable = { /*page_code*/SMS_CONTROL_MODE_PAGE, /*page_length*/sizeof(struct scsi_control_page) - 2, /*rlec*/SCP_DSENSE, /*queue_flags*/SCP_QUEUE_ALG_MASK, /*eca_and_aen*/SCP_SWP, /*flags4*/0, /*aen_holdoff_period*/{0, 0}, /*busy_timeout_period*/{0, 0}, /*extended_selftest_completion_time*/{0, 0} }; const static struct scsi_info_exceptions_page ie_page_default = { /*page_code*/SMS_INFO_EXCEPTIONS_PAGE, /*page_length*/sizeof(struct scsi_info_exceptions_page) - 2, /*info_flags*/SIEP_FLAGS_DEXCPT, /*mrie*/0, /*interval_timer*/{0, 0, 0, 0}, /*report_count*/{0, 0, 0, 0} }; const static struct scsi_info_exceptions_page ie_page_changeable = { /*page_code*/SMS_INFO_EXCEPTIONS_PAGE, /*page_length*/sizeof(struct scsi_info_exceptions_page) - 2, /*info_flags*/0, /*mrie*/0, /*interval_timer*/{0, 0, 0, 0}, /*report_count*/{0, 0, 0, 0} }; #define CTL_LBPM_LEN (sizeof(struct ctl_logical_block_provisioning_page) - 4) const static struct ctl_logical_block_provisioning_page lbp_page_default = {{ /*page_code*/SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF, /*subpage_code*/0x02, /*page_length*/{CTL_LBPM_LEN >> 8, CTL_LBPM_LEN}, /*flags*/0, /*reserved*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /*descr*/{}}, {{/*flags*/0, /*resource*/0x01, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0x02, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0xf1, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0xf2, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}} } }; const static struct ctl_logical_block_provisioning_page lbp_page_changeable = {{ /*page_code*/SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF, /*subpage_code*/0x02, /*page_length*/{CTL_LBPM_LEN >> 8, CTL_LBPM_LEN}, /*flags*/0, /*reserved*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /*descr*/{}}, {{/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}} } }; /* * XXX KDM move these into the softc. */ static int rcv_sync_msg; static uint8_t ctl_pause_rtr; SYSCTL_NODE(_kern_cam, OID_AUTO, ctl, CTLFLAG_RD, 0, "CAM Target Layer"); static int worker_threads = -1; SYSCTL_INT(_kern_cam_ctl, OID_AUTO, worker_threads, CTLFLAG_RDTUN, &worker_threads, 1, "Number of worker threads"); static int ctl_debug = CTL_DEBUG_NONE; SYSCTL_INT(_kern_cam_ctl, OID_AUTO, debug, CTLFLAG_RWTUN, &ctl_debug, 0, "Enabled debug flags"); /* * Supported pages (0x00), Serial number (0x80), Device ID (0x83), * Extended INQUIRY Data (0x86), Mode Page Policy (0x87), * SCSI Ports (0x88), Third-party Copy (0x8F), Block limits (0xB0), * Block Device Characteristics (0xB1) and Logical Block Provisioning (0xB2) */ #define SCSI_EVPD_NUM_SUPPORTED_PAGES 10 #ifdef notyet static void ctl_isc_event_handler(ctl_ha_channel chanel, ctl_ha_event event, int param); static void ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest); #endif static int ctl_init(void); void ctl_shutdown(void); static int ctl_open(struct cdev *dev, int flags, int fmt, struct thread *td); static int ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td); static int ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio); static int ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num, struct ctl_ooa *ooa_hdr, struct ctl_ooa_entry *kern_entries); static int ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static int ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *lun, struct ctl_be_lun *be_lun); static int ctl_free_lun(struct ctl_lun *lun); static void ctl_create_lun(struct ctl_be_lun *be_lun); static struct ctl_port * ctl_io_port(struct ctl_io_hdr *io_hdr); /** static void ctl_failover_change_pages(struct ctl_softc *softc, struct ctl_scsiio *ctsio, int master); **/ static int ctl_do_mode_select(union ctl_io *io); static int ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key, uint64_t sa_res_key, uint8_t type, uint32_t residx, struct ctl_scsiio *ctsio, struct scsi_per_res_out *cdb, struct scsi_per_res_out_parms* param); static void ctl_pro_preempt_other(struct ctl_lun *lun, union ctl_ha_msg *msg); static void ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg); static int ctl_inquiry_evpd_supported(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_serial(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_eid(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_mpp(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_bdc(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd(struct ctl_scsiio *ctsio); static int ctl_inquiry_std(struct ctl_scsiio *ctsio); static int ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len); static ctl_action ctl_extent_check(union ctl_io *io1, union ctl_io *io2, bool seq); static ctl_action ctl_extent_check_seq(union ctl_io *io1, union ctl_io *io2); static ctl_action ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *ooa_io); static ctl_action ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *starting_io); static int ctl_check_blocked(struct ctl_lun *lun); static int ctl_scsiio_lun_check(struct ctl_lun *lun, const struct ctl_cmd_entry *entry, struct ctl_scsiio *ctsio); //static int ctl_check_rtr(union ctl_io *pending_io, struct ctl_softc *softc); #ifdef notyet static void ctl_failover(void); #endif static void ctl_clr_ua_allluns(struct ctl_softc *ctl_softc, uint32_t initidx, ctl_ua_type ua_type); static int ctl_scsiio_precheck(struct ctl_softc *ctl_softc, struct ctl_scsiio *ctsio); static int ctl_scsiio(struct ctl_scsiio *ctsio); static int ctl_bus_reset(struct ctl_softc *ctl_softc, union ctl_io *io); static int ctl_target_reset(struct ctl_softc *ctl_softc, union ctl_io *io, ctl_ua_type ua_type); static int ctl_lun_reset(struct ctl_lun *lun, union ctl_io *io, ctl_ua_type ua_type); static int ctl_abort_task(union ctl_io *io); static int ctl_abort_task_set(union ctl_io *io); static int ctl_i_t_nexus_reset(union ctl_io *io); static void ctl_run_task(union ctl_io *io); #ifdef CTL_IO_DELAY static void ctl_datamove_timer_wakeup(void *arg); static void ctl_done_timer_wakeup(void *arg); #endif /* CTL_IO_DELAY */ static void ctl_send_datamove_done(union ctl_io *io, int have_lock); static void ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq); static int ctl_datamove_remote_dm_write_cb(union ctl_io *io); static void ctl_datamove_remote_write(union ctl_io *io); static int ctl_datamove_remote_dm_read_cb(union ctl_io *io); static void ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq); static int ctl_datamove_remote_sgl_setup(union ctl_io *io); static int ctl_datamove_remote_xfer(union ctl_io *io, unsigned command, ctl_ha_dt_cb callback); static void ctl_datamove_remote_read(union ctl_io *io); static void ctl_datamove_remote(union ctl_io *io); static int ctl_process_done(union ctl_io *io); static void ctl_lun_thread(void *arg); static void ctl_thresh_thread(void *arg); static void ctl_work_thread(void *arg); static void ctl_enqueue_incoming(union ctl_io *io); static void ctl_enqueue_rtr(union ctl_io *io); static void ctl_enqueue_done(union ctl_io *io); #ifdef notyet static void ctl_enqueue_isc(union ctl_io *io); #endif static const struct ctl_cmd_entry * ctl_get_cmd_entry(struct ctl_scsiio *ctsio, int *sa); static const struct ctl_cmd_entry * ctl_validate_command(struct ctl_scsiio *ctsio); static int ctl_cmd_applicable(uint8_t lun_type, const struct ctl_cmd_entry *entry); /* * Load the serialization table. This isn't very pretty, but is probably * the easiest way to do it. */ #include "ctl_ser_table.c" /* * We only need to define open, close and ioctl routines for this driver. */ static struct cdevsw ctl_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = ctl_open, .d_close = ctl_close, .d_ioctl = ctl_ioctl, .d_name = "ctl", }; MALLOC_DEFINE(M_CTL, "ctlmem", "Memory used for CTL"); static int ctl_module_event_handler(module_t, int /*modeventtype_t*/, void *); static moduledata_t ctl_moduledata = { "ctl", ctl_module_event_handler, NULL }; DECLARE_MODULE(ctl, ctl_moduledata, SI_SUB_CONFIGURE, SI_ORDER_THIRD); MODULE_VERSION(ctl, 1); #ifdef notyet static void ctl_isc_handler_finish_xfer(struct ctl_softc *ctl_softc, union ctl_ha_msg *msg_info) { struct ctl_scsiio *ctsio; if (msg_info->hdr.original_sc == NULL) { printf("%s: original_sc == NULL!\n", __func__); /* XXX KDM now what? */ return; } ctsio = &msg_info->hdr.original_sc->scsiio; ctsio->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; ctsio->io_hdr.msg_type = CTL_MSG_FINISH_IO; ctsio->io_hdr.status = msg_info->hdr.status; ctsio->scsi_status = msg_info->scsi.scsi_status; ctsio->sense_len = msg_info->scsi.sense_len; ctsio->sense_residual = msg_info->scsi.sense_residual; ctsio->residual = msg_info->scsi.residual; memcpy(&ctsio->sense_data, &msg_info->scsi.sense_data, sizeof(ctsio->sense_data)); memcpy(&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes, &msg_info->scsi.lbalen, sizeof(msg_info->scsi.lbalen)); ctl_enqueue_isc((union ctl_io *)ctsio); } static void ctl_isc_handler_finish_ser_only(struct ctl_softc *ctl_softc, union ctl_ha_msg *msg_info) { struct ctl_scsiio *ctsio; if (msg_info->hdr.serializing_sc == NULL) { printf("%s: serializing_sc == NULL!\n", __func__); /* XXX KDM now what? */ return; } ctsio = &msg_info->hdr.serializing_sc->scsiio; #if 0 /* * Attempt to catch the situation where an I/O has * been freed, and we're using it again. */ if (ctsio->io_hdr.io_type == 0xff) { union ctl_io *tmp_io; tmp_io = (union ctl_io *)ctsio; printf("%s: %p use after free!\n", __func__, ctsio); printf("%s: type %d msg %d cdb %x iptl: " "%d:%d:%d:%d tag 0x%04x " "flag %#x status %x\n", __func__, tmp_io->io_hdr.io_type, tmp_io->io_hdr.msg_type, tmp_io->scsiio.cdb[0], tmp_io->io_hdr.nexus.initid.id, tmp_io->io_hdr.nexus.targ_port, tmp_io->io_hdr.nexus.targ_target.id, tmp_io->io_hdr.nexus.targ_lun, (tmp_io->io_hdr.io_type == CTL_IO_TASK) ? tmp_io->taskio.tag_num : tmp_io->scsiio.tag_num, tmp_io->io_hdr.flags, tmp_io->io_hdr.status); } #endif ctsio->io_hdr.msg_type = CTL_MSG_FINISH_IO; ctl_enqueue_isc((union ctl_io *)ctsio); } /* * ISC (Inter Shelf Communication) event handler. Events from the HA * subsystem come in here. */ static void ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param) { struct ctl_softc *softc; union ctl_io *io; struct ctl_prio *presio; ctl_ha_status isc_status; softc = control_softc; io = NULL; #if 0 printf("CTL: Isc Msg event %d\n", event); #endif if (event == CTL_HA_EVT_MSG_RECV) { union ctl_ha_msg msg_info; isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), /*wait*/ 0); #if 0 printf("CTL: msg_type %d\n", msg_info.msg_type); #endif if (isc_status != 0) { printf("Error receiving message, status = %d\n", isc_status); return; } switch (msg_info.hdr.msg_type) { case CTL_MSG_SERIALIZE: #if 0 printf("Serialize\n"); #endif io = ctl_alloc_io_nowait(softc->othersc_pool); if (io == NULL) { printf("ctl_isc_event_handler: can't allocate " "ctl_io!\n"); /* Bad Juju */ /* Need to set busy and send msg back */ msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; msg_info.hdr.status = CTL_SCSI_ERROR; msg_info.scsi.scsi_status = SCSI_STATUS_BUSY; msg_info.scsi.sense_len = 0; if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 0) > CTL_HA_STATUS_SUCCESS){ } goto bailout; } ctl_zero_io(io); // populate ctsio from msg_info io->io_hdr.io_type = CTL_IO_SCSI; io->io_hdr.msg_type = CTL_MSG_SERIALIZE; io->io_hdr.original_sc = msg_info.hdr.original_sc; #if 0 printf("pOrig %x\n", (int)msg_info.original_sc); #endif io->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC | CTL_FLAG_IO_ACTIVE; /* * If we're in serialization-only mode, we don't * want to go through full done processing. Thus * the COPY flag. * * XXX KDM add another flag that is more specific. */ if (softc->ha_mode == CTL_HA_MODE_SER_ONLY) io->io_hdr.flags |= CTL_FLAG_INT_COPY; io->io_hdr.nexus = msg_info.hdr.nexus; #if 0 printf("targ %d, port %d, iid %d, lun %d\n", io->io_hdr.nexus.targ_target.id, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.initid.id, io->io_hdr.nexus.targ_lun); #endif io->scsiio.tag_num = msg_info.scsi.tag_num; io->scsiio.tag_type = msg_info.scsi.tag_type; memcpy(io->scsiio.cdb, msg_info.scsi.cdb, CTL_MAX_CDBLEN); if (softc->ha_mode == CTL_HA_MODE_XFER) { const struct ctl_cmd_entry *entry; entry = ctl_get_cmd_entry(&io->scsiio, NULL); io->io_hdr.flags &= ~CTL_FLAG_DATA_MASK; io->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK; } ctl_enqueue_isc(io); break; /* Performed on the Originating SC, XFER mode only */ case CTL_MSG_DATAMOVE: { struct ctl_sg_entry *sgl; int i, j; io = msg_info.hdr.original_sc; if (io == NULL) { printf("%s: original_sc == NULL!\n", __func__); /* XXX KDM do something here */ break; } io->io_hdr.msg_type = CTL_MSG_DATAMOVE; io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; /* * Keep track of this, we need to send it back over * when the datamove is complete. */ io->io_hdr.serializing_sc = msg_info.hdr.serializing_sc; if (msg_info.dt.sg_sequence == 0) { /* * XXX KDM we use the preallocated S/G list * here, but we'll need to change this to * dynamic allocation if we need larger S/G * lists. */ if (msg_info.dt.kern_sg_entries > sizeof(io->io_hdr.remote_sglist) / sizeof(io->io_hdr.remote_sglist[0])) { printf("%s: number of S/G entries " "needed %u > allocated num %zd\n", __func__, msg_info.dt.kern_sg_entries, sizeof(io->io_hdr.remote_sglist)/ sizeof(io->io_hdr.remote_sglist[0])); /* * XXX KDM send a message back to * the other side to shut down the * DMA. The error will come back * through via the normal channel. */ break; } sgl = io->io_hdr.remote_sglist; memset(sgl, 0, sizeof(io->io_hdr.remote_sglist)); io->scsiio.kern_data_ptr = (uint8_t *)sgl; io->scsiio.kern_sg_entries = msg_info.dt.kern_sg_entries; io->scsiio.rem_sg_entries = msg_info.dt.kern_sg_entries; io->scsiio.kern_data_len = msg_info.dt.kern_data_len; io->scsiio.kern_total_len = msg_info.dt.kern_total_len; io->scsiio.kern_data_resid = msg_info.dt.kern_data_resid; io->scsiio.kern_rel_offset = msg_info.dt.kern_rel_offset; /* * Clear out per-DMA flags. */ io->io_hdr.flags &= ~CTL_FLAG_RDMA_MASK; /* * Add per-DMA flags that are set for this * particular DMA request. */ io->io_hdr.flags |= msg_info.dt.flags & CTL_FLAG_RDMA_MASK; } else sgl = (struct ctl_sg_entry *) io->scsiio.kern_data_ptr; for (i = msg_info.dt.sent_sg_entries, j = 0; i < (msg_info.dt.sent_sg_entries + msg_info.dt.cur_sg_entries); i++, j++) { sgl[i].addr = msg_info.dt.sg_list[j].addr; sgl[i].len = msg_info.dt.sg_list[j].len; #if 0 printf("%s: L: %p,%d -> %p,%d j=%d, i=%d\n", __func__, msg_info.dt.sg_list[j].addr, msg_info.dt.sg_list[j].len, sgl[i].addr, sgl[i].len, j, i); #endif } #if 0 memcpy(&sgl[msg_info.dt.sent_sg_entries], msg_info.dt.sg_list, sizeof(*sgl) * msg_info.dt.cur_sg_entries); #endif /* * If this is the last piece of the I/O, we've got * the full S/G list. Queue processing in the thread. * Otherwise wait for the next piece. */ if (msg_info.dt.sg_last != 0) ctl_enqueue_isc(io); break; } /* Performed on the Serializing (primary) SC, XFER mode only */ case CTL_MSG_DATAMOVE_DONE: { if (msg_info.hdr.serializing_sc == NULL) { printf("%s: serializing_sc == NULL!\n", __func__); /* XXX KDM now what? */ break; } /* * We grab the sense information here in case * there was a failure, so we can return status * back to the initiator. */ io = msg_info.hdr.serializing_sc; io->io_hdr.msg_type = CTL_MSG_DATAMOVE_DONE; io->io_hdr.status = msg_info.hdr.status; io->scsiio.scsi_status = msg_info.scsi.scsi_status; io->scsiio.sense_len = msg_info.scsi.sense_len; io->scsiio.sense_residual =msg_info.scsi.sense_residual; io->io_hdr.port_status = msg_info.scsi.fetd_status; io->scsiio.residual = msg_info.scsi.residual; memcpy(&io->scsiio.sense_data,&msg_info.scsi.sense_data, sizeof(io->scsiio.sense_data)); ctl_enqueue_isc(io); break; } /* Preformed on Originating SC, SER_ONLY mode */ case CTL_MSG_R2R: io = msg_info.hdr.original_sc; if (io == NULL) { printf("%s: Major Bummer\n", __func__); return; } else { #if 0 printf("pOrig %x\n",(int) ctsio); #endif } io->io_hdr.msg_type = CTL_MSG_R2R; io->io_hdr.serializing_sc = msg_info.hdr.serializing_sc; ctl_enqueue_isc(io); break; /* * Performed on Serializing(i.e. primary SC) SC in SER_ONLY * mode. * Performed on the Originating (i.e. secondary) SC in XFER * mode */ case CTL_MSG_FINISH_IO: if (softc->ha_mode == CTL_HA_MODE_XFER) ctl_isc_handler_finish_xfer(softc, &msg_info); else ctl_isc_handler_finish_ser_only(softc, &msg_info); break; /* Preformed on Originating SC */ case CTL_MSG_BAD_JUJU: io = msg_info.hdr.original_sc; if (io == NULL) { printf("%s: Bad JUJU!, original_sc is NULL!\n", __func__); break; } ctl_copy_sense_data(&msg_info, io); /* * IO should have already been cleaned up on other * SC so clear this flag so we won't send a message * back to finish the IO there. */ io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC; io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; /* io = msg_info.hdr.serializing_sc; */ io->io_hdr.msg_type = CTL_MSG_BAD_JUJU; ctl_enqueue_isc(io); break; /* Handle resets sent from the other side */ case CTL_MSG_MANAGE_TASKS: { struct ctl_taskio *taskio; taskio = (struct ctl_taskio *)ctl_alloc_io_nowait( softc->othersc_pool); if (taskio == NULL) { printf("ctl_isc_event_handler: can't allocate " "ctl_io!\n"); /* Bad Juju */ /* should I just call the proper reset func here??? */ goto bailout; } ctl_zero_io((union ctl_io *)taskio); taskio->io_hdr.io_type = CTL_IO_TASK; taskio->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC; taskio->io_hdr.nexus = msg_info.hdr.nexus; taskio->task_action = msg_info.task.task_action; taskio->tag_num = msg_info.task.tag_num; taskio->tag_type = msg_info.task.tag_type; #ifdef CTL_TIME_IO taskio->io_hdr.start_time = time_uptime; getbintime(&taskio->io_hdr.start_bt); #if 0 cs_prof_gettime(&taskio->io_hdr.start_ticks); #endif #endif /* CTL_TIME_IO */ ctl_run_task((union ctl_io *)taskio); break; } /* Persistent Reserve action which needs attention */ case CTL_MSG_PERS_ACTION: presio = (struct ctl_prio *)ctl_alloc_io_nowait( softc->othersc_pool); if (presio == NULL) { printf("ctl_isc_event_handler: can't allocate " "ctl_io!\n"); /* Bad Juju */ /* Need to set busy and send msg back */ goto bailout; } ctl_zero_io((union ctl_io *)presio); presio->io_hdr.msg_type = CTL_MSG_PERS_ACTION; presio->pr_msg = msg_info.pr; ctl_enqueue_isc((union ctl_io *)presio); break; case CTL_MSG_SYNC_FE: rcv_sync_msg = 1; break; default: printf("How did I get here?\n"); } } else if (event == CTL_HA_EVT_MSG_SENT) { if (param != CTL_HA_STATUS_SUCCESS) { printf("Bad status from ctl_ha_msg_send status %d\n", param); } return; } else if (event == CTL_HA_EVT_DISCONNECT) { printf("CTL: Got a disconnect from Isc\n"); return; } else { printf("ctl_isc_event_handler: Unknown event %d\n", event); return; } bailout: return; } static void ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest) { struct scsi_sense_data *sense; sense = &dest->scsiio.sense_data; bcopy(&src->scsi.sense_data, sense, sizeof(*sense)); dest->scsiio.scsi_status = src->scsi.scsi_status; dest->scsiio.sense_len = src->scsi.sense_len; dest->io_hdr.status = src->hdr.status; } #endif static void ctl_est_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua) { ctl_ua_type *pu; mtx_assert(&lun->lun_lock, MA_OWNED); pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT]; if (pu == NULL) return; pu[initidx % CTL_MAX_INIT_PER_PORT] |= ua; } static void ctl_est_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua) { int i, j; mtx_assert(&lun->lun_lock, MA_OWNED); for (i = 0; i < CTL_MAX_PORTS; i++) { if (lun->pending_ua[i] == NULL) continue; for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) { if (i * CTL_MAX_INIT_PER_PORT + j == except) continue; lun->pending_ua[i][j] |= ua; } } } static void ctl_clr_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua) { ctl_ua_type *pu; mtx_assert(&lun->lun_lock, MA_OWNED); pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT]; if (pu == NULL) return; pu[initidx % CTL_MAX_INIT_PER_PORT] &= ~ua; } static void ctl_clr_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua) { int i, j; mtx_assert(&lun->lun_lock, MA_OWNED); for (i = 0; i < CTL_MAX_PORTS; i++) { if (lun->pending_ua[i] == NULL) continue; for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) { if (i * CTL_MAX_INIT_PER_PORT + j == except) continue; lun->pending_ua[i][j] &= ~ua; } } } static void ctl_clr_ua_allluns(struct ctl_softc *ctl_softc, uint32_t initidx, ctl_ua_type ua_type) { struct ctl_lun *lun; mtx_assert(&ctl_softc->ctl_lock, MA_OWNED); STAILQ_FOREACH(lun, &ctl_softc->lun_list, links) { mtx_lock(&lun->lun_lock); ctl_clr_ua(lun, initidx, ua_type); mtx_unlock(&lun->lun_lock); } } static int ctl_ha_state_sysctl(SYSCTL_HANDLER_ARGS) { struct ctl_softc *softc = (struct ctl_softc *)arg1; struct ctl_lun *lun; int error, value; if (softc->flags & CTL_FLAG_ACTIVE_SHELF) value = 0; else value = 1; error = sysctl_handle_int(oidp, &value, 0, req); if ((error != 0) || (req->newptr == NULL)) return (error); mtx_lock(&softc->ctl_lock); if (value == 0) softc->flags |= CTL_FLAG_ACTIVE_SHELF; else softc->flags &= ~CTL_FLAG_ACTIVE_SHELF; STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE); mtx_unlock(&lun->lun_lock); } mtx_unlock(&softc->ctl_lock); return (0); } static int ctl_init(void) { struct ctl_softc *softc; void *other_pool; int i, error, retval; //int isc_retval; retval = 0; ctl_pause_rtr = 0; rcv_sync_msg = 0; control_softc = malloc(sizeof(*control_softc), M_DEVBUF, M_WAITOK | M_ZERO); softc = control_softc; softc->dev = make_dev(&ctl_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0600, "cam/ctl"); softc->dev->si_drv1 = softc; /* * By default, return a "bad LUN" peripheral qualifier for unknown * LUNs. The user can override this default using the tunable or * sysctl. See the comment in ctl_inquiry_std() for more details. */ softc->inquiry_pq_no_lun = 1; TUNABLE_INT_FETCH("kern.cam.ctl.inquiry_pq_no_lun", &softc->inquiry_pq_no_lun); sysctl_ctx_init(&softc->sysctl_ctx); softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_cam), OID_AUTO, "ctl", CTLFLAG_RD, 0, "CAM Target Layer"); if (softc->sysctl_tree == NULL) { printf("%s: unable to allocate sysctl tree\n", __func__); destroy_dev(softc->dev); free(control_softc, M_DEVBUF); control_softc = NULL; return (ENOMEM); } SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "inquiry_pq_no_lun", CTLFLAG_RW, &softc->inquiry_pq_no_lun, 0, "Report no lun possible for invalid LUNs"); mtx_init(&softc->ctl_lock, "CTL mutex", NULL, MTX_DEF); softc->io_zone = uma_zcreate("CTL IO", sizeof(union ctl_io), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); softc->open_count = 0; /* * Default to actually sending a SYNCHRONIZE CACHE command down to * the drive. */ softc->flags = CTL_FLAG_REAL_SYNC; /* * In Copan's HA scheme, the "master" and "slave" roles are * figured out through the slot the controller is in. Although it * is an active/active system, someone has to be in charge. */ SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "ha_id", CTLFLAG_RDTUN, &softc->ha_id, 0, "HA head ID (0 - no HA)"); if (softc->ha_id == 0) { softc->flags |= CTL_FLAG_ACTIVE_SHELF; softc->is_single = 1; softc->port_offset = 0; } else softc->port_offset = (softc->ha_id - 1) * CTL_MAX_PORTS; softc->persis_offset = softc->port_offset * CTL_MAX_INIT_PER_PORT; STAILQ_INIT(&softc->lun_list); STAILQ_INIT(&softc->pending_lun_queue); STAILQ_INIT(&softc->fe_list); STAILQ_INIT(&softc->port_list); STAILQ_INIT(&softc->be_list); ctl_tpc_init(softc); if (ctl_pool_create(softc, "othersc", CTL_POOL_ENTRIES_OTHER_SC, &other_pool) != 0) { printf("ctl: can't allocate %d entry other SC pool, " "exiting\n", CTL_POOL_ENTRIES_OTHER_SC); return (ENOMEM); } softc->othersc_pool = other_pool; if (worker_threads <= 0) worker_threads = max(1, mp_ncpus / 4); if (worker_threads > CTL_MAX_THREADS) worker_threads = CTL_MAX_THREADS; for (i = 0; i < worker_threads; i++) { struct ctl_thread *thr = &softc->threads[i]; mtx_init(&thr->queue_lock, "CTL queue mutex", NULL, MTX_DEF); thr->ctl_softc = softc; STAILQ_INIT(&thr->incoming_queue); STAILQ_INIT(&thr->rtr_queue); STAILQ_INIT(&thr->done_queue); STAILQ_INIT(&thr->isc_queue); error = kproc_kthread_add(ctl_work_thread, thr, &softc->ctl_proc, &thr->thread, 0, 0, "ctl", "work%d", i); if (error != 0) { printf("error creating CTL work thread!\n"); ctl_pool_free(other_pool); return (error); } } error = kproc_kthread_add(ctl_lun_thread, softc, &softc->ctl_proc, NULL, 0, 0, "ctl", "lun"); if (error != 0) { printf("error creating CTL lun thread!\n"); ctl_pool_free(other_pool); return (error); } error = kproc_kthread_add(ctl_thresh_thread, softc, &softc->ctl_proc, NULL, 0, 0, "ctl", "thresh"); if (error != 0) { printf("error creating CTL threshold thread!\n"); ctl_pool_free(other_pool); return (error); } SYSCTL_ADD_PROC(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "ha_state", CTLTYPE_INT | CTLFLAG_RWTUN, softc, 0, ctl_ha_state_sysctl, "I", "HA state for this head"); #ifdef CTL_IO_DELAY if (sizeof(struct callout) > CTL_TIMER_BYTES) { printf("sizeof(struct callout) %zd > CTL_TIMER_BYTES %zd\n", sizeof(struct callout), CTL_TIMER_BYTES); return (EINVAL); } #endif /* CTL_IO_DELAY */ return (0); } void ctl_shutdown(void) { struct ctl_softc *softc; struct ctl_lun *lun, *next_lun; softc = (struct ctl_softc *)control_softc; mtx_lock(&softc->ctl_lock); /* * Free up each LUN. */ for (lun = STAILQ_FIRST(&softc->lun_list); lun != NULL; lun = next_lun){ next_lun = STAILQ_NEXT(lun, links); ctl_free_lun(lun); } mtx_unlock(&softc->ctl_lock); #if 0 ctl_shutdown_thread(softc->work_thread); mtx_destroy(&softc->queue_lock); #endif ctl_tpc_shutdown(softc); uma_zdestroy(softc->io_zone); mtx_destroy(&softc->ctl_lock); destroy_dev(softc->dev); sysctl_ctx_free(&softc->sysctl_ctx); free(control_softc, M_DEVBUF); control_softc = NULL; } static int ctl_module_event_handler(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (ctl_init()); case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } } /* * XXX KDM should we do some access checks here? Bump a reference count to * prevent a CTL module from being unloaded while someone has it open? */ static int ctl_open(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } static int ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } int ctl_port_enable(ctl_port_type port_type) { struct ctl_softc *softc = control_softc; struct ctl_port *port; if (softc->is_single == 0) { union ctl_ha_msg msg_info; int isc_retval; #if 0 printf("%s: HA mode, synchronizing frontend enable\n", __func__); #endif msg_info.hdr.msg_type = CTL_MSG_SYNC_FE; if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 1 )) > CTL_HA_STATUS_SUCCESS) { printf("Sync msg send error retval %d\n", isc_retval); } if (!rcv_sync_msg) { isc_retval=ctl_ha_msg_recv(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 1); } #if 0 printf("CTL:Frontend Enable\n"); } else { printf("%s: single mode, skipping frontend synchronization\n", __func__); #endif } STAILQ_FOREACH(port, &softc->port_list, links) { if (port_type & port->port_type) { #if 0 printf("port %d\n", port->targ_port); #endif ctl_port_online(port); } } return (0); } int ctl_port_disable(ctl_port_type port_type) { struct ctl_softc *softc; struct ctl_port *port; softc = control_softc; STAILQ_FOREACH(port, &softc->port_list, links) { if (port_type & port->port_type) ctl_port_offline(port); } return (0); } /* * Returns 0 for success, 1 for failure. * Currently the only failure mode is if there aren't enough entries * allocated. So, in case of a failure, look at num_entries_dropped, * reallocate and try again. */ int ctl_port_list(struct ctl_port_entry *entries, int num_entries_alloced, int *num_entries_filled, int *num_entries_dropped, ctl_port_type port_type, int no_virtual) { struct ctl_softc *softc; struct ctl_port *port; int entries_dropped, entries_filled; int retval; int i; softc = control_softc; retval = 0; entries_filled = 0; entries_dropped = 0; i = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { struct ctl_port_entry *entry; if ((port->port_type & port_type) == 0) continue; if ((no_virtual != 0) && (port->virtual_port != 0)) continue; if (entries_filled >= num_entries_alloced) { entries_dropped++; continue; } entry = &entries[i]; entry->port_type = port->port_type; strlcpy(entry->port_name, port->port_name, sizeof(entry->port_name)); entry->physical_port = port->physical_port; entry->virtual_port = port->virtual_port; entry->wwnn = port->wwnn; entry->wwpn = port->wwpn; i++; entries_filled++; } mtx_unlock(&softc->ctl_lock); if (entries_dropped > 0) retval = 1; *num_entries_dropped = entries_dropped; *num_entries_filled = entries_filled; return (retval); } /* * Remove an initiator by port number and initiator ID. * Returns 0 for success, -1 for failure. */ int ctl_remove_initiator(struct ctl_port *port, int iid) { struct ctl_softc *softc = control_softc; mtx_assert(&softc->ctl_lock, MA_NOTOWNED); if (iid > CTL_MAX_INIT_PER_PORT) { printf("%s: initiator ID %u > maximun %u!\n", __func__, iid, CTL_MAX_INIT_PER_PORT); return (-1); } mtx_lock(&softc->ctl_lock); port->wwpn_iid[iid].in_use--; port->wwpn_iid[iid].last_use = time_uptime; mtx_unlock(&softc->ctl_lock); return (0); } /* * Add an initiator to the initiator map. * Returns iid for success, < 0 for failure. */ int ctl_add_initiator(struct ctl_port *port, int iid, uint64_t wwpn, char *name) { struct ctl_softc *softc = control_softc; time_t best_time; int i, best; mtx_assert(&softc->ctl_lock, MA_NOTOWNED); if (iid >= CTL_MAX_INIT_PER_PORT) { printf("%s: WWPN %#jx initiator ID %u > maximum %u!\n", __func__, wwpn, iid, CTL_MAX_INIT_PER_PORT); free(name, M_CTL); return (-1); } mtx_lock(&softc->ctl_lock); if (iid < 0 && (wwpn != 0 || name != NULL)) { for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (wwpn != 0 && wwpn == port->wwpn_iid[i].wwpn) { iid = i; break; } if (name != NULL && port->wwpn_iid[i].name != NULL && strcmp(name, port->wwpn_iid[i].name) == 0) { iid = i; break; } } } if (iid < 0) { for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (port->wwpn_iid[i].in_use == 0 && port->wwpn_iid[i].wwpn == 0 && port->wwpn_iid[i].name == NULL) { iid = i; break; } } } if (iid < 0) { best = -1; best_time = INT32_MAX; for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (port->wwpn_iid[i].in_use == 0) { if (port->wwpn_iid[i].last_use < best_time) { best = i; best_time = port->wwpn_iid[i].last_use; } } } iid = best; } if (iid < 0) { mtx_unlock(&softc->ctl_lock); free(name, M_CTL); return (-2); } if (port->wwpn_iid[iid].in_use > 0 && (wwpn != 0 || name != NULL)) { /* * This is not an error yet. */ if (wwpn != 0 && wwpn == port->wwpn_iid[iid].wwpn) { #if 0 printf("%s: port %d iid %u WWPN %#jx arrived" " again\n", __func__, port->targ_port, iid, (uintmax_t)wwpn); #endif goto take; } if (name != NULL && port->wwpn_iid[iid].name != NULL && strcmp(name, port->wwpn_iid[iid].name) == 0) { #if 0 printf("%s: port %d iid %u name '%s' arrived" " again\n", __func__, port->targ_port, iid, name); #endif goto take; } /* * This is an error, but what do we do about it? The * driver is telling us we have a new WWPN for this * initiator ID, so we pretty much need to use it. */ printf("%s: port %d iid %u WWPN %#jx '%s' arrived," " but WWPN %#jx '%s' is still at that address\n", __func__, port->targ_port, iid, wwpn, name, (uintmax_t)port->wwpn_iid[iid].wwpn, port->wwpn_iid[iid].name); /* * XXX KDM clear have_ca and ua_pending on each LUN for * this initiator. */ } take: free(port->wwpn_iid[iid].name, M_CTL); port->wwpn_iid[iid].name = name; port->wwpn_iid[iid].wwpn = wwpn; port->wwpn_iid[iid].in_use++; mtx_unlock(&softc->ctl_lock); return (iid); } static int ctl_create_iid(struct ctl_port *port, int iid, uint8_t *buf) { int len; switch (port->port_type) { case CTL_PORT_FC: { struct scsi_transportid_fcp *id = (struct scsi_transportid_fcp *)buf; if (port->wwpn_iid[iid].wwpn == 0) return (0); memset(id, 0, sizeof(*id)); id->format_protocol = SCSI_PROTO_FC; scsi_u64to8b(port->wwpn_iid[iid].wwpn, id->n_port_name); return (sizeof(*id)); } case CTL_PORT_ISCSI: { struct scsi_transportid_iscsi_port *id = (struct scsi_transportid_iscsi_port *)buf; if (port->wwpn_iid[iid].name == NULL) return (0); memset(id, 0, 256); id->format_protocol = SCSI_TRN_ISCSI_FORMAT_PORT | SCSI_PROTO_ISCSI; len = strlcpy(id->iscsi_name, port->wwpn_iid[iid].name, 252) + 1; len = roundup2(min(len, 252), 4); scsi_ulto2b(len, id->additional_length); return (sizeof(*id) + len); } case CTL_PORT_SAS: { struct scsi_transportid_sas *id = (struct scsi_transportid_sas *)buf; if (port->wwpn_iid[iid].wwpn == 0) return (0); memset(id, 0, sizeof(*id)); id->format_protocol = SCSI_PROTO_SAS; scsi_u64to8b(port->wwpn_iid[iid].wwpn, id->sas_address); return (sizeof(*id)); } default: { struct scsi_transportid_spi *id = (struct scsi_transportid_spi *)buf; memset(id, 0, sizeof(*id)); id->format_protocol = SCSI_PROTO_SPI; scsi_ulto2b(iid, id->scsi_addr); scsi_ulto2b(port->targ_port, id->rel_trgt_port_id); return (sizeof(*id)); } } } /* * Serialize a command that went down the "wrong" side, and so was sent to * this controller for execution. The logic is a little different than the * standard case in ctl_scsiio_precheck(). Errors in this case need to get * sent back to the other side, but in the success case, we execute the * command on this side (XFER mode) or tell the other side to execute it * (SER_ONLY mode). */ static int ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio) { struct ctl_softc *softc; union ctl_ha_msg msg_info; struct ctl_lun *lun; int retval = 0; uint32_t targ_lun; softc = control_softc; targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun; lun = softc->ctl_luns[targ_lun]; if (lun==NULL) { /* * Why isn't LUN defined? The other side wouldn't * send a cmd if the LUN is undefined. */ printf("%s: Bad JUJU!, LUN is NULL!\n", __func__); /* "Logical unit not supported" */ ctl_set_sense_data(&msg_info.scsi.sense_data, lun, /*sense_format*/SSD_TYPE_NONE, /*current_error*/ 1, /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST, /*asc*/ 0x25, /*ascq*/ 0x00, SSD_ELEM_NONE); msg_info.scsi.sense_len = SSD_FULL_SIZE; msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND; msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) { } return(1); } mtx_lock(&lun->lun_lock); TAILQ_INSERT_TAIL(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); switch (ctl_check_ooa(lun, (union ctl_io *)ctsio, (union ctl_io *)TAILQ_PREV(&ctsio->io_hdr, ctl_ooaq, ooa_links))) { case CTL_ACTION_BLOCK: ctsio->io_hdr.flags |= CTL_FLAG_BLOCKED; TAILQ_INSERT_TAIL(&lun->blocked_queue, &ctsio->io_hdr, blocked_links); break; case CTL_ACTION_PASS: case CTL_ACTION_SKIP: if (softc->ha_mode == CTL_HA_MODE_XFER) { ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr((union ctl_io *)ctsio); } else { /* send msg back to other side */ msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = (union ctl_io *)ctsio; msg_info.hdr.msg_type = CTL_MSG_R2R; #if 0 printf("2. pOrig %x\n", (int)msg_info.hdr.original_sc); #endif if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) { } } break; case CTL_ACTION_OVERLAP: /* OVERLAPPED COMMANDS ATTEMPTED */ ctl_set_sense_data(&msg_info.scsi.sense_data, lun, /*sense_format*/SSD_TYPE_NONE, /*current_error*/ 1, /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST, /*asc*/ 0x4E, /*ascq*/ 0x00, SSD_ELEM_NONE); msg_info.scsi.sense_len = SSD_FULL_SIZE; msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND; msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; #if 0 printf("BAD JUJU:Major Bummer Overlap\n"); #endif TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); retval = 1; if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) { } break; case CTL_ACTION_OVERLAP_TAG: /* TAGGED OVERLAPPED COMMANDS (NN = QUEUE TAG) */ ctl_set_sense_data(&msg_info.scsi.sense_data, lun, /*sense_format*/SSD_TYPE_NONE, /*current_error*/ 1, /*sense_key*/ SSD_KEY_ILLEGAL_REQUEST, /*asc*/ 0x4D, /*ascq*/ ctsio->tag_num & 0xff, SSD_ELEM_NONE); msg_info.scsi.sense_len = SSD_FULL_SIZE; msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND; msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; #if 0 printf("BAD JUJU:Major Bummer Overlap Tag\n"); #endif TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); retval = 1; if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) { } break; case CTL_ACTION_ERROR: default: /* "Internal target failure" */ ctl_set_sense_data(&msg_info.scsi.sense_data, lun, /*sense_format*/SSD_TYPE_NONE, /*current_error*/ 1, /*sense_key*/ SSD_KEY_HARDWARE_ERROR, /*asc*/ 0x44, /*ascq*/ 0x00, SSD_ELEM_NONE); msg_info.scsi.sense_len = SSD_FULL_SIZE; msg_info.scsi.scsi_status = SCSI_STATUS_CHECK_COND; msg_info.hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; #if 0 printf("BAD JUJU:Major Bummer HW Error\n"); #endif TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); retval = 1; if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 0 ) > CTL_HA_STATUS_SUCCESS) { } break; } mtx_unlock(&lun->lun_lock); return (retval); } /* * Returns 0 for success, errno for failure. */ static int ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num, struct ctl_ooa *ooa_hdr, struct ctl_ooa_entry *kern_entries) { union ctl_io *io; int retval; retval = 0; mtx_lock(&lun->lun_lock); for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); (io != NULL); (*cur_fill_num)++, io = (union ctl_io *)TAILQ_NEXT(&io->io_hdr, ooa_links)) { struct ctl_ooa_entry *entry; /* * If we've got more than we can fit, just count the * remaining entries. */ if (*cur_fill_num >= ooa_hdr->alloc_num) continue; entry = &kern_entries[*cur_fill_num]; entry->tag_num = io->scsiio.tag_num; entry->lun_num = lun->lun; #ifdef CTL_TIME_IO entry->start_bt = io->io_hdr.start_bt; #endif bcopy(io->scsiio.cdb, entry->cdb, io->scsiio.cdb_len); entry->cdb_len = io->scsiio.cdb_len; if (io->io_hdr.flags & CTL_FLAG_BLOCKED) entry->cmd_flags |= CTL_OOACMD_FLAG_BLOCKED; if (io->io_hdr.flags & CTL_FLAG_DMA_INPROG) entry->cmd_flags |= CTL_OOACMD_FLAG_DMA; if (io->io_hdr.flags & CTL_FLAG_ABORT) entry->cmd_flags |= CTL_OOACMD_FLAG_ABORT; if (io->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR) entry->cmd_flags |= CTL_OOACMD_FLAG_RTR; if (io->io_hdr.flags & CTL_FLAG_DMA_QUEUED) entry->cmd_flags |= CTL_OOACMD_FLAG_DMA_QUEUED; } mtx_unlock(&lun->lun_lock); return (retval); } static void * ctl_copyin_alloc(void *user_addr, int len, char *error_str, size_t error_str_len) { void *kptr; kptr = malloc(len, M_CTL, M_WAITOK | M_ZERO); if (copyin(user_addr, kptr, len) != 0) { snprintf(error_str, error_str_len, "Error copying %d bytes " "from user address %p to kernel address %p", len, user_addr, kptr); free(kptr, M_CTL); return (NULL); } return (kptr); } static void ctl_free_args(int num_args, struct ctl_be_arg *args) { int i; if (args == NULL) return; for (i = 0; i < num_args; i++) { free(args[i].kname, M_CTL); free(args[i].kvalue, M_CTL); } free(args, M_CTL); } static struct ctl_be_arg * ctl_copyin_args(int num_args, struct ctl_be_arg *uargs, char *error_str, size_t error_str_len) { struct ctl_be_arg *args; int i; args = ctl_copyin_alloc(uargs, num_args * sizeof(*args), error_str, error_str_len); if (args == NULL) goto bailout; for (i = 0; i < num_args; i++) { args[i].kname = NULL; args[i].kvalue = NULL; } for (i = 0; i < num_args; i++) { uint8_t *tmpptr; args[i].kname = ctl_copyin_alloc(args[i].name, args[i].namelen, error_str, error_str_len); if (args[i].kname == NULL) goto bailout; if (args[i].kname[args[i].namelen - 1] != '\0') { snprintf(error_str, error_str_len, "Argument %d " "name is not NUL-terminated", i); goto bailout; } if (args[i].flags & CTL_BEARG_RD) { tmpptr = ctl_copyin_alloc(args[i].value, args[i].vallen, error_str, error_str_len); if (tmpptr == NULL) goto bailout; if ((args[i].flags & CTL_BEARG_ASCII) && (tmpptr[args[i].vallen - 1] != '\0')) { snprintf(error_str, error_str_len, "Argument " "%d value is not NUL-terminated", i); goto bailout; } args[i].kvalue = tmpptr; } else { args[i].kvalue = malloc(args[i].vallen, M_CTL, M_WAITOK | M_ZERO); } } return (args); bailout: ctl_free_args(num_args, args); return (NULL); } static void ctl_copyout_args(int num_args, struct ctl_be_arg *args) { int i; for (i = 0; i < num_args; i++) { if (args[i].flags & CTL_BEARG_WR) copyout(args[i].kvalue, args[i].value, args[i].vallen); } } /* * Escape characters that are illegal or not recommended in XML. */ int ctl_sbuf_printf_esc(struct sbuf *sb, char *str, int size) { char *end = str + size; int retval; retval = 0; for (; *str && str < end; str++) { switch (*str) { case '&': retval = sbuf_printf(sb, "&"); break; case '>': retval = sbuf_printf(sb, ">"); break; case '<': retval = sbuf_printf(sb, "<"); break; default: retval = sbuf_putc(sb, *str); break; } if (retval != 0) break; } return (retval); } static void ctl_id_sbuf(struct ctl_devid *id, struct sbuf *sb) { struct scsi_vpd_id_descriptor *desc; int i; if (id == NULL || id->len < 4) return; desc = (struct scsi_vpd_id_descriptor *)id->data; switch (desc->id_type & SVPD_ID_TYPE_MASK) { case SVPD_ID_TYPE_T10: sbuf_printf(sb, "t10."); break; case SVPD_ID_TYPE_EUI64: sbuf_printf(sb, "eui."); break; case SVPD_ID_TYPE_NAA: sbuf_printf(sb, "naa."); break; case SVPD_ID_TYPE_SCSI_NAME: break; } switch (desc->proto_codeset & SVPD_ID_CODESET_MASK) { case SVPD_ID_CODESET_BINARY: for (i = 0; i < desc->length; i++) sbuf_printf(sb, "%02x", desc->identifier[i]); break; case SVPD_ID_CODESET_ASCII: sbuf_printf(sb, "%.*s", (int)desc->length, (char *)desc->identifier); break; case SVPD_ID_CODESET_UTF8: sbuf_printf(sb, "%s", (char *)desc->identifier); break; } } static int ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_softc *softc; int retval; softc = control_softc; retval = 0; switch (cmd) { case CTL_IO: retval = ctl_ioctl_io(dev, cmd, addr, flag, td); break; case CTL_ENABLE_PORT: case CTL_DISABLE_PORT: case CTL_SET_PORT_WWNS: { struct ctl_port *port; struct ctl_port_entry *entry; entry = (struct ctl_port_entry *)addr; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { int action, done; action = 0; done = 0; if ((entry->port_type == CTL_PORT_NONE) && (entry->targ_port == port->targ_port)) { /* * If the user only wants to enable or * disable or set WWNs on a specific port, * do the operation and we're done. */ action = 1; done = 1; } else if (entry->port_type & port->port_type) { /* * Compare the user's type mask with the * particular frontend type to see if we * have a match. */ action = 1; done = 0; /* * Make sure the user isn't trying to set * WWNs on multiple ports at the same time. */ if (cmd == CTL_SET_PORT_WWNS) { printf("%s: Can't set WWNs on " "multiple ports\n", __func__); retval = EINVAL; break; } } if (action != 0) { /* * XXX KDM we have to drop the lock here, * because the online/offline operations * can potentially block. We need to * reference count the frontends so they * can't go away, */ mtx_unlock(&softc->ctl_lock); if (cmd == CTL_ENABLE_PORT) { ctl_port_online(port); } else if (cmd == CTL_DISABLE_PORT) { ctl_port_offline(port); } mtx_lock(&softc->ctl_lock); if (cmd == CTL_SET_PORT_WWNS) ctl_port_set_wwns(port, (entry->flags & CTL_PORT_WWNN_VALID) ? 1 : 0, entry->wwnn, (entry->flags & CTL_PORT_WWPN_VALID) ? 1 : 0, entry->wwpn); } if (done != 0) break; } mtx_unlock(&softc->ctl_lock); break; } case CTL_GET_PORT_LIST: { struct ctl_port *port; struct ctl_port_list *list; int i; list = (struct ctl_port_list *)addr; if (list->alloc_len != (list->alloc_num * sizeof(struct ctl_port_entry))) { printf("%s: CTL_GET_PORT_LIST: alloc_len %u != " "alloc_num %u * sizeof(struct ctl_port_entry) " "%zu\n", __func__, list->alloc_len, list->alloc_num, sizeof(struct ctl_port_entry)); retval = EINVAL; break; } list->fill_len = 0; list->fill_num = 0; list->dropped_num = 0; i = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { struct ctl_port_entry entry, *list_entry; if (list->fill_num >= list->alloc_num) { list->dropped_num++; continue; } entry.port_type = port->port_type; strlcpy(entry.port_name, port->port_name, sizeof(entry.port_name)); entry.targ_port = port->targ_port; entry.physical_port = port->physical_port; entry.virtual_port = port->virtual_port; entry.wwnn = port->wwnn; entry.wwpn = port->wwpn; if (port->status & CTL_PORT_STATUS_ONLINE) entry.online = 1; else entry.online = 0; list_entry = &list->entries[i]; retval = copyout(&entry, list_entry, sizeof(entry)); if (retval != 0) { printf("%s: CTL_GET_PORT_LIST: copyout " "returned %d\n", __func__, retval); break; } i++; list->fill_num++; list->fill_len += sizeof(entry); } mtx_unlock(&softc->ctl_lock); /* * If this is non-zero, we had a copyout fault, so there's * probably no point in attempting to set the status inside * the structure. */ if (retval != 0) break; if (list->dropped_num > 0) list->status = CTL_PORT_LIST_NEED_MORE_SPACE; else list->status = CTL_PORT_LIST_OK; break; } case CTL_DUMP_OOA: { struct ctl_lun *lun; union ctl_io *io; char printbuf[128]; struct sbuf sb; mtx_lock(&softc->ctl_lock); printf("Dumping OOA queues:\n"); STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); for (io = (union ctl_io *)TAILQ_FIRST( &lun->ooa_queue); io != NULL; io = (union ctl_io *)TAILQ_NEXT(&io->io_hdr, ooa_links)) { sbuf_new(&sb, printbuf, sizeof(printbuf), SBUF_FIXEDLEN); sbuf_printf(&sb, "LUN %jd tag 0x%04x%s%s%s%s: ", (intmax_t)lun->lun, io->scsiio.tag_num, (io->io_hdr.flags & CTL_FLAG_BLOCKED) ? "" : " BLOCKED", (io->io_hdr.flags & CTL_FLAG_DMA_INPROG) ? " DMA" : "", (io->io_hdr.flags & CTL_FLAG_ABORT) ? " ABORT" : "", (io->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR) ? " RTR" : ""); ctl_scsi_command_string(&io->scsiio, NULL, &sb); sbuf_finish(&sb); printf("%s\n", sbuf_data(&sb)); } mtx_unlock(&lun->lun_lock); } printf("OOA queues dump done\n"); mtx_unlock(&softc->ctl_lock); break; } case CTL_GET_OOA: { struct ctl_lun *lun; struct ctl_ooa *ooa_hdr; struct ctl_ooa_entry *entries; uint32_t cur_fill_num; ooa_hdr = (struct ctl_ooa *)addr; if ((ooa_hdr->alloc_len == 0) || (ooa_hdr->alloc_num == 0)) { printf("%s: CTL_GET_OOA: alloc len %u and alloc num %u " "must be non-zero\n", __func__, ooa_hdr->alloc_len, ooa_hdr->alloc_num); retval = EINVAL; break; } if (ooa_hdr->alloc_len != (ooa_hdr->alloc_num * sizeof(struct ctl_ooa_entry))) { printf("%s: CTL_GET_OOA: alloc len %u must be alloc " "num %d * sizeof(struct ctl_ooa_entry) %zd\n", __func__, ooa_hdr->alloc_len, ooa_hdr->alloc_num,sizeof(struct ctl_ooa_entry)); retval = EINVAL; break; } entries = malloc(ooa_hdr->alloc_len, M_CTL, M_WAITOK | M_ZERO); if (entries == NULL) { printf("%s: could not allocate %d bytes for OOA " "dump\n", __func__, ooa_hdr->alloc_len); retval = ENOMEM; break; } mtx_lock(&softc->ctl_lock); if (((ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) == 0) && ((ooa_hdr->lun_num >= CTL_MAX_LUNS) || (softc->ctl_luns[ooa_hdr->lun_num] == NULL))) { mtx_unlock(&softc->ctl_lock); free(entries, M_CTL); printf("%s: CTL_GET_OOA: invalid LUN %ju\n", __func__, (uintmax_t)ooa_hdr->lun_num); retval = EINVAL; break; } cur_fill_num = 0; if (ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) { STAILQ_FOREACH(lun, &softc->lun_list, links) { retval = ctl_ioctl_fill_ooa(lun, &cur_fill_num, ooa_hdr, entries); if (retval != 0) break; } if (retval != 0) { mtx_unlock(&softc->ctl_lock); free(entries, M_CTL); break; } } else { lun = softc->ctl_luns[ooa_hdr->lun_num]; retval = ctl_ioctl_fill_ooa(lun, &cur_fill_num,ooa_hdr, entries); } mtx_unlock(&softc->ctl_lock); ooa_hdr->fill_num = min(cur_fill_num, ooa_hdr->alloc_num); ooa_hdr->fill_len = ooa_hdr->fill_num * sizeof(struct ctl_ooa_entry); retval = copyout(entries, ooa_hdr->entries, ooa_hdr->fill_len); if (retval != 0) { printf("%s: error copying out %d bytes for OOA dump\n", __func__, ooa_hdr->fill_len); } getbintime(&ooa_hdr->cur_bt); if (cur_fill_num > ooa_hdr->alloc_num) { ooa_hdr->dropped_num = cur_fill_num -ooa_hdr->alloc_num; ooa_hdr->status = CTL_OOA_NEED_MORE_SPACE; } else { ooa_hdr->dropped_num = 0; ooa_hdr->status = CTL_OOA_OK; } free(entries, M_CTL); break; } case CTL_CHECK_OOA: { union ctl_io *io; struct ctl_lun *lun; struct ctl_ooa_info *ooa_info; ooa_info = (struct ctl_ooa_info *)addr; if (ooa_info->lun_id >= CTL_MAX_LUNS) { ooa_info->status = CTL_OOA_INVALID_LUN; break; } mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[ooa_info->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); ooa_info->status = CTL_OOA_INVALID_LUN; break; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); ooa_info->num_entries = 0; for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); io != NULL; io = (union ctl_io *)TAILQ_NEXT( &io->io_hdr, ooa_links)) { ooa_info->num_entries++; } mtx_unlock(&lun->lun_lock); ooa_info->status = CTL_OOA_SUCCESS; break; } case CTL_DELAY_IO: { struct ctl_io_delay_info *delay_info; #ifdef CTL_IO_DELAY struct ctl_lun *lun; #endif /* CTL_IO_DELAY */ delay_info = (struct ctl_io_delay_info *)addr; #ifdef CTL_IO_DELAY mtx_lock(&softc->ctl_lock); if ((delay_info->lun_id >= CTL_MAX_LUNS) || (softc->ctl_luns[delay_info->lun_id] == NULL)) { delay_info->status = CTL_DELAY_STATUS_INVALID_LUN; } else { lun = softc->ctl_luns[delay_info->lun_id]; mtx_lock(&lun->lun_lock); delay_info->status = CTL_DELAY_STATUS_OK; switch (delay_info->delay_type) { case CTL_DELAY_TYPE_CONT: break; case CTL_DELAY_TYPE_ONESHOT: break; default: delay_info->status = CTL_DELAY_STATUS_INVALID_TYPE; break; } switch (delay_info->delay_loc) { case CTL_DELAY_LOC_DATAMOVE: lun->delay_info.datamove_type = delay_info->delay_type; lun->delay_info.datamove_delay = delay_info->delay_secs; break; case CTL_DELAY_LOC_DONE: lun->delay_info.done_type = delay_info->delay_type; lun->delay_info.done_delay = delay_info->delay_secs; break; default: delay_info->status = CTL_DELAY_STATUS_INVALID_LOC; break; } mtx_unlock(&lun->lun_lock); } mtx_unlock(&softc->ctl_lock); #else delay_info->status = CTL_DELAY_STATUS_NOT_IMPLEMENTED; #endif /* CTL_IO_DELAY */ break; } case CTL_REALSYNC_SET: { int *syncstate; syncstate = (int *)addr; mtx_lock(&softc->ctl_lock); switch (*syncstate) { case 0: softc->flags &= ~CTL_FLAG_REAL_SYNC; break; case 1: softc->flags |= CTL_FLAG_REAL_SYNC; break; default: retval = EINVAL; break; } mtx_unlock(&softc->ctl_lock); break; } case CTL_REALSYNC_GET: { int *syncstate; syncstate = (int*)addr; mtx_lock(&softc->ctl_lock); if (softc->flags & CTL_FLAG_REAL_SYNC) *syncstate = 1; else *syncstate = 0; mtx_unlock(&softc->ctl_lock); break; } case CTL_SETSYNC: case CTL_GETSYNC: { struct ctl_sync_info *sync_info; struct ctl_lun *lun; sync_info = (struct ctl_sync_info *)addr; mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[sync_info->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); sync_info->status = CTL_GS_SYNC_NO_LUN; } /* * Get or set the sync interval. We're not bounds checking * in the set case, hopefully the user won't do something * silly. */ mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if (cmd == CTL_GETSYNC) sync_info->sync_interval = lun->sync_interval; else lun->sync_interval = sync_info->sync_interval; mtx_unlock(&lun->lun_lock); sync_info->status = CTL_GS_SYNC_OK; break; } case CTL_GETSTATS: { struct ctl_stats *stats; struct ctl_lun *lun; int i; stats = (struct ctl_stats *)addr; if ((sizeof(struct ctl_lun_io_stats) * softc->num_luns) > stats->alloc_len) { stats->status = CTL_SS_NEED_MORE_SPACE; stats->num_luns = softc->num_luns; break; } /* * XXX KDM no locking here. If the LUN list changes, * things can blow up. */ for (i = 0, lun = STAILQ_FIRST(&softc->lun_list); lun != NULL; i++, lun = STAILQ_NEXT(lun, links)) { retval = copyout(&lun->stats, &stats->lun_stats[i], sizeof(lun->stats)); if (retval != 0) break; } stats->num_luns = softc->num_luns; stats->fill_len = sizeof(struct ctl_lun_io_stats) * softc->num_luns; stats->status = CTL_SS_OK; #ifdef CTL_TIME_IO stats->flags = CTL_STATS_FLAG_TIME_VALID; #else stats->flags = CTL_STATS_FLAG_NONE; #endif getnanouptime(&stats->timestamp); break; } case CTL_ERROR_INJECT: { struct ctl_error_desc *err_desc, *new_err_desc; struct ctl_lun *lun; err_desc = (struct ctl_error_desc *)addr; new_err_desc = malloc(sizeof(*new_err_desc), M_CTL, M_WAITOK | M_ZERO); bcopy(err_desc, new_err_desc, sizeof(*new_err_desc)); mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[err_desc->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); free(new_err_desc, M_CTL); printf("%s: CTL_ERROR_INJECT: invalid LUN %ju\n", __func__, (uintmax_t)err_desc->lun_id); retval = EINVAL; break; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); /* * We could do some checking here to verify the validity * of the request, but given the complexity of error * injection requests, the checking logic would be fairly * complex. * * For now, if the request is invalid, it just won't get * executed and might get deleted. */ STAILQ_INSERT_TAIL(&lun->error_list, new_err_desc, links); /* * XXX KDM check to make sure the serial number is unique, * in case we somehow manage to wrap. That shouldn't * happen for a very long time, but it's the right thing to * do. */ new_err_desc->serial = lun->error_serial; err_desc->serial = lun->error_serial; lun->error_serial++; mtx_unlock(&lun->lun_lock); break; } case CTL_ERROR_INJECT_DELETE: { struct ctl_error_desc *delete_desc, *desc, *desc2; struct ctl_lun *lun; int delete_done; delete_desc = (struct ctl_error_desc *)addr; delete_done = 0; mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[delete_desc->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); printf("%s: CTL_ERROR_INJECT_DELETE: invalid LUN %ju\n", __func__, (uintmax_t)delete_desc->lun_id); retval = EINVAL; break; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) { if (desc->serial != delete_desc->serial) continue; STAILQ_REMOVE(&lun->error_list, desc, ctl_error_desc, links); free(desc, M_CTL); delete_done = 1; } mtx_unlock(&lun->lun_lock); if (delete_done == 0) { printf("%s: CTL_ERROR_INJECT_DELETE: can't find " "error serial %ju on LUN %u\n", __func__, delete_desc->serial, delete_desc->lun_id); retval = EINVAL; break; } break; } case CTL_DUMP_STRUCTS: { int i, j, k; struct ctl_port *port; struct ctl_frontend *fe; mtx_lock(&softc->ctl_lock); printf("CTL Persistent Reservation information start:\n"); for (i = 0; i < CTL_MAX_LUNS; i++) { struct ctl_lun *lun; lun = softc->ctl_luns[i]; if ((lun == NULL) || ((lun->flags & CTL_LUN_DISABLED) != 0)) continue; for (j = 0; j < (CTL_MAX_PORTS * 2); j++) { if (lun->pr_keys[j] == NULL) continue; for (k = 0; k < CTL_MAX_INIT_PER_PORT; k++){ if (lun->pr_keys[j][k] == 0) continue; printf(" LUN %d port %d iid %d key " "%#jx\n", i, j, k, (uintmax_t)lun->pr_keys[j][k]); } } } printf("CTL Persistent Reservation information end\n"); printf("CTL Ports:\n"); STAILQ_FOREACH(port, &softc->port_list, links) { printf(" Port %d '%s' Frontend '%s' Type %u pp %d vp %d WWNN " "%#jx WWPN %#jx\n", port->targ_port, port->port_name, port->frontend->name, port->port_type, port->physical_port, port->virtual_port, (uintmax_t)port->wwnn, (uintmax_t)port->wwpn); for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) { if (port->wwpn_iid[j].in_use == 0 && port->wwpn_iid[j].wwpn == 0 && port->wwpn_iid[j].name == NULL) continue; printf(" iid %u use %d WWPN %#jx '%s'\n", j, port->wwpn_iid[j].in_use, (uintmax_t)port->wwpn_iid[j].wwpn, port->wwpn_iid[j].name); } } printf("CTL Port information end\n"); mtx_unlock(&softc->ctl_lock); /* * XXX KDM calling this without a lock. We'd likely want * to drop the lock before calling the frontend's dump * routine anyway. */ printf("CTL Frontends:\n"); STAILQ_FOREACH(fe, &softc->fe_list, links) { printf(" Frontend '%s'\n", fe->name); if (fe->fe_dump != NULL) fe->fe_dump(); } printf("CTL Frontend information end\n"); break; } case CTL_LUN_REQ: { struct ctl_lun_req *lun_req; struct ctl_backend_driver *backend; lun_req = (struct ctl_lun_req *)addr; backend = ctl_backend_find(lun_req->backend); if (backend == NULL) { lun_req->status = CTL_LUN_ERROR; snprintf(lun_req->error_str, sizeof(lun_req->error_str), "Backend \"%s\" not found.", lun_req->backend); break; } if (lun_req->num_be_args > 0) { lun_req->kern_be_args = ctl_copyin_args( lun_req->num_be_args, lun_req->be_args, lun_req->error_str, sizeof(lun_req->error_str)); if (lun_req->kern_be_args == NULL) { lun_req->status = CTL_LUN_ERROR; break; } } retval = backend->ioctl(dev, cmd, addr, flag, td); if (lun_req->num_be_args > 0) { ctl_copyout_args(lun_req->num_be_args, lun_req->kern_be_args); ctl_free_args(lun_req->num_be_args, lun_req->kern_be_args); } break; } case CTL_LUN_LIST: { struct sbuf *sb; struct ctl_lun *lun; struct ctl_lun_list *list; struct ctl_option *opt; list = (struct ctl_lun_list *)addr; /* * Allocate a fixed length sbuf here, based on the length * of the user's buffer. We could allocate an auto-extending * buffer, and then tell the user how much larger our * amount of data is than his buffer, but that presents * some problems: * * 1. The sbuf(9) routines use a blocking malloc, and so * we can't hold a lock while calling them with an * auto-extending buffer. * * 2. There is not currently a LUN reference counting * mechanism, outside of outstanding transactions on * the LUN's OOA queue. So a LUN could go away on us * while we're getting the LUN number, backend-specific * information, etc. Thus, given the way things * currently work, we need to hold the CTL lock while * grabbing LUN information. * * So, from the user's standpoint, the best thing to do is * allocate what he thinks is a reasonable buffer length, * and then if he gets a CTL_LUN_LIST_NEED_MORE_SPACE error, * double the buffer length and try again. (And repeat * that until he succeeds.) */ sb = sbuf_new(NULL, NULL, list->alloc_len, SBUF_FIXEDLEN); if (sb == NULL) { list->status = CTL_LUN_LIST_ERROR; snprintf(list->error_str, sizeof(list->error_str), "Unable to allocate %d bytes for LUN list", list->alloc_len); break; } sbuf_printf(sb, "\n"); mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); retval = sbuf_printf(sb, "\n", (uintmax_t)lun->lun); /* * Bail out as soon as we see that we've overfilled * the buffer. */ if (retval != 0) break; retval = sbuf_printf(sb, "\t%s" "\n", (lun->backend == NULL) ? "none" : lun->backend->name); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", lun->be_lun->lun_type); if (retval != 0) break; if (lun->backend == NULL) { retval = sbuf_printf(sb, "\n"); if (retval != 0) break; continue; } retval = sbuf_printf(sb, "\t%ju\n", (lun->be_lun->maxlba > 0) ? lun->be_lun->maxlba + 1 : 0); if (retval != 0) break; retval = sbuf_printf(sb, "\t%u\n", lun->be_lun->blocksize); if (retval != 0) break; retval = sbuf_printf(sb, "\t"); if (retval != 0) break; retval = ctl_sbuf_printf_esc(sb, lun->be_lun->serial_num, sizeof(lun->be_lun->serial_num)); if (retval != 0) break; retval = sbuf_printf(sb, "\n"); if (retval != 0) break; retval = sbuf_printf(sb, "\t"); if (retval != 0) break; retval = ctl_sbuf_printf_esc(sb, lun->be_lun->device_id, sizeof(lun->be_lun->device_id)); if (retval != 0) break; retval = sbuf_printf(sb, "\n"); if (retval != 0) break; if (lun->backend->lun_info != NULL) { retval = lun->backend->lun_info(lun->be_lun->be_lun, sb); if (retval != 0) break; } STAILQ_FOREACH(opt, &lun->be_lun->options, links) { retval = sbuf_printf(sb, "\t<%s>%s\n", opt->name, opt->value, opt->name); if (retval != 0) break; } retval = sbuf_printf(sb, "\n"); if (retval != 0) break; mtx_unlock(&lun->lun_lock); } if (lun != NULL) mtx_unlock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if ((retval != 0) || ((retval = sbuf_printf(sb, "\n")) != 0)) { retval = 0; sbuf_delete(sb); list->status = CTL_LUN_LIST_NEED_MORE_SPACE; snprintf(list->error_str, sizeof(list->error_str), "Out of space, %d bytes is too small", list->alloc_len); break; } sbuf_finish(sb); retval = copyout(sbuf_data(sb), list->lun_xml, sbuf_len(sb) + 1); list->fill_len = sbuf_len(sb) + 1; list->status = CTL_LUN_LIST_OK; sbuf_delete(sb); break; } case CTL_ISCSI: { struct ctl_iscsi *ci; struct ctl_frontend *fe; ci = (struct ctl_iscsi *)addr; fe = ctl_frontend_find("iscsi"); if (fe == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Frontend \"iscsi\" not found."); break; } retval = fe->ioctl(dev, cmd, addr, flag, td); break; } case CTL_PORT_REQ: { struct ctl_req *req; struct ctl_frontend *fe; req = (struct ctl_req *)addr; fe = ctl_frontend_find(req->driver); if (fe == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Frontend \"%s\" not found.", req->driver); break; } if (req->num_args > 0) { req->kern_args = ctl_copyin_args(req->num_args, req->args, req->error_str, sizeof(req->error_str)); if (req->kern_args == NULL) { req->status = CTL_LUN_ERROR; break; } } retval = fe->ioctl(dev, cmd, addr, flag, td); if (req->num_args > 0) { ctl_copyout_args(req->num_args, req->kern_args); ctl_free_args(req->num_args, req->kern_args); } break; } case CTL_PORT_LIST: { struct sbuf *sb; struct ctl_port *port; struct ctl_lun_list *list; struct ctl_option *opt; int j; uint32_t plun; list = (struct ctl_lun_list *)addr; sb = sbuf_new(NULL, NULL, list->alloc_len, SBUF_FIXEDLEN); if (sb == NULL) { list->status = CTL_LUN_LIST_ERROR; snprintf(list->error_str, sizeof(list->error_str), "Unable to allocate %d bytes for LUN list", list->alloc_len); break; } sbuf_printf(sb, "\n"); mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { retval = sbuf_printf(sb, "\n", (uintmax_t)port->targ_port); /* * Bail out as soon as we see that we've overfilled * the buffer. */ if (retval != 0) break; retval = sbuf_printf(sb, "\t%s" "\n", port->frontend->name); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", port->port_type); if (retval != 0) break; retval = sbuf_printf(sb, "\t%s\n", (port->status & CTL_PORT_STATUS_ONLINE) ? "YES" : "NO"); if (retval != 0) break; retval = sbuf_printf(sb, "\t%s\n", port->port_name); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", port->physical_port); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", port->virtual_port); if (retval != 0) break; if (port->target_devid != NULL) { sbuf_printf(sb, "\t"); ctl_id_sbuf(port->target_devid, sb); sbuf_printf(sb, "\n"); } if (port->port_devid != NULL) { sbuf_printf(sb, "\t"); ctl_id_sbuf(port->port_devid, sb); sbuf_printf(sb, "\n"); } if (port->port_info != NULL) { retval = port->port_info(port->onoff_arg, sb); if (retval != 0) break; } STAILQ_FOREACH(opt, &port->options, links) { retval = sbuf_printf(sb, "\t<%s>%s\n", opt->name, opt->value, opt->name); if (retval != 0) break; } if (port->lun_map != NULL) { sbuf_printf(sb, "\ton\n"); for (j = 0; j < CTL_MAX_LUNS; j++) { plun = ctl_lun_map_from_port(port, j); if (plun >= CTL_MAX_LUNS) continue; sbuf_printf(sb, "\t%u\n", j, plun); } } for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) { if (port->wwpn_iid[j].in_use == 0 || (port->wwpn_iid[j].wwpn == 0 && port->wwpn_iid[j].name == NULL)) continue; if (port->wwpn_iid[j].name != NULL) retval = sbuf_printf(sb, "\t%s\n", j, port->wwpn_iid[j].name); else retval = sbuf_printf(sb, "\tnaa.%08jx\n", j, port->wwpn_iid[j].wwpn); if (retval != 0) break; } if (retval != 0) break; retval = sbuf_printf(sb, "\n"); if (retval != 0) break; } mtx_unlock(&softc->ctl_lock); if ((retval != 0) || ((retval = sbuf_printf(sb, "\n")) != 0)) { retval = 0; sbuf_delete(sb); list->status = CTL_LUN_LIST_NEED_MORE_SPACE; snprintf(list->error_str, sizeof(list->error_str), "Out of space, %d bytes is too small", list->alloc_len); break; } sbuf_finish(sb); retval = copyout(sbuf_data(sb), list->lun_xml, sbuf_len(sb) + 1); list->fill_len = sbuf_len(sb) + 1; list->status = CTL_LUN_LIST_OK; sbuf_delete(sb); break; } case CTL_LUN_MAP: { struct ctl_lun_map *lm = (struct ctl_lun_map *)addr; struct ctl_port *port; mtx_lock(&softc->ctl_lock); if (lm->port >= CTL_MAX_PORTS || (port = softc->ctl_ports[lm->port]) == NULL) { mtx_unlock(&softc->ctl_lock); return (ENXIO); } mtx_unlock(&softc->ctl_lock); // XXX: port_enable sleeps if (lm->plun < CTL_MAX_LUNS) { if (lm->lun == UINT32_MAX) retval = ctl_lun_map_unset(port, lm->plun); else if (lm->lun < CTL_MAX_LUNS && softc->ctl_luns[lm->lun] != NULL) retval = ctl_lun_map_set(port, lm->plun, lm->lun); else return (ENXIO); } else if (lm->plun == UINT32_MAX) { if (lm->lun == UINT32_MAX) retval = ctl_lun_map_deinit(port); else retval = ctl_lun_map_init(port); } else return (ENXIO); break; } default: { /* XXX KDM should we fix this? */ #if 0 struct ctl_backend_driver *backend; unsigned int type; int found; found = 0; /* * We encode the backend type as the ioctl type for backend * ioctls. So parse it out here, and then search for a * backend of this type. */ type = _IOC_TYPE(cmd); STAILQ_FOREACH(backend, &softc->be_list, links) { if (backend->type == type) { found = 1; break; } } if (found == 0) { printf("ctl: unknown ioctl command %#lx or backend " "%d\n", cmd, type); retval = EINVAL; break; } retval = backend->ioctl(dev, cmd, addr, flag, td); #endif retval = ENOTTY; break; } } return (retval); } uint32_t ctl_get_initindex(struct ctl_nexus *nexus) { if (nexus->targ_port < CTL_MAX_PORTS) return (nexus->initid.id + (nexus->targ_port * CTL_MAX_INIT_PER_PORT)); else return (nexus->initid.id + ((nexus->targ_port - CTL_MAX_PORTS) * CTL_MAX_INIT_PER_PORT)); } uint32_t ctl_get_resindex(struct ctl_nexus *nexus) { return (nexus->initid.id + (nexus->targ_port * CTL_MAX_INIT_PER_PORT)); } uint32_t ctl_port_idx(int port_num) { if (port_num < CTL_MAX_PORTS) return(port_num); else return(port_num - CTL_MAX_PORTS); } int ctl_lun_map_init(struct ctl_port *port) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; uint32_t i; if (port->lun_map == NULL) port->lun_map = malloc(sizeof(uint32_t) * CTL_MAX_LUNS, M_CTL, M_NOWAIT); if (port->lun_map == NULL) return (ENOMEM); for (i = 0; i < CTL_MAX_LUNS; i++) port->lun_map[i] = UINT32_MAX; if (port->status & CTL_PORT_STATUS_ONLINE && port->lun_disable != NULL) { STAILQ_FOREACH(lun, &softc->lun_list, links) port->lun_disable(port->targ_lun_arg, lun->lun); } return (0); } int ctl_lun_map_deinit(struct ctl_port *port) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; if (port->lun_map == NULL) return (0); free(port->lun_map, M_CTL); port->lun_map = NULL; if (port->status & CTL_PORT_STATUS_ONLINE && port->lun_enable != NULL) { STAILQ_FOREACH(lun, &softc->lun_list, links) port->lun_enable(port->targ_lun_arg, lun->lun); } return (0); } int ctl_lun_map_set(struct ctl_port *port, uint32_t plun, uint32_t glun) { int status; uint32_t old; if (port->lun_map == NULL) { status = ctl_lun_map_init(port); if (status != 0) return (status); } old = port->lun_map[plun]; port->lun_map[plun] = glun; if ((port->status & CTL_PORT_STATUS_ONLINE) && old >= CTL_MAX_LUNS && port->lun_enable != NULL) port->lun_enable(port->targ_lun_arg, plun); return (0); } int ctl_lun_map_unset(struct ctl_port *port, uint32_t plun) { uint32_t old; if (port->lun_map == NULL) return (0); old = port->lun_map[plun]; port->lun_map[plun] = UINT32_MAX; if ((port->status & CTL_PORT_STATUS_ONLINE) && old < CTL_MAX_LUNS && port->lun_disable != NULL) port->lun_disable(port->targ_lun_arg, plun); return (0); } uint32_t ctl_lun_map_from_port(struct ctl_port *port, uint32_t lun_id) { if (port == NULL) return (UINT32_MAX); if (port->lun_map == NULL || lun_id >= CTL_MAX_LUNS) return (lun_id); return (port->lun_map[lun_id]); } uint32_t ctl_lun_map_to_port(struct ctl_port *port, uint32_t lun_id) { uint32_t i; if (port == NULL) return (UINT32_MAX); if (port->lun_map == NULL) return (lun_id); for (i = 0; i < CTL_MAX_LUNS; i++) { if (port->lun_map[i] == lun_id) return (i); } return (UINT32_MAX); } static struct ctl_port * ctl_io_port(struct ctl_io_hdr *io_hdr) { int port_num; port_num = io_hdr->nexus.targ_port; return (control_softc->ctl_ports[ctl_port_idx(port_num)]); } /* * Note: This only works for bitmask sizes that are at least 32 bits, and * that are a power of 2. */ int ctl_ffz(uint32_t *mask, uint32_t size) { uint32_t num_chunks, num_pieces; int i, j; num_chunks = (size >> 5); if (num_chunks == 0) num_chunks++; num_pieces = MIN((sizeof(uint32_t) * 8), size); for (i = 0; i < num_chunks; i++) { for (j = 0; j < num_pieces; j++) { if ((mask[i] & (1 << j)) == 0) return ((i << 5) + j); } } return (-1); } int ctl_set_mask(uint32_t *mask, uint32_t bit) { uint32_t chunk, piece; chunk = bit >> 5; piece = bit % (sizeof(uint32_t) * 8); if ((mask[chunk] & (1 << piece)) != 0) return (-1); else mask[chunk] |= (1 << piece); return (0); } int ctl_clear_mask(uint32_t *mask, uint32_t bit) { uint32_t chunk, piece; chunk = bit >> 5; piece = bit % (sizeof(uint32_t) * 8); if ((mask[chunk] & (1 << piece)) == 0) return (-1); else mask[chunk] &= ~(1 << piece); return (0); } int ctl_is_set(uint32_t *mask, uint32_t bit) { uint32_t chunk, piece; chunk = bit >> 5; piece = bit % (sizeof(uint32_t) * 8); if ((mask[chunk] & (1 << piece)) == 0) return (0); else return (1); } static uint64_t ctl_get_prkey(struct ctl_lun *lun, uint32_t residx) { uint64_t *t; t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT]; if (t == NULL) return (0); return (t[residx % CTL_MAX_INIT_PER_PORT]); } static void ctl_clr_prkey(struct ctl_lun *lun, uint32_t residx) { uint64_t *t; t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT]; if (t == NULL) return; t[residx % CTL_MAX_INIT_PER_PORT] = 0; } static void ctl_alloc_prkey(struct ctl_lun *lun, uint32_t residx) { uint64_t *p; u_int i; i = residx/CTL_MAX_INIT_PER_PORT; if (lun->pr_keys[i] != NULL) return; mtx_unlock(&lun->lun_lock); p = malloc(sizeof(uint64_t) * CTL_MAX_INIT_PER_PORT, M_CTL, M_WAITOK | M_ZERO); mtx_lock(&lun->lun_lock); if (lun->pr_keys[i] == NULL) lun->pr_keys[i] = p; else free(p, M_CTL); } static void ctl_set_prkey(struct ctl_lun *lun, uint32_t residx, uint64_t key) { uint64_t *t; t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT]; KASSERT(t != NULL, ("prkey %d is not allocated", residx)); t[residx % CTL_MAX_INIT_PER_PORT] = key; } /* * ctl_softc, pool_name, total_ctl_io are passed in. * npool is passed out. */ int ctl_pool_create(struct ctl_softc *ctl_softc, const char *pool_name, uint32_t total_ctl_io, void **npool) { #ifdef IO_POOLS struct ctl_io_pool *pool; pool = (struct ctl_io_pool *)malloc(sizeof(*pool), M_CTL, M_NOWAIT | M_ZERO); if (pool == NULL) return (ENOMEM); snprintf(pool->name, sizeof(pool->name), "CTL IO %s", pool_name); pool->ctl_softc = ctl_softc; pool->zone = uma_zsecond_create(pool->name, NULL, NULL, NULL, NULL, ctl_softc->io_zone); /* uma_prealloc(pool->zone, total_ctl_io); */ *npool = pool; #else *npool = ctl_softc->io_zone; #endif return (0); } void ctl_pool_free(struct ctl_io_pool *pool) { if (pool == NULL) return; #ifdef IO_POOLS uma_zdestroy(pool->zone); free(pool, M_CTL); #endif } union ctl_io * ctl_alloc_io(void *pool_ref) { union ctl_io *io; #ifdef IO_POOLS struct ctl_io_pool *pool = (struct ctl_io_pool *)pool_ref; io = uma_zalloc(pool->zone, M_WAITOK); #else io = uma_zalloc((uma_zone_t)pool_ref, M_WAITOK); #endif if (io != NULL) io->io_hdr.pool = pool_ref; return (io); } union ctl_io * ctl_alloc_io_nowait(void *pool_ref) { union ctl_io *io; #ifdef IO_POOLS struct ctl_io_pool *pool = (struct ctl_io_pool *)pool_ref; io = uma_zalloc(pool->zone, M_NOWAIT); #else io = uma_zalloc((uma_zone_t)pool_ref, M_NOWAIT); #endif if (io != NULL) io->io_hdr.pool = pool_ref; return (io); } void ctl_free_io(union ctl_io *io) { #ifdef IO_POOLS struct ctl_io_pool *pool; #endif if (io == NULL) return; #ifdef IO_POOLS pool = (struct ctl_io_pool *)io->io_hdr.pool; uma_zfree(pool->zone, io); #else uma_zfree((uma_zone_t)io->io_hdr.pool, io); #endif } void ctl_zero_io(union ctl_io *io) { void *pool_ref; if (io == NULL) return; /* * May need to preserve linked list pointers at some point too. */ pool_ref = io->io_hdr.pool; memset(io, 0, sizeof(*io)); io->io_hdr.pool = pool_ref; } /* * This routine is currently used for internal copies of ctl_ios that need * to persist for some reason after we've already returned status to the * FETD. (Thus the flag set.) * * XXX XXX * Note that this makes a blind copy of all fields in the ctl_io, except * for the pool reference. This includes any memory that has been * allocated! That memory will no longer be valid after done has been * called, so this would be VERY DANGEROUS for command that actually does * any reads or writes. Right now (11/7/2005), this is only used for immediate * start and stop commands, which don't transfer any data, so this is not a * problem. If it is used for anything else, the caller would also need to * allocate data buffer space and this routine would need to be modified to * copy the data buffer(s) as well. */ void ctl_copy_io(union ctl_io *src, union ctl_io *dest) { void *pool_ref; if ((src == NULL) || (dest == NULL)) return; /* * May need to preserve linked list pointers at some point too. */ pool_ref = dest->io_hdr.pool; memcpy(dest, src, MIN(sizeof(*src), sizeof(*dest))); dest->io_hdr.pool = pool_ref; /* * We need to know that this is an internal copy, and doesn't need * to get passed back to the FETD that allocated it. */ dest->io_hdr.flags |= CTL_FLAG_INT_COPY; } int ctl_expand_number(const char *buf, uint64_t *num) { char *endptr; uint64_t number; unsigned shift; number = strtoq(buf, &endptr, 0); switch (tolower((unsigned char)*endptr)) { case 'e': shift = 60; break; case 'p': shift = 50; break; case 't': shift = 40; break; case 'g': shift = 30; break; case 'm': shift = 20; break; case 'k': shift = 10; break; case 'b': case '\0': /* No unit. */ *num = number; return (0); default: /* Unrecognized unit. */ return (-1); } if ((number << shift) >> shift != number) { /* Overflow */ return (-1); } *num = number << shift; return (0); } /* * This routine could be used in the future to load default and/or saved * mode page parameters for a particuar lun. */ static int ctl_init_page_index(struct ctl_lun *lun) { int i; struct ctl_page_index *page_index; const char *value; uint64_t ival; memcpy(&lun->mode_pages.index, page_index_template, sizeof(page_index_template)); for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { page_index = &lun->mode_pages.index[i]; /* * If this is a disk-only mode page, there's no point in * setting it up. For some pages, we have to have some * basic information about the disk in order to calculate the * mode page data. */ if ((lun->be_lun->lun_type != T_DIRECT) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; switch (page_index->page_code & SMPH_PC_MASK) { case SMS_RW_ERROR_RECOVERY_PAGE: { if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("subpage is incorrect!"); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_CURRENT], &rw_er_page_default, sizeof(rw_er_page_default)); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_CHANGEABLE], &rw_er_page_changeable, sizeof(rw_er_page_changeable)); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_DEFAULT], &rw_er_page_default, sizeof(rw_er_page_default)); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_SAVED], &rw_er_page_default, sizeof(rw_er_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.rw_er_page; break; } case SMS_FORMAT_DEVICE_PAGE: { struct scsi_format_page *format_page; if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("subpage is incorrect!"); /* * Sectors per track are set above. Bytes per * sector need to be set here on a per-LUN basis. */ memcpy(&lun->mode_pages.format_page[CTL_PAGE_CURRENT], &format_page_default, sizeof(format_page_default)); memcpy(&lun->mode_pages.format_page[ CTL_PAGE_CHANGEABLE], &format_page_changeable, sizeof(format_page_changeable)); memcpy(&lun->mode_pages.format_page[CTL_PAGE_DEFAULT], &format_page_default, sizeof(format_page_default)); memcpy(&lun->mode_pages.format_page[CTL_PAGE_SAVED], &format_page_default, sizeof(format_page_default)); format_page = &lun->mode_pages.format_page[ CTL_PAGE_CURRENT]; scsi_ulto2b(lun->be_lun->blocksize, format_page->bytes_per_sector); format_page = &lun->mode_pages.format_page[ CTL_PAGE_DEFAULT]; scsi_ulto2b(lun->be_lun->blocksize, format_page->bytes_per_sector); format_page = &lun->mode_pages.format_page[ CTL_PAGE_SAVED]; scsi_ulto2b(lun->be_lun->blocksize, format_page->bytes_per_sector); page_index->page_data = (uint8_t *)lun->mode_pages.format_page; break; } case SMS_RIGID_DISK_PAGE: { struct scsi_rigid_disk_page *rigid_disk_page; uint32_t sectors_per_cylinder; uint64_t cylinders; #ifndef __XSCALE__ int shift; #endif /* !__XSCALE__ */ if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("invalid subpage value %d", page_index->subpage); /* * Rotation rate and sectors per track are set * above. We calculate the cylinders here based on * capacity. Due to the number of heads and * sectors per track we're using, smaller arrays * may turn out to have 0 cylinders. Linux and * FreeBSD don't pay attention to these mode pages * to figure out capacity, but Solaris does. It * seems to deal with 0 cylinders just fine, and * works out a fake geometry based on the capacity. */ memcpy(&lun->mode_pages.rigid_disk_page[ CTL_PAGE_DEFAULT], &rigid_disk_page_default, sizeof(rigid_disk_page_default)); memcpy(&lun->mode_pages.rigid_disk_page[ CTL_PAGE_CHANGEABLE],&rigid_disk_page_changeable, sizeof(rigid_disk_page_changeable)); sectors_per_cylinder = CTL_DEFAULT_SECTORS_PER_TRACK * CTL_DEFAULT_HEADS; /* * The divide method here will be more accurate, * probably, but results in floating point being * used in the kernel on i386 (__udivdi3()). On the * XScale, though, __udivdi3() is implemented in * software. * * The shift method for cylinder calculation is * accurate if sectors_per_cylinder is a power of * 2. Otherwise it might be slightly off -- you * might have a bit of a truncation problem. */ #ifdef __XSCALE__ cylinders = (lun->be_lun->maxlba + 1) / sectors_per_cylinder; #else for (shift = 31; shift > 0; shift--) { if (sectors_per_cylinder & (1 << shift)) break; } cylinders = (lun->be_lun->maxlba + 1) >> shift; #endif /* * We've basically got 3 bytes, or 24 bits for the * cylinder size in the mode page. If we're over, * just round down to 2^24. */ if (cylinders > 0xffffff) cylinders = 0xffffff; rigid_disk_page = &lun->mode_pages.rigid_disk_page[ CTL_PAGE_DEFAULT]; scsi_ulto3b(cylinders, rigid_disk_page->cylinders); if ((value = ctl_get_opt(&lun->be_lun->options, "rpm")) != NULL) { scsi_ulto2b(strtol(value, NULL, 0), rigid_disk_page->rotation_rate); } memcpy(&lun->mode_pages.rigid_disk_page[CTL_PAGE_CURRENT], &lun->mode_pages.rigid_disk_page[CTL_PAGE_DEFAULT], sizeof(rigid_disk_page_default)); memcpy(&lun->mode_pages.rigid_disk_page[CTL_PAGE_SAVED], &lun->mode_pages.rigid_disk_page[CTL_PAGE_DEFAULT], sizeof(rigid_disk_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.rigid_disk_page; break; } case SMS_CACHING_PAGE: { struct scsi_caching_page *caching_page; if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("invalid subpage value %d", page_index->subpage); memcpy(&lun->mode_pages.caching_page[CTL_PAGE_DEFAULT], &caching_page_default, sizeof(caching_page_default)); memcpy(&lun->mode_pages.caching_page[ CTL_PAGE_CHANGEABLE], &caching_page_changeable, sizeof(caching_page_changeable)); memcpy(&lun->mode_pages.caching_page[CTL_PAGE_SAVED], &caching_page_default, sizeof(caching_page_default)); caching_page = &lun->mode_pages.caching_page[ CTL_PAGE_SAVED]; value = ctl_get_opt(&lun->be_lun->options, "writecache"); if (value != NULL && strcmp(value, "off") == 0) caching_page->flags1 &= ~SCP_WCE; value = ctl_get_opt(&lun->be_lun->options, "readcache"); if (value != NULL && strcmp(value, "off") == 0) caching_page->flags1 |= SCP_RCD; memcpy(&lun->mode_pages.caching_page[CTL_PAGE_CURRENT], &lun->mode_pages.caching_page[CTL_PAGE_SAVED], sizeof(caching_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.caching_page; break; } case SMS_CONTROL_MODE_PAGE: { struct scsi_control_page *control_page; if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("invalid subpage value %d", page_index->subpage); memcpy(&lun->mode_pages.control_page[CTL_PAGE_DEFAULT], &control_page_default, sizeof(control_page_default)); memcpy(&lun->mode_pages.control_page[ CTL_PAGE_CHANGEABLE], &control_page_changeable, sizeof(control_page_changeable)); memcpy(&lun->mode_pages.control_page[CTL_PAGE_SAVED], &control_page_default, sizeof(control_page_default)); control_page = &lun->mode_pages.control_page[ CTL_PAGE_SAVED]; value = ctl_get_opt(&lun->be_lun->options, "reordering"); if (value != NULL && strcmp(value, "unrestricted") == 0) { control_page->queue_flags &= ~SCP_QUEUE_ALG_MASK; control_page->queue_flags |= SCP_QUEUE_ALG_UNRESTRICTED; } memcpy(&lun->mode_pages.control_page[CTL_PAGE_CURRENT], &lun->mode_pages.control_page[CTL_PAGE_SAVED], sizeof(control_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.control_page; break; } case SMS_INFO_EXCEPTIONS_PAGE: { switch (page_index->subpage) { case SMS_SUBPAGE_PAGE_0: memcpy(&lun->mode_pages.ie_page[CTL_PAGE_CURRENT], &ie_page_default, sizeof(ie_page_default)); memcpy(&lun->mode_pages.ie_page[ CTL_PAGE_CHANGEABLE], &ie_page_changeable, sizeof(ie_page_changeable)); memcpy(&lun->mode_pages.ie_page[CTL_PAGE_DEFAULT], &ie_page_default, sizeof(ie_page_default)); memcpy(&lun->mode_pages.ie_page[CTL_PAGE_SAVED], &ie_page_default, sizeof(ie_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.ie_page; break; case 0x02: { struct ctl_logical_block_provisioning_page *page; memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_DEFAULT], &lbp_page_default, sizeof(lbp_page_default)); memcpy(&lun->mode_pages.lbp_page[ CTL_PAGE_CHANGEABLE], &lbp_page_changeable, sizeof(lbp_page_changeable)); memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_SAVED], &lbp_page_default, sizeof(lbp_page_default)); page = &lun->mode_pages.lbp_page[CTL_PAGE_SAVED]; value = ctl_get_opt(&lun->be_lun->options, "avail-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[0].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_DEC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[0].count); } value = ctl_get_opt(&lun->be_lun->options, "used-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[1].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_INC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[1].count); } value = ctl_get_opt(&lun->be_lun->options, "pool-avail-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[2].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_DEC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[2].count); } value = ctl_get_opt(&lun->be_lun->options, "pool-used-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[3].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_INC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[3].count); } memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_CURRENT], &lun->mode_pages.lbp_page[CTL_PAGE_SAVED], sizeof(lbp_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.lbp_page; }} break; } case SMS_VENDOR_SPECIFIC_PAGE:{ switch (page_index->subpage) { case DBGCNF_SUBPAGE_CODE: { struct copan_debugconf_subpage *current_page, *saved_page; memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_CURRENT], &debugconf_page_default, sizeof(debugconf_page_default)); memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_CHANGEABLE], &debugconf_page_changeable, sizeof(debugconf_page_changeable)); memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_DEFAULT], &debugconf_page_default, sizeof(debugconf_page_default)); memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_SAVED], &debugconf_page_default, sizeof(debugconf_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.debugconf_subpage; current_page = (struct copan_debugconf_subpage *) (page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT)); saved_page = (struct copan_debugconf_subpage *) (page_index->page_data + (page_index->page_len * CTL_PAGE_SAVED)); break; } default: panic("invalid subpage value %d", page_index->subpage); break; } break; } default: panic("invalid page value %d", page_index->page_code & SMPH_PC_MASK); break; } } return (CTL_RETVAL_COMPLETE); } static int ctl_init_log_page_index(struct ctl_lun *lun) { struct ctl_page_index *page_index; int i, j, k, prev; memcpy(&lun->log_pages.index, log_page_index_template, sizeof(log_page_index_template)); prev = -1; for (i = 0, j = 0, k = 0; i < CTL_NUM_LOG_PAGES; i++) { page_index = &lun->log_pages.index[i]; /* * If this is a disk-only mode page, there's no point in * setting it up. For some pages, we have to have some * basic information about the disk in order to calculate the * mode page data. */ if ((lun->be_lun->lun_type != T_DIRECT) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; if (page_index->page_code == SLS_LOGICAL_BLOCK_PROVISIONING && lun->backend->lun_attr == NULL) continue; if (page_index->page_code != prev) { lun->log_pages.pages_page[j] = page_index->page_code; prev = page_index->page_code; j++; } lun->log_pages.subpages_page[k*2] = page_index->page_code; lun->log_pages.subpages_page[k*2+1] = page_index->subpage; k++; } lun->log_pages.index[0].page_data = &lun->log_pages.pages_page[0]; lun->log_pages.index[0].page_len = j; lun->log_pages.index[1].page_data = &lun->log_pages.subpages_page[0]; lun->log_pages.index[1].page_len = k * 2; lun->log_pages.index[2].page_data = &lun->log_pages.lbp_page[0]; lun->log_pages.index[2].page_len = 12*CTL_NUM_LBP_PARAMS; lun->log_pages.index[3].page_data = (uint8_t *)&lun->log_pages.stat_page; lun->log_pages.index[3].page_len = sizeof(lun->log_pages.stat_page); return (CTL_RETVAL_COMPLETE); } static int hex2bin(const char *str, uint8_t *buf, int buf_size) { int i; u_char c; memset(buf, 0, buf_size); while (isspace(str[0])) str++; if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) str += 2; buf_size *= 2; for (i = 0; str[i] != 0 && i < buf_size; i++) { c = str[i]; if (isdigit(c)) c -= '0'; else if (isalpha(c)) c -= isupper(c) ? 'A' - 10 : 'a' - 10; else break; if (c >= 16) break; if ((i & 1) == 0) buf[i / 2] |= (c << 4); else buf[i / 2] |= c; } return ((i + 1) / 2); } /* * LUN allocation. * * Requirements: * - caller allocates and zeros LUN storage, or passes in a NULL LUN if he * wants us to allocate the LUN and he can block. * - ctl_softc is always set * - be_lun is set if the LUN has a backend (needed for disk LUNs) * * Returns 0 for success, non-zero (errno) for failure. */ static int ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *ctl_lun, struct ctl_be_lun *const be_lun) { struct ctl_lun *nlun, *lun; struct scsi_vpd_id_descriptor *desc; struct scsi_vpd_id_t10 *t10id; - const char *eui, *naa, *scsiname, *vendor, *value; + const char *eui, *naa, *scsiname, *vendor; int lun_number, i, lun_malloced; int devidlen, idlen1, idlen2 = 0, len; if (be_lun == NULL) return (EINVAL); /* * We currently only support Direct Access or Processor LUN types. */ switch (be_lun->lun_type) { case T_DIRECT: break; case T_PROCESSOR: break; case T_SEQUENTIAL: case T_CHANGER: default: be_lun->lun_config_status(be_lun->be_lun, CTL_LUN_CONFIG_FAILURE); break; } if (ctl_lun == NULL) { lun = malloc(sizeof(*lun), M_CTL, M_WAITOK); lun_malloced = 1; } else { lun_malloced = 0; lun = ctl_lun; } memset(lun, 0, sizeof(*lun)); if (lun_malloced) lun->flags = CTL_LUN_MALLOCED; /* Generate LUN ID. */ devidlen = max(CTL_DEVID_MIN_LEN, strnlen(be_lun->device_id, CTL_DEVID_LEN)); idlen1 = sizeof(*t10id) + devidlen; len = sizeof(struct scsi_vpd_id_descriptor) + idlen1; scsiname = ctl_get_opt(&be_lun->options, "scsiname"); if (scsiname != NULL) { idlen2 = roundup2(strlen(scsiname) + 1, 4); len += sizeof(struct scsi_vpd_id_descriptor) + idlen2; } eui = ctl_get_opt(&be_lun->options, "eui"); if (eui != NULL) { len += sizeof(struct scsi_vpd_id_descriptor) + 16; } naa = ctl_get_opt(&be_lun->options, "naa"); if (naa != NULL) { len += sizeof(struct scsi_vpd_id_descriptor) + 16; } lun->lun_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); desc = (struct scsi_vpd_id_descriptor *)lun->lun_devid->data; desc->proto_codeset = SVPD_ID_CODESET_ASCII; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_T10; desc->length = idlen1; t10id = (struct scsi_vpd_id_t10 *)&desc->identifier[0]; memset(t10id->vendor, ' ', sizeof(t10id->vendor)); if ((vendor = ctl_get_opt(&be_lun->options, "vendor")) == NULL) { strncpy((char *)t10id->vendor, CTL_VENDOR, sizeof(t10id->vendor)); } else { strncpy(t10id->vendor, vendor, min(sizeof(t10id->vendor), strlen(vendor))); } strncpy((char *)t10id->vendor_spec_id, (char *)be_lun->device_id, devidlen); if (scsiname != NULL) { desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + desc->length); desc->proto_codeset = SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen2; strlcpy(desc->identifier, scsiname, idlen2); } if (eui != NULL) { desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + desc->length); desc->proto_codeset = SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_EUI64; desc->length = hex2bin(eui, desc->identifier, 16); desc->length = desc->length > 12 ? 16 : (desc->length > 8 ? 12 : 8); len -= 16 - desc->length; } if (naa != NULL) { desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + desc->length); desc->proto_codeset = SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_NAA; desc->length = hex2bin(naa, desc->identifier, 16); desc->length = desc->length > 8 ? 16 : 8; len -= 16 - desc->length; } lun->lun_devid->len = len; mtx_lock(&ctl_softc->ctl_lock); /* * See if the caller requested a particular LUN number. If so, see * if it is available. Otherwise, allocate the first available LUN. */ if (be_lun->flags & CTL_LUN_FLAG_ID_REQ) { if ((be_lun->req_lun_id > (CTL_MAX_LUNS - 1)) || (ctl_is_set(ctl_softc->ctl_lun_mask, be_lun->req_lun_id))) { mtx_unlock(&ctl_softc->ctl_lock); if (be_lun->req_lun_id > (CTL_MAX_LUNS - 1)) { printf("ctl: requested LUN ID %d is higher " "than CTL_MAX_LUNS - 1 (%d)\n", be_lun->req_lun_id, CTL_MAX_LUNS - 1); } else { /* * XXX KDM return an error, or just assign * another LUN ID in this case?? */ printf("ctl: requested LUN ID %d is already " "in use\n", be_lun->req_lun_id); } if (lun->flags & CTL_LUN_MALLOCED) free(lun, M_CTL); be_lun->lun_config_status(be_lun->be_lun, CTL_LUN_CONFIG_FAILURE); return (ENOSPC); } lun_number = be_lun->req_lun_id; } else { lun_number = ctl_ffz(ctl_softc->ctl_lun_mask, CTL_MAX_LUNS); if (lun_number == -1) { mtx_unlock(&ctl_softc->ctl_lock); printf("ctl: can't allocate LUN, out of LUNs\n"); if (lun->flags & CTL_LUN_MALLOCED) free(lun, M_CTL); be_lun->lun_config_status(be_lun->be_lun, CTL_LUN_CONFIG_FAILURE); return (ENOSPC); } } ctl_set_mask(ctl_softc->ctl_lun_mask, lun_number); mtx_init(&lun->lun_lock, "CTL LUN", NULL, MTX_DEF); lun->lun = lun_number; lun->be_lun = be_lun; /* * The processor LUN is always enabled. Disk LUNs come on line * disabled, and must be enabled by the backend. */ lun->flags |= CTL_LUN_DISABLED; lun->backend = be_lun->be; be_lun->ctl_lun = lun; be_lun->lun_id = lun_number; atomic_add_int(&be_lun->be->num_luns, 1); if (be_lun->flags & CTL_LUN_FLAG_OFFLINE) lun->flags |= CTL_LUN_OFFLINE; if (be_lun->flags & CTL_LUN_FLAG_POWERED_OFF) lun->flags |= CTL_LUN_STOPPED; if (be_lun->flags & CTL_LUN_FLAG_INOPERABLE) lun->flags |= CTL_LUN_INOPERABLE; if (be_lun->flags & CTL_LUN_FLAG_PRIMARY) lun->flags |= CTL_LUN_PRIMARY_SC; - value = ctl_get_opt(&be_lun->options, "readonly"); - if (value != NULL && strcmp(value, "on") == 0) - lun->flags |= CTL_LUN_READONLY; - - lun->serseq = CTL_LUN_SERSEQ_OFF; - if (be_lun->flags & CTL_LUN_FLAG_SERSEQ_READ) - lun->serseq = CTL_LUN_SERSEQ_READ; - value = ctl_get_opt(&be_lun->options, "serseq"); - if (value != NULL && strcmp(value, "on") == 0) - lun->serseq = CTL_LUN_SERSEQ_ON; - else if (value != NULL && strcmp(value, "read") == 0) - lun->serseq = CTL_LUN_SERSEQ_READ; - else if (value != NULL && strcmp(value, "off") == 0) - lun->serseq = CTL_LUN_SERSEQ_OFF; - lun->ctl_softc = ctl_softc; #ifdef CTL_TIME_IO lun->last_busy = getsbinuptime(); #endif TAILQ_INIT(&lun->ooa_queue); TAILQ_INIT(&lun->blocked_queue); STAILQ_INIT(&lun->error_list); ctl_tpc_lun_init(lun); /* * Initialize the mode and log page index. */ ctl_init_page_index(lun); ctl_init_log_page_index(lun); /* * Now, before we insert this lun on the lun list, set the lun * inventory changed UA for all other luns. */ STAILQ_FOREACH(nlun, &ctl_softc->lun_list, links) { mtx_lock(&nlun->lun_lock); ctl_est_ua_all(nlun, -1, CTL_UA_LUN_CHANGE); mtx_unlock(&nlun->lun_lock); } STAILQ_INSERT_TAIL(&ctl_softc->lun_list, lun, links); ctl_softc->ctl_luns[lun_number] = lun; ctl_softc->num_luns++; /* Setup statistics gathering */ lun->stats.device_type = be_lun->lun_type; lun->stats.lun_number = lun_number; if (lun->stats.device_type == T_DIRECT) lun->stats.blocksize = be_lun->blocksize; else lun->stats.flags = CTL_LUN_STATS_NO_BLOCKSIZE; for (i = 0;i < CTL_MAX_PORTS;i++) lun->stats.ports[i].targ_port = i; mtx_unlock(&ctl_softc->ctl_lock); lun->be_lun->lun_config_status(lun->be_lun->be_lun, CTL_LUN_CONFIG_OK); return (0); } /* * Delete a LUN. * Assumptions: * - LUN has already been marked invalid and any pending I/O has been taken * care of. */ static int ctl_free_lun(struct ctl_lun *lun) { struct ctl_softc *softc; struct ctl_lun *nlun; int i; softc = lun->ctl_softc; mtx_assert(&softc->ctl_lock, MA_OWNED); STAILQ_REMOVE(&softc->lun_list, lun, ctl_lun, links); ctl_clear_mask(softc->ctl_lun_mask, lun->lun); softc->ctl_luns[lun->lun] = NULL; if (!TAILQ_EMPTY(&lun->ooa_queue)) panic("Freeing a LUN %p with outstanding I/O!!\n", lun); softc->num_luns--; /* * Tell the backend to free resources, if this LUN has a backend. */ atomic_subtract_int(&lun->be_lun->be->num_luns, 1); lun->be_lun->lun_shutdown(lun->be_lun->be_lun); ctl_tpc_lun_shutdown(lun); mtx_destroy(&lun->lun_lock); free(lun->lun_devid, M_CTL); for (i = 0; i < CTL_MAX_PORTS; i++) free(lun->pending_ua[i], M_CTL); for (i = 0; i < 2 * CTL_MAX_PORTS; i++) free(lun->pr_keys[i], M_CTL); free(lun->write_buffer, M_CTL); if (lun->flags & CTL_LUN_MALLOCED) free(lun, M_CTL); STAILQ_FOREACH(nlun, &softc->lun_list, links) { mtx_lock(&nlun->lun_lock); ctl_est_ua_all(nlun, -1, CTL_UA_LUN_CHANGE); mtx_unlock(&nlun->lun_lock); } return (0); } static void ctl_create_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; softc = control_softc; /* * ctl_alloc_lun() should handle all potential failure cases. */ ctl_alloc_lun(softc, NULL, be_lun); } int ctl_add_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc = control_softc; mtx_lock(&softc->ctl_lock); STAILQ_INSERT_TAIL(&softc->pending_lun_queue, be_lun, links); mtx_unlock(&softc->ctl_lock); wakeup(&softc->pending_lun_queue); return (0); } int ctl_enable_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; struct ctl_port *port, *nport; struct ctl_lun *lun; int retval; lun = (struct ctl_lun *)be_lun->ctl_lun; softc = lun->ctl_softc; mtx_lock(&softc->ctl_lock); mtx_lock(&lun->lun_lock); if ((lun->flags & CTL_LUN_DISABLED) == 0) { /* * eh? Why did we get called if the LUN is already * enabled? */ mtx_unlock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); return (0); } lun->flags &= ~CTL_LUN_DISABLED; mtx_unlock(&lun->lun_lock); for (port = STAILQ_FIRST(&softc->port_list); port != NULL; port = nport) { nport = STAILQ_NEXT(port, links); if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 || port->lun_map != NULL || port->lun_enable == NULL) continue; /* * Drop the lock while we call the FETD's enable routine. * This can lead to a callback into CTL (at least in the * case of the internal initiator frontend. */ mtx_unlock(&softc->ctl_lock); retval = port->lun_enable(port->targ_lun_arg, lun->lun); mtx_lock(&softc->ctl_lock); if (retval != 0) { printf("%s: FETD %s port %d returned error " "%d for lun_enable on lun %jd\n", __func__, port->port_name, port->targ_port, retval, (intmax_t)lun->lun); } } mtx_unlock(&softc->ctl_lock); return (0); } int ctl_disable_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; struct ctl_port *port; struct ctl_lun *lun; int retval; lun = (struct ctl_lun *)be_lun->ctl_lun; softc = lun->ctl_softc; mtx_lock(&softc->ctl_lock); mtx_lock(&lun->lun_lock); if (lun->flags & CTL_LUN_DISABLED) { mtx_unlock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); return (0); } lun->flags |= CTL_LUN_DISABLED; mtx_unlock(&lun->lun_lock); STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 || port->lun_map != NULL || port->lun_disable == NULL) continue; /* * Drop the lock before we call the frontend's disable * routine, to avoid lock order reversals. * * XXX KDM what happens if the frontend list changes while * we're traversing it? It's unlikely, but should be handled. */ mtx_unlock(&softc->ctl_lock); retval = port->lun_disable(port->targ_lun_arg, lun->lun); mtx_lock(&softc->ctl_lock); if (retval != 0) { printf("%s: FETD %s port %d returned error " "%d for lun_disable on lun %jd\n", __func__, port->port_name, port->targ_port, retval, (intmax_t)lun->lun); } } mtx_unlock(&softc->ctl_lock); return (0); } int ctl_start_lun(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_STOPPED; mtx_unlock(&lun->lun_lock); return (0); } int ctl_stop_lun(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags |= CTL_LUN_STOPPED; mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_offline(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags |= CTL_LUN_OFFLINE; mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_online(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_OFFLINE; mtx_unlock(&lun->lun_lock); return (0); } int ctl_invalidate_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; struct ctl_lun *lun; lun = (struct ctl_lun *)be_lun->ctl_lun; softc = lun->ctl_softc; mtx_lock(&lun->lun_lock); /* * The LUN needs to be disabled before it can be marked invalid. */ if ((lun->flags & CTL_LUN_DISABLED) == 0) { mtx_unlock(&lun->lun_lock); return (-1); } /* * Mark the LUN invalid. */ lun->flags |= CTL_LUN_INVALID; /* * If there is nothing in the OOA queue, go ahead and free the LUN. * If we have something in the OOA queue, we'll free it when the * last I/O completes. */ if (TAILQ_EMPTY(&lun->ooa_queue)) { mtx_unlock(&lun->lun_lock); mtx_lock(&softc->ctl_lock); ctl_free_lun(lun); mtx_unlock(&softc->ctl_lock); } else mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_inoperable(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags |= CTL_LUN_INOPERABLE; mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_operable(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_INOPERABLE; mtx_unlock(&lun->lun_lock); return (0); } void ctl_lun_capacity_changed(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); ctl_est_ua_all(lun, -1, CTL_UA_CAPACITY_CHANGED); mtx_unlock(&lun->lun_lock); } /* * Backend "memory move is complete" callback for requests that never * make it down to say RAIDCore's configuration code. */ int ctl_config_move_done(union ctl_io *io) { int retval; CTL_DEBUG_PRINT(("ctl_config_move_done\n")); KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Config I/O type isn't CTL_IO_SCSI (%d)!", io->io_hdr.io_type)); if ((io->io_hdr.port_status != 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { /* * For hardware error sense keys, the sense key * specific value is defined to be a retry count, * but we use it to pass back an internal FETD * error code. XXX KDM Hopefully the FETD is only * using 16 bits for an error code, since that's * all the space we have in the sks field. */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ io->io_hdr.port_status); } if (((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)) { /* * XXX KDM just assuming a single pointer here, and not a * S/G list. If we start using S/G lists for config data, * we'll need to know how to clean them up here as well. */ if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) free(io->scsiio.kern_data_ptr, M_CTL); ctl_done(io); retval = CTL_RETVAL_COMPLETE; } else { /* * XXX KDM now we need to continue data movement. Some * options: * - call ctl_scsiio() again? We don't do this for data * writes, because for those at least we know ahead of * time where the write will go and how long it is. For * config writes, though, that information is largely * contained within the write itself, thus we need to * parse out the data again. * * - Call some other function once the data is in? */ if (ctl_debug & CTL_DEBUG_CDB_DATA) ctl_data_print(io); /* * XXX KDM call ctl_scsiio() again for now, and check flag * bits to see whether we're allocated or not. */ retval = ctl_scsiio(&io->scsiio); } return (retval); } /* * This gets called by a backend driver when it is done with a * data_submit method. */ void ctl_data_submit_done(union ctl_io *io) { /* * If the IO_CONT flag is set, we need to call the supplied * function to continue processing the I/O, instead of completing * the I/O just yet. * * If there is an error, though, we don't want to keep processing. * Instead, just send status back to the initiator. */ if ((io->io_hdr.flags & CTL_FLAG_IO_CONT) && (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { io->scsiio.io_cont(io); return; } ctl_done(io); } /* * This gets called by a backend driver when it is done with a * configuration write. */ void ctl_config_write_done(union ctl_io *io) { uint8_t *buf; /* * If the IO_CONT flag is set, we need to call the supplied * function to continue processing the I/O, instead of completing * the I/O just yet. * * If there is an error, though, we don't want to keep processing. * Instead, just send status back to the initiator. */ if ((io->io_hdr.flags & CTL_FLAG_IO_CONT) && (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { io->scsiio.io_cont(io); return; } /* * Since a configuration write can be done for commands that actually * have data allocated, like write buffer, and commands that have * no data, like start/stop unit, we need to check here. */ if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) buf = io->scsiio.kern_data_ptr; else buf = NULL; ctl_done(io); if (buf) free(buf, M_CTL); } void ctl_config_read_done(union ctl_io *io) { uint8_t *buf; /* * If there is some error -- we are done, skip data transfer. */ if ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) buf = io->scsiio.kern_data_ptr; else buf = NULL; ctl_done(io); if (buf) free(buf, M_CTL); return; } /* * If the IO_CONT flag is set, we need to call the supplied * function to continue processing the I/O, instead of completing * the I/O just yet. */ if (io->io_hdr.flags & CTL_FLAG_IO_CONT) { io->scsiio.io_cont(io); return; } ctl_datamove(io); } /* * SCSI release command. */ int ctl_scsi_release(struct ctl_scsiio *ctsio) { int length, longid, thirdparty_id, resv_id; struct ctl_lun *lun; uint32_t residx; length = 0; resv_id = 0; CTL_DEBUG_PRINT(("ctl_scsi_release\n")); residx = ctl_get_resindex(&ctsio->io_hdr.nexus); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; switch (ctsio->cdb[0]) { case RELEASE_10: { struct scsi_release_10 *cdb; cdb = (struct scsi_release_10 *)ctsio->cdb; if (cdb->byte2 & SR10_LONGID) longid = 1; else thirdparty_id = cdb->thirdparty_id; resv_id = cdb->resv_id; length = scsi_2btoul(cdb->length); break; } } /* * XXX KDM right now, we only support LUN reservation. We don't * support 3rd party reservations, or extent reservations, which * might actually need the parameter list. If we've gotten this * far, we've got a LUN reservation. Anything else got kicked out * above. So, according to SPC, ignore the length. */ length = 0; if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) && (length > 0)) { ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK); ctsio->kern_data_len = length; ctsio->kern_total_len = length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (length > 0) thirdparty_id = scsi_8btou64(ctsio->kern_data_ptr); mtx_lock(&lun->lun_lock); /* * According to SPC, it is not an error for an intiator to attempt * to release a reservation on a LUN that isn't reserved, or that * is reserved by another initiator. The reservation can only be * released, though, by the initiator who made it or by one of * several reset type events. */ if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == residx)) lun->flags &= ~CTL_LUN_RESERVED; mtx_unlock(&lun->lun_lock); if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_scsi_reserve(struct ctl_scsiio *ctsio) { int extent, thirdparty, longid; int resv_id, length; uint64_t thirdparty_id; struct ctl_lun *lun; uint32_t residx; extent = 0; thirdparty = 0; longid = 0; resv_id = 0; length = 0; thirdparty_id = 0; CTL_DEBUG_PRINT(("ctl_reserve\n")); residx = ctl_get_resindex(&ctsio->io_hdr.nexus); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; switch (ctsio->cdb[0]) { case RESERVE_10: { struct scsi_reserve_10 *cdb; cdb = (struct scsi_reserve_10 *)ctsio->cdb; if (cdb->byte2 & SR10_LONGID) longid = 1; else thirdparty_id = cdb->thirdparty_id; resv_id = cdb->resv_id; length = scsi_2btoul(cdb->length); break; } } /* * XXX KDM right now, we only support LUN reservation. We don't * support 3rd party reservations, or extent reservations, which * might actually need the parameter list. If we've gotten this * far, we've got a LUN reservation. Anything else got kicked out * above. So, according to SPC, ignore the length. */ length = 0; if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) && (length > 0)) { ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK); ctsio->kern_data_len = length; ctsio->kern_total_len = length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (length > 0) thirdparty_id = scsi_8btou64(ctsio->kern_data_ptr); mtx_lock(&lun->lun_lock); if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx != residx)) { ctl_set_reservation_conflict(ctsio); goto bailout; } lun->flags |= CTL_LUN_RESERVED; lun->res_idx = residx; ctl_set_success(ctsio); bailout: mtx_unlock(&lun->lun_lock); if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_start_stop(struct ctl_scsiio *ctsio) { struct scsi_start_stop_unit *cdb; struct ctl_lun *lun; int retval; CTL_DEBUG_PRINT(("ctl_start_stop\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; retval = 0; cdb = (struct scsi_start_stop_unit *)ctsio->cdb; /* * XXX KDM * We don't support the immediate bit on a stop unit. In order to * do that, we would need to code up a way to know that a stop is * pending, and hold off any new commands until it completes, one * way or another. Then we could accept or reject those commands * depending on its status. We would almost need to do the reverse * of what we do below for an immediate start -- return the copy of * the ctl_io to the FETD with status to send to the host (and to * free the copy!) and then free the original I/O once the stop * actually completes. That way, the OOA queue mechanism can work * to block commands that shouldn't proceed. Another alternative * would be to put the copy in the queue in place of the original, * and return the original back to the caller. That could be * slightly safer.. */ if ((cdb->byte2 & SSS_IMMED) && ((cdb->how & SSS_START) == 0)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if ((lun->flags & CTL_LUN_PR_RESERVED) && ((cdb->how & SSS_START)==0)) { uint32_t residx; residx = ctl_get_resindex(&ctsio->io_hdr.nexus); if (ctl_get_prkey(lun, residx) == 0 || (lun->pr_res_idx!=residx && lun->res_type < 4)) { ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } } /* * If there is no backend on this device, we can't start or stop * it. In theory we shouldn't get any start/stop commands in the * first place at this level if the LUN doesn't have a backend. * That should get stopped by the command decode code. */ if (lun->backend == NULL) { ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * XXX KDM Copan-specific offline behavior. * Figure out a reasonable way to port this? */ #ifdef NEEDTOPORT mtx_lock(&lun->lun_lock); if (((cdb->byte2 & SSS_ONOFFLINE) == 0) && (lun->flags & CTL_LUN_OFFLINE)) { /* * If the LUN is offline, and the on/offline bit isn't set, * reject the start or stop. Otherwise, let it through. */ mtx_unlock(&lun->lun_lock); ctl_set_lun_not_ready(ctsio); ctl_done((union ctl_io *)ctsio); } else { mtx_unlock(&lun->lun_lock); #endif /* NEEDTOPORT */ /* * This could be a start or a stop when we're online, * or a stop/offline or start/online. A start or stop when * we're offline is covered in the case above. */ /* * In the non-immediate case, we send the request to * the backend and return status to the user when * it is done. * * In the immediate case, we allocate a new ctl_io * to hold a copy of the request, and send that to * the backend. We then set good status on the * user's request and return it immediately. */ if (cdb->byte2 & SSS_IMMED) { union ctl_io *new_io; new_io = ctl_alloc_io(ctsio->io_hdr.pool); ctl_copy_io((union ctl_io *)ctsio, new_io); retval = lun->backend->config_write(new_io); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); } else { retval = lun->backend->config_write( (union ctl_io *)ctsio); } #ifdef NEEDTOPORT } #endif return (retval); } /* * We support the SYNCHRONIZE CACHE command (10 and 16 byte versions), but * we don't really do anything with the LBA and length fields if the user * passes them in. Instead we'll just flush out the cache for the entire * LUN. */ int ctl_sync_cache(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_softc *softc; struct ctl_lba_len_flags *lbalen; uint64_t starting_lba; uint32_t block_count; int retval; uint8_t byte2; CTL_DEBUG_PRINT(("ctl_sync_cache\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; retval = 0; switch (ctsio->cdb[0]) { case SYNCHRONIZE_CACHE: { struct scsi_sync_cache *cdb; cdb = (struct scsi_sync_cache *)ctsio->cdb; starting_lba = scsi_4btoul(cdb->begin_lba); block_count = scsi_2btoul(cdb->lb_count); byte2 = cdb->byte2; break; } case SYNCHRONIZE_CACHE_16: { struct scsi_sync_cache_16 *cdb; cdb = (struct scsi_sync_cache_16 *)ctsio->cdb; starting_lba = scsi_8btou64(cdb->begin_lba); block_count = scsi_4btoul(cdb->lb_count); byte2 = cdb->byte2; break; } default: ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); goto bailout; break; /* NOTREACHED */ } /* * We check the LBA and length, but don't do anything with them. * A SYNCHRONIZE CACHE will cause the entire cache for this lun to * get flushed. This check will just help satisfy anyone who wants * to see an error for an out of range LBA. */ if ((starting_lba + block_count) > (lun->be_lun->maxlba + 1)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); goto bailout; } /* * If this LUN has no backend, we can't flush the cache anyway. */ if (lun->backend == NULL) { ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); goto bailout; } lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = starting_lba; lbalen->len = block_count; lbalen->flags = byte2; /* * Check to see whether we're configured to send the SYNCHRONIZE * CACHE command directly to the back end. */ mtx_lock(&lun->lun_lock); if ((softc->flags & CTL_FLAG_REAL_SYNC) && (++(lun->sync_count) >= lun->sync_interval)) { lun->sync_count = 0; mtx_unlock(&lun->lun_lock); retval = lun->backend->config_write((union ctl_io *)ctsio); } else { mtx_unlock(&lun->lun_lock); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); } bailout: return (retval); } int ctl_format(struct ctl_scsiio *ctsio) { struct scsi_format *cdb; struct ctl_lun *lun; int length, defect_list_len; CTL_DEBUG_PRINT(("ctl_format\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_format *)ctsio->cdb; length = 0; if (cdb->byte2 & SF_FMTDATA) { if (cdb->byte2 & SF_LONGLIST) length = sizeof(struct scsi_format_header_long); else length = sizeof(struct scsi_format_header_short); } if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) && (length > 0)) { ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK); ctsio->kern_data_len = length; ctsio->kern_total_len = length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } defect_list_len = 0; if (cdb->byte2 & SF_FMTDATA) { if (cdb->byte2 & SF_LONGLIST) { struct scsi_format_header_long *header; header = (struct scsi_format_header_long *) ctsio->kern_data_ptr; defect_list_len = scsi_4btoul(header->defect_list_len); if (defect_list_len != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); goto bailout; } } else { struct scsi_format_header_short *header; header = (struct scsi_format_header_short *) ctsio->kern_data_ptr; defect_list_len = scsi_2btoul(header->defect_list_len); if (defect_list_len != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); goto bailout; } } } /* * The format command will clear out the "Medium format corrupted" * status if set by the configuration code. That status is really * just a way to notify the host that we have lost the media, and * get them to issue a command that will basically make them think * they're blowing away the media. */ mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_INOPERABLE; mtx_unlock(&lun->lun_lock); ctl_set_success(ctsio); bailout: if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_buffer(struct ctl_scsiio *ctsio) { struct scsi_read_buffer *cdb; struct ctl_lun *lun; int buffer_offset, len; static uint8_t descr[4]; static uint8_t echo_descr[4] = { 0 }; CTL_DEBUG_PRINT(("ctl_read_buffer\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_read_buffer *)ctsio->cdb; if ((cdb->byte2 & RWB_MODE) != RWB_MODE_DATA && (cdb->byte2 & RWB_MODE) != RWB_MODE_ECHO_DESCR && (cdb->byte2 & RWB_MODE) != RWB_MODE_DESCR) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } len = scsi_3btoul(cdb->length); buffer_offset = scsi_3btoul(cdb->offset); if (buffer_offset + len > CTL_WRITE_BUFFER_SIZE) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 6, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if ((cdb->byte2 & RWB_MODE) == RWB_MODE_DESCR) { descr[0] = 0; scsi_ulto3b(CTL_WRITE_BUFFER_SIZE, &descr[1]); ctsio->kern_data_ptr = descr; len = min(len, sizeof(descr)); } else if ((cdb->byte2 & RWB_MODE) == RWB_MODE_ECHO_DESCR) { ctsio->kern_data_ptr = echo_descr; len = min(len, sizeof(echo_descr)); } else { if (lun->write_buffer == NULL) { lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE, M_CTL, M_WAITOK); } ctsio->kern_data_ptr = lun->write_buffer + buffer_offset; } ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctl_set_success(ctsio); ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_write_buffer(struct ctl_scsiio *ctsio) { struct scsi_write_buffer *cdb; struct ctl_lun *lun; int buffer_offset, len; CTL_DEBUG_PRINT(("ctl_write_buffer\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_write_buffer *)ctsio->cdb; if ((cdb->byte2 & RWB_MODE) != RWB_MODE_DATA) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } len = scsi_3btoul(cdb->length); buffer_offset = scsi_3btoul(cdb->offset); if (buffer_offset + len > CTL_WRITE_BUFFER_SIZE) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 6, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * If we've got a kernel request that hasn't been malloced yet, * malloc it and tell the caller the data buffer is here. */ if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { if (lun->write_buffer == NULL) { lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE, M_CTL, M_WAITOK); } ctsio->kern_data_ptr = lun->write_buffer + buffer_offset; ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_write_same(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int len, retval; uint8_t byte2; retval = CTL_RETVAL_COMPLETE; CTL_DEBUG_PRINT(("ctl_write_same\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; switch (ctsio->cdb[0]) { case WRITE_SAME_10: { struct scsi_write_same_10 *cdb; cdb = (struct scsi_write_same_10 *)ctsio->cdb; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); byte2 = cdb->byte2; break; } case WRITE_SAME_16: { struct scsi_write_same_16 *cdb; cdb = (struct scsi_write_same_16 *)ctsio->cdb; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); byte2 = cdb->byte2; break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* NDOB and ANCHOR flags can be used only together with UNMAP */ if ((byte2 & SWS_UNMAP) == 0 && (byte2 & (SWS_NDOB | SWS_ANCHOR)) != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* Zero number of blocks means "to the last logical block" */ if (num_blocks == 0) { if ((lun->be_lun->maxlba + 1) - lba > UINT32_MAX) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 0, /*command*/ 1, /*field*/ 0, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } num_blocks = (lun->be_lun->maxlba + 1) - lba; } len = lun->be_lun->blocksize; /* * If we've got a kernel request that hasn't been malloced yet, * malloc it and tell the caller the data buffer is here. */ if ((byte2 & SWS_NDOB) == 0 && (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);; ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; lbalen->flags = byte2; retval = lun->backend->config_write((union ctl_io *)ctsio); return (retval); } int ctl_unmap(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct scsi_unmap *cdb; struct ctl_ptr_len_flags *ptrlen; struct scsi_unmap_header *hdr; struct scsi_unmap_desc *buf, *end, *endnz, *range; uint64_t lba; uint32_t num_blocks; int len, retval; uint8_t byte2; retval = CTL_RETVAL_COMPLETE; CTL_DEBUG_PRINT(("ctl_unmap\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_unmap *)ctsio->cdb; len = scsi_2btoul(cdb->length); byte2 = cdb->byte2; /* * If we've got a kernel request that hasn't been malloced yet, * malloc it and tell the caller the data buffer is here. */ if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);; ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } len = ctsio->kern_total_len - ctsio->kern_data_resid; hdr = (struct scsi_unmap_header *)ctsio->kern_data_ptr; if (len < sizeof (*hdr) || len < (scsi_2btoul(hdr->length) + sizeof(hdr->length)) || len < (scsi_2btoul(hdr->desc_length) + sizeof (*hdr)) || scsi_2btoul(hdr->desc_length) % sizeof(*buf) != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 0, /*command*/ 0, /*field*/ 0, /*bit_valid*/ 0, /*bit*/ 0); goto done; } len = scsi_2btoul(hdr->desc_length); buf = (struct scsi_unmap_desc *)(hdr + 1); end = buf + len / sizeof(*buf); endnz = buf; for (range = buf; range < end; range++) { lba = scsi_8btou64(range->lba); num_blocks = scsi_4btoul(range->length); if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (num_blocks != 0) endnz = range + 1; } /* * Block backend can not handle zero last range. * Filter it out and return if there is nothing left. */ len = (uint8_t *)endnz - (uint8_t *)buf; if (len == 0) { ctl_set_success(ctsio); goto done; } mtx_lock(&lun->lun_lock); ptrlen = (struct ctl_ptr_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; ptrlen->ptr = (void *)buf; ptrlen->len = len; ptrlen->flags = byte2; ctl_check_blocked(lun); mtx_unlock(&lun->lun_lock); retval = lun->backend->config_write((union ctl_io *)ctsio); return (retval); done: if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Note that this function currently doesn't actually do anything inside * CTL to enforce things if the DQue bit is turned on. * * Also note that this function can't be used in the default case, because * the DQue bit isn't set in the changeable mask for the control mode page * anyway. This is just here as an example for how to implement a page * handler, and a placeholder in case we want to allow the user to turn * tagged queueing on and off. * * The D_SENSE bit handling is functional, however, and will turn * descriptor sense on and off for a given LUN. */ int ctl_control_page_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr) { struct scsi_control_page *current_cp, *saved_cp, *user_cp; struct ctl_lun *lun; int set_ua; uint32_t initidx; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); set_ua = 0; user_cp = (struct scsi_control_page *)page_ptr; current_cp = (struct scsi_control_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT)); saved_cp = (struct scsi_control_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_SAVED)); mtx_lock(&lun->lun_lock); if (((current_cp->rlec & SCP_DSENSE) == 0) && ((user_cp->rlec & SCP_DSENSE) != 0)) { /* * Descriptor sense is currently turned off and the user * wants to turn it on. */ current_cp->rlec |= SCP_DSENSE; saved_cp->rlec |= SCP_DSENSE; lun->flags |= CTL_LUN_SENSE_DESC; set_ua = 1; } else if (((current_cp->rlec & SCP_DSENSE) != 0) && ((user_cp->rlec & SCP_DSENSE) == 0)) { /* * Descriptor sense is currently turned on, and the user * wants to turn it off. */ current_cp->rlec &= ~SCP_DSENSE; saved_cp->rlec &= ~SCP_DSENSE; lun->flags &= ~CTL_LUN_SENSE_DESC; set_ua = 1; } if ((current_cp->queue_flags & SCP_QUEUE_ALG_MASK) != (user_cp->queue_flags & SCP_QUEUE_ALG_MASK)) { current_cp->queue_flags &= ~SCP_QUEUE_ALG_MASK; current_cp->queue_flags |= user_cp->queue_flags & SCP_QUEUE_ALG_MASK; saved_cp->queue_flags &= ~SCP_QUEUE_ALG_MASK; saved_cp->queue_flags |= user_cp->queue_flags & SCP_QUEUE_ALG_MASK; set_ua = 1; } if ((current_cp->eca_and_aen & SCP_SWP) != (user_cp->eca_and_aen & SCP_SWP)) { current_cp->eca_and_aen &= ~SCP_SWP; current_cp->eca_and_aen |= user_cp->eca_and_aen & SCP_SWP; saved_cp->eca_and_aen &= ~SCP_SWP; saved_cp->eca_and_aen |= user_cp->eca_and_aen & SCP_SWP; set_ua = 1; } if (set_ua != 0) ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE); mtx_unlock(&lun->lun_lock); return (0); } int ctl_caching_sp_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr) { struct scsi_caching_page *current_cp, *saved_cp, *user_cp; struct ctl_lun *lun; int set_ua; uint32_t initidx; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); set_ua = 0; user_cp = (struct scsi_caching_page *)page_ptr; current_cp = (struct scsi_caching_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT)); saved_cp = (struct scsi_caching_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_SAVED)); mtx_lock(&lun->lun_lock); if ((current_cp->flags1 & (SCP_WCE | SCP_RCD)) != (user_cp->flags1 & (SCP_WCE | SCP_RCD))) { current_cp->flags1 &= ~(SCP_WCE | SCP_RCD); current_cp->flags1 |= user_cp->flags1 & (SCP_WCE | SCP_RCD); saved_cp->flags1 &= ~(SCP_WCE | SCP_RCD); saved_cp->flags1 |= user_cp->flags1 & (SCP_WCE | SCP_RCD); set_ua = 1; } if (set_ua != 0) ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE); mtx_unlock(&lun->lun_lock); return (0); } int ctl_debugconf_sp_select_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr) { uint8_t *c; int i; c = ((struct copan_debugconf_subpage *)page_ptr)->ctl_time_io_secs; ctl_time_io_secs = (c[0] << 8) | (c[1] << 0) | 0; CTL_DEBUG_PRINT(("set ctl_time_io_secs to %d\n", ctl_time_io_secs)); printf("set ctl_time_io_secs to %d\n", ctl_time_io_secs); printf("page data:"); for (i=0; i<8; i++) printf(" %.2x",page_ptr[i]); printf("\n"); return (0); } int ctl_debugconf_sp_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc) { struct copan_debugconf_subpage *page; page = (struct copan_debugconf_subpage *)page_index->page_data + (page_index->page_len * pc); switch (pc) { case SMS_PAGE_CTRL_CHANGEABLE >> 6: case SMS_PAGE_CTRL_DEFAULT >> 6: case SMS_PAGE_CTRL_SAVED >> 6: /* * We don't update the changable or default bits for this page. */ break; case SMS_PAGE_CTRL_CURRENT >> 6: page->ctl_time_io_secs[0] = ctl_time_io_secs >> 8; page->ctl_time_io_secs[1] = ctl_time_io_secs >> 0; break; default: #ifdef NEEDTOPORT EPRINT(0, "Invalid PC %d!!", pc); #endif /* NEEDTOPORT */ break; } return (0); } static int ctl_do_mode_select(union ctl_io *io) { struct scsi_mode_page_header *page_header; struct ctl_page_index *page_index; struct ctl_scsiio *ctsio; int control_dev, page_len; int page_len_offset, page_len_size; union ctl_modepage_info *modepage_info; struct ctl_lun *lun; int *len_left, *len_used; int retval, i; ctsio = &io->scsiio; page_index = NULL; page_len = 0; retval = CTL_RETVAL_COMPLETE; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun->be_lun->lun_type != T_DIRECT) control_dev = 1; else control_dev = 0; modepage_info = (union ctl_modepage_info *) ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes; len_left = &modepage_info->header.len_left; len_used = &modepage_info->header.len_used; do_next_page: page_header = (struct scsi_mode_page_header *) (ctsio->kern_data_ptr + *len_used); if (*len_left == 0) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } else if (*len_left < sizeof(struct scsi_mode_page_header)) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } else if ((page_header->page_code & SMPH_SPF) && (*len_left < sizeof(struct scsi_mode_page_header_sp))) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * XXX KDM should we do something with the block descriptor? */ for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { if ((control_dev != 0) && (lun->mode_pages.index[i].page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) != (page_header->page_code & SMPH_PC_MASK)) continue; /* * If neither page has a subpage code, then we've got a * match. */ if (((lun->mode_pages.index[i].page_code & SMPH_SPF) == 0) && ((page_header->page_code & SMPH_SPF) == 0)) { page_index = &lun->mode_pages.index[i]; page_len = page_header->page_length; break; } /* * If both pages have subpages, then the subpage numbers * have to match. */ if ((lun->mode_pages.index[i].page_code & SMPH_SPF) && (page_header->page_code & SMPH_SPF)) { struct scsi_mode_page_header_sp *sph; sph = (struct scsi_mode_page_header_sp *)page_header; if (lun->mode_pages.index[i].subpage == sph->subpage) { page_index = &lun->mode_pages.index[i]; page_len = scsi_2btoul(sph->page_length); break; } } } /* * If we couldn't find the page, or if we don't have a mode select * handler for it, send back an error to the user. */ if ((page_index == NULL) || (page_index->select_handler == NULL)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ *len_used, /*bit_valid*/ 0, /*bit*/ 0); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (page_index->page_code & SMPH_SPF) { page_len_offset = 2; page_len_size = 2; } else { page_len_size = 1; page_len_offset = 1; } /* * If the length the initiator gives us isn't the one we specify in * the mode page header, or if they didn't specify enough data in * the CDB to avoid truncating this page, kick out the request. */ if ((page_len != (page_index->page_len - page_len_offset - page_len_size)) || (*len_left < page_index->page_len)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ *len_used + page_len_offset, /*bit_valid*/ 0, /*bit*/ 0); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Run through the mode page, checking to make sure that the bits * the user changed are actually legal for him to change. */ for (i = 0; i < page_index->page_len; i++) { uint8_t *user_byte, *change_mask, *current_byte; int bad_bit; int j; user_byte = (uint8_t *)page_header + i; change_mask = page_index->page_data + (page_index->page_len * CTL_PAGE_CHANGEABLE) + i; current_byte = page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT) + i; /* * Check to see whether the user set any bits in this byte * that he is not allowed to set. */ if ((*user_byte & ~(*change_mask)) == (*current_byte & ~(*change_mask))) continue; /* * Go through bit by bit to determine which one is illegal. */ bad_bit = 0; for (j = 7; j >= 0; j--) { if ((((1 << i) & ~(*change_mask)) & *user_byte) != (((1 << i) & ~(*change_mask)) & *current_byte)) { bad_bit = i; break; } } ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ *len_used + i, /*bit_valid*/ 1, /*bit*/ bad_bit); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Decrement these before we call the page handler, since we may * end up getting called back one way or another before the handler * returns to this context. */ *len_left -= page_index->page_len; *len_used += page_index->page_len; retval = page_index->select_handler(ctsio, page_index, (uint8_t *)page_header); /* * If the page handler returns CTL_RETVAL_QUEUED, then we need to * wait until this queued command completes to finish processing * the mode page. If it returns anything other than * CTL_RETVAL_COMPLETE (e.g. CTL_RETVAL_ERROR), then it should have * already set the sense information, freed the data pointer, and * completed the io for us. */ if (retval != CTL_RETVAL_COMPLETE) goto bailout_no_done; /* * If the initiator sent us more than one page, parse the next one. */ if (*len_left > 0) goto do_next_page; ctl_set_success(ctsio); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); bailout_no_done: return (CTL_RETVAL_COMPLETE); } int ctl_mode_select(struct ctl_scsiio *ctsio) { int param_len, pf, sp; int header_size, bd_len; int len_left, len_used; struct ctl_page_index *page_index; struct ctl_lun *lun; int control_dev, page_len; union ctl_modepage_info *modepage_info; int retval; pf = 0; sp = 0; page_len = 0; len_used = 0; len_left = 0; retval = 0; bd_len = 0; page_index = NULL; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun->be_lun->lun_type != T_DIRECT) control_dev = 1; else control_dev = 0; switch (ctsio->cdb[0]) { case MODE_SELECT_6: { struct scsi_mode_select_6 *cdb; cdb = (struct scsi_mode_select_6 *)ctsio->cdb; pf = (cdb->byte2 & SMS_PF) ? 1 : 0; sp = (cdb->byte2 & SMS_SP) ? 1 : 0; param_len = cdb->length; header_size = sizeof(struct scsi_mode_header_6); break; } case MODE_SELECT_10: { struct scsi_mode_select_10 *cdb; cdb = (struct scsi_mode_select_10 *)ctsio->cdb; pf = (cdb->byte2 & SMS_PF) ? 1 : 0; sp = (cdb->byte2 & SMS_SP) ? 1 : 0; param_len = scsi_2btoul(cdb->length); header_size = sizeof(struct scsi_mode_header_10); break; } default: ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * From SPC-3: * "A parameter list length of zero indicates that the Data-Out Buffer * shall be empty. This condition shall not be considered as an error." */ if (param_len == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Since we'll hit this the first time through, prior to * allocation, we don't need to free a data buffer here. */ if (param_len < header_size) { ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Allocate the data buffer and grab the user's data. In theory, * we shouldn't have to sanity check the parameter list length here * because the maximum size is 64K. We should be able to malloc * that much without too many problems. */ if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(param_len, M_CTL, M_WAITOK); ctsio->kern_data_len = param_len; ctsio->kern_total_len = param_len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } switch (ctsio->cdb[0]) { case MODE_SELECT_6: { struct scsi_mode_header_6 *mh6; mh6 = (struct scsi_mode_header_6 *)ctsio->kern_data_ptr; bd_len = mh6->blk_desc_len; break; } case MODE_SELECT_10: { struct scsi_mode_header_10 *mh10; mh10 = (struct scsi_mode_header_10 *)ctsio->kern_data_ptr; bd_len = scsi_2btoul(mh10->blk_desc_len); break; } default: panic("Invalid CDB type %#x", ctsio->cdb[0]); break; } if (param_len < (header_size + bd_len)) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Set the IO_CONT flag, so that if this I/O gets passed to * ctl_config_write_done(), it'll get passed back to * ctl_do_mode_select() for further processing, or completion if * we're all done. */ ctsio->io_hdr.flags |= CTL_FLAG_IO_CONT; ctsio->io_cont = ctl_do_mode_select; modepage_info = (union ctl_modepage_info *) ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes; memset(modepage_info, 0, sizeof(*modepage_info)); len_left = param_len - header_size - bd_len; len_used = header_size + bd_len; modepage_info->header.len_left = len_left; modepage_info->header.len_used = len_used; return (ctl_do_mode_select((union ctl_io *)ctsio)); } int ctl_mode_sense(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; int pc, page_code, dbd, llba, subpage; int alloc_len, page_len, header_len, total_len; struct scsi_mode_block_descr *block_desc; struct ctl_page_index *page_index; int control_dev; dbd = 0; llba = 0; block_desc = NULL; page_index = NULL; CTL_DEBUG_PRINT(("ctl_mode_sense\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun->be_lun->lun_type != T_DIRECT) control_dev = 1; else control_dev = 0; switch (ctsio->cdb[0]) { case MODE_SENSE_6: { struct scsi_mode_sense_6 *cdb; cdb = (struct scsi_mode_sense_6 *)ctsio->cdb; header_len = sizeof(struct scsi_mode_hdr_6); if (cdb->byte2 & SMS_DBD) dbd = 1; else header_len += sizeof(struct scsi_mode_block_descr); pc = (cdb->page & SMS_PAGE_CTRL_MASK) >> 6; page_code = cdb->page & SMS_PAGE_CODE; subpage = cdb->subpage; alloc_len = cdb->length; break; } case MODE_SENSE_10: { struct scsi_mode_sense_10 *cdb; cdb = (struct scsi_mode_sense_10 *)ctsio->cdb; header_len = sizeof(struct scsi_mode_hdr_10); if (cdb->byte2 & SMS_DBD) dbd = 1; else header_len += sizeof(struct scsi_mode_block_descr); if (cdb->byte2 & SMS10_LLBAA) llba = 1; pc = (cdb->page & SMS_PAGE_CTRL_MASK) >> 6; page_code = cdb->page & SMS_PAGE_CODE; subpage = cdb->subpage; alloc_len = scsi_2btoul(cdb->length); break; } default: ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * We have to make a first pass through to calculate the size of * the pages that match the user's query. Then we allocate enough * memory to hold it, and actually copy the data into the buffer. */ switch (page_code) { case SMS_ALL_PAGES_PAGE: { int i; page_len = 0; /* * At the moment, values other than 0 and 0xff here are * reserved according to SPC-3. */ if ((subpage != SMS_SUBPAGE_PAGE_0) && (subpage != SMS_SUBPAGE_ALL)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 3, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { if ((control_dev != 0) && (lun->mode_pages.index[i].page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; /* * We don't use this subpage if the user didn't * request all subpages. */ if ((lun->mode_pages.index[i].subpage != 0) && (subpage == SMS_SUBPAGE_PAGE_0)) continue; #if 0 printf("found page %#x len %d\n", lun->mode_pages.index[i].page_code & SMPH_PC_MASK, lun->mode_pages.index[i].page_len); #endif page_len += lun->mode_pages.index[i].page_len; } break; } default: { int i; page_len = 0; for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { /* Look for the right page code */ if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) != page_code) continue; /* Look for the right subpage or the subpage wildcard*/ if ((lun->mode_pages.index[i].subpage != subpage) && (subpage != SMS_SUBPAGE_ALL)) continue; /* Make sure the page is supported for this dev type */ if ((control_dev != 0) && (lun->mode_pages.index[i].page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; #if 0 printf("found page %#x len %d\n", lun->mode_pages.index[i].page_code & SMPH_PC_MASK, lun->mode_pages.index[i].page_len); #endif page_len += lun->mode_pages.index[i].page_len; } if (page_len == 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 5); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } break; } } total_len = header_len + page_len; #if 0 printf("header_len = %d, page_len = %d, total_len = %d\n", header_len, page_len, total_len); #endif ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } switch (ctsio->cdb[0]) { case MODE_SENSE_6: { struct scsi_mode_hdr_6 *header; header = (struct scsi_mode_hdr_6 *)ctsio->kern_data_ptr; header->datalen = MIN(total_len - 1, 254); if (control_dev == 0) { header->dev_specific = 0x10; /* DPOFUA */ - if ((lun->flags & CTL_LUN_READONLY) || + if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) || (lun->mode_pages.control_page[CTL_PAGE_CURRENT] .eca_and_aen & SCP_SWP) != 0) header->dev_specific |= 0x80; /* WP */ } if (dbd) header->block_descr_len = 0; else header->block_descr_len = sizeof(struct scsi_mode_block_descr); block_desc = (struct scsi_mode_block_descr *)&header[1]; break; } case MODE_SENSE_10: { struct scsi_mode_hdr_10 *header; int datalen; header = (struct scsi_mode_hdr_10 *)ctsio->kern_data_ptr; datalen = MIN(total_len - 2, 65533); scsi_ulto2b(datalen, header->datalen); if (control_dev == 0) { header->dev_specific = 0x10; /* DPOFUA */ - if ((lun->flags & CTL_LUN_READONLY) || + if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) || (lun->mode_pages.control_page[CTL_PAGE_CURRENT] .eca_and_aen & SCP_SWP) != 0) header->dev_specific |= 0x80; /* WP */ } if (dbd) scsi_ulto2b(0, header->block_descr_len); else scsi_ulto2b(sizeof(struct scsi_mode_block_descr), header->block_descr_len); block_desc = (struct scsi_mode_block_descr *)&header[1]; break; } default: panic("invalid CDB type %#x", ctsio->cdb[0]); break; /* NOTREACHED */ } /* * If we've got a disk, use its blocksize in the block * descriptor. Otherwise, just set it to 0. */ if (dbd == 0) { if (control_dev == 0) scsi_ulto3b(lun->be_lun->blocksize, block_desc->block_len); else scsi_ulto3b(0, block_desc->block_len); } switch (page_code) { case SMS_ALL_PAGES_PAGE: { int i, data_used; data_used = header_len; for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { struct ctl_page_index *page_index; page_index = &lun->mode_pages.index[i]; if ((control_dev != 0) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; /* * We don't use this subpage if the user didn't * request all subpages. We already checked (above) * to make sure the user only specified a subpage * of 0 or 0xff in the SMS_ALL_PAGES_PAGE case. */ if ((page_index->subpage != 0) && (subpage == SMS_SUBPAGE_PAGE_0)) continue; /* * Call the handler, if it exists, to update the * page to the latest values. */ if (page_index->sense_handler != NULL) page_index->sense_handler(ctsio, page_index,pc); memcpy(ctsio->kern_data_ptr + data_used, page_index->page_data + (page_index->page_len * pc), page_index->page_len); data_used += page_index->page_len; } break; } default: { int i, data_used; data_used = header_len; for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { struct ctl_page_index *page_index; page_index = &lun->mode_pages.index[i]; /* Look for the right page code */ if ((page_index->page_code & SMPH_PC_MASK) != page_code) continue; /* Look for the right subpage or the subpage wildcard*/ if ((page_index->subpage != subpage) && (subpage != SMS_SUBPAGE_ALL)) continue; /* Make sure the page is supported for this dev type */ if ((control_dev != 0) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; /* * Call the handler, if it exists, to update the * page to the latest values. */ if (page_index->sense_handler != NULL) page_index->sense_handler(ctsio, page_index,pc); memcpy(ctsio->kern_data_ptr + data_used, page_index->page_data + (page_index->page_len * pc), page_index->page_len); data_used += page_index->page_len; } break; } } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_lbp_log_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc) { struct ctl_lun *lun; struct scsi_log_param_header *phdr; uint8_t *data; uint64_t val; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data = page_index->page_data; if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "blocksavail")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x0001, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x02; /* per-pool */ data += phdr->param_len; } if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "blocksused")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x0002, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x01; /* per-LUN */ data += phdr->param_len; } if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "poolblocksavail")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x00f1, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x02; /* per-pool */ data += phdr->param_len; } if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "poolblocksused")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x00f2, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x02; /* per-pool */ data += phdr->param_len; } page_index->page_len = data - page_index->page_data; return (0); } int ctl_sap_log_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc) { struct ctl_lun *lun; struct stat_page *data; uint64_t rn, wn, rb, wb; struct bintime rt, wt; int i; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data = (struct stat_page *)page_index->page_data; scsi_ulto2b(SLP_SAP, data->sap.hdr.param_code); data->sap.hdr.param_control = SLP_LBIN; data->sap.hdr.param_len = sizeof(struct scsi_log_stat_and_perf) - sizeof(struct scsi_log_param_header); rn = wn = rb = wb = 0; bintime_clear(&rt); bintime_clear(&wt); for (i = 0; i < CTL_MAX_PORTS; i++) { rn += lun->stats.ports[i].operations[CTL_STATS_READ]; wn += lun->stats.ports[i].operations[CTL_STATS_WRITE]; rb += lun->stats.ports[i].bytes[CTL_STATS_READ]; wb += lun->stats.ports[i].bytes[CTL_STATS_WRITE]; bintime_add(&rt, &lun->stats.ports[i].time[CTL_STATS_READ]); bintime_add(&wt, &lun->stats.ports[i].time[CTL_STATS_WRITE]); } scsi_u64to8b(rn, data->sap.read_num); scsi_u64to8b(wn, data->sap.write_num); if (lun->stats.blocksize > 0) { scsi_u64to8b(wb / lun->stats.blocksize, data->sap.recvieved_lba); scsi_u64to8b(rb / lun->stats.blocksize, data->sap.transmitted_lba); } scsi_u64to8b((uint64_t)rt.sec * 1000 + rt.frac / (UINT64_MAX / 1000), data->sap.read_int); scsi_u64to8b((uint64_t)wt.sec * 1000 + wt.frac / (UINT64_MAX / 1000), data->sap.write_int); scsi_u64to8b(0, data->sap.weighted_num); scsi_u64to8b(0, data->sap.weighted_int); scsi_ulto2b(SLP_IT, data->it.hdr.param_code); data->it.hdr.param_control = SLP_LBIN; data->it.hdr.param_len = sizeof(struct scsi_log_idle_time) - sizeof(struct scsi_log_param_header); #ifdef CTL_TIME_IO scsi_u64to8b(lun->idle_time / SBT_1MS, data->it.idle_int); #endif scsi_ulto2b(SLP_TI, data->ti.hdr.param_code); data->it.hdr.param_control = SLP_LBIN; data->ti.hdr.param_len = sizeof(struct scsi_log_time_interval) - sizeof(struct scsi_log_param_header); scsi_ulto4b(3, data->ti.exponent); scsi_ulto4b(1, data->ti.integer); page_index->page_len = sizeof(*data); return (0); } int ctl_log_sense(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; int i, pc, page_code, subpage; int alloc_len, total_len; struct ctl_page_index *page_index; struct scsi_log_sense *cdb; struct scsi_log_header *header; CTL_DEBUG_PRINT(("ctl_log_sense\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_log_sense *)ctsio->cdb; pc = (cdb->page & SLS_PAGE_CTRL_MASK) >> 6; page_code = cdb->page & SLS_PAGE_CODE; subpage = cdb->subpage; alloc_len = scsi_2btoul(cdb->length); page_index = NULL; for (i = 0; i < CTL_NUM_LOG_PAGES; i++) { page_index = &lun->log_pages.index[i]; /* Look for the right page code */ if ((page_index->page_code & SL_PAGE_CODE) != page_code) continue; /* Look for the right subpage or the subpage wildcard*/ if (page_index->subpage != subpage) continue; break; } if (i >= CTL_NUM_LOG_PAGES) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(struct scsi_log_header) + page_index->page_len; ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } header = (struct scsi_log_header *)ctsio->kern_data_ptr; header->page = page_index->page_code; if (page_index->subpage) { header->page |= SL_SPF; header->subpage = page_index->subpage; } scsi_ulto2b(page_index->page_len, header->datalen); /* * Call the handler, if it exists, to update the * page to the latest values. */ if (page_index->sense_handler != NULL) page_index->sense_handler(ctsio, page_index, pc); memcpy(header + 1, page_index->page_data, page_index->page_len); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_capacity(struct ctl_scsiio *ctsio) { struct scsi_read_capacity *cdb; struct scsi_read_capacity_data *data; struct ctl_lun *lun; uint32_t lba; CTL_DEBUG_PRINT(("ctl_read_capacity\n")); cdb = (struct scsi_read_capacity *)ctsio->cdb; lba = scsi_4btoul(cdb->addr); if (((cdb->pmi & SRC_PMI) == 0) && (lba != 0)) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*data), M_CTL, M_WAITOK | M_ZERO); data = (struct scsi_read_capacity_data *)ctsio->kern_data_ptr; ctsio->residual = 0; ctsio->kern_data_len = sizeof(*data); ctsio->kern_total_len = sizeof(*data); ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * If the maximum LBA is greater than 0xfffffffe, the user must * issue a SERVICE ACTION IN (16) command, with the read capacity * serivce action set. */ if (lun->be_lun->maxlba > 0xfffffffe) scsi_ulto4b(0xffffffff, data->addr); else scsi_ulto4b(lun->be_lun->maxlba, data->addr); /* * XXX KDM this may not be 512 bytes... */ scsi_ulto4b(lun->be_lun->blocksize, data->length); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_capacity_16(struct ctl_scsiio *ctsio) { struct scsi_read_capacity_16 *cdb; struct scsi_read_capacity_data_long *data; struct ctl_lun *lun; uint64_t lba; uint32_t alloc_len; CTL_DEBUG_PRINT(("ctl_read_capacity_16\n")); cdb = (struct scsi_read_capacity_16 *)ctsio->cdb; alloc_len = scsi_4btoul(cdb->alloc_len); lba = scsi_8btou64(cdb->addr); if ((cdb->reladr & SRC16_PMI) && (lba != 0)) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*data), M_CTL, M_WAITOK | M_ZERO); data = (struct scsi_read_capacity_data_long *)ctsio->kern_data_ptr; if (sizeof(*data) < alloc_len) { ctsio->residual = alloc_len - sizeof(*data); ctsio->kern_data_len = sizeof(*data); ctsio->kern_total_len = sizeof(*data); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; scsi_u64to8b(lun->be_lun->maxlba, data->addr); /* XXX KDM this may not be 512 bytes... */ scsi_ulto4b(lun->be_lun->blocksize, data->length); data->prot_lbppbe = lun->be_lun->pblockexp & SRC16_LBPPBE; scsi_ulto2b(lun->be_lun->pblockoff & SRC16_LALBA_A, data->lalba_lbp); if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) data->lalba_lbp[0] |= SRC16_LBPME | SRC16_LBPRZ; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_get_lba_status(struct ctl_scsiio *ctsio) { struct scsi_get_lba_status *cdb; struct scsi_get_lba_status_data *data; struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t alloc_len, total_len; int retval; CTL_DEBUG_PRINT(("ctl_get_lba_status\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_get_lba_status *)ctsio->cdb; lba = scsi_8btou64(cdb->addr); alloc_len = scsi_4btoul(cdb->alloc_len); if (lba > lun->be_lun->maxlba) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(*data) + sizeof(data->descr[0]); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); data = (struct scsi_get_lba_status_data *)ctsio->kern_data_ptr; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* Fill dummy data in case backend can't tell anything. */ scsi_ulto4b(4 + sizeof(data->descr[0]), data->length); scsi_u64to8b(lba, data->descr[0].addr); scsi_ulto4b(MIN(UINT32_MAX, lun->be_lun->maxlba + 1 - lba), data->descr[0].length); data->descr[0].status = 0; /* Mapped or unknown. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = total_len; lbalen->flags = 0; retval = lun->backend->config_read((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_defect(struct ctl_scsiio *ctsio) { struct scsi_read_defect_data_10 *ccb10; struct scsi_read_defect_data_12 *ccb12; struct scsi_read_defect_data_hdr_10 *data10; struct scsi_read_defect_data_hdr_12 *data12; uint32_t alloc_len, data_len; uint8_t format; CTL_DEBUG_PRINT(("ctl_read_defect\n")); if (ctsio->cdb[0] == READ_DEFECT_DATA_10) { ccb10 = (struct scsi_read_defect_data_10 *)&ctsio->cdb; format = ccb10->format; alloc_len = scsi_2btoul(ccb10->alloc_length); data_len = sizeof(*data10); } else { ccb12 = (struct scsi_read_defect_data_12 *)&ctsio->cdb; format = ccb12->format; alloc_len = scsi_4btoul(ccb12->alloc_length); data_len = sizeof(*data12); } if (alloc_len == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; if (ctsio->cdb[0] == READ_DEFECT_DATA_10) { data10 = (struct scsi_read_defect_data_hdr_10 *) ctsio->kern_data_ptr; data10->format = format; scsi_ulto2b(0, data10->length); } else { data12 = (struct scsi_read_defect_data_hdr_12 *) ctsio->kern_data_ptr; data12->format = format; scsi_ulto2b(0, data12->generation); scsi_ulto4b(0, data12->length); } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio) { struct scsi_maintenance_in *cdb; int retval; int alloc_len, ext, total_len = 0, g, p, pc, pg, gs, os; int num_target_port_groups, num_target_ports; struct ctl_lun *lun; struct ctl_softc *softc; struct ctl_port *port; struct scsi_target_group_data *rtg_ptr; struct scsi_target_group_data_extended *rtg_ext_ptr; struct scsi_target_port_group_descriptor *tpg_desc; CTL_DEBUG_PRINT(("ctl_report_tagret_port_groups\n")); cdb = (struct scsi_maintenance_in *)ctsio->cdb; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; retval = CTL_RETVAL_COMPLETE; switch (cdb->byte2 & STG_PDF_MASK) { case STG_PDF_LENGTH: ext = 0; break; case STG_PDF_EXTENDED: ext = 1; break; default: ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 5); ctl_done((union ctl_io *)ctsio); return(retval); } if (softc->is_single) num_target_port_groups = 1; else num_target_port_groups = NUM_TARGET_PORT_GROUPS; num_target_ports = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; num_target_ports++; } mtx_unlock(&softc->ctl_lock); if (ext) total_len = sizeof(struct scsi_target_group_data_extended); else total_len = sizeof(struct scsi_target_group_data); total_len += sizeof(struct scsi_target_port_group_descriptor) * num_target_port_groups + sizeof(struct scsi_target_port_descriptor) * num_target_ports * num_target_port_groups; alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (ext) { rtg_ext_ptr = (struct scsi_target_group_data_extended *) ctsio->kern_data_ptr; scsi_ulto4b(total_len - 4, rtg_ext_ptr->length); rtg_ext_ptr->format_type = 0x10; rtg_ext_ptr->implicit_transition_time = 0; tpg_desc = &rtg_ext_ptr->groups[0]; } else { rtg_ptr = (struct scsi_target_group_data *) ctsio->kern_data_ptr; scsi_ulto4b(total_len - 4, rtg_ptr->length); tpg_desc = &rtg_ptr->groups[0]; } mtx_lock(&softc->ctl_lock); pg = softc->port_offset / CTL_MAX_PORTS; if (softc->flags & CTL_FLAG_ACTIVE_SHELF) { if (softc->ha_mode == CTL_HA_MODE_ACT_STBY) { gs = TPG_ASYMMETRIC_ACCESS_OPTIMIZED; os = TPG_ASYMMETRIC_ACCESS_STANDBY; } else if (lun->flags & CTL_LUN_PRIMARY_SC) { gs = TPG_ASYMMETRIC_ACCESS_OPTIMIZED; os = TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED; } else { gs = TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED; os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED; } } else { gs = TPG_ASYMMETRIC_ACCESS_STANDBY; os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED; } for (g = 0; g < num_target_port_groups; g++) { tpg_desc->pref_state = (g == pg) ? gs : os; tpg_desc->support = TPG_AO_SUP | TPG_AN_SUP | TPG_S_SUP; scsi_ulto2b(g + 1, tpg_desc->target_port_group); tpg_desc->status = TPG_IMPLICIT; pc = 0; STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; p = port->targ_port % CTL_MAX_PORTS + g * CTL_MAX_PORTS; scsi_ulto2b(p, tpg_desc->descriptors[pc]. relative_target_port_identifier); pc++; } tpg_desc->target_port_count = pc; tpg_desc = (struct scsi_target_port_group_descriptor *) &tpg_desc->descriptors[pc]; } mtx_unlock(&softc->ctl_lock); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return(retval); } int ctl_report_supported_opcodes(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct scsi_report_supported_opcodes *cdb; const struct ctl_cmd_entry *entry, *sentry; struct scsi_report_supported_opcodes_all *all; struct scsi_report_supported_opcodes_descr *descr; struct scsi_report_supported_opcodes_one *one; int retval; int alloc_len, total_len; int opcode, service_action, i, j, num; CTL_DEBUG_PRINT(("ctl_report_supported_opcodes\n")); cdb = (struct scsi_report_supported_opcodes *)ctsio->cdb; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; retval = CTL_RETVAL_COMPLETE; opcode = cdb->requested_opcode; service_action = scsi_2btoul(cdb->requested_service_action); switch (cdb->options & RSO_OPTIONS_MASK) { case RSO_OPTIONS_ALL: num = 0; for (i = 0; i < 256; i++) { entry = &ctl_cmd_table[i]; if (entry->flags & CTL_CMD_FLAG_SA5) { for (j = 0; j < 32; j++) { sentry = &((const struct ctl_cmd_entry *) entry->execute)[j]; if (ctl_cmd_applicable( lun->be_lun->lun_type, sentry)) num++; } } else { if (ctl_cmd_applicable(lun->be_lun->lun_type, entry)) num++; } } total_len = sizeof(struct scsi_report_supported_opcodes_all) + num * sizeof(struct scsi_report_supported_opcodes_descr); break; case RSO_OPTIONS_OC: if (ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 2); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32; break; case RSO_OPTIONS_OC_SA: if ((ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) == 0 || service_action >= 32) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 2); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32; break; default: ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 2); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; switch (cdb->options & RSO_OPTIONS_MASK) { case RSO_OPTIONS_ALL: all = (struct scsi_report_supported_opcodes_all *) ctsio->kern_data_ptr; num = 0; for (i = 0; i < 256; i++) { entry = &ctl_cmd_table[i]; if (entry->flags & CTL_CMD_FLAG_SA5) { for (j = 0; j < 32; j++) { sentry = &((const struct ctl_cmd_entry *) entry->execute)[j]; if (!ctl_cmd_applicable( lun->be_lun->lun_type, sentry)) continue; descr = &all->descr[num++]; descr->opcode = i; scsi_ulto2b(j, descr->service_action); descr->flags = RSO_SERVACTV; scsi_ulto2b(sentry->length, descr->cdb_length); } } else { if (!ctl_cmd_applicable(lun->be_lun->lun_type, entry)) continue; descr = &all->descr[num++]; descr->opcode = i; scsi_ulto2b(0, descr->service_action); descr->flags = 0; scsi_ulto2b(entry->length, descr->cdb_length); } } scsi_ulto4b( num * sizeof(struct scsi_report_supported_opcodes_descr), all->length); break; case RSO_OPTIONS_OC: one = (struct scsi_report_supported_opcodes_one *) ctsio->kern_data_ptr; entry = &ctl_cmd_table[opcode]; goto fill_one; case RSO_OPTIONS_OC_SA: one = (struct scsi_report_supported_opcodes_one *) ctsio->kern_data_ptr; entry = &ctl_cmd_table[opcode]; entry = &((const struct ctl_cmd_entry *) entry->execute)[service_action]; fill_one: if (ctl_cmd_applicable(lun->be_lun->lun_type, entry)) { one->support = 3; scsi_ulto2b(entry->length, one->cdb_length); one->cdb_usage[0] = opcode; memcpy(&one->cdb_usage[1], entry->usage, entry->length - 1); } else one->support = 1; break; } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return(retval); } int ctl_report_supported_tmf(struct ctl_scsiio *ctsio) { struct scsi_report_supported_tmf *cdb; struct scsi_report_supported_tmf_data *data; int retval; int alloc_len, total_len; CTL_DEBUG_PRINT(("ctl_report_supported_tmf\n")); cdb = (struct scsi_report_supported_tmf *)ctsio->cdb; retval = CTL_RETVAL_COMPLETE; total_len = sizeof(struct scsi_report_supported_tmf_data); alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; data = (struct scsi_report_supported_tmf_data *)ctsio->kern_data_ptr; data->byte1 |= RST_ATS | RST_ATSS | RST_CTSS | RST_LURS | RST_TRS; data->byte2 |= RST_ITNRS; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (retval); } int ctl_report_timestamp(struct ctl_scsiio *ctsio) { struct scsi_report_timestamp *cdb; struct scsi_report_timestamp_data *data; struct timeval tv; int64_t timestamp; int retval; int alloc_len, total_len; CTL_DEBUG_PRINT(("ctl_report_timestamp\n")); cdb = (struct scsi_report_timestamp *)ctsio->cdb; retval = CTL_RETVAL_COMPLETE; total_len = sizeof(struct scsi_report_timestamp_data); alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; data = (struct scsi_report_timestamp_data *)ctsio->kern_data_ptr; scsi_ulto2b(sizeof(*data) - 2, data->length); data->origin = RTS_ORIG_OUTSIDE; getmicrotime(&tv); timestamp = (int64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000; scsi_ulto4b(timestamp >> 16, data->timestamp); scsi_ulto2b(timestamp & 0xffff, &data->timestamp[4]); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (retval); } int ctl_persistent_reserve_in(struct ctl_scsiio *ctsio) { struct scsi_per_res_in *cdb; int alloc_len, total_len = 0; /* struct scsi_per_res_in_rsrv in_data; */ struct ctl_lun *lun; struct ctl_softc *softc; uint64_t key; CTL_DEBUG_PRINT(("ctl_persistent_reserve_in\n")); cdb = (struct scsi_per_res_in *)ctsio->cdb; alloc_len = scsi_2btoul(cdb->length); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; retry: mtx_lock(&lun->lun_lock); switch (cdb->action) { case SPRI_RK: /* read keys */ total_len = sizeof(struct scsi_per_res_in_keys) + lun->pr_key_count * sizeof(struct scsi_per_res_key); break; case SPRI_RR: /* read reservation */ if (lun->flags & CTL_LUN_PR_RESERVED) total_len = sizeof(struct scsi_per_res_in_rsrv); else total_len = sizeof(struct scsi_per_res_in_header); break; case SPRI_RC: /* report capabilities */ total_len = sizeof(struct scsi_per_res_cap); break; case SPRI_RS: /* read full status */ total_len = sizeof(struct scsi_per_res_in_header) + (sizeof(struct scsi_per_res_in_full_desc) + 256) * lun->pr_key_count; break; default: panic("Invalid PR type %x", cdb->action); } mtx_unlock(&lun->lun_lock); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; mtx_lock(&lun->lun_lock); switch (cdb->action) { case SPRI_RK: { // read keys struct scsi_per_res_in_keys *res_keys; int i, key_count; res_keys = (struct scsi_per_res_in_keys*)ctsio->kern_data_ptr; /* * We had to drop the lock to allocate our buffer, which * leaves time for someone to come in with another * persistent reservation. (That is unlikely, though, * since this should be the only persistent reservation * command active right now.) */ if (total_len != (sizeof(struct scsi_per_res_in_keys) + (lun->pr_key_count * sizeof(struct scsi_per_res_key)))){ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); printf("%s: reservation length changed, retrying\n", __func__); goto retry; } scsi_ulto4b(lun->PRGeneration, res_keys->header.generation); scsi_ulto4b(sizeof(struct scsi_per_res_key) * lun->pr_key_count, res_keys->header.length); for (i = 0, key_count = 0; i < 2*CTL_MAX_INITIATORS; i++) { if ((key = ctl_get_prkey(lun, i)) == 0) continue; /* * We used lun->pr_key_count to calculate the * size to allocate. If it turns out the number of * initiators with the registered flag set is * larger than that (i.e. they haven't been kept in * sync), we've got a problem. */ if (key_count >= lun->pr_key_count) { #ifdef NEEDTOPORT csevent_log(CSC_CTL | CSC_SHELF_SW | CTL_PR_ERROR, csevent_LogType_Fault, csevent_AlertLevel_Yellow, csevent_FRU_ShelfController, csevent_FRU_Firmware, csevent_FRU_Unknown, "registered keys %d >= key " "count %d", key_count, lun->pr_key_count); #endif key_count++; continue; } scsi_u64to8b(key, res_keys->keys[key_count].key); key_count++; } break; } case SPRI_RR: { // read reservation struct scsi_per_res_in_rsrv *res; int tmp_len, header_only; res = (struct scsi_per_res_in_rsrv *)ctsio->kern_data_ptr; scsi_ulto4b(lun->PRGeneration, res->header.generation); if (lun->flags & CTL_LUN_PR_RESERVED) { tmp_len = sizeof(struct scsi_per_res_in_rsrv); scsi_ulto4b(sizeof(struct scsi_per_res_in_rsrv_data), res->header.length); header_only = 0; } else { tmp_len = sizeof(struct scsi_per_res_in_header); scsi_ulto4b(0, res->header.length); header_only = 1; } /* * We had to drop the lock to allocate our buffer, which * leaves time for someone to come in with another * persistent reservation. (That is unlikely, though, * since this should be the only persistent reservation * command active right now.) */ if (tmp_len != total_len) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); printf("%s: reservation status changed, retrying\n", __func__); goto retry; } /* * No reservation held, so we're done. */ if (header_only != 0) break; /* * If the registration is an All Registrants type, the key * is 0, since it doesn't really matter. */ if (lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) { scsi_u64to8b(ctl_get_prkey(lun, lun->pr_res_idx), res->data.reservation); } res->data.scopetype = lun->res_type; break; } case SPRI_RC: //report capabilities { struct scsi_per_res_cap *res_cap; uint16_t type_mask; res_cap = (struct scsi_per_res_cap *)ctsio->kern_data_ptr; scsi_ulto2b(sizeof(*res_cap), res_cap->length); res_cap->flags2 |= SPRI_TMV | SPRI_ALLOW_5; type_mask = SPRI_TM_WR_EX_AR | SPRI_TM_EX_AC_RO | SPRI_TM_WR_EX_RO | SPRI_TM_EX_AC | SPRI_TM_WR_EX | SPRI_TM_EX_AC_AR; scsi_ulto2b(type_mask, res_cap->type_mask); break; } case SPRI_RS: { // read full status struct scsi_per_res_in_full *res_status; struct scsi_per_res_in_full_desc *res_desc; struct ctl_port *port; int i, len; res_status = (struct scsi_per_res_in_full*)ctsio->kern_data_ptr; /* * We had to drop the lock to allocate our buffer, which * leaves time for someone to come in with another * persistent reservation. (That is unlikely, though, * since this should be the only persistent reservation * command active right now.) */ if (total_len < (sizeof(struct scsi_per_res_in_header) + (sizeof(struct scsi_per_res_in_full_desc) + 256) * lun->pr_key_count)){ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); printf("%s: reservation length changed, retrying\n", __func__); goto retry; } scsi_ulto4b(lun->PRGeneration, res_status->header.generation); res_desc = &res_status->desc[0]; for (i = 0; i < 2*CTL_MAX_INITIATORS; i++) { if ((key = ctl_get_prkey(lun, i)) == 0) continue; scsi_u64to8b(key, res_desc->res_key.key); if ((lun->flags & CTL_LUN_PR_RESERVED) && (lun->pr_res_idx == i || lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS)) { res_desc->flags = SPRI_FULL_R_HOLDER; res_desc->scopetype = lun->res_type; } scsi_ulto2b(i / CTL_MAX_INIT_PER_PORT, res_desc->rel_trgt_port_id); len = 0; port = softc->ctl_ports[ ctl_port_idx(i / CTL_MAX_INIT_PER_PORT)]; if (port != NULL) len = ctl_create_iid(port, i % CTL_MAX_INIT_PER_PORT, res_desc->transport_id); scsi_ulto4b(len, res_desc->additional_length); res_desc = (struct scsi_per_res_in_full_desc *) &res_desc->transport_id[len]; } scsi_ulto4b((uint8_t *)res_desc - (uint8_t *)&res_status->desc[0], res_status->header.length); break; } default: /* * This is a bug, because we just checked for this above, * and should have returned an error. */ panic("Invalid PR type %x", cdb->action); break; /* NOTREACHED */ } mtx_unlock(&lun->lun_lock); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static void ctl_est_res_ua(struct ctl_lun *lun, uint32_t residx, ctl_ua_type ua) { int off = lun->ctl_softc->persis_offset; if (residx >= off && residx < off + CTL_MAX_INITIATORS) ctl_est_ua(lun, residx - off, ua); } /* * Returns 0 if ctl_persistent_reserve_out() should continue, non-zero if * it should return. */ static int ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key, uint64_t sa_res_key, uint8_t type, uint32_t residx, struct ctl_scsiio *ctsio, struct scsi_per_res_out *cdb, struct scsi_per_res_out_parms* param) { union ctl_ha_msg persis_io; int retval, i; int isc_retval; retval = 0; mtx_lock(&lun->lun_lock); if (sa_res_key == 0) { if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) { /* validate scope and type */ if ((cdb->scope_type & SPR_SCOPE_MASK) != SPR_LU_SCOPE) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (1); } if (type>8 || type==2 || type==4 || type==0) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } /* * Unregister everybody else and build UA for * them */ for(i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (i == residx || ctl_get_prkey(lun, i) == 0) continue; ctl_clr_prkey(lun, i); ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->pr_key_count = 1; lun->res_type = type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = residx; /* send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned " "from ctl_ha_msg_send %d\n", isc_retval); } } else { /* not all registrants */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 8, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } } else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS || !(lun->flags & CTL_LUN_PR_RESERVED)) { int found = 0; if (res_key == sa_res_key) { /* special case */ /* * The spec implies this is not good but doesn't * say what to do. There are two choices either * generate a res conflict or check condition * with illegal field in parameter data. Since * that is what is done when the sa_res_key is * zero I'll take that approach since this has * to do with the sa_res_key. */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 8, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } for (i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (ctl_get_prkey(lun, i) != sa_res_key) continue; found = 1; ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } if (!found) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned from " "ctl_ha_msg_send %d\n", isc_retval); } } else { /* Reserved but not all registrants */ /* sa_res_key is res holder */ if (sa_res_key == ctl_get_prkey(lun, lun->pr_res_idx)) { /* validate scope and type */ if ((cdb->scope_type & SPR_SCOPE_MASK) != SPR_LU_SCOPE) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (1); } if (type>8 || type==2 || type==4 || type==0) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } /* * Do the following: * if sa_res_key != res_key remove all * registrants w/sa_res_key and generate UA * for these registrants(Registrations * Preempted) if it wasn't an exclusive * reservation generate UA(Reservations * Preempted) for all other registered nexuses * if the type has changed. Establish the new * reservation and holder. If res_key and * sa_res_key are the same do the above * except don't unregister the res holder. */ for(i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (i == residx || ctl_get_prkey(lun, i) == 0) continue; if (sa_res_key == ctl_get_prkey(lun, i)) { ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } else if (type != lun->res_type && (lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type ==SPR_TYPE_EX_AC_RO)){ ctl_est_res_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = residx; else lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS; persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned " "from ctl_ha_msg_send %d\n", isc_retval); } } else { /* * sa_res_key is not the res holder just * remove registrants */ int found=0; for (i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (sa_res_key != ctl_get_prkey(lun, i)) continue; found = 1; ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } if (!found) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (1); } persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned " "from ctl_ha_msg_send %d\n", isc_retval); } } } lun->PRGeneration++; mtx_unlock(&lun->lun_lock); return (retval); } static void ctl_pro_preempt_other(struct ctl_lun *lun, union ctl_ha_msg *msg) { uint64_t sa_res_key; int i; sa_res_key = scsi_8btou64(msg->pr.pr_info.sa_res_key); if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS || lun->pr_res_idx == CTL_PR_NO_RESERVATION || sa_res_key != ctl_get_prkey(lun, lun->pr_res_idx)) { if (sa_res_key == 0) { /* * Unregister everybody else and build UA for * them */ for(i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (i == msg->pr.pr_info.residx || ctl_get_prkey(lun, i) == 0) continue; ctl_clr_prkey(lun, i); ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->pr_key_count = 1; lun->res_type = msg->pr.pr_info.res_type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = msg->pr.pr_info.residx; } else { for (i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (sa_res_key == ctl_get_prkey(lun, i)) continue; ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } } } else { for (i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (i == msg->pr.pr_info.residx || ctl_get_prkey(lun, i) == 0) continue; if (sa_res_key == ctl_get_prkey(lun, i)) { ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } else if (msg->pr.pr_info.res_type != lun->res_type && (lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_EX_AC_RO)) { ctl_est_res_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = msg->pr.pr_info.res_type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = msg->pr.pr_info.residx; else lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS; } lun->PRGeneration++; } int ctl_persistent_reserve_out(struct ctl_scsiio *ctsio) { int retval; int isc_retval; u_int32_t param_len; struct scsi_per_res_out *cdb; struct ctl_lun *lun; struct scsi_per_res_out_parms* param; struct ctl_softc *softc; uint32_t residx; uint64_t res_key, sa_res_key, key; uint8_t type; union ctl_ha_msg persis_io; int i; CTL_DEBUG_PRINT(("ctl_persistent_reserve_out\n")); retval = CTL_RETVAL_COMPLETE; cdb = (struct scsi_per_res_out *)ctsio->cdb; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; /* * We only support whole-LUN scope. The scope & type are ignored for * register, register and ignore existing key and clear. * We sometimes ignore scope and type on preempts too!! * Verify reservation type here as well. */ type = cdb->scope_type & SPR_TYPE_MASK; if ((cdb->action == SPRO_RESERVE) || (cdb->action == SPRO_RELEASE)) { if ((cdb->scope_type & SPR_SCOPE_MASK) != SPR_LU_SCOPE) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (type>8 || type==2 || type==4 || type==0) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } } param_len = scsi_4btoul(cdb->length); if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(param_len, M_CTL, M_WAITOK); ctsio->kern_data_len = param_len; ctsio->kern_total_len = param_len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } param = (struct scsi_per_res_out_parms *)ctsio->kern_data_ptr; residx = ctl_get_resindex(&ctsio->io_hdr.nexus); res_key = scsi_8btou64(param->res_key.key); sa_res_key = scsi_8btou64(param->serv_act_res_key); /* * Validate the reservation key here except for SPRO_REG_IGNO * This must be done for all other service actions */ if ((cdb->action & SPRO_ACTION_MASK) != SPRO_REG_IGNO) { mtx_lock(&lun->lun_lock); if ((key = ctl_get_prkey(lun, residx)) != 0) { if (res_key != key) { /* * The current key passed in doesn't match * the one the initiator previously * registered. */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } } else if ((cdb->action & SPRO_ACTION_MASK) != SPRO_REGISTER) { /* * We are not registered */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } else if (res_key != 0) { /* * We are not registered and trying to register but * the register key isn't zero. */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } mtx_unlock(&lun->lun_lock); } switch (cdb->action & SPRO_ACTION_MASK) { case SPRO_REGISTER: case SPRO_REG_IGNO: { #if 0 printf("Registration received\n"); #endif /* * We don't support any of these options, as we report in * the read capabilities request (see * ctl_persistent_reserve_in(), above). */ if ((param->flags & SPR_SPEC_I_PT) || (param->flags & SPR_ALL_TG_PT) || (param->flags & SPR_APTPL)) { int bit_ptr; if (param->flags & SPR_APTPL) bit_ptr = 0; else if (param->flags & SPR_ALL_TG_PT) bit_ptr = 2; else /* SPR_SPEC_I_PT */ bit_ptr = 3; free(ctsio->kern_data_ptr, M_CTL); ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 20, /*bit_valid*/ 1, /*bit*/ bit_ptr); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } mtx_lock(&lun->lun_lock); /* * The initiator wants to clear the * key/unregister. */ if (sa_res_key == 0) { if ((res_key == 0 && (cdb->action & SPRO_ACTION_MASK) == SPRO_REGISTER) || ((cdb->action & SPRO_ACTION_MASK) == SPRO_REG_IGNO && ctl_get_prkey(lun, residx) == 0)) { mtx_unlock(&lun->lun_lock); goto done; } ctl_clr_prkey(lun, residx); lun->pr_key_count--; if (residx == lun->pr_res_idx) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; if ((lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_EX_AC_RO) && lun->pr_key_count) { /* * If the reservation is a registrants * only type we need to generate a UA * for other registered inits. The * sense code should be RESERVATIONS * RELEASED */ for (i = 0; i < CTL_MAX_INITIATORS;i++){ if (ctl_get_prkey(lun, i + softc->persis_offset) == 0) continue; ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = 0; } else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) { if (lun->pr_key_count==0) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; } } persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_UNREG_KEY; persis_io.pr.pr_info.residx = residx; if ((isc_retval = ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0 )) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned from " "ctl_ha_msg_send %d\n", isc_retval); } } else /* sa_res_key != 0 */ { /* * If we aren't registered currently then increment * the key count and set the registered flag. */ ctl_alloc_prkey(lun, residx); if (ctl_get_prkey(lun, residx) == 0) lun->pr_key_count++; ctl_set_prkey(lun, residx, sa_res_key); persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_REG_KEY; persis_io.pr.pr_info.residx = residx; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned from " "ctl_ha_msg_send %d\n", isc_retval); } } lun->PRGeneration++; mtx_unlock(&lun->lun_lock); break; } case SPRO_RESERVE: #if 0 printf("Reserve executed type %d\n", type); #endif mtx_lock(&lun->lun_lock); if (lun->flags & CTL_LUN_PR_RESERVED) { /* * if this isn't the reservation holder and it's * not a "all registrants" type or if the type is * different then we have a conflict */ if ((lun->pr_res_idx != residx && lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) || lun->res_type != type) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } mtx_unlock(&lun->lun_lock); } else /* create a reservation */ { /* * If it's not an "all registrants" type record * reservation holder */ if (type != SPR_TYPE_WR_EX_AR && type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = residx; /* Res holder */ else lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS; lun->flags |= CTL_LUN_PR_RESERVED; lun->res_type = type; mtx_unlock(&lun->lun_lock); /* send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_RESERVE; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned from " "ctl_ha_msg_send %d\n", isc_retval); } } break; case SPRO_RELEASE: mtx_lock(&lun->lun_lock); if ((lun->flags & CTL_LUN_PR_RESERVED) == 0) { /* No reservation exists return good status */ mtx_unlock(&lun->lun_lock); goto done; } /* * Is this nexus a reservation holder? */ if (lun->pr_res_idx != residx && lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) { /* * not a res holder return good status but * do nothing */ mtx_unlock(&lun->lun_lock); goto done; } if (lun->res_type != type) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_illegal_pr_release(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* okay to release */ lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; lun->res_type = 0; /* * if this isn't an exclusive access * res generate UA for all other * registrants. */ if (type != SPR_TYPE_EX_AC && type != SPR_TYPE_WR_EX) { for (i = 0; i < CTL_MAX_INITIATORS; i++) { if (i == residx || ctl_get_prkey(lun, i + softc->persis_offset) == 0) continue; ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } mtx_unlock(&lun->lun_lock); /* Send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_RELEASE; if ((isc_retval=ctl_ha_msg_send( CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned from " "ctl_ha_msg_send %d\n", isc_retval); } break; case SPRO_CLEAR: /* send msg to other side */ mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_key_count = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; ctl_clr_prkey(lun, residx); for (i=0; i < 2*CTL_MAX_INITIATORS; i++) if (ctl_get_prkey(lun, i) != 0) { ctl_clr_prkey(lun, i); ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->PRGeneration++; mtx_unlock(&lun->lun_lock); persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_CLEAR; if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Persis Out error returned from " "ctl_ha_msg_send %d\n", isc_retval); } break; case SPRO_PREEMPT: case SPRO_PRE_ABO: { int nretval; nretval = ctl_pro_preempt(softc, lun, res_key, sa_res_key, type, residx, ctsio, cdb, param); if (nretval != 0) return (CTL_RETVAL_COMPLETE); break; } default: panic("Invalid PR type %x", cdb->action); } done: free(ctsio->kern_data_ptr, M_CTL); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (retval); } /* * This routine is for handling a message from the other SC pertaining to * persistent reserve out. All the error checking will have been done * so only perorming the action need be done here to keep the two * in sync. */ static void ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg) { struct ctl_lun *lun; struct ctl_softc *softc; int i; uint32_t targ_lun; softc = control_softc; targ_lun = msg->hdr.nexus.targ_mapped_lun; lun = softc->ctl_luns[targ_lun]; mtx_lock(&lun->lun_lock); switch(msg->pr.pr_info.action) { case CTL_PR_REG_KEY: ctl_alloc_prkey(lun, msg->pr.pr_info.residx); if (ctl_get_prkey(lun, msg->pr.pr_info.residx) == 0) lun->pr_key_count++; ctl_set_prkey(lun, msg->pr.pr_info.residx, scsi_8btou64(msg->pr.pr_info.sa_res_key)); lun->PRGeneration++; break; case CTL_PR_UNREG_KEY: ctl_clr_prkey(lun, msg->pr.pr_info.residx); lun->pr_key_count--; /* XXX Need to see if the reservation has been released */ /* if so do we need to generate UA? */ if (msg->pr.pr_info.residx == lun->pr_res_idx) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; if ((lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_EX_AC_RO) && lun->pr_key_count) { /* * If the reservation is a registrants * only type we need to generate a UA * for other registered inits. The * sense code should be RESERVATIONS * RELEASED */ for (i = 0; i < CTL_MAX_INITIATORS; i++) { if (ctl_get_prkey(lun, i + softc->persis_offset) == 0) continue; ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = 0; } else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) { if (lun->pr_key_count==0) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; } } lun->PRGeneration++; break; case CTL_PR_RESERVE: lun->flags |= CTL_LUN_PR_RESERVED; lun->res_type = msg->pr.pr_info.res_type; lun->pr_res_idx = msg->pr.pr_info.residx; break; case CTL_PR_RELEASE: /* * if this isn't an exclusive access res generate UA for all * other registrants. */ if (lun->res_type != SPR_TYPE_EX_AC && lun->res_type != SPR_TYPE_WR_EX) { for (i = 0; i < CTL_MAX_INITIATORS; i++) if (ctl_get_prkey(lun, i + softc->persis_offset) != 0) ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; lun->res_type = 0; break; case CTL_PR_PREEMPT: ctl_pro_preempt_other(lun, msg); break; case CTL_PR_CLEAR: lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_key_count = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; for (i=0; i < 2*CTL_MAX_INITIATORS; i++) { if (ctl_get_prkey(lun, i) == 0) continue; ctl_clr_prkey(lun, i); ctl_est_res_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->PRGeneration++; break; } mtx_unlock(&lun->lun_lock); } int ctl_read_write(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int flags, retval; int isread; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_read_write: command: %#x\n", ctsio->cdb[0])); flags = 0; retval = CTL_RETVAL_COMPLETE; isread = ctsio->cdb[0] == READ_6 || ctsio->cdb[0] == READ_10 || ctsio->cdb[0] == READ_12 || ctsio->cdb[0] == READ_16; switch (ctsio->cdb[0]) { case READ_6: case WRITE_6: { struct scsi_rw_6 *cdb; cdb = (struct scsi_rw_6 *)ctsio->cdb; lba = scsi_3btoul(cdb->addr); /* only 5 bits are valid in the most significant address byte */ lba &= 0x1fffff; num_blocks = cdb->length; /* * This is correct according to SBC-2. */ if (num_blocks == 0) num_blocks = 256; break; } case READ_10: case WRITE_10: { struct scsi_rw_10 *cdb; cdb = (struct scsi_rw_10 *)ctsio->cdb; if (cdb->byte2 & SRW10_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW10_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); break; } case WRITE_VERIFY_10: { struct scsi_write_verify_10 *cdb; cdb = (struct scsi_write_verify_10 *)ctsio->cdb; flags |= CTL_LLF_FUA; if (cdb->byte2 & SWV_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); break; } case READ_12: case WRITE_12: { struct scsi_rw_12 *cdb; cdb = (struct scsi_rw_12 *)ctsio->cdb; if (cdb->byte2 & SRW12_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW12_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case WRITE_VERIFY_12: { struct scsi_write_verify_12 *cdb; cdb = (struct scsi_write_verify_12 *)ctsio->cdb; flags |= CTL_LLF_FUA; if (cdb->byte2 & SWV_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case READ_16: case WRITE_16: { struct scsi_rw_16 *cdb; cdb = (struct scsi_rw_16 *)ctsio->cdb; if (cdb->byte2 & SRW12_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW12_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case WRITE_ATOMIC_16: { struct scsi_rw_16 *cdb; if (lun->be_lun->atomicblock == 0) { ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } cdb = (struct scsi_rw_16 *)ctsio->cdb; if (cdb->byte2 & SRW12_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW12_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); if (num_blocks > lun->be_lun->atomicblock) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 12, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } break; } case WRITE_VERIFY_16: { struct scsi_write_verify_16 *cdb; cdb = (struct scsi_write_verify_16 *)ctsio->cdb; flags |= CTL_LLF_FUA; if (cdb->byte2 & SWV_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * According to SBC-3, a transfer length of 0 is not an error. * Note that this cannot happen with WRITE(6) or READ(6), since 0 * translates to 256 blocks for those commands. */ if (num_blocks == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* Set FUA and/or DPO if caches are disabled. */ if (isread) { if ((lun->mode_pages.caching_page[CTL_PAGE_CURRENT].flags1 & SCP_RCD) != 0) flags |= CTL_LLF_FUA | CTL_LLF_DPO; } else { if ((lun->mode_pages.caching_page[CTL_PAGE_CURRENT].flags1 & SCP_WCE) == 0) flags |= CTL_LLF_FUA; } lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) | flags; ctsio->kern_total_len = num_blocks * lun->be_lun->blocksize; ctsio->kern_rel_offset = 0; CTL_DEBUG_PRINT(("ctl_read_write: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } static int ctl_cnw_cont(union ctl_io *io) { struct ctl_scsiio *ctsio; struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; int retval; ctsio = &io->scsiio; ctsio->io_hdr.status = CTL_STATUS_NONE; ctsio->io_hdr.flags &= ~CTL_FLAG_IO_CONT; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->flags &= ~CTL_LLF_COMPARE; lbalen->flags |= CTL_LLF_WRITE; CTL_DEBUG_PRINT(("ctl_cnw_cont: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } int ctl_cnw(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int flags, retval; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_cnw: command: %#x\n", ctsio->cdb[0])); flags = 0; retval = CTL_RETVAL_COMPLETE; switch (ctsio->cdb[0]) { case COMPARE_AND_WRITE: { struct scsi_compare_and_write *cdb; cdb = (struct scsi_compare_and_write *)ctsio->cdb; if (cdb->byte2 & SRW10_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW10_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = cdb->length; break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * According to SBC-3, a transfer length of 0 is not an error. */ if (num_blocks == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* Set FUA if write cache is disabled. */ if ((lun->mode_pages.caching_page[CTL_PAGE_CURRENT].flags1 & SCP_WCE) == 0) flags |= CTL_LLF_FUA; ctsio->kern_total_len = 2 * num_blocks * lun->be_lun->blocksize; ctsio->kern_rel_offset = 0; /* * Set the IO_CONT flag, so that if this I/O gets passed to * ctl_data_submit_done(), it'll get passed back to * ctl_ctl_cnw_cont() for further processing. */ ctsio->io_hdr.flags |= CTL_FLAG_IO_CONT; ctsio->io_cont = ctl_cnw_cont; lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; lbalen->flags = CTL_LLF_COMPARE | flags; CTL_DEBUG_PRINT(("ctl_cnw: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } int ctl_verify(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int bytchk, flags; int retval; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_verify: command: %#x\n", ctsio->cdb[0])); bytchk = 0; flags = CTL_LLF_FUA; retval = CTL_RETVAL_COMPLETE; switch (ctsio->cdb[0]) { case VERIFY_10: { struct scsi_verify_10 *cdb; cdb = (struct scsi_verify_10 *)ctsio->cdb; if (cdb->byte2 & SVFY_BYTCHK) bytchk = 1; if (cdb->byte2 & SVFY_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); break; } case VERIFY_12: { struct scsi_verify_12 *cdb; cdb = (struct scsi_verify_12 *)ctsio->cdb; if (cdb->byte2 & SVFY_BYTCHK) bytchk = 1; if (cdb->byte2 & SVFY_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case VERIFY_16: { struct scsi_rw_16 *cdb; cdb = (struct scsi_rw_16 *)ctsio->cdb; if (cdb->byte2 & SVFY_BYTCHK) bytchk = 1; if (cdb->byte2 & SVFY_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * According to SBC-3, a transfer length of 0 is not an error. */ if (num_blocks == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; if (bytchk) { lbalen->flags = CTL_LLF_COMPARE | flags; ctsio->kern_total_len = num_blocks * lun->be_lun->blocksize; } else { lbalen->flags = CTL_LLF_VERIFY | flags; ctsio->kern_total_len = 0; } ctsio->kern_rel_offset = 0; CTL_DEBUG_PRINT(("ctl_verify: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } int ctl_report_luns(struct ctl_scsiio *ctsio) { struct ctl_softc *softc = control_softc; struct scsi_report_luns *cdb; struct scsi_report_luns_data *lun_data; struct ctl_lun *lun, *request_lun; struct ctl_port *port; int num_luns, retval; uint32_t alloc_len, lun_datalen; int num_filled, well_known; uint32_t initidx, targ_lun_id, lun_id; retval = CTL_RETVAL_COMPLETE; well_known = 0; cdb = (struct scsi_report_luns *)ctsio->cdb; port = ctl_io_port(&ctsio->io_hdr); CTL_DEBUG_PRINT(("ctl_report_luns\n")); mtx_lock(&softc->ctl_lock); num_luns = 0; for (targ_lun_id = 0; targ_lun_id < CTL_MAX_LUNS; targ_lun_id++) { if (ctl_lun_map_from_port(port, targ_lun_id) < CTL_MAX_LUNS) num_luns++; } mtx_unlock(&softc->ctl_lock); switch (cdb->select_report) { case RPL_REPORT_DEFAULT: case RPL_REPORT_ALL: break; case RPL_REPORT_WELLKNOWN: well_known = 1; num_luns = 0; break; default: ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (retval); break; /* NOTREACHED */ } alloc_len = scsi_4btoul(cdb->length); /* * The initiator has to allocate at least 16 bytes for this request, * so he can at least get the header and the first LUN. Otherwise * we reject the request (per SPC-3 rev 14, section 6.21). */ if (alloc_len < (sizeof(struct scsi_report_luns_data) + sizeof(struct scsi_report_luns_lundata))) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 6, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (retval); } request_lun = (struct ctl_lun *) ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; lun_datalen = sizeof(*lun_data) + (num_luns * sizeof(struct scsi_report_luns_lundata)); ctsio->kern_data_ptr = malloc(lun_datalen, M_CTL, M_WAITOK | M_ZERO); lun_data = (struct scsi_report_luns_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); mtx_lock(&softc->ctl_lock); for (targ_lun_id = 0, num_filled = 0; targ_lun_id < CTL_MAX_LUNS && num_filled < num_luns; targ_lun_id++) { lun_id = ctl_lun_map_from_port(port, targ_lun_id); if (lun_id >= CTL_MAX_LUNS) continue; lun = softc->ctl_luns[lun_id]; if (lun == NULL) continue; if (targ_lun_id <= 0xff) { /* * Peripheral addressing method, bus number 0. */ lun_data->luns[num_filled].lundata[0] = RPL_LUNDATA_ATYP_PERIPH; lun_data->luns[num_filled].lundata[1] = targ_lun_id; num_filled++; } else if (targ_lun_id <= 0x3fff) { /* * Flat addressing method. */ lun_data->luns[num_filled].lundata[0] = RPL_LUNDATA_ATYP_FLAT | (targ_lun_id >> 8); lun_data->luns[num_filled].lundata[1] = (targ_lun_id & 0xff); num_filled++; } else if (targ_lun_id <= 0xffffff) { /* * Extended flat addressing method. */ lun_data->luns[num_filled].lundata[0] = RPL_LUNDATA_ATYP_EXTLUN | 0x12; scsi_ulto3b(targ_lun_id, &lun_data->luns[num_filled].lundata[1]); num_filled++; } else { printf("ctl_report_luns: bogus LUN number %jd, " "skipping\n", (intmax_t)targ_lun_id); } /* * According to SPC-3, rev 14 section 6.21: * * "The execution of a REPORT LUNS command to any valid and * installed logical unit shall clear the REPORTED LUNS DATA * HAS CHANGED unit attention condition for all logical * units of that target with respect to the requesting * initiator. A valid and installed logical unit is one * having a PERIPHERAL QUALIFIER of 000b in the standard * INQUIRY data (see 6.4.2)." * * If request_lun is NULL, the LUN this report luns command * was issued to is either disabled or doesn't exist. In that * case, we shouldn't clear any pending lun change unit * attention. */ if (request_lun != NULL) { mtx_lock(&lun->lun_lock); ctl_clr_ua(lun, initidx, CTL_UA_LUN_CHANGE); mtx_unlock(&lun->lun_lock); } } mtx_unlock(&softc->ctl_lock); /* * It's quite possible that we've returned fewer LUNs than we allocated * space for. Trim it. */ lun_datalen = sizeof(*lun_data) + (num_filled * sizeof(struct scsi_report_luns_lundata)); if (lun_datalen < alloc_len) { ctsio->residual = alloc_len - lun_datalen; ctsio->kern_data_len = lun_datalen; ctsio->kern_total_len = lun_datalen; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * We set this to the actual data length, regardless of how much * space we actually have to return results. If the user looks at * this value, he'll know whether or not he allocated enough space * and reissue the command if necessary. We don't support well * known logical units, so if the user asks for that, return none. */ scsi_ulto4b(lun_datalen - 8, lun_data->length); /* * We can only return SCSI_STATUS_CHECK_COND when we can't satisfy * this request. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (retval); } int ctl_request_sense(struct ctl_scsiio *ctsio) { struct scsi_request_sense *cdb; struct scsi_sense_data *sense_ptr; struct ctl_softc *ctl_softc; struct ctl_lun *lun; uint32_t initidx; int have_error; scsi_sense_data_type sense_format; ctl_ua_type ua_type; cdb = (struct scsi_request_sense *)ctsio->cdb; ctl_softc = control_softc; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_request_sense\n")); /* * Determine which sense format the user wants. */ if (cdb->byte2 & SRS_DESC) sense_format = SSD_TYPE_DESC; else sense_format = SSD_TYPE_FIXED; ctsio->kern_data_ptr = malloc(sizeof(*sense_ptr), M_CTL, M_WAITOK); sense_ptr = (struct scsi_sense_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; /* * struct scsi_sense_data, which is currently set to 256 bytes, is * larger than the largest allowed value for the length field in the * REQUEST SENSE CDB, which is 252 bytes as of SPC-4. */ ctsio->residual = 0; ctsio->kern_data_len = cdb->length; ctsio->kern_total_len = cdb->length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * If we don't have a LUN, we don't have any pending sense. */ if (lun == NULL) goto no_sense; have_error = 0; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); /* * Check for pending sense, and then for pending unit attentions. * Pending sense gets returned first, then pending unit attentions. */ mtx_lock(&lun->lun_lock); #ifdef CTL_WITH_CA if (ctl_is_set(lun->have_ca, initidx)) { scsi_sense_data_type stored_format; /* * Check to see which sense format was used for the stored * sense data. */ stored_format = scsi_sense_type(&lun->pending_sense[initidx]); /* * If the user requested a different sense format than the * one we stored, then we need to convert it to the other * format. If we're going from descriptor to fixed format * sense data, we may lose things in translation, depending * on what options were used. * * If the stored format is SSD_TYPE_NONE (i.e. invalid), * for some reason we'll just copy it out as-is. */ if ((stored_format == SSD_TYPE_FIXED) && (sense_format == SSD_TYPE_DESC)) ctl_sense_to_desc((struct scsi_sense_data_fixed *) &lun->pending_sense[initidx], (struct scsi_sense_data_desc *)sense_ptr); else if ((stored_format == SSD_TYPE_DESC) && (sense_format == SSD_TYPE_FIXED)) ctl_sense_to_fixed((struct scsi_sense_data_desc *) &lun->pending_sense[initidx], (struct scsi_sense_data_fixed *)sense_ptr); else memcpy(sense_ptr, &lun->pending_sense[initidx], MIN(sizeof(*sense_ptr), sizeof(lun->pending_sense[initidx]))); ctl_clear_mask(lun->have_ca, initidx); have_error = 1; } else #endif { ua_type = ctl_build_ua(lun, initidx, sense_ptr, sense_format); if (ua_type != CTL_UA_NONE) have_error = 1; if (ua_type == CTL_UA_LUN_CHANGE) { mtx_unlock(&lun->lun_lock); mtx_lock(&ctl_softc->ctl_lock); ctl_clr_ua_allluns(ctl_softc, initidx, ua_type); mtx_unlock(&ctl_softc->ctl_lock); mtx_lock(&lun->lun_lock); } } mtx_unlock(&lun->lun_lock); /* * We already have a pending error, return it. */ if (have_error != 0) { /* * We report the SCSI status as OK, since the status of the * request sense command itself is OK. * We report 0 for the sense length, because we aren't doing * autosense in this case. We're reporting sense as * parameter data. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } no_sense: /* * No sense information to report, so we report that everything is * okay. */ ctl_set_sense_data(sense_ptr, lun, sense_format, /*current_error*/ 1, /*sense_key*/ SSD_KEY_NO_SENSE, /*asc*/ 0x00, /*ascq*/ 0x00, SSD_ELEM_NONE); /* * We report 0 for the sense length, because we aren't doing * autosense in this case. We're reporting sense as parameter data. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_tur(struct ctl_scsiio *ctsio) { CTL_DEBUG_PRINT(("ctl_tur\n")); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } #ifdef notyet static int ctl_cmddt_inquiry(struct ctl_scsiio *ctsio) { } #endif /* * SCSI VPD page 0x00, the Supported VPD Pages page. */ static int ctl_inquiry_evpd_supported(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_supported_pages *pages; int sup_page_size; struct ctl_lun *lun; int p; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; sup_page_size = sizeof(struct scsi_vpd_supported_pages) * SCSI_EVPD_NUM_SUPPORTED_PAGES; ctsio->kern_data_ptr = malloc(sup_page_size, M_CTL, M_WAITOK | M_ZERO); pages = (struct scsi_vpd_supported_pages *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sup_page_size < alloc_len) { ctsio->residual = alloc_len - sup_page_size; ctsio->kern_data_len = sup_page_size; ctsio->kern_total_len = sup_page_size; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) pages->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else pages->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; p = 0; /* Supported VPD pages */ pages->page_list[p++] = SVPD_SUPPORTED_PAGES; /* Serial Number */ pages->page_list[p++] = SVPD_UNIT_SERIAL_NUMBER; /* Device Identification */ pages->page_list[p++] = SVPD_DEVICE_ID; /* Extended INQUIRY Data */ pages->page_list[p++] = SVPD_EXTENDED_INQUIRY_DATA; /* Mode Page Policy */ pages->page_list[p++] = SVPD_MODE_PAGE_POLICY; /* SCSI Ports */ pages->page_list[p++] = SVPD_SCSI_PORTS; /* Third-party Copy */ pages->page_list[p++] = SVPD_SCSI_TPC; if (lun != NULL && lun->be_lun->lun_type == T_DIRECT) { /* Block limits */ pages->page_list[p++] = SVPD_BLOCK_LIMITS; /* Block Device Characteristics */ pages->page_list[p++] = SVPD_BDC; /* Logical Block Provisioning */ pages->page_list[p++] = SVPD_LBP; } pages->length = p; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * SCSI VPD page 0x80, the Unit Serial Number page. */ static int ctl_inquiry_evpd_serial(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_unit_serial_number *sn_ptr; struct ctl_lun *lun; int data_len; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = 4 + CTL_SN_LEN; ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); sn_ptr = (struct scsi_vpd_unit_serial_number *)ctsio->kern_data_ptr; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) sn_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else sn_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; sn_ptr->page_code = SVPD_UNIT_SERIAL_NUMBER; sn_ptr->length = CTL_SN_LEN; /* * If we don't have a LUN, we just leave the serial number as * all spaces. */ if (lun != NULL) { strncpy((char *)sn_ptr->serial_num, (char *)lun->be_lun->serial_num, CTL_SN_LEN); } else memset(sn_ptr->serial_num, 0x20, CTL_SN_LEN); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * SCSI VPD page 0x86, the Extended INQUIRY Data page. */ static int ctl_inquiry_evpd_eid(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_extended_inquiry_data *eid_ptr; struct ctl_lun *lun; int data_len; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = sizeof(struct scsi_vpd_extended_inquiry_data); ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); eid_ptr = (struct scsi_vpd_extended_inquiry_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. */ if (lun != NULL) eid_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else eid_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; eid_ptr->page_code = SVPD_EXTENDED_INQUIRY_DATA; scsi_ulto2b(data_len - 4, eid_ptr->page_length); /* * We support head of queue, ordered and simple tags. */ eid_ptr->flags2 = SVPD_EID_HEADSUP | SVPD_EID_ORDSUP | SVPD_EID_SIMPSUP; /* * Volatile cache supported. */ eid_ptr->flags3 = SVPD_EID_V_SUP; /* * This means that we clear the REPORTED LUNS DATA HAS CHANGED unit * attention for a particular IT nexus on all LUNs once we report * it to that nexus once. This bit is required as of SPC-4. */ eid_ptr->flags4 = SVPD_EID_LUICLT; /* * XXX KDM in order to correctly answer this, we would need * information from the SIM to determine how much sense data it * can send. So this would really be a path inquiry field, most * likely. This can be set to a maximum of 252 according to SPC-4, * but the hardware may or may not be able to support that much. * 0 just means that the maximum sense data length is not reported. */ eid_ptr->max_sense_length = 0; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_mpp(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_mode_page_policy *mpp_ptr; struct ctl_lun *lun; int data_len; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = sizeof(struct scsi_vpd_mode_page_policy) + sizeof(struct scsi_vpd_mode_page_policy_descr); ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); mpp_ptr = (struct scsi_vpd_mode_page_policy *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. */ if (lun != NULL) mpp_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else mpp_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; mpp_ptr->page_code = SVPD_MODE_PAGE_POLICY; scsi_ulto2b(data_len - 4, mpp_ptr->page_length); mpp_ptr->descr[0].page_code = 0x3f; mpp_ptr->descr[0].subpage_code = 0xff; mpp_ptr->descr[0].policy = SVPD_MPP_SHARED; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * SCSI VPD page 0x83, the Device Identification page. */ static int ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_device_id *devid_ptr; struct scsi_vpd_id_descriptor *desc; struct ctl_softc *softc; struct ctl_lun *lun; struct ctl_port *port; int data_len; uint8_t proto; softc = control_softc; port = softc->ctl_ports[ctl_port_idx(ctsio->io_hdr.nexus.targ_port)]; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = sizeof(struct scsi_vpd_device_id) + sizeof(struct scsi_vpd_id_descriptor) + sizeof(struct scsi_vpd_id_rel_trgt_port_id) + sizeof(struct scsi_vpd_id_descriptor) + sizeof(struct scsi_vpd_id_trgt_port_grp_id); if (lun && lun->lun_devid) data_len += lun->lun_devid->len; if (port->port_devid) data_len += port->port_devid->len; if (port->target_devid) data_len += port->target_devid->len; ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); devid_ptr = (struct scsi_vpd_device_id *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. */ if (lun != NULL) devid_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else devid_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; devid_ptr->page_code = SVPD_DEVICE_ID; scsi_ulto2b(data_len - 4, devid_ptr->length); if (port->port_type == CTL_PORT_FC) proto = SCSI_PROTO_FC << 4; else if (port->port_type == CTL_PORT_ISCSI) proto = SCSI_PROTO_ISCSI << 4; else proto = SCSI_PROTO_SPI << 4; desc = (struct scsi_vpd_id_descriptor *)devid_ptr->desc_list; /* * We're using a LUN association here. i.e., this device ID is a * per-LUN identifier. */ if (lun && lun->lun_devid) { memcpy(desc, lun->lun_devid->data, lun->lun_devid->len); desc = (struct scsi_vpd_id_descriptor *)((uint8_t *)desc + lun->lun_devid->len); } /* * This is for the WWPN which is a port association. */ if (port->port_devid) { memcpy(desc, port->port_devid->data, port->port_devid->len); desc = (struct scsi_vpd_id_descriptor *)((uint8_t *)desc + port->port_devid->len); } /* * This is for the Relative Target Port(type 4h) identifier */ desc->proto_codeset = proto | SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | SVPD_ID_TYPE_RELTARG; desc->length = 4; scsi_ulto2b(ctsio->io_hdr.nexus.targ_port, &desc->identifier[2]); desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + sizeof(struct scsi_vpd_id_rel_trgt_port_id)); /* * This is for the Target Port Group(type 5h) identifier */ desc->proto_codeset = proto | SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | SVPD_ID_TYPE_TPORTGRP; desc->length = 4; scsi_ulto2b(ctsio->io_hdr.nexus.targ_port / CTL_MAX_PORTS + 1, &desc->identifier[2]); desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + sizeof(struct scsi_vpd_id_trgt_port_grp_id)); /* * This is for the Target identifier */ if (port->target_devid) { memcpy(desc, port->target_devid->data, port->target_devid->len); } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len) { struct ctl_softc *softc = control_softc; struct scsi_vpd_scsi_ports *sp; struct scsi_vpd_port_designation *pd; struct scsi_vpd_port_designation_cont *pdc; struct ctl_lun *lun; struct ctl_port *port; int data_len, num_target_ports, iid_len, id_len, g, pg, p; int num_target_port_groups; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (softc->is_single) num_target_port_groups = 1; else num_target_port_groups = NUM_TARGET_PORT_GROUPS; num_target_ports = 0; iid_len = 0; id_len = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (lun != NULL && ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; num_target_ports++; if (port->init_devid) iid_len += port->init_devid->len; if (port->port_devid) id_len += port->port_devid->len; } mtx_unlock(&softc->ctl_lock); data_len = sizeof(struct scsi_vpd_scsi_ports) + num_target_port_groups * num_target_ports * (sizeof(struct scsi_vpd_port_designation) + sizeof(struct scsi_vpd_port_designation_cont)) + iid_len + id_len; ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); sp = (struct scsi_vpd_scsi_ports *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) sp->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else sp->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; sp->page_code = SVPD_SCSI_PORTS; scsi_ulto2b(data_len - sizeof(struct scsi_vpd_scsi_ports), sp->page_length); pd = &sp->design[0]; mtx_lock(&softc->ctl_lock); pg = softc->port_offset / CTL_MAX_PORTS; for (g = 0; g < num_target_port_groups; g++) { STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (lun != NULL && ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; p = port->targ_port % CTL_MAX_PORTS + g * CTL_MAX_PORTS; scsi_ulto2b(p, pd->relative_port_id); if (port->init_devid && g == pg) { iid_len = port->init_devid->len; memcpy(pd->initiator_transportid, port->init_devid->data, port->init_devid->len); } else iid_len = 0; scsi_ulto2b(iid_len, pd->initiator_transportid_length); pdc = (struct scsi_vpd_port_designation_cont *) (&pd->initiator_transportid[iid_len]); if (port->port_devid && g == pg) { id_len = port->port_devid->len; memcpy(pdc->target_port_descriptors, port->port_devid->data, port->port_devid->len); } else id_len = 0; scsi_ulto2b(id_len, pdc->target_port_descriptors_length); pd = (struct scsi_vpd_port_designation *) ((uint8_t *)pdc->target_port_descriptors + id_len); } } mtx_unlock(&softc->ctl_lock); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_block_limits *bl_ptr; struct ctl_lun *lun; int bs; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*bl_ptr), M_CTL, M_WAITOK | M_ZERO); bl_ptr = (struct scsi_vpd_block_limits *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sizeof(*bl_ptr) < alloc_len) { ctsio->residual = alloc_len - sizeof(*bl_ptr); ctsio->kern_data_len = sizeof(*bl_ptr); ctsio->kern_total_len = sizeof(*bl_ptr); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) bl_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else bl_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; bl_ptr->page_code = SVPD_BLOCK_LIMITS; scsi_ulto2b(sizeof(*bl_ptr) - 4, bl_ptr->page_length); bl_ptr->max_cmp_write_len = 0xff; scsi_ulto4b(0xffffffff, bl_ptr->max_txfer_len); if (lun != NULL) { bs = lun->be_lun->blocksize; scsi_ulto4b(lun->be_lun->opttxferlen, bl_ptr->opt_txfer_len); if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) { scsi_ulto4b(0xffffffff, bl_ptr->max_unmap_lba_cnt); scsi_ulto4b(0xffffffff, bl_ptr->max_unmap_blk_cnt); if (lun->be_lun->ublockexp != 0) { scsi_ulto4b((1 << lun->be_lun->ublockexp), bl_ptr->opt_unmap_grain); scsi_ulto4b(0x80000000 | lun->be_lun->ublockoff, bl_ptr->unmap_grain_align); } } scsi_ulto4b(lun->be_lun->atomicblock, bl_ptr->max_atomic_transfer_length); scsi_ulto4b(0, bl_ptr->atomic_alignment); scsi_ulto4b(0, bl_ptr->atomic_transfer_length_granularity); } scsi_u64to8b(UINT64_MAX, bl_ptr->max_write_same_length); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_bdc(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_block_device_characteristics *bdc_ptr; struct ctl_lun *lun; const char *value; u_int i; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*bdc_ptr), M_CTL, M_WAITOK | M_ZERO); bdc_ptr = (struct scsi_vpd_block_device_characteristics *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sizeof(*bdc_ptr) < alloc_len) { ctsio->residual = alloc_len - sizeof(*bdc_ptr); ctsio->kern_data_len = sizeof(*bdc_ptr); ctsio->kern_total_len = sizeof(*bdc_ptr); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) bdc_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else bdc_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; bdc_ptr->page_code = SVPD_BDC; scsi_ulto2b(sizeof(*bdc_ptr) - 4, bdc_ptr->page_length); if (lun != NULL && (value = ctl_get_opt(&lun->be_lun->options, "rpm")) != NULL) i = strtol(value, NULL, 0); else i = CTL_DEFAULT_ROTATION_RATE; scsi_ulto2b(i, bdc_ptr->medium_rotation_rate); if (lun != NULL && (value = ctl_get_opt(&lun->be_lun->options, "formfactor")) != NULL) i = strtol(value, NULL, 0); else i = 0; bdc_ptr->wab_wac_ff = (i & 0x0f); bdc_ptr->flags = SVPD_FUAB | SVPD_VBULS; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_logical_block_prov *lbp_ptr; struct ctl_lun *lun; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*lbp_ptr), M_CTL, M_WAITOK | M_ZERO); lbp_ptr = (struct scsi_vpd_logical_block_prov *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sizeof(*lbp_ptr) < alloc_len) { ctsio->residual = alloc_len - sizeof(*lbp_ptr); ctsio->kern_data_len = sizeof(*lbp_ptr); ctsio->kern_total_len = sizeof(*lbp_ptr); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) lbp_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else lbp_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; lbp_ptr->page_code = SVPD_LBP; scsi_ulto2b(sizeof(*lbp_ptr) - 4, lbp_ptr->page_length); lbp_ptr->threshold_exponent = CTL_LBP_EXPONENT; if (lun != NULL && lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) { lbp_ptr->flags = SVPD_LBP_UNMAP | SVPD_LBP_WS16 | SVPD_LBP_WS10 | SVPD_LBP_RZ | SVPD_LBP_ANC_SUP; lbp_ptr->prov_type = SVPD_LBP_THIN; } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * INQUIRY with the EVPD bit set. */ static int ctl_inquiry_evpd(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct scsi_inquiry *cdb; int alloc_len, retval; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_inquiry *)ctsio->cdb; alloc_len = scsi_2btoul(cdb->length); switch (cdb->page_code) { case SVPD_SUPPORTED_PAGES: retval = ctl_inquiry_evpd_supported(ctsio, alloc_len); break; case SVPD_UNIT_SERIAL_NUMBER: retval = ctl_inquiry_evpd_serial(ctsio, alloc_len); break; case SVPD_DEVICE_ID: retval = ctl_inquiry_evpd_devid(ctsio, alloc_len); break; case SVPD_EXTENDED_INQUIRY_DATA: retval = ctl_inquiry_evpd_eid(ctsio, alloc_len); break; case SVPD_MODE_PAGE_POLICY: retval = ctl_inquiry_evpd_mpp(ctsio, alloc_len); break; case SVPD_SCSI_PORTS: retval = ctl_inquiry_evpd_scsi_ports(ctsio, alloc_len); break; case SVPD_SCSI_TPC: retval = ctl_inquiry_evpd_tpc(ctsio, alloc_len); break; case SVPD_BLOCK_LIMITS: if (lun == NULL || lun->be_lun->lun_type != T_DIRECT) goto err; retval = ctl_inquiry_evpd_block_limits(ctsio, alloc_len); break; case SVPD_BDC: if (lun == NULL || lun->be_lun->lun_type != T_DIRECT) goto err; retval = ctl_inquiry_evpd_bdc(ctsio, alloc_len); break; case SVPD_LBP: if (lun == NULL || lun->be_lun->lun_type != T_DIRECT) goto err; retval = ctl_inquiry_evpd_lbp(ctsio, alloc_len); break; default: err: ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } /* * Standard INQUIRY data. */ static int ctl_inquiry_std(struct ctl_scsiio *ctsio) { struct scsi_inquiry_data *inq_ptr; struct scsi_inquiry *cdb; struct ctl_softc *softc; struct ctl_lun *lun; char *val; uint32_t alloc_len, data_len; ctl_port_type port_type; softc = control_softc; /* * Figure out whether we're talking to a Fibre Channel port or not. * We treat the ioctl front end, and any SCSI adapters, as packetized * SCSI front ends. */ port_type = softc->ctl_ports[ ctl_port_idx(ctsio->io_hdr.nexus.targ_port)]->port_type; if (port_type == CTL_PORT_IOCTL || port_type == CTL_PORT_INTERNAL) port_type = CTL_PORT_SCSI; lun = ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_inquiry *)ctsio->cdb; alloc_len = scsi_2btoul(cdb->length); /* * We malloc the full inquiry data size here and fill it * in. If the user only asks for less, we'll give him * that much. */ data_len = offsetof(struct scsi_inquiry_data, vendor_specific1); ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); inq_ptr = (struct scsi_inquiry_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } /* * If we have a LUN configured, report it as connected. Otherwise, * report that it is offline or no device is supported, depending * on the value of inquiry_pq_no_lun. * * According to the spec (SPC-4 r34), the peripheral qualifier * SID_QUAL_LU_OFFLINE (001b) is used in the following scenario: * * "A peripheral device having the specified peripheral device type * is not connected to this logical unit. However, the device * server is capable of supporting the specified peripheral device * type on this logical unit." * * According to the same spec, the peripheral qualifier * SID_QUAL_BAD_LU (011b) is used in this scenario: * * "The device server is not capable of supporting a peripheral * device on this logical unit. For this peripheral qualifier the * peripheral device type shall be set to 1Fh. All other peripheral * device type values are reserved for this peripheral qualifier." * * Given the text, it would seem that we probably want to report that * the LUN is offline here. There is no LUN connected, but we can * support a LUN at the given LUN number. * * In the real world, though, it sounds like things are a little * different: * * - Linux, when presented with a LUN with the offline peripheral * qualifier, will create an sg driver instance for it. So when * you attach it to CTL, you wind up with a ton of sg driver * instances. (One for every LUN that Linux bothered to probe.) * Linux does this despite the fact that it issues a REPORT LUNs * to LUN 0 to get the inventory of supported LUNs. * * - There is other anecdotal evidence (from Emulex folks) about * arrays that use the offline peripheral qualifier for LUNs that * are on the "passive" path in an active/passive array. * * So the solution is provide a hopefully reasonable default * (return bad/no LUN) and allow the user to change the behavior * with a tunable/sysctl variable. */ if (lun != NULL) inq_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else if (softc->inquiry_pq_no_lun == 0) inq_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; else inq_ptr->device = (SID_QUAL_BAD_LU << 5) | T_NODEVICE; /* RMB in byte 2 is 0 */ inq_ptr->version = SCSI_REV_SPC4; /* * According to SAM-3, even if a device only supports a single * level of LUN addressing, it should still set the HISUP bit: * * 4.9.1 Logical unit numbers overview * * All logical unit number formats described in this standard are * hierarchical in structure even when only a single level in that * hierarchy is used. The HISUP bit shall be set to one in the * standard INQUIRY data (see SPC-2) when any logical unit number * format described in this standard is used. Non-hierarchical * formats are outside the scope of this standard. * * Therefore we set the HiSup bit here. * * The reponse format is 2, per SPC-3. */ inq_ptr->response_format = SID_HiSup | 2; inq_ptr->additional_length = data_len - (offsetof(struct scsi_inquiry_data, additional_length) + 1); CTL_DEBUG_PRINT(("additional_length = %d\n", inq_ptr->additional_length)); inq_ptr->spc3_flags = SPC3_SID_3PC | SPC3_SID_TPGS_IMPLICIT; /* 16 bit addressing */ if (port_type == CTL_PORT_SCSI) inq_ptr->spc2_flags = SPC2_SID_ADDR16; /* XXX set the SID_MultiP bit here if we're actually going to respond on multiple ports */ inq_ptr->spc2_flags |= SPC2_SID_MultiP; /* 16 bit data bus, synchronous transfers */ if (port_type == CTL_PORT_SCSI) inq_ptr->flags = SID_WBus16 | SID_Sync; /* * XXX KDM do we want to support tagged queueing on the control * device at all? */ if ((lun == NULL) || (lun->be_lun->lun_type != T_PROCESSOR)) inq_ptr->flags |= SID_CmdQue; /* * Per SPC-3, unused bytes in ASCII strings are filled with spaces. * We have 8 bytes for the vendor name, and 16 bytes for the device * name and 4 bytes for the revision. */ if (lun == NULL || (val = ctl_get_opt(&lun->be_lun->options, "vendor")) == NULL) { strncpy(inq_ptr->vendor, CTL_VENDOR, sizeof(inq_ptr->vendor)); } else { memset(inq_ptr->vendor, ' ', sizeof(inq_ptr->vendor)); strncpy(inq_ptr->vendor, val, min(sizeof(inq_ptr->vendor), strlen(val))); } if (lun == NULL) { strncpy(inq_ptr->product, CTL_DIRECT_PRODUCT, sizeof(inq_ptr->product)); } else if ((val = ctl_get_opt(&lun->be_lun->options, "product")) == NULL) { switch (lun->be_lun->lun_type) { case T_DIRECT: strncpy(inq_ptr->product, CTL_DIRECT_PRODUCT, sizeof(inq_ptr->product)); break; case T_PROCESSOR: strncpy(inq_ptr->product, CTL_PROCESSOR_PRODUCT, sizeof(inq_ptr->product)); break; default: strncpy(inq_ptr->product, CTL_UNKNOWN_PRODUCT, sizeof(inq_ptr->product)); break; } } else { memset(inq_ptr->product, ' ', sizeof(inq_ptr->product)); strncpy(inq_ptr->product, val, min(sizeof(inq_ptr->product), strlen(val))); } /* * XXX make this a macro somewhere so it automatically gets * incremented when we make changes. */ if (lun == NULL || (val = ctl_get_opt(&lun->be_lun->options, "revision")) == NULL) { strncpy(inq_ptr->revision, "0001", sizeof(inq_ptr->revision)); } else { memset(inq_ptr->revision, ' ', sizeof(inq_ptr->revision)); strncpy(inq_ptr->revision, val, min(sizeof(inq_ptr->revision), strlen(val))); } /* * For parallel SCSI, we support double transition and single * transition clocking. We also support QAS (Quick Arbitration * and Selection) and Information Unit transfers on both the * control and array devices. */ if (port_type == CTL_PORT_SCSI) inq_ptr->spi3data = SID_SPI_CLOCK_DT_ST | SID_SPI_QAS | SID_SPI_IUS; /* SAM-5 (no version claimed) */ scsi_ulto2b(0x00A0, inq_ptr->version1); /* SPC-4 (no version claimed) */ scsi_ulto2b(0x0460, inq_ptr->version2); if (port_type == CTL_PORT_FC) { /* FCP-2 ANSI INCITS.350:2003 */ scsi_ulto2b(0x0917, inq_ptr->version3); } else if (port_type == CTL_PORT_SCSI) { /* SPI-4 ANSI INCITS.362:200x */ scsi_ulto2b(0x0B56, inq_ptr->version3); } else if (port_type == CTL_PORT_ISCSI) { /* iSCSI (no version claimed) */ scsi_ulto2b(0x0960, inq_ptr->version3); } else if (port_type == CTL_PORT_SAS) { /* SAS (no version claimed) */ scsi_ulto2b(0x0BE0, inq_ptr->version3); } if (lun == NULL) { /* SBC-4 (no version claimed) */ scsi_ulto2b(0x0600, inq_ptr->version4); } else { switch (lun->be_lun->lun_type) { case T_DIRECT: /* SBC-4 (no version claimed) */ scsi_ulto2b(0x0600, inq_ptr->version4); break; case T_PROCESSOR: default: break; } } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_inquiry(struct ctl_scsiio *ctsio) { struct scsi_inquiry *cdb; int retval; CTL_DEBUG_PRINT(("ctl_inquiry\n")); cdb = (struct scsi_inquiry *)ctsio->cdb; if (cdb->byte2 & SI_EVPD) retval = ctl_inquiry_evpd(ctsio); else if (cdb->page_code == 0) retval = ctl_inquiry_std(ctsio); else { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } return (retval); } /* * For known CDB types, parse the LBA and length. */ static int ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len) { if (io->io_hdr.io_type != CTL_IO_SCSI) return (1); switch (io->scsiio.cdb[0]) { case COMPARE_AND_WRITE: { struct scsi_compare_and_write *cdb; cdb = (struct scsi_compare_and_write *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = cdb->length; break; } case READ_6: case WRITE_6: { struct scsi_rw_6 *cdb; cdb = (struct scsi_rw_6 *)io->scsiio.cdb; *lba = scsi_3btoul(cdb->addr); /* only 5 bits are valid in the most significant address byte */ *lba &= 0x1fffff; *len = cdb->length; break; } case READ_10: case WRITE_10: { struct scsi_rw_10 *cdb; cdb = (struct scsi_rw_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case WRITE_VERIFY_10: { struct scsi_write_verify_10 *cdb; cdb = (struct scsi_write_verify_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case READ_12: case WRITE_12: { struct scsi_rw_12 *cdb; cdb = (struct scsi_rw_12 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case WRITE_VERIFY_12: { struct scsi_write_verify_12 *cdb; cdb = (struct scsi_write_verify_12 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case READ_16: case WRITE_16: case WRITE_ATOMIC_16: { struct scsi_rw_16 *cdb; cdb = (struct scsi_rw_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case WRITE_VERIFY_16: { struct scsi_write_verify_16 *cdb; cdb = (struct scsi_write_verify_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case WRITE_SAME_10: { struct scsi_write_same_10 *cdb; cdb = (struct scsi_write_same_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case WRITE_SAME_16: { struct scsi_write_same_16 *cdb; cdb = (struct scsi_write_same_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case VERIFY_10: { struct scsi_verify_10 *cdb; cdb = (struct scsi_verify_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case VERIFY_12: { struct scsi_verify_12 *cdb; cdb = (struct scsi_verify_12 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case VERIFY_16: { struct scsi_verify_16 *cdb; cdb = (struct scsi_verify_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case UNMAP: { *lba = 0; *len = UINT64_MAX; break; } case SERVICE_ACTION_IN: { /* GET LBA STATUS */ struct scsi_get_lba_status *cdb; cdb = (struct scsi_get_lba_status *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = UINT32_MAX; break; } default: return (1); break; /* NOTREACHED */ } return (0); } static ctl_action ctl_extent_check_lba(uint64_t lba1, uint64_t len1, uint64_t lba2, uint64_t len2, bool seq) { uint64_t endlba1, endlba2; endlba1 = lba1 + len1 - (seq ? 0 : 1); endlba2 = lba2 + len2 - 1; if ((endlba1 < lba2) || (endlba2 < lba1)) return (CTL_ACTION_PASS); else return (CTL_ACTION_BLOCK); } static int ctl_extent_check_unmap(union ctl_io *io, uint64_t lba2, uint64_t len2) { struct ctl_ptr_len_flags *ptrlen; struct scsi_unmap_desc *buf, *end, *range; uint64_t lba; uint32_t len; /* If not UNMAP -- go other way. */ if (io->io_hdr.io_type != CTL_IO_SCSI || io->scsiio.cdb[0] != UNMAP) return (CTL_ACTION_ERROR); /* If UNMAP without data -- block and wait for data. */ ptrlen = (struct ctl_ptr_len_flags *) &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; if ((io->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0 || ptrlen->ptr == NULL) return (CTL_ACTION_BLOCK); /* UNMAP with data -- check for collision. */ buf = (struct scsi_unmap_desc *)ptrlen->ptr; end = buf + ptrlen->len / sizeof(*buf); for (range = buf; range < end; range++) { lba = scsi_8btou64(range->lba); len = scsi_4btoul(range->length); if ((lba < lba2 + len2) && (lba + len > lba2)) return (CTL_ACTION_BLOCK); } return (CTL_ACTION_PASS); } static ctl_action ctl_extent_check(union ctl_io *io1, union ctl_io *io2, bool seq) { uint64_t lba1, lba2; uint64_t len1, len2; int retval; if (ctl_get_lba_len(io2, &lba2, &len2) != 0) return (CTL_ACTION_ERROR); retval = ctl_extent_check_unmap(io1, lba2, len2); if (retval != CTL_ACTION_ERROR) return (retval); if (ctl_get_lba_len(io1, &lba1, &len1) != 0) return (CTL_ACTION_ERROR); return (ctl_extent_check_lba(lba1, len1, lba2, len2, seq)); } static ctl_action ctl_extent_check_seq(union ctl_io *io1, union ctl_io *io2) { uint64_t lba1, lba2; uint64_t len1, len2; if (ctl_get_lba_len(io1, &lba1, &len1) != 0) return (CTL_ACTION_ERROR); if (ctl_get_lba_len(io2, &lba2, &len2) != 0) return (CTL_ACTION_ERROR); if (lba1 + len1 == lba2) return (CTL_ACTION_BLOCK); return (CTL_ACTION_PASS); } static ctl_action ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *ooa_io) { const struct ctl_cmd_entry *pending_entry, *ooa_entry; ctl_serialize_action *serialize_row; /* * The initiator attempted multiple untagged commands at the same * time. Can't do that. */ if ((pending_io->scsiio.tag_type == CTL_TAG_UNTAGGED) && (ooa_io->scsiio.tag_type == CTL_TAG_UNTAGGED) && ((pending_io->io_hdr.nexus.targ_port == ooa_io->io_hdr.nexus.targ_port) && (pending_io->io_hdr.nexus.initid.id == ooa_io->io_hdr.nexus.initid.id)) && ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT | CTL_FLAG_STATUS_SENT)) == 0)) return (CTL_ACTION_OVERLAP); /* * The initiator attempted to send multiple tagged commands with * the same ID. (It's fine if different initiators have the same * tag ID.) * * Even if all of those conditions are true, we don't kill the I/O * if the command ahead of us has been aborted. We won't end up * sending it to the FETD, and it's perfectly legal to resend a * command with the same tag number as long as the previous * instance of this tag number has been aborted somehow. */ if ((pending_io->scsiio.tag_type != CTL_TAG_UNTAGGED) && (ooa_io->scsiio.tag_type != CTL_TAG_UNTAGGED) && (pending_io->scsiio.tag_num == ooa_io->scsiio.tag_num) && ((pending_io->io_hdr.nexus.targ_port == ooa_io->io_hdr.nexus.targ_port) && (pending_io->io_hdr.nexus.initid.id == ooa_io->io_hdr.nexus.initid.id)) && ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT | CTL_FLAG_STATUS_SENT)) == 0)) return (CTL_ACTION_OVERLAP_TAG); /* * If we get a head of queue tag, SAM-3 says that we should * immediately execute it. * * What happens if this command would normally block for some other * reason? e.g. a request sense with a head of queue tag * immediately after a write. Normally that would block, but this * will result in its getting executed immediately... * * We currently return "pass" instead of "skip", so we'll end up * going through the rest of the queue to check for overlapped tags. * * XXX KDM check for other types of blockage first?? */ if (pending_io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE) return (CTL_ACTION_PASS); /* * Ordered tags have to block until all items ahead of them * have completed. If we get called with an ordered tag, we always * block, if something else is ahead of us in the queue. */ if (pending_io->scsiio.tag_type == CTL_TAG_ORDERED) return (CTL_ACTION_BLOCK); /* * Simple tags get blocked until all head of queue and ordered tags * ahead of them have completed. I'm lumping untagged commands in * with simple tags here. XXX KDM is that the right thing to do? */ if (((pending_io->scsiio.tag_type == CTL_TAG_UNTAGGED) || (pending_io->scsiio.tag_type == CTL_TAG_SIMPLE)) && ((ooa_io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE) || (ooa_io->scsiio.tag_type == CTL_TAG_ORDERED))) return (CTL_ACTION_BLOCK); pending_entry = ctl_get_cmd_entry(&pending_io->scsiio, NULL); ooa_entry = ctl_get_cmd_entry(&ooa_io->scsiio, NULL); serialize_row = ctl_serialize_table[ooa_entry->seridx]; switch (serialize_row[pending_entry->seridx]) { case CTL_SER_BLOCK: return (CTL_ACTION_BLOCK); case CTL_SER_EXTENT: return (ctl_extent_check(ooa_io, pending_io, - (lun->serseq == CTL_LUN_SERSEQ_ON))); + (lun->be_lun && lun->be_lun->serseq == CTL_LUN_SERSEQ_ON))); case CTL_SER_EXTENTOPT: if ((lun->mode_pages.control_page[CTL_PAGE_CURRENT].queue_flags & SCP_QUEUE_ALG_MASK) != SCP_QUEUE_ALG_UNRESTRICTED) return (ctl_extent_check(ooa_io, pending_io, - (lun->serseq == CTL_LUN_SERSEQ_ON))); + (lun->be_lun && + lun->be_lun->serseq == CTL_LUN_SERSEQ_ON))); return (CTL_ACTION_PASS); case CTL_SER_EXTENTSEQ: - if (lun->serseq != CTL_LUN_SERSEQ_OFF) + if (lun->be_lun && lun->be_lun->serseq != CTL_LUN_SERSEQ_OFF) return (ctl_extent_check_seq(ooa_io, pending_io)); return (CTL_ACTION_PASS); case CTL_SER_PASS: return (CTL_ACTION_PASS); case CTL_SER_BLOCKOPT: if ((lun->mode_pages.control_page[CTL_PAGE_CURRENT].queue_flags & SCP_QUEUE_ALG_MASK) != SCP_QUEUE_ALG_UNRESTRICTED) return (CTL_ACTION_BLOCK); return (CTL_ACTION_PASS); case CTL_SER_SKIP: return (CTL_ACTION_SKIP); default: panic("invalid serialization value %d", serialize_row[pending_entry->seridx]); } return (CTL_ACTION_ERROR); } /* * Check for blockage or overlaps against the OOA (Order Of Arrival) queue. * Assumptions: * - pending_io is generally either incoming, or on the blocked queue * - starting I/O is the I/O we want to start the check with. */ static ctl_action ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *starting_io) { union ctl_io *ooa_io; ctl_action action; mtx_assert(&lun->lun_lock, MA_OWNED); /* * Run back along the OOA queue, starting with the current * blocked I/O and going through every I/O before it on the * queue. If starting_io is NULL, we'll just end up returning * CTL_ACTION_PASS. */ for (ooa_io = starting_io; ooa_io != NULL; ooa_io = (union ctl_io *)TAILQ_PREV(&ooa_io->io_hdr, ctl_ooaq, ooa_links)){ /* * This routine just checks to see whether * cur_blocked is blocked by ooa_io, which is ahead * of it in the queue. It doesn't queue/dequeue * cur_blocked. */ action = ctl_check_for_blockage(lun, pending_io, ooa_io); switch (action) { case CTL_ACTION_BLOCK: case CTL_ACTION_OVERLAP: case CTL_ACTION_OVERLAP_TAG: case CTL_ACTION_SKIP: case CTL_ACTION_ERROR: return (action); break; /* NOTREACHED */ case CTL_ACTION_PASS: break; default: panic("invalid action %d", action); break; /* NOTREACHED */ } } return (CTL_ACTION_PASS); } /* * Assumptions: * - An I/O has just completed, and has been removed from the per-LUN OOA * queue, so some items on the blocked queue may now be unblocked. */ static int ctl_check_blocked(struct ctl_lun *lun) { union ctl_io *cur_blocked, *next_blocked; mtx_assert(&lun->lun_lock, MA_OWNED); /* * Run forward from the head of the blocked queue, checking each * entry against the I/Os prior to it on the OOA queue to see if * there is still any blockage. * * We cannot use the TAILQ_FOREACH() macro, because it can't deal * with our removing a variable on it while it is traversing the * list. */ for (cur_blocked = (union ctl_io *)TAILQ_FIRST(&lun->blocked_queue); cur_blocked != NULL; cur_blocked = next_blocked) { union ctl_io *prev_ooa; ctl_action action; next_blocked = (union ctl_io *)TAILQ_NEXT(&cur_blocked->io_hdr, blocked_links); prev_ooa = (union ctl_io *)TAILQ_PREV(&cur_blocked->io_hdr, ctl_ooaq, ooa_links); /* * If cur_blocked happens to be the first item in the OOA * queue now, prev_ooa will be NULL, and the action * returned will just be CTL_ACTION_PASS. */ action = ctl_check_ooa(lun, cur_blocked, prev_ooa); switch (action) { case CTL_ACTION_BLOCK: /* Nothing to do here, still blocked */ break; case CTL_ACTION_OVERLAP: case CTL_ACTION_OVERLAP_TAG: /* * This shouldn't happen! In theory we've already * checked this command for overlap... */ break; case CTL_ACTION_PASS: case CTL_ACTION_SKIP: { const struct ctl_cmd_entry *entry; int isc_retval; /* * The skip case shouldn't happen, this transaction * should have never made it onto the blocked queue. */ /* * This I/O is no longer blocked, we can remove it * from the blocked queue. Since this is a TAILQ * (doubly linked list), we can do O(1) removals * from any place on the list. */ TAILQ_REMOVE(&lun->blocked_queue, &cur_blocked->io_hdr, blocked_links); cur_blocked->io_hdr.flags &= ~CTL_FLAG_BLOCKED; if (cur_blocked->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC){ /* * Need to send IO back to original side to * run */ union ctl_ha_msg msg_info; msg_info.hdr.original_sc = cur_blocked->io_hdr.original_sc; msg_info.hdr.serializing_sc = cur_blocked; msg_info.hdr.msg_type = CTL_MSG_R2R; if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:Check Blocked error from " "ctl_ha_msg_send %d\n", isc_retval); } break; } entry = ctl_get_cmd_entry(&cur_blocked->scsiio, NULL); /* * Check this I/O for LUN state changes that may * have happened while this command was blocked. * The LUN state may have been changed by a command * ahead of us in the queue, so we need to re-check * for any states that can be caused by SCSI * commands. */ if (ctl_scsiio_lun_check(lun, entry, &cur_blocked->scsiio) == 0) { cur_blocked->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr(cur_blocked); } else ctl_done(cur_blocked); break; } default: /* * This probably shouldn't happen -- we shouldn't * get CTL_ACTION_ERROR, or anything else. */ break; } } return (CTL_RETVAL_COMPLETE); } /* * This routine (with one exception) checks LUN flags that can be set by * commands ahead of us in the OOA queue. These flags have to be checked * when a command initially comes in, and when we pull a command off the * blocked queue and are preparing to execute it. The reason we have to * check these flags for commands on the blocked queue is that the LUN * state may have been changed by a command ahead of us while we're on the * blocked queue. * * Ordering is somewhat important with these checks, so please pay * careful attention to the placement of any new checks. */ static int ctl_scsiio_lun_check(struct ctl_lun *lun, const struct ctl_cmd_entry *entry, struct ctl_scsiio *ctsio) { struct ctl_softc *softc = lun->ctl_softc; int retval; uint32_t residx; retval = 0; mtx_assert(&lun->lun_lock, MA_OWNED); /* * If this shelf is a secondary shelf controller, we have to reject * any media access commands. */ if ((softc->flags & CTL_FLAG_ACTIVE_SHELF) == 0 && (entry->flags & CTL_CMD_FLAG_OK_ON_SECONDARY) == 0) { ctl_set_lun_standby(ctsio); retval = 1; goto bailout; } if (entry->pattern & CTL_LUN_PAT_WRITE) { - if (lun->flags & CTL_LUN_READONLY) { + if (lun->be_lun && + lun->be_lun->flags & CTL_LUN_FLAG_READONLY) { ctl_set_sense(ctsio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_DATA_PROTECT, /*asc*/ 0x27, /*ascq*/ 0x01, SSD_ELEM_NONE); retval = 1; goto bailout; } if ((lun->mode_pages.control_page[CTL_PAGE_CURRENT] .eca_and_aen & SCP_SWP) != 0) { ctl_set_sense(ctsio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_DATA_PROTECT, /*asc*/ 0x27, /*ascq*/ 0x02, SSD_ELEM_NONE); retval = 1; goto bailout; } } /* * Check for a reservation conflict. If this command isn't allowed * even on reserved LUNs, and if this initiator isn't the one who * reserved us, reject the command with a reservation conflict. */ residx = ctl_get_resindex(&ctsio->io_hdr.nexus); if ((lun->flags & CTL_LUN_RESERVED) && ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_RESV) == 0)) { if (lun->res_idx != residx) { ctl_set_reservation_conflict(ctsio); retval = 1; goto bailout; } } if ((lun->flags & CTL_LUN_PR_RESERVED) == 0 || (entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_RESV)) { /* No reservation or command is allowed. */; } else if ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_WRESV) && (lun->res_type == SPR_TYPE_WR_EX || lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_WR_EX_AR)) { /* The command is allowed for Write Exclusive resv. */; } else { /* * if we aren't registered or it's a res holder type * reservation and this isn't the res holder then set a * conflict. */ if (ctl_get_prkey(lun, residx) == 0 || (residx != lun->pr_res_idx && lun->res_type < 4)) { ctl_set_reservation_conflict(ctsio); retval = 1; goto bailout; } } if ((lun->flags & CTL_LUN_OFFLINE) && ((entry->flags & CTL_CMD_FLAG_OK_ON_OFFLINE) == 0)) { ctl_set_lun_not_ready(ctsio); retval = 1; goto bailout; } /* * If the LUN is stopped, see if this particular command is allowed * for a stopped lun. Otherwise, reject it with 0x04,0x02. */ if ((lun->flags & CTL_LUN_STOPPED) && ((entry->flags & CTL_CMD_FLAG_OK_ON_STOPPED) == 0)) { /* "Logical unit not ready, initializing cmd. required" */ ctl_set_lun_stopped(ctsio); retval = 1; goto bailout; } if ((lun->flags & CTL_LUN_INOPERABLE) && ((entry->flags & CTL_CMD_FLAG_OK_ON_INOPERABLE) == 0)) { /* "Medium format corrupted" */ ctl_set_medium_format_corrupted(ctsio); retval = 1; goto bailout; } bailout: return (retval); } static void ctl_failover_io(union ctl_io *io, int have_lock) { ctl_set_busy(&io->scsiio); ctl_done(io); } #ifdef notyet static void ctl_failover(void) { struct ctl_lun *lun; struct ctl_softc *softc; union ctl_io *next_io, *pending_io; union ctl_io *io; int lun_idx; softc = control_softc; mtx_lock(&softc->ctl_lock); /* * Remove any cmds from the other SC from the rtr queue. These * will obviously only be for LUNs for which we're the primary. * We can't send status or get/send data for these commands. * Since they haven't been executed yet, we can just remove them. * We'll either abort them or delete them below, depending on * which HA mode we're in. */ #ifdef notyet mtx_lock(&softc->queue_lock); for (io = (union ctl_io *)STAILQ_FIRST(&softc->rtr_queue); io != NULL; io = next_io) { next_io = (union ctl_io *)STAILQ_NEXT(&io->io_hdr, links); if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) STAILQ_REMOVE(&softc->rtr_queue, &io->io_hdr, ctl_io_hdr, links); } mtx_unlock(&softc->queue_lock); #endif for (lun_idx=0; lun_idx < softc->num_luns; lun_idx++) { lun = softc->ctl_luns[lun_idx]; if (lun==NULL) continue; /* * Processor LUNs are primary on both sides. * XXX will this always be true? */ if (lun->be_lun->lun_type == T_PROCESSOR) continue; if ((lun->flags & CTL_LUN_PRIMARY_SC) && (softc->ha_mode == CTL_HA_MODE_SER_ONLY)) { printf("FAILOVER: primary lun %d\n", lun_idx); /* * Remove all commands from the other SC. First from the * blocked queue then from the ooa queue. Once we have * removed them. Call ctl_check_blocked to see if there * is anything that can run. */ for (io = (union ctl_io *)TAILQ_FIRST( &lun->blocked_queue); io != NULL; io = next_io) { next_io = (union ctl_io *)TAILQ_NEXT( &io->io_hdr, blocked_links); if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) { TAILQ_REMOVE(&lun->blocked_queue, &io->io_hdr,blocked_links); io->io_hdr.flags &= ~CTL_FLAG_BLOCKED; TAILQ_REMOVE(&lun->ooa_queue, &io->io_hdr, ooa_links); ctl_free_io(io); } } for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); io != NULL; io = next_io) { next_io = (union ctl_io *)TAILQ_NEXT( &io->io_hdr, ooa_links); if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) { TAILQ_REMOVE(&lun->ooa_queue, &io->io_hdr, ooa_links); ctl_free_io(io); } } ctl_check_blocked(lun); } else if ((lun->flags & CTL_LUN_PRIMARY_SC) && (softc->ha_mode == CTL_HA_MODE_XFER)) { printf("FAILOVER: primary lun %d\n", lun_idx); /* * Abort all commands from the other SC. We can't * send status back for them now. These should get * cleaned up when they are completed or come out * for a datamove operation. */ for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); io != NULL; io = next_io) { next_io = (union ctl_io *)TAILQ_NEXT( &io->io_hdr, ooa_links); if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) io->io_hdr.flags |= CTL_FLAG_ABORT; } } else if (((lun->flags & CTL_LUN_PRIMARY_SC) == 0) && (softc->ha_mode == CTL_HA_MODE_XFER)) { printf("FAILOVER: secondary lun %d\n", lun_idx); lun->flags |= CTL_LUN_PRIMARY_SC; /* * We send all I/O that was sent to this controller * and redirected to the other side back with * busy status, and have the initiator retry it. * Figuring out how much data has been transferred, * etc. and picking up where we left off would be * very tricky. * * XXX KDM need to remove I/O from the blocked * queue as well! */ for (pending_io = (union ctl_io *)TAILQ_FIRST( &lun->ooa_queue); pending_io != NULL; pending_io = next_io) { next_io = (union ctl_io *)TAILQ_NEXT( &pending_io->io_hdr, ooa_links); pending_io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC; if (pending_io->io_hdr.flags & CTL_FLAG_IO_ACTIVE) { pending_io->io_hdr.flags |= CTL_FLAG_FAILOVER; } else { ctl_set_busy(&pending_io->scsiio); ctl_done(pending_io); } } ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE); } else if (((lun->flags & CTL_LUN_PRIMARY_SC) == 0) && (softc->ha_mode == CTL_HA_MODE_SER_ONLY)) { printf("FAILOVER: secondary lun %d\n", lun_idx); /* * if the first io on the OOA is not on the RtR queue * add it. */ lun->flags |= CTL_LUN_PRIMARY_SC; pending_io = (union ctl_io *)TAILQ_FIRST( &lun->ooa_queue); if (pending_io==NULL) { printf("Nothing on OOA queue\n"); continue; } pending_io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC; if ((pending_io->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR) == 0) { pending_io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr(pending_io); } #if 0 else { printf("Tag 0x%04x is running\n", pending_io->scsiio.tag_num); } #endif next_io = (union ctl_io *)TAILQ_NEXT( &pending_io->io_hdr, ooa_links); for (pending_io=next_io; pending_io != NULL; pending_io = next_io) { pending_io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC; next_io = (union ctl_io *)TAILQ_NEXT( &pending_io->io_hdr, ooa_links); if (pending_io->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR) { #if 0 printf("Tag 0x%04x is running\n", pending_io->scsiio.tag_num); #endif continue; } switch (ctl_check_ooa(lun, pending_io, (union ctl_io *)TAILQ_PREV( &pending_io->io_hdr, ctl_ooaq, ooa_links))) { case CTL_ACTION_BLOCK: TAILQ_INSERT_TAIL(&lun->blocked_queue, &pending_io->io_hdr, blocked_links); pending_io->io_hdr.flags |= CTL_FLAG_BLOCKED; break; case CTL_ACTION_PASS: case CTL_ACTION_SKIP: pending_io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr(pending_io); break; case CTL_ACTION_OVERLAP: ctl_set_overlapped_cmd( (struct ctl_scsiio *)pending_io); ctl_done(pending_io); break; case CTL_ACTION_OVERLAP_TAG: ctl_set_overlapped_tag( (struct ctl_scsiio *)pending_io, pending_io->scsiio.tag_num & 0xff); ctl_done(pending_io); break; case CTL_ACTION_ERROR: default: ctl_set_internal_failure( (struct ctl_scsiio *)pending_io, 0, // sks_valid 0); //retry count ctl_done(pending_io); break; } } ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE); } else { panic("Unhandled HA mode failover, LUN flags = %#x, " "ha_mode = #%x", lun->flags, softc->ha_mode); } } ctl_pause_rtr = 0; mtx_unlock(&softc->ctl_lock); } #endif static int ctl_scsiio_precheck(struct ctl_softc *softc, struct ctl_scsiio *ctsio) { struct ctl_lun *lun; const struct ctl_cmd_entry *entry; uint32_t initidx, targ_lun; int retval; retval = 0; lun = NULL; targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun; if ((targ_lun < CTL_MAX_LUNS) && ((lun = softc->ctl_luns[targ_lun]) != NULL)) { /* * If the LUN is invalid, pretend that it doesn't exist. * It will go away as soon as all pending I/O has been * completed. */ mtx_lock(&lun->lun_lock); if (lun->flags & CTL_LUN_DISABLED) { mtx_unlock(&lun->lun_lock); lun = NULL; ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = NULL; ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = NULL; } else { ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = lun; ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = lun->be_lun; if (lun->be_lun->lun_type == T_PROCESSOR) { ctsio->io_hdr.flags |= CTL_FLAG_CONTROL_DEV; } /* * Every I/O goes into the OOA queue for a * particular LUN, and stays there until completion. */ #ifdef CTL_TIME_IO if (TAILQ_EMPTY(&lun->ooa_queue)) { lun->idle_time += getsbinuptime() - lun->last_busy; } #endif TAILQ_INSERT_TAIL(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); } } else { ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = NULL; ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = NULL; } /* Get command entry and return error if it is unsuppotyed. */ entry = ctl_validate_command(ctsio); if (entry == NULL) { if (lun) mtx_unlock(&lun->lun_lock); return (retval); } ctsio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK; ctsio->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK; /* * Check to see whether we can send this command to LUNs that don't * exist. This should pretty much only be the case for inquiry * and request sense. Further checks, below, really require having * a LUN, so we can't really check the command anymore. Just put * it on the rtr queue. */ if (lun == NULL) { if (entry->flags & CTL_CMD_FLAG_OK_ON_ALL_LUNS) { ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr((union ctl_io *)ctsio); return (retval); } ctl_set_unsupported_lun(ctsio); ctl_done((union ctl_io *)ctsio); CTL_DEBUG_PRINT(("ctl_scsiio_precheck: bailing out due to invalid LUN\n")); return (retval); } else { /* * Make sure we support this particular command on this LUN. * e.g., we don't support writes to the control LUN. */ if (!ctl_cmd_applicable(lun->be_lun->lun_type, entry)) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (retval); } } initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); #ifdef CTL_WITH_CA /* * If we've got a request sense, it'll clear the contingent * allegiance condition. Otherwise, if we have a CA condition for * this initiator, clear it, because it sent down a command other * than request sense. */ if ((ctsio->cdb[0] != REQUEST_SENSE) && (ctl_is_set(lun->have_ca, initidx))) ctl_clear_mask(lun->have_ca, initidx); #endif /* * If the command has this flag set, it handles its own unit * attention reporting, we shouldn't do anything. Otherwise we * check for any pending unit attentions, and send them back to the * initiator. We only do this when a command initially comes in, * not when we pull it off the blocked queue. * * According to SAM-3, section 5.3.2, the order that things get * presented back to the host is basically unit attentions caused * by some sort of reset event, busy status, reservation conflicts * or task set full, and finally any other status. * * One issue here is that some of the unit attentions we report * don't fall into the "reset" category (e.g. "reported luns data * has changed"). So reporting it here, before the reservation * check, may be technically wrong. I guess the only thing to do * would be to check for and report the reset events here, and then * check for the other unit attention types after we check for a * reservation conflict. * * XXX KDM need to fix this */ if ((entry->flags & CTL_CMD_FLAG_NO_SENSE) == 0) { ctl_ua_type ua_type; scsi_sense_data_type sense_format; if (lun->flags & CTL_LUN_SENSE_DESC) sense_format = SSD_TYPE_DESC; else sense_format = SSD_TYPE_FIXED; ua_type = ctl_build_ua(lun, initidx, &ctsio->sense_data, sense_format); if (ua_type != CTL_UA_NONE) { mtx_unlock(&lun->lun_lock); ctsio->scsi_status = SCSI_STATUS_CHECK_COND; ctsio->io_hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; ctsio->sense_len = SSD_FULL_SIZE; ctl_done((union ctl_io *)ctsio); return (retval); } } if (ctl_scsiio_lun_check(lun, entry, ctsio) != 0) { mtx_unlock(&lun->lun_lock); ctl_done((union ctl_io *)ctsio); return (retval); } /* * XXX CHD this is where we want to send IO to other side if * this LUN is secondary on this SC. We will need to make a copy * of the IO and flag the IO on this side as SENT_2OTHER and the flag * the copy we send as FROM_OTHER. * We also need to stuff the address of the original IO so we can * find it easily. Something similar will need be done on the other * side so when we are done we can find the copy. */ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0) { union ctl_ha_msg msg_info; int isc_retval; ctsio->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC; msg_info.hdr.msg_type = CTL_MSG_SERIALIZE; msg_info.hdr.original_sc = (union ctl_io *)ctsio; #if 0 printf("1. ctsio %p\n", ctsio); #endif msg_info.hdr.serializing_sc = NULL; msg_info.hdr.nexus = ctsio->io_hdr.nexus; msg_info.scsi.tag_num = ctsio->tag_num; msg_info.scsi.tag_type = ctsio->tag_type; memcpy(msg_info.scsi.cdb, ctsio->cdb, CTL_MAX_CDBLEN); ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; if ((isc_retval=ctl_ha_msg_send(CTL_HA_CHAN_CTL, (void *)&msg_info, sizeof(msg_info), 0)) > CTL_HA_STATUS_SUCCESS) { printf("CTL:precheck, ctl_ha_msg_send returned %d\n", isc_retval); printf("CTL:opcode is %x\n", ctsio->cdb[0]); } else { #if 0 printf("CTL:Precheck sent msg, opcode is %x\n",opcode); #endif } /* * XXX KDM this I/O is off the incoming queue, but hasn't * been inserted on any other queue. We may need to come * up with a holding queue while we wait for serialization * so that we have an idea of what we're waiting for from * the other side. */ mtx_unlock(&lun->lun_lock); return (retval); } switch (ctl_check_ooa(lun, (union ctl_io *)ctsio, (union ctl_io *)TAILQ_PREV(&ctsio->io_hdr, ctl_ooaq, ooa_links))) { case CTL_ACTION_BLOCK: ctsio->io_hdr.flags |= CTL_FLAG_BLOCKED; TAILQ_INSERT_TAIL(&lun->blocked_queue, &ctsio->io_hdr, blocked_links); mtx_unlock(&lun->lun_lock); return (retval); case CTL_ACTION_PASS: case CTL_ACTION_SKIP: ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; mtx_unlock(&lun->lun_lock); ctl_enqueue_rtr((union ctl_io *)ctsio); break; case CTL_ACTION_OVERLAP: mtx_unlock(&lun->lun_lock); ctl_set_overlapped_cmd(ctsio); ctl_done((union ctl_io *)ctsio); break; case CTL_ACTION_OVERLAP_TAG: mtx_unlock(&lun->lun_lock); ctl_set_overlapped_tag(ctsio, ctsio->tag_num & 0xff); ctl_done((union ctl_io *)ctsio); break; case CTL_ACTION_ERROR: default: mtx_unlock(&lun->lun_lock); ctl_set_internal_failure(ctsio, /*sks_valid*/ 0, /*retry_count*/ 0); ctl_done((union ctl_io *)ctsio); break; } return (retval); } const struct ctl_cmd_entry * ctl_get_cmd_entry(struct ctl_scsiio *ctsio, int *sa) { const struct ctl_cmd_entry *entry; int service_action; entry = &ctl_cmd_table[ctsio->cdb[0]]; if (sa) *sa = ((entry->flags & CTL_CMD_FLAG_SA5) != 0); if (entry->flags & CTL_CMD_FLAG_SA5) { service_action = ctsio->cdb[1] & SERVICE_ACTION_MASK; entry = &((const struct ctl_cmd_entry *) entry->execute)[service_action]; } return (entry); } const struct ctl_cmd_entry * ctl_validate_command(struct ctl_scsiio *ctsio) { const struct ctl_cmd_entry *entry; int i, sa; uint8_t diff; entry = ctl_get_cmd_entry(ctsio, &sa); if (entry->execute == NULL) { if (sa) ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); else ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (NULL); } KASSERT(entry->length > 0, ("Not defined length for command 0x%02x/0x%02x", ctsio->cdb[0], ctsio->cdb[1])); for (i = 1; i < entry->length; i++) { diff = ctsio->cdb[i] & ~entry->usage[i - 1]; if (diff == 0) continue; ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ i, /*bit_valid*/ 1, /*bit*/ fls(diff) - 1); ctl_done((union ctl_io *)ctsio); return (NULL); } return (entry); } static int ctl_cmd_applicable(uint8_t lun_type, const struct ctl_cmd_entry *entry) { switch (lun_type) { case T_PROCESSOR: if (((entry->flags & CTL_CMD_FLAG_OK_ON_PROC) == 0) && ((entry->flags & CTL_CMD_FLAG_OK_ON_ALL_LUNS) == 0)) return (0); break; case T_DIRECT: if (((entry->flags & CTL_CMD_FLAG_OK_ON_SLUN) == 0) && ((entry->flags & CTL_CMD_FLAG_OK_ON_ALL_LUNS) == 0)) return (0); break; default: return (0); } return (1); } static int ctl_scsiio(struct ctl_scsiio *ctsio) { int retval; const struct ctl_cmd_entry *entry; retval = CTL_RETVAL_COMPLETE; CTL_DEBUG_PRINT(("ctl_scsiio cdb[0]=%02X\n", ctsio->cdb[0])); entry = ctl_get_cmd_entry(ctsio, NULL); /* * If this I/O has been aborted, just send it straight to * ctl_done() without executing it. */ if (ctsio->io_hdr.flags & CTL_FLAG_ABORT) { ctl_done((union ctl_io *)ctsio); goto bailout; } /* * All the checks should have been handled by ctl_scsiio_precheck(). * We should be clear now to just execute the I/O. */ retval = entry->execute(ctsio); bailout: return (retval); } /* * Since we only implement one target right now, a bus reset simply resets * our single target. */ static int ctl_bus_reset(struct ctl_softc *softc, union ctl_io *io) { return(ctl_target_reset(softc, io, CTL_UA_BUS_RESET)); } static int ctl_target_reset(struct ctl_softc *softc, union ctl_io *io, ctl_ua_type ua_type) { struct ctl_lun *lun; int retval; if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { union ctl_ha_msg msg_info; io->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC; msg_info.hdr.nexus = io->io_hdr.nexus; if (ua_type==CTL_UA_TARG_RESET) msg_info.task.task_action = CTL_TASK_TARGET_RESET; else msg_info.task.task_action = CTL_TASK_BUS_RESET; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; if (CTL_HA_STATUS_SUCCESS != ctl_ha_msg_send(CTL_HA_CHAN_CTL, (void *)&msg_info, sizeof(msg_info), 0)) { } } retval = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) retval += ctl_lun_reset(lun, io, ua_type); mtx_unlock(&softc->ctl_lock); return (retval); } /* * The LUN should always be set. The I/O is optional, and is used to * distinguish between I/Os sent by this initiator, and by other * initiators. We set unit attention for initiators other than this one. * SAM-3 is vague on this point. It does say that a unit attention should * be established for other initiators when a LUN is reset (see section * 5.7.3), but it doesn't specifically say that the unit attention should * be established for this particular initiator when a LUN is reset. Here * is the relevant text, from SAM-3 rev 8: * * 5.7.2 When a SCSI initiator port aborts its own tasks * * When a SCSI initiator port causes its own task(s) to be aborted, no * notification that the task(s) have been aborted shall be returned to * the SCSI initiator port other than the completion response for the * command or task management function action that caused the task(s) to * be aborted and notification(s) associated with related effects of the * action (e.g., a reset unit attention condition). * * XXX KDM for now, we're setting unit attention for all initiators. */ static int ctl_lun_reset(struct ctl_lun *lun, union ctl_io *io, ctl_ua_type ua_type) { union ctl_io *xio; #if 0 uint32_t initidx; #endif #ifdef CTL_WITH_CA int i; #endif mtx_lock(&lun->lun_lock); /* * Run through the OOA queue and abort each I/O. */ for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL; xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) { xio->io_hdr.flags |= CTL_FLAG_ABORT | CTL_FLAG_ABORT_STATUS; } /* * This version sets unit attention for every */ #if 0 initidx = ctl_get_initindex(&io->io_hdr.nexus); ctl_est_ua_all(lun, initidx, ua_type); #else ctl_est_ua_all(lun, -1, ua_type); #endif /* * A reset (any kind, really) clears reservations established with * RESERVE/RELEASE. It does not clear reservations established * with PERSISTENT RESERVE OUT, but we don't support that at the * moment anyway. See SPC-2, section 5.6. SPC-3 doesn't address * reservations made with the RESERVE/RELEASE commands, because * those commands are obsolete in SPC-3. */ lun->flags &= ~CTL_LUN_RESERVED; #ifdef CTL_WITH_CA for (i = 0; i < CTL_MAX_INITIATORS; i++) ctl_clear_mask(lun->have_ca, i); #endif mtx_unlock(&lun->lun_lock); return (0); } static void ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id, int other_sc) { union ctl_io *xio; mtx_assert(&lun->lun_lock, MA_OWNED); /* * Run through the OOA queue and attempt to find the given I/O. * The target port, initiator ID, tag type and tag number have to * match the values that we got from the initiator. If we have an * untagged command to abort, simply abort the first untagged command * we come to. We only allow one untagged command at a time of course. */ for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL; xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) { if ((targ_port == UINT32_MAX || targ_port == xio->io_hdr.nexus.targ_port) && (init_id == UINT32_MAX || init_id == xio->io_hdr.nexus.initid.id)) { if (targ_port != xio->io_hdr.nexus.targ_port || init_id != xio->io_hdr.nexus.initid.id) xio->io_hdr.flags |= CTL_FLAG_ABORT_STATUS; xio->io_hdr.flags |= CTL_FLAG_ABORT; if (!other_sc && !(lun->flags & CTL_LUN_PRIMARY_SC)) { union ctl_ha_msg msg_info; msg_info.hdr.nexus = xio->io_hdr.nexus; msg_info.task.task_action = CTL_TASK_ABORT_TASK; msg_info.task.tag_num = xio->scsiio.tag_num; msg_info.task.tag_type = xio->scsiio.tag_type; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; ctl_ha_msg_send(CTL_HA_CHAN_CTL, (void *)&msg_info, sizeof(msg_info), 0); } } } } static int ctl_abort_task_set(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; uint32_t targ_lun; /* * Look up the LUN. */ targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun < CTL_MAX_LUNS) && (softc->ctl_luns[targ_lun] != NULL)) lun = softc->ctl_luns[targ_lun]; else { mtx_unlock(&softc->ctl_lock); return (1); } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if (io->taskio.task_action == CTL_TASK_ABORT_TASK_SET) { ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.initid.id, (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0); } else { /* CTL_TASK_CLEAR_TASK_SET */ ctl_abort_tasks_lun(lun, UINT32_MAX, UINT32_MAX, (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0); } mtx_unlock(&lun->lun_lock); return (0); } static int ctl_i_t_nexus_reset(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; uint32_t initidx, residx; initidx = ctl_get_initindex(&io->io_hdr.nexus); residx = ctl_get_resindex(&io->io_hdr.nexus); mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.initid.id, (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0); #ifdef CTL_WITH_CA ctl_clear_mask(lun->have_ca, initidx); #endif if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == residx)) lun->flags &= ~CTL_LUN_RESERVED; ctl_est_ua(lun, initidx, CTL_UA_I_T_NEXUS_LOSS); mtx_unlock(&lun->lun_lock); } mtx_unlock(&softc->ctl_lock); return (0); } static int ctl_abort_task(union ctl_io *io) { union ctl_io *xio; struct ctl_lun *lun; struct ctl_softc *softc; #if 0 struct sbuf sb; char printbuf[128]; #endif int found; uint32_t targ_lun; softc = control_softc; found = 0; /* * Look up the LUN. */ targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun < CTL_MAX_LUNS) && (softc->ctl_luns[targ_lun] != NULL)) lun = softc->ctl_luns[targ_lun]; else { mtx_unlock(&softc->ctl_lock); return (1); } #if 0 printf("ctl_abort_task: called for lun %lld, tag %d type %d\n", lun->lun, io->taskio.tag_num, io->taskio.tag_type); #endif mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); /* * Run through the OOA queue and attempt to find the given I/O. * The target port, initiator ID, tag type and tag number have to * match the values that we got from the initiator. If we have an * untagged command to abort, simply abort the first untagged command * we come to. We only allow one untagged command at a time of course. */ for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL; xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) { #if 0 sbuf_new(&sb, printbuf, sizeof(printbuf), SBUF_FIXEDLEN); sbuf_printf(&sb, "LUN %lld tag %d type %d%s%s%s%s: ", lun->lun, xio->scsiio.tag_num, xio->scsiio.tag_type, (xio->io_hdr.blocked_links.tqe_prev == NULL) ? "" : " BLOCKED", (xio->io_hdr.flags & CTL_FLAG_DMA_INPROG) ? " DMA" : "", (xio->io_hdr.flags & CTL_FLAG_ABORT) ? " ABORT" : "", (xio->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR ? " RTR" : "")); ctl_scsi_command_string(&xio->scsiio, NULL, &sb); sbuf_finish(&sb); printf("%s\n", sbuf_data(&sb)); #endif if ((xio->io_hdr.nexus.targ_port != io->io_hdr.nexus.targ_port) || (xio->io_hdr.nexus.initid.id != io->io_hdr.nexus.initid.id) || (xio->io_hdr.flags & CTL_FLAG_ABORT)) continue; /* * If the abort says that the task is untagged, the * task in the queue must be untagged. Otherwise, * we just check to see whether the tag numbers * match. This is because the QLogic firmware * doesn't pass back the tag type in an abort * request. */ #if 0 if (((xio->scsiio.tag_type == CTL_TAG_UNTAGGED) && (io->taskio.tag_type == CTL_TAG_UNTAGGED)) || (xio->scsiio.tag_num == io->taskio.tag_num)) #endif /* * XXX KDM we've got problems with FC, because it * doesn't send down a tag type with aborts. So we * can only really go by the tag number... * This may cause problems with parallel SCSI. * Need to figure that out!! */ if (xio->scsiio.tag_num == io->taskio.tag_num) { xio->io_hdr.flags |= CTL_FLAG_ABORT; found = 1; if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) == 0 && !(lun->flags & CTL_LUN_PRIMARY_SC)) { union ctl_ha_msg msg_info; io->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC; msg_info.hdr.nexus = io->io_hdr.nexus; msg_info.task.task_action = CTL_TASK_ABORT_TASK; msg_info.task.tag_num = io->taskio.tag_num; msg_info.task.tag_type = io->taskio.tag_type; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; #if 0 printf("Sent Abort to other side\n"); #endif if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, (void *)&msg_info, sizeof(msg_info), 0) != CTL_HA_STATUS_SUCCESS) { } } #if 0 printf("ctl_abort_task: found I/O to abort\n"); #endif } } mtx_unlock(&lun->lun_lock); if (found == 0) { /* * This isn't really an error. It's entirely possible for * the abort and command completion to cross on the wire. * This is more of an informative/diagnostic error. */ #if 0 printf("ctl_abort_task: ABORT sent for nonexistent I/O: " "%d:%d:%d:%d tag %d type %d\n", io->io_hdr.nexus.initid.id, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.targ_target.id, io->io_hdr.nexus.targ_lun, io->taskio.tag_num, io->taskio.tag_type); #endif } return (0); } static void ctl_run_task(union ctl_io *io) { struct ctl_softc *softc = control_softc; int retval = 1; const char *task_desc; CTL_DEBUG_PRINT(("ctl_run_task\n")); KASSERT(io->io_hdr.io_type == CTL_IO_TASK, ("ctl_run_task: Unextected io_type %d\n", io->io_hdr.io_type)); task_desc = ctl_scsi_task_string(&io->taskio); if (task_desc != NULL) { #ifdef NEEDTOPORT csevent_log(CSC_CTL | CSC_SHELF_SW | CTL_TASK_REPORT, csevent_LogType_Trace, csevent_Severity_Information, csevent_AlertLevel_Green, csevent_FRU_Firmware, csevent_FRU_Unknown, "CTL: received task: %s",task_desc); #endif } else { #ifdef NEEDTOPORT csevent_log(CSC_CTL | CSC_SHELF_SW | CTL_TASK_REPORT, csevent_LogType_Trace, csevent_Severity_Information, csevent_AlertLevel_Green, csevent_FRU_Firmware, csevent_FRU_Unknown, "CTL: received unknown task " "type: %d (%#x)", io->taskio.task_action, io->taskio.task_action); #endif } switch (io->taskio.task_action) { case CTL_TASK_ABORT_TASK: retval = ctl_abort_task(io); break; case CTL_TASK_ABORT_TASK_SET: case CTL_TASK_CLEAR_TASK_SET: retval = ctl_abort_task_set(io); break; case CTL_TASK_CLEAR_ACA: break; case CTL_TASK_I_T_NEXUS_RESET: retval = ctl_i_t_nexus_reset(io); break; case CTL_TASK_LUN_RESET: { struct ctl_lun *lun; uint32_t targ_lun; targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun < CTL_MAX_LUNS) && (softc->ctl_luns[targ_lun] != NULL)) lun = softc->ctl_luns[targ_lun]; else { mtx_unlock(&softc->ctl_lock); retval = 1; break; } if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { union ctl_ha_msg msg_info; io->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.nexus = io->io_hdr.nexus; msg_info.task.task_action = CTL_TASK_LUN_RESET; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; if (CTL_HA_STATUS_SUCCESS != ctl_ha_msg_send(CTL_HA_CHAN_CTL, (void *)&msg_info, sizeof(msg_info), 0)) { } } retval = ctl_lun_reset(lun, io, CTL_UA_LUN_RESET); mtx_unlock(&softc->ctl_lock); break; } case CTL_TASK_TARGET_RESET: retval = ctl_target_reset(softc, io, CTL_UA_TARG_RESET); break; case CTL_TASK_BUS_RESET: retval = ctl_bus_reset(softc, io); break; case CTL_TASK_PORT_LOGIN: break; case CTL_TASK_PORT_LOGOUT: break; default: printf("ctl_run_task: got unknown task management event %d\n", io->taskio.task_action); break; } if (retval == 0) io->io_hdr.status = CTL_SUCCESS; else io->io_hdr.status = CTL_ERROR; ctl_done(io); } /* * For HA operation. Handle commands that come in from the other * controller. */ static void ctl_handle_isc(union ctl_io *io) { int free_io; struct ctl_lun *lun; struct ctl_softc *softc; uint32_t targ_lun; softc = control_softc; targ_lun = io->io_hdr.nexus.targ_mapped_lun; lun = softc->ctl_luns[targ_lun]; switch (io->io_hdr.msg_type) { case CTL_MSG_SERIALIZE: free_io = ctl_serialize_other_sc_cmd(&io->scsiio); break; case CTL_MSG_R2R: { const struct ctl_cmd_entry *entry; /* * This is only used in SER_ONLY mode. */ free_io = 0; entry = ctl_get_cmd_entry(&io->scsiio, NULL); mtx_lock(&lun->lun_lock); if (ctl_scsiio_lun_check(lun, entry, (struct ctl_scsiio *)io) != 0) { mtx_unlock(&lun->lun_lock); ctl_done(io); break; } io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; mtx_unlock(&lun->lun_lock); ctl_enqueue_rtr(io); break; } case CTL_MSG_FINISH_IO: if (softc->ha_mode == CTL_HA_MODE_XFER) { free_io = 0; ctl_done(io); } else { free_io = 1; mtx_lock(&lun->lun_lock); TAILQ_REMOVE(&lun->ooa_queue, &io->io_hdr, ooa_links); ctl_check_blocked(lun); mtx_unlock(&lun->lun_lock); } break; case CTL_MSG_PERS_ACTION: ctl_hndl_per_res_out_on_other_sc( (union ctl_ha_msg *)&io->presio.pr_msg); free_io = 1; break; case CTL_MSG_BAD_JUJU: free_io = 0; ctl_done(io); break; case CTL_MSG_DATAMOVE: /* Only used in XFER mode */ free_io = 0; ctl_datamove_remote(io); break; case CTL_MSG_DATAMOVE_DONE: /* Only used in XFER mode */ free_io = 0; io->scsiio.be_move_done(io); break; default: free_io = 1; printf("%s: Invalid message type %d\n", __func__, io->io_hdr.msg_type); break; } if (free_io) ctl_free_io(io); } /* * Returns the match type in the case of a match, or CTL_LUN_PAT_NONE if * there is no match. */ static ctl_lun_error_pattern ctl_cmd_pattern_match(struct ctl_scsiio *ctsio, struct ctl_error_desc *desc) { const struct ctl_cmd_entry *entry; ctl_lun_error_pattern filtered_pattern, pattern; pattern = desc->error_pattern; /* * XXX KDM we need more data passed into this function to match a * custom pattern, and we actually need to implement custom pattern * matching. */ if (pattern & CTL_LUN_PAT_CMD) return (CTL_LUN_PAT_CMD); if ((pattern & CTL_LUN_PAT_MASK) == CTL_LUN_PAT_ANY) return (CTL_LUN_PAT_ANY); entry = ctl_get_cmd_entry(ctsio, NULL); filtered_pattern = entry->pattern & pattern; /* * If the user requested specific flags in the pattern (e.g. * CTL_LUN_PAT_RANGE), make sure the command supports all of those * flags. * * If the user did not specify any flags, it doesn't matter whether * or not the command supports the flags. */ if ((filtered_pattern & ~CTL_LUN_PAT_MASK) != (pattern & ~CTL_LUN_PAT_MASK)) return (CTL_LUN_PAT_NONE); /* * If the user asked for a range check, see if the requested LBA * range overlaps with this command's LBA range. */ if (filtered_pattern & CTL_LUN_PAT_RANGE) { uint64_t lba1; uint64_t len1; ctl_action action; int retval; retval = ctl_get_lba_len((union ctl_io *)ctsio, &lba1, &len1); if (retval != 0) return (CTL_LUN_PAT_NONE); action = ctl_extent_check_lba(lba1, len1, desc->lba_range.lba, desc->lba_range.len, FALSE); /* * A "pass" means that the LBA ranges don't overlap, so * this doesn't match the user's range criteria. */ if (action == CTL_ACTION_PASS) return (CTL_LUN_PAT_NONE); } return (filtered_pattern); } static void ctl_inject_error(struct ctl_lun *lun, union ctl_io *io) { struct ctl_error_desc *desc, *desc2; mtx_assert(&lun->lun_lock, MA_OWNED); STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) { ctl_lun_error_pattern pattern; /* * Check to see whether this particular command matches * the pattern in the descriptor. */ pattern = ctl_cmd_pattern_match(&io->scsiio, desc); if ((pattern & CTL_LUN_PAT_MASK) == CTL_LUN_PAT_NONE) continue; switch (desc->lun_error & CTL_LUN_INJ_TYPE) { case CTL_LUN_INJ_ABORTED: ctl_set_aborted(&io->scsiio); break; case CTL_LUN_INJ_MEDIUM_ERR: ctl_set_medium_error(&io->scsiio); break; case CTL_LUN_INJ_UA: /* 29h/00h POWER ON, RESET, OR BUS DEVICE RESET * OCCURRED */ ctl_set_ua(&io->scsiio, 0x29, 0x00); break; case CTL_LUN_INJ_CUSTOM: /* * We're assuming the user knows what he is doing. * Just copy the sense information without doing * checks. */ bcopy(&desc->custom_sense, &io->scsiio.sense_data, MIN(sizeof(desc->custom_sense), sizeof(io->scsiio.sense_data))); io->scsiio.scsi_status = SCSI_STATUS_CHECK_COND; io->scsiio.sense_len = SSD_FULL_SIZE; io->io_hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; break; case CTL_LUN_INJ_NONE: default: /* * If this is an error injection type we don't know * about, clear the continuous flag (if it is set) * so it will get deleted below. */ desc->lun_error &= ~CTL_LUN_INJ_CONTINUOUS; break; } /* * By default, each error injection action is a one-shot */ if (desc->lun_error & CTL_LUN_INJ_CONTINUOUS) continue; STAILQ_REMOVE(&lun->error_list, desc, ctl_error_desc, links); free(desc, M_CTL); } } #ifdef CTL_IO_DELAY static void ctl_datamove_timer_wakeup(void *arg) { union ctl_io *io; io = (union ctl_io *)arg; ctl_datamove(io); } #endif /* CTL_IO_DELAY */ void ctl_datamove(union ctl_io *io) { void (*fe_datamove)(union ctl_io *io); mtx_assert(&control_softc->ctl_lock, MA_NOTOWNED); CTL_DEBUG_PRINT(("ctl_datamove\n")); #ifdef CTL_TIME_IO if ((time_uptime - io->io_hdr.start_time) > ctl_time_io_secs) { char str[256]; char path_str[64]; struct sbuf sb; ctl_scsi_path_string(io, path_str, sizeof(path_str)); sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN); sbuf_cat(&sb, path_str); switch (io->io_hdr.io_type) { case CTL_IO_SCSI: ctl_scsi_command_string(&io->scsiio, NULL, &sb); sbuf_printf(&sb, "\n"); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "Tag: 0x%04x, type %d\n", io->scsiio.tag_num, io->scsiio.tag_type); break; case CTL_IO_TASK: sbuf_printf(&sb, "Task I/O type: %d, Tag: 0x%04x, " "Tag Type: %d\n", io->taskio.task_action, io->taskio.tag_num, io->taskio.tag_type); break; default: printf("Invalid CTL I/O type %d\n", io->io_hdr.io_type); panic("Invalid CTL I/O type %d\n", io->io_hdr.io_type); break; } sbuf_cat(&sb, path_str); sbuf_printf(&sb, "ctl_datamove: %jd seconds\n", (intmax_t)time_uptime - io->io_hdr.start_time); sbuf_finish(&sb); printf("%s", sbuf_data(&sb)); } #endif /* CTL_TIME_IO */ #ifdef CTL_IO_DELAY if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) { struct ctl_lun *lun; lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; io->io_hdr.flags &= ~CTL_FLAG_DELAY_DONE; } else { struct ctl_lun *lun; lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if ((lun != NULL) && (lun->delay_info.datamove_delay > 0)) { struct callout *callout; callout = (struct callout *)&io->io_hdr.timer_bytes; callout_init(callout, /*mpsafe*/ 1); io->io_hdr.flags |= CTL_FLAG_DELAY_DONE; callout_reset(callout, lun->delay_info.datamove_delay * hz, ctl_datamove_timer_wakeup, io); if (lun->delay_info.datamove_type == CTL_DELAY_TYPE_ONESHOT) lun->delay_info.datamove_delay = 0; return; } } #endif /* * This command has been aborted. Set the port status, so we fail * the data move. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { printf("ctl_datamove: tag 0x%04x on (%ju:%d:%ju:%d) aborted\n", io->scsiio.tag_num,(uintmax_t)io->io_hdr.nexus.initid.id, io->io_hdr.nexus.targ_port, (uintmax_t)io->io_hdr.nexus.targ_target.id, io->io_hdr.nexus.targ_lun); io->io_hdr.port_status = 31337; /* * Note that the backend, in this case, will get the * callback in its context. In other cases it may get * called in the frontend's interrupt thread context. */ io->scsiio.be_move_done(io); return; } /* Don't confuse frontend with zero length data move. */ if (io->scsiio.kern_data_len == 0) { io->scsiio.be_move_done(io); return; } /* * If we're in XFER mode and this I/O is from the other shelf * controller, we need to send the DMA to the other side to * actually transfer the data to/from the host. In serialize only * mode the transfer happens below CTL and ctl_datamove() is only * called on the machine that originally received the I/O. */ if ((control_softc->ha_mode == CTL_HA_MODE_XFER) && (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { union ctl_ha_msg msg; uint32_t sg_entries_sent; int do_sg_copy; int i; memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_DATAMOVE; msg.hdr.original_sc = io->io_hdr.original_sc; msg.hdr.serializing_sc = io; msg.hdr.nexus = io->io_hdr.nexus; msg.dt.flags = io->io_hdr.flags; /* * We convert everything into a S/G list here. We can't * pass by reference, only by value between controllers. * So we can't pass a pointer to the S/G list, only as many * S/G entries as we can fit in here. If it's possible for * us to get more than CTL_HA_MAX_SG_ENTRIES S/G entries, * then we need to break this up into multiple transfers. */ if (io->scsiio.kern_sg_entries == 0) { msg.dt.kern_sg_entries = 1; /* * If this is in cached memory, flush the cache * before we send the DMA request to the other * controller. We want to do this in either the * read or the write case. The read case is * straightforward. In the write case, we want to * make sure nothing is in the local cache that * could overwrite the DMAed data. */ if ((io->io_hdr.flags & CTL_FLAG_NO_DATASYNC) == 0) { /* * XXX KDM use bus_dmamap_sync() here. */ } /* * Convert to a physical address if this is a * virtual address. */ if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) { msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr; } else { /* * XXX KDM use busdma here! */ #if 0 msg.dt.sg_list[0].addr = (void *) vtophys(io->scsiio.kern_data_ptr); #endif } msg.dt.sg_list[0].len = io->scsiio.kern_data_len; do_sg_copy = 0; } else { struct ctl_sg_entry *sgl; do_sg_copy = 1; msg.dt.kern_sg_entries = io->scsiio.kern_sg_entries; sgl = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; if ((io->io_hdr.flags & CTL_FLAG_NO_DATASYNC) == 0) { /* * XXX KDM use bus_dmamap_sync() here. */ } } msg.dt.kern_data_len = io->scsiio.kern_data_len; msg.dt.kern_total_len = io->scsiio.kern_total_len; msg.dt.kern_data_resid = io->scsiio.kern_data_resid; msg.dt.kern_rel_offset = io->scsiio.kern_rel_offset; msg.dt.sg_sequence = 0; /* * Loop until we've sent all of the S/G entries. On the * other end, we'll recompose these S/G entries into one * contiguous list before passing it to the */ for (sg_entries_sent = 0; sg_entries_sent < msg.dt.kern_sg_entries; msg.dt.sg_sequence++) { msg.dt.cur_sg_entries = MIN((sizeof(msg.dt.sg_list)/ sizeof(msg.dt.sg_list[0])), msg.dt.kern_sg_entries - sg_entries_sent); if (do_sg_copy != 0) { struct ctl_sg_entry *sgl; int j; sgl = (struct ctl_sg_entry *) io->scsiio.kern_data_ptr; /* * If this is in cached memory, flush the cache * before we send the DMA request to the other * controller. We want to do this in either * the * read or the write case. The read * case is straightforward. In the write * case, we want to make sure nothing is * in the local cache that could overwrite * the DMAed data. */ for (i = sg_entries_sent, j = 0; i < msg.dt.cur_sg_entries; i++, j++) { if ((io->io_hdr.flags & CTL_FLAG_NO_DATASYNC) == 0) { /* * XXX KDM use bus_dmamap_sync() */ } if ((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0) { /* * XXX KDM use busdma. */ #if 0 msg.dt.sg_list[j].addr =(void *) vtophys(sgl[i].addr); #endif } else { msg.dt.sg_list[j].addr = sgl[i].addr; } msg.dt.sg_list[j].len = sgl[i].len; } } sg_entries_sent += msg.dt.cur_sg_entries; if (sg_entries_sent >= msg.dt.kern_sg_entries) msg.dt.sg_last = 1; else msg.dt.sg_last = 0; /* * XXX KDM drop and reacquire the lock here? */ if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg), 0) > CTL_HA_STATUS_SUCCESS) { /* * XXX do something here. */ } msg.dt.sent_sg_entries = sg_entries_sent; } io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; if (io->io_hdr.flags & CTL_FLAG_FAILOVER) ctl_failover_io(io, /*have_lock*/ 0); } else { /* * Lookup the fe_datamove() function for this particular * front end. */ fe_datamove = control_softc->ctl_ports[ctl_port_idx(io->io_hdr.nexus.targ_port)]->fe_datamove; fe_datamove(io); } } static void ctl_send_datamove_done(union ctl_io *io, int have_lock) { union ctl_ha_msg msg; int isc_status; memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_DATAMOVE_DONE; msg.hdr.original_sc = io; msg.hdr.serializing_sc = io->io_hdr.serializing_sc; msg.hdr.nexus = io->io_hdr.nexus; msg.hdr.status = io->io_hdr.status; msg.scsi.tag_num = io->scsiio.tag_num; msg.scsi.tag_type = io->scsiio.tag_type; msg.scsi.scsi_status = io->scsiio.scsi_status; memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data, sizeof(io->scsiio.sense_data)); msg.scsi.sense_len = io->scsiio.sense_len; msg.scsi.sense_residual = io->scsiio.sense_residual; msg.scsi.fetd_status = io->io_hdr.port_status; msg.scsi.residual = io->scsiio.residual; io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; if (io->io_hdr.flags & CTL_FLAG_FAILOVER) { ctl_failover_io(io, /*have_lock*/ have_lock); return; } isc_status = ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg), 0); if (isc_status > CTL_HA_STATUS_SUCCESS) { /* XXX do something if this fails */ } } /* * The DMA to the remote side is done, now we need to tell the other side * we're done so it can continue with its data movement. */ static void ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq) { union ctl_io *io; io = rq->context; if (rq->ret != CTL_HA_STATUS_SUCCESS) { printf("%s: ISC DMA write failed with error %d", __func__, rq->ret); ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ rq->ret); } ctl_dt_req_free(rq); /* * In this case, we had to malloc the memory locally. Free it. */ if ((io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) == 0) { int i; for (i = 0; i < io->scsiio.kern_sg_entries; i++) free(io->io_hdr.local_sglist[i].addr, M_CTL); } /* * The data is in local and remote memory, so now we need to send * status (good or back) back to the other side. */ ctl_send_datamove_done(io, /*have_lock*/ 0); } /* * We've moved the data from the host/controller into local memory. Now we * need to push it over to the remote controller's memory. */ static int ctl_datamove_remote_dm_write_cb(union ctl_io *io) { int retval; retval = 0; retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_WRITE, ctl_datamove_remote_write_cb); return (retval); } static void ctl_datamove_remote_write(union ctl_io *io) { int retval; void (*fe_datamove)(union ctl_io *io); /* * - Get the data from the host/HBA into local memory. * - DMA memory from the local controller to the remote controller. * - Send status back to the remote controller. */ retval = ctl_datamove_remote_sgl_setup(io); if (retval != 0) return; /* Switch the pointer over so the FETD knows what to do */ io->scsiio.kern_data_ptr = (uint8_t *)io->io_hdr.local_sglist; /* * Use a custom move done callback, since we need to send completion * back to the other controller, not to the backend on this side. */ io->scsiio.be_move_done = ctl_datamove_remote_dm_write_cb; fe_datamove = control_softc->ctl_ports[ctl_port_idx(io->io_hdr.nexus.targ_port)]->fe_datamove; fe_datamove(io); return; } static int ctl_datamove_remote_dm_read_cb(union ctl_io *io) { #if 0 char str[256]; char path_str[64]; struct sbuf sb; #endif /* * In this case, we had to malloc the memory locally. Free it. */ if ((io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) == 0) { int i; for (i = 0; i < io->scsiio.kern_sg_entries; i++) free(io->io_hdr.local_sglist[i].addr, M_CTL); } #if 0 scsi_path_string(io, path_str, sizeof(path_str)); sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN); sbuf_cat(&sb, path_str); scsi_command_string(&io->scsiio, NULL, &sb); sbuf_printf(&sb, "\n"); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "Tag: 0x%04x, type %d\n", io->scsiio.tag_num, io->scsiio.tag_type); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "%s: flags %#x, status %#x\n", __func__, io->io_hdr.flags, io->io_hdr.status); sbuf_finish(&sb); printk("%s", sbuf_data(&sb)); #endif /* * The read is done, now we need to send status (good or bad) back * to the other side. */ ctl_send_datamove_done(io, /*have_lock*/ 0); return (0); } static void ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq) { union ctl_io *io; void (*fe_datamove)(union ctl_io *io); io = rq->context; if (rq->ret != CTL_HA_STATUS_SUCCESS) { printf("%s: ISC DMA read failed with error %d", __func__, rq->ret); ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ rq->ret); } ctl_dt_req_free(rq); /* Switch the pointer over so the FETD knows what to do */ io->scsiio.kern_data_ptr = (uint8_t *)io->io_hdr.local_sglist; /* * Use a custom move done callback, since we need to send completion * back to the other controller, not to the backend on this side. */ io->scsiio.be_move_done = ctl_datamove_remote_dm_read_cb; /* XXX KDM add checks like the ones in ctl_datamove? */ fe_datamove = control_softc->ctl_ports[ctl_port_idx(io->io_hdr.nexus.targ_port)]->fe_datamove; fe_datamove(io); } static int ctl_datamove_remote_sgl_setup(union ctl_io *io) { struct ctl_sg_entry *local_sglist, *remote_sglist; struct ctl_sg_entry *local_dma_sglist, *remote_dma_sglist; struct ctl_softc *softc; int retval; int i; retval = 0; softc = control_softc; local_sglist = io->io_hdr.local_sglist; local_dma_sglist = io->io_hdr.local_dma_sglist; remote_sglist = io->io_hdr.remote_sglist; remote_dma_sglist = io->io_hdr.remote_dma_sglist; if (io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) { for (i = 0; i < io->scsiio.kern_sg_entries; i++) { local_sglist[i].len = remote_sglist[i].len; /* * XXX Detect the situation where the RS-level I/O * redirector on the other side has already read the * data off of the AOR RS on this side, and * transferred it to remote (mirror) memory on the * other side. Since we already have the data in * memory here, we just need to use it. * * XXX KDM this can probably be removed once we * get the cache device code in and take the * current AOR implementation out. */ #ifdef NEEDTOPORT if ((remote_sglist[i].addr >= (void *)vtophys(softc->mirr->addr)) && (remote_sglist[i].addr < ((void *)vtophys(softc->mirr->addr) + CacheMirrorOffset))) { local_sglist[i].addr = remote_sglist[i].addr - CacheMirrorOffset; if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) io->io_hdr.flags |= CTL_FLAG_REDIR_DONE; } else { local_sglist[i].addr = remote_sglist[i].addr + CacheMirrorOffset; } #endif #if 0 printf("%s: local %p, remote %p, len %d\n", __func__, local_sglist[i].addr, remote_sglist[i].addr, local_sglist[i].len); #endif } } else { uint32_t len_to_go; /* * In this case, we don't have automatically allocated * memory for this I/O on this controller. This typically * happens with internal CTL I/O -- e.g. inquiry, mode * sense, etc. Anything coming from RAIDCore will have * a mirror area available. */ len_to_go = io->scsiio.kern_data_len; /* * Clear the no datasync flag, we have to use malloced * buffers. */ io->io_hdr.flags &= ~CTL_FLAG_NO_DATASYNC; /* * The difficult thing here is that the size of the various * S/G segments may be different than the size from the * remote controller. That'll make it harder when DMAing * the data back to the other side. */ for (i = 0; (i < sizeof(io->io_hdr.remote_sglist) / sizeof(io->io_hdr.remote_sglist[0])) && (len_to_go > 0); i++) { local_sglist[i].len = MIN(len_to_go, 131072); CTL_SIZE_8B(local_dma_sglist[i].len, local_sglist[i].len); local_sglist[i].addr = malloc(local_dma_sglist[i].len, M_CTL,M_WAITOK); local_dma_sglist[i].addr = local_sglist[i].addr; if (local_sglist[i].addr == NULL) { int j; printf("malloc failed for %zd bytes!", local_dma_sglist[i].len); for (j = 0; j < i; j++) { free(local_sglist[j].addr, M_CTL); } ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 4857); retval = 1; goto bailout_error; } /* XXX KDM do we need a sync here? */ len_to_go -= local_sglist[i].len; } /* * Reset the number of S/G entries accordingly. The * original number of S/G entries is available in * rem_sg_entries. */ io->scsiio.kern_sg_entries = i; #if 0 printf("%s: kern_sg_entries = %d\n", __func__, io->scsiio.kern_sg_entries); for (i = 0; i < io->scsiio.kern_sg_entries; i++) printf("%s: sg[%d] = %p, %d (DMA: %d)\n", __func__, i, local_sglist[i].addr, local_sglist[i].len, local_dma_sglist[i].len); #endif } return (retval); bailout_error: ctl_send_datamove_done(io, /*have_lock*/ 0); return (retval); } static int ctl_datamove_remote_xfer(union ctl_io *io, unsigned command, ctl_ha_dt_cb callback) { struct ctl_ha_dt_req *rq; struct ctl_sg_entry *remote_sglist, *local_sglist; struct ctl_sg_entry *remote_dma_sglist, *local_dma_sglist; uint32_t local_used, remote_used, total_used; int retval; int i, j; retval = 0; rq = ctl_dt_req_alloc(); /* * If we failed to allocate the request, and if the DMA didn't fail * anyway, set busy status. This is just a resource allocation * failure. */ if ((rq == NULL) && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) ctl_set_busy(&io->scsiio); if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE) { if (rq != NULL) ctl_dt_req_free(rq); /* * The data move failed. We need to return status back * to the other controller. No point in trying to DMA * data to the remote controller. */ ctl_send_datamove_done(io, /*have_lock*/ 0); retval = 1; goto bailout; } local_sglist = io->io_hdr.local_sglist; local_dma_sglist = io->io_hdr.local_dma_sglist; remote_sglist = io->io_hdr.remote_sglist; remote_dma_sglist = io->io_hdr.remote_dma_sglist; local_used = 0; remote_used = 0; total_used = 0; if (io->io_hdr.flags & CTL_FLAG_REDIR_DONE) { rq->ret = CTL_HA_STATUS_SUCCESS; rq->context = io; callback(rq); goto bailout; } /* * Pull/push the data over the wire from/to the other controller. * This takes into account the possibility that the local and * remote sglists may not be identical in terms of the size of * the elements and the number of elements. * * One fundamental assumption here is that the length allocated for * both the local and remote sglists is identical. Otherwise, we've * essentially got a coding error of some sort. */ for (i = 0, j = 0; total_used < io->scsiio.kern_data_len; ) { int isc_ret; uint32_t cur_len, dma_length; uint8_t *tmp_ptr; rq->id = CTL_HA_DATA_CTL; rq->command = command; rq->context = io; /* * Both pointers should be aligned. But it is possible * that the allocation length is not. They should both * also have enough slack left over at the end, though, * to round up to the next 8 byte boundary. */ cur_len = MIN(local_sglist[i].len - local_used, remote_sglist[j].len - remote_used); /* * In this case, we have a size issue and need to decrease * the size, except in the case where we actually have less * than 8 bytes left. In that case, we need to increase * the DMA length to get the last bit. */ if ((cur_len & 0x7) != 0) { if (cur_len > 0x7) { cur_len = cur_len - (cur_len & 0x7); dma_length = cur_len; } else { CTL_SIZE_8B(dma_length, cur_len); } } else dma_length = cur_len; /* * If we had to allocate memory for this I/O, instead of using * the non-cached mirror memory, we'll need to flush the cache * before trying to DMA to the other controller. * * We could end up doing this multiple times for the same * segment if we have a larger local segment than remote * segment. That shouldn't be an issue. */ if ((io->io_hdr.flags & CTL_FLAG_NO_DATASYNC) == 0) { /* * XXX KDM use bus_dmamap_sync() here. */ } rq->size = dma_length; tmp_ptr = (uint8_t *)local_sglist[i].addr; tmp_ptr += local_used; /* Use physical addresses when talking to ISC hardware */ if ((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0) { /* XXX KDM use busdma */ #if 0 rq->local = vtophys(tmp_ptr); #endif } else rq->local = tmp_ptr; tmp_ptr = (uint8_t *)remote_sglist[j].addr; tmp_ptr += remote_used; rq->remote = tmp_ptr; rq->callback = NULL; local_used += cur_len; if (local_used >= local_sglist[i].len) { i++; local_used = 0; } remote_used += cur_len; if (remote_used >= remote_sglist[j].len) { j++; remote_used = 0; } total_used += cur_len; if (total_used >= io->scsiio.kern_data_len) rq->callback = callback; if ((rq->size & 0x7) != 0) { printf("%s: warning: size %d is not on 8b boundary\n", __func__, rq->size); } if (((uintptr_t)rq->local & 0x7) != 0) { printf("%s: warning: local %p not on 8b boundary\n", __func__, rq->local); } if (((uintptr_t)rq->remote & 0x7) != 0) { printf("%s: warning: remote %p not on 8b boundary\n", __func__, rq->local); } #if 0 printf("%s: %s: local %#x remote %#x size %d\n", __func__, (command == CTL_HA_DT_CMD_WRITE) ? "WRITE" : "READ", rq->local, rq->remote, rq->size); #endif isc_ret = ctl_dt_single(rq); if (isc_ret == CTL_HA_STATUS_WAIT) continue; if (isc_ret == CTL_HA_STATUS_DISCONNECT) { rq->ret = CTL_HA_STATUS_SUCCESS; } else { rq->ret = isc_ret; } callback(rq); goto bailout; } bailout: return (retval); } static void ctl_datamove_remote_read(union ctl_io *io) { int retval; int i; /* * This will send an error to the other controller in the case of a * failure. */ retval = ctl_datamove_remote_sgl_setup(io); if (retval != 0) return; retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_READ, ctl_datamove_remote_read_cb); if ((retval != 0) && ((io->io_hdr.flags & CTL_FLAG_AUTO_MIRROR) == 0)) { /* * Make sure we free memory if there was an error.. The * ctl_datamove_remote_xfer() function will send the * datamove done message, or call the callback with an * error if there is a problem. */ for (i = 0; i < io->scsiio.kern_sg_entries; i++) free(io->io_hdr.local_sglist[i].addr, M_CTL); } return; } /* * Process a datamove request from the other controller. This is used for * XFER mode only, not SER_ONLY mode. For writes, we DMA into local memory * first. Once that is complete, the data gets DMAed into the remote * controller's memory. For reads, we DMA from the remote controller's * memory into our memory first, and then move it out to the FETD. */ static void ctl_datamove_remote(union ctl_io *io) { struct ctl_softc *softc; softc = control_softc; mtx_assert(&softc->ctl_lock, MA_NOTOWNED); /* * Note that we look for an aborted I/O here, but don't do some of * the other checks that ctl_datamove() normally does. * We don't need to run the datamove delay code, since that should * have been done if need be on the other controller. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { printf("%s: tag 0x%04x on (%d:%d:%d:%d) aborted\n", __func__, io->scsiio.tag_num, io->io_hdr.nexus.initid.id, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.targ_target.id, io->io_hdr.nexus.targ_lun); io->io_hdr.port_status = 31338; ctl_send_datamove_done(io, /*have_lock*/ 0); return; } if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT) { ctl_datamove_remote_write(io); } else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN){ ctl_datamove_remote_read(io); } else { union ctl_ha_msg msg; struct scsi_sense_data *sense; uint8_t sks[3]; int retry_count; memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_BAD_JUJU; msg.hdr.status = CTL_SCSI_ERROR; msg.scsi.scsi_status = SCSI_STATUS_CHECK_COND; retry_count = 4243; sense = &msg.scsi.sense_data; sks[0] = SSD_SCS_VALID; sks[1] = (retry_count >> 8) & 0xff; sks[2] = retry_count & 0xff; /* "Internal target failure" */ scsi_set_sense_data(sense, /*sense_format*/ SSD_TYPE_NONE, /*current_error*/ 1, /*sense_key*/ SSD_KEY_HARDWARE_ERROR, /*asc*/ 0x44, /*ascq*/ 0x00, /*type*/ SSD_ELEM_SKS, /*size*/ sizeof(sks), /*data*/ sks, SSD_ELEM_NONE); io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; if (io->io_hdr.flags & CTL_FLAG_FAILOVER) { ctl_failover_io(io, /*have_lock*/ 1); return; } if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg), 0) > CTL_HA_STATUS_SUCCESS) { /* XXX KDM what to do if this fails? */ } return; } } static int ctl_process_done(union ctl_io *io) { struct ctl_lun *lun; struct ctl_softc *softc = control_softc; void (*fe_done)(union ctl_io *io); uint32_t targ_port = ctl_port_idx(io->io_hdr.nexus.targ_port); CTL_DEBUG_PRINT(("ctl_process_done\n")); fe_done = softc->ctl_ports[targ_port]->fe_done; #ifdef CTL_TIME_IO if ((time_uptime - io->io_hdr.start_time) > ctl_time_io_secs) { char str[256]; char path_str[64]; struct sbuf sb; ctl_scsi_path_string(io, path_str, sizeof(path_str)); sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN); sbuf_cat(&sb, path_str); switch (io->io_hdr.io_type) { case CTL_IO_SCSI: ctl_scsi_command_string(&io->scsiio, NULL, &sb); sbuf_printf(&sb, "\n"); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "Tag: 0x%04x, type %d\n", io->scsiio.tag_num, io->scsiio.tag_type); break; case CTL_IO_TASK: sbuf_printf(&sb, "Task I/O type: %d, Tag: 0x%04x, " "Tag Type: %d\n", io->taskio.task_action, io->taskio.tag_num, io->taskio.tag_type); break; default: printf("Invalid CTL I/O type %d\n", io->io_hdr.io_type); panic("Invalid CTL I/O type %d\n", io->io_hdr.io_type); break; } sbuf_cat(&sb, path_str); sbuf_printf(&sb, "ctl_process_done: %jd seconds\n", (intmax_t)time_uptime - io->io_hdr.start_time); sbuf_finish(&sb); printf("%s", sbuf_data(&sb)); } #endif /* CTL_TIME_IO */ switch (io->io_hdr.io_type) { case CTL_IO_SCSI: break; case CTL_IO_TASK: if (ctl_debug & CTL_DEBUG_INFO) ctl_io_error_print(io, NULL); if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) ctl_free_io(io); else fe_done(io); return (CTL_RETVAL_COMPLETE); default: panic("ctl_process_done: invalid io type %d\n", io->io_hdr.io_type); break; /* NOTREACHED */ } lun = (struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun == NULL) { CTL_DEBUG_PRINT(("NULL LUN for lun %d\n", io->io_hdr.nexus.targ_mapped_lun)); goto bailout; } mtx_lock(&lun->lun_lock); /* * Check to see if we have any errors to inject here. We only * inject errors for commands that don't already have errors set. */ if ((STAILQ_FIRST(&lun->error_list) != NULL) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) && ((io->io_hdr.flags & CTL_FLAG_STATUS_SENT) == 0)) ctl_inject_error(lun, io); /* * XXX KDM how do we treat commands that aren't completed * successfully? * * XXX KDM should we also track I/O latency? */ if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS && io->io_hdr.io_type == CTL_IO_SCSI) { #ifdef CTL_TIME_IO struct bintime cur_bt; #endif int type; if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) type = CTL_STATS_READ; else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT) type = CTL_STATS_WRITE; else type = CTL_STATS_NO_IO; lun->stats.ports[targ_port].bytes[type] += io->scsiio.kern_total_len; lun->stats.ports[targ_port].operations[type]++; #ifdef CTL_TIME_IO bintime_add(&lun->stats.ports[targ_port].dma_time[type], &io->io_hdr.dma_bt); lun->stats.ports[targ_port].num_dmas[type] += io->io_hdr.num_dmas; getbintime(&cur_bt); bintime_sub(&cur_bt, &io->io_hdr.start_bt); bintime_add(&lun->stats.ports[targ_port].time[type], &cur_bt); #endif } /* * Remove this from the OOA queue. */ TAILQ_REMOVE(&lun->ooa_queue, &io->io_hdr, ooa_links); #ifdef CTL_TIME_IO if (TAILQ_EMPTY(&lun->ooa_queue)) lun->last_busy = getsbinuptime(); #endif /* * Run through the blocked queue on this LUN and see if anything * has become unblocked, now that this transaction is done. */ ctl_check_blocked(lun); /* * If the LUN has been invalidated, free it if there is nothing * left on its OOA queue. */ if ((lun->flags & CTL_LUN_INVALID) && TAILQ_EMPTY(&lun->ooa_queue)) { mtx_unlock(&lun->lun_lock); mtx_lock(&softc->ctl_lock); ctl_free_lun(lun); mtx_unlock(&softc->ctl_lock); } else mtx_unlock(&lun->lun_lock); bailout: /* * If this command has been aborted, make sure we set the status * properly. The FETD is responsible for freeing the I/O and doing * whatever it needs to do to clean up its state. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) ctl_set_task_aborted(&io->scsiio); /* * If enabled, print command error status. */ if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS && (ctl_debug & CTL_DEBUG_INFO) != 0) ctl_io_error_print(io, NULL); /* * Tell the FETD or the other shelf controller we're done with this * command. Note that only SCSI commands get to this point. Task * management commands are completed above. * * We only send status to the other controller if we're in XFER * mode. In SER_ONLY mode, the I/O is done on the controller that * received the I/O (from CTL's perspective), and so the status is * generated there. * * XXX KDM if we hold the lock here, we could cause a deadlock * if the frontend comes back in in this context to queue * something. */ if ((softc->ha_mode == CTL_HA_MODE_XFER) && (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { union ctl_ha_msg msg; memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_FINISH_IO; msg.hdr.original_sc = io->io_hdr.original_sc; msg.hdr.nexus = io->io_hdr.nexus; msg.hdr.status = io->io_hdr.status; msg.scsi.scsi_status = io->scsiio.scsi_status; msg.scsi.tag_num = io->scsiio.tag_num; msg.scsi.tag_type = io->scsiio.tag_type; msg.scsi.sense_len = io->scsiio.sense_len; msg.scsi.sense_residual = io->scsiio.sense_residual; msg.scsi.residual = io->scsiio.residual; memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data, sizeof(io->scsiio.sense_data)); /* * We copy this whether or not this is an I/O-related * command. Otherwise, we'd have to go and check to see * whether it's a read/write command, and it really isn't * worth it. */ memcpy(&msg.scsi.lbalen, &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN].bytes, sizeof(msg.scsi.lbalen)); if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg), 0) > CTL_HA_STATUS_SUCCESS) { /* XXX do something here */ } ctl_free_io(io); } else fe_done(io); return (CTL_RETVAL_COMPLETE); } #ifdef CTL_WITH_CA /* * Front end should call this if it doesn't do autosense. When the request * sense comes back in from the initiator, we'll dequeue this and send it. */ int ctl_queue_sense(union ctl_io *io) { struct ctl_lun *lun; struct ctl_port *port; struct ctl_softc *softc; uint32_t initidx, targ_lun; softc = control_softc; CTL_DEBUG_PRINT(("ctl_queue_sense\n")); /* * LUN lookup will likely move to the ctl_work_thread() once we * have our new queueing infrastructure (that doesn't put things on * a per-LUN queue initially). That is so that we can handle * things like an INQUIRY to a LUN that we don't have enabled. We * can't deal with that right now. */ mtx_lock(&softc->ctl_lock); /* * If we don't have a LUN for this, just toss the sense * information. */ port = ctl_io_port(&ctsio->io_hdr); targ_lun = ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun); if ((targ_lun < CTL_MAX_LUNS) && (softc->ctl_luns[targ_lun] != NULL)) lun = softc->ctl_luns[targ_lun]; else goto bailout; initidx = ctl_get_initindex(&io->io_hdr.nexus); mtx_lock(&lun->lun_lock); /* * Already have CA set for this LUN...toss the sense information. */ if (ctl_is_set(lun->have_ca, initidx)) { mtx_unlock(&lun->lun_lock); goto bailout; } memcpy(&lun->pending_sense[initidx], &io->scsiio.sense_data, MIN(sizeof(lun->pending_sense[initidx]), sizeof(io->scsiio.sense_data))); ctl_set_mask(lun->have_ca, initidx); mtx_unlock(&lun->lun_lock); bailout: mtx_unlock(&softc->ctl_lock); ctl_free_io(io); return (CTL_RETVAL_COMPLETE); } #endif /* * Primary command inlet from frontend ports. All SCSI and task I/O * requests must go through this function. */ int ctl_queue(union ctl_io *io) { struct ctl_port *port; CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0])); #ifdef CTL_TIME_IO io->io_hdr.start_time = time_uptime; getbintime(&io->io_hdr.start_bt); #endif /* CTL_TIME_IO */ /* Map FE-specific LUN ID into global one. */ port = ctl_io_port(&io->io_hdr); io->io_hdr.nexus.targ_mapped_lun = ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun); switch (io->io_hdr.io_type) { case CTL_IO_SCSI: case CTL_IO_TASK: if (ctl_debug & CTL_DEBUG_CDB) ctl_io_print(io); ctl_enqueue_incoming(io); break; default: printf("ctl_queue: unknown I/O type %d\n", io->io_hdr.io_type); return (EINVAL); } return (CTL_RETVAL_COMPLETE); } #ifdef CTL_IO_DELAY static void ctl_done_timer_wakeup(void *arg) { union ctl_io *io; io = (union ctl_io *)arg; ctl_done(io); } #endif /* CTL_IO_DELAY */ void ctl_done(union ctl_io *io) { /* * Enable this to catch duplicate completion issues. */ #if 0 if (io->io_hdr.flags & CTL_FLAG_ALREADY_DONE) { printf("%s: type %d msg %d cdb %x iptl: " "%d:%d:%d:%d tag 0x%04x " "flag %#x status %x\n", __func__, io->io_hdr.io_type, io->io_hdr.msg_type, io->scsiio.cdb[0], io->io_hdr.nexus.initid.id, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.targ_target.id, io->io_hdr.nexus.targ_lun, (io->io_hdr.io_type == CTL_IO_TASK) ? io->taskio.tag_num : io->scsiio.tag_num, io->io_hdr.flags, io->io_hdr.status); } else io->io_hdr.flags |= CTL_FLAG_ALREADY_DONE; #endif /* * This is an internal copy of an I/O, and should not go through * the normal done processing logic. */ if (io->io_hdr.flags & CTL_FLAG_INT_COPY) return; /* * We need to send a msg to the serializing shelf to finish the IO * as well. We don't send a finish message to the other shelf if * this is a task management command. Task management commands * aren't serialized in the OOA queue, but rather just executed on * both shelf controllers for commands that originated on that * controller. */ if ((io->io_hdr.flags & CTL_FLAG_SENT_2OTHER_SC) && (io->io_hdr.io_type != CTL_IO_TASK)) { union ctl_ha_msg msg_io; msg_io.hdr.msg_type = CTL_MSG_FINISH_IO; msg_io.hdr.serializing_sc = io->io_hdr.serializing_sc; if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_io, sizeof(msg_io), 0 ) != CTL_HA_STATUS_SUCCESS) { } /* continue on to finish IO */ } #ifdef CTL_IO_DELAY if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) { struct ctl_lun *lun; lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; io->io_hdr.flags &= ~CTL_FLAG_DELAY_DONE; } else { struct ctl_lun *lun; lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if ((lun != NULL) && (lun->delay_info.done_delay > 0)) { struct callout *callout; callout = (struct callout *)&io->io_hdr.timer_bytes; callout_init(callout, /*mpsafe*/ 1); io->io_hdr.flags |= CTL_FLAG_DELAY_DONE; callout_reset(callout, lun->delay_info.done_delay * hz, ctl_done_timer_wakeup, io); if (lun->delay_info.done_type == CTL_DELAY_TYPE_ONESHOT) lun->delay_info.done_delay = 0; return; } } #endif /* CTL_IO_DELAY */ ctl_enqueue_done(io); } int ctl_isc(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; int retval; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_isc: command: %02x\n", ctsio->cdb[0])); CTL_DEBUG_PRINT(("ctl_isc: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } static void ctl_work_thread(void *arg) { struct ctl_thread *thr = (struct ctl_thread *)arg; struct ctl_softc *softc = thr->ctl_softc; union ctl_io *io; int retval; CTL_DEBUG_PRINT(("ctl_work_thread starting\n")); for (;;) { retval = 0; /* * We handle the queues in this order: * - ISC * - done queue (to free up resources, unblock other commands) * - RtR queue * - incoming queue * * If those queues are empty, we break out of the loop and * go to sleep. */ mtx_lock(&thr->queue_lock); io = (union ctl_io *)STAILQ_FIRST(&thr->isc_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->isc_queue, links); mtx_unlock(&thr->queue_lock); ctl_handle_isc(io); continue; } io = (union ctl_io *)STAILQ_FIRST(&thr->done_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->done_queue, links); /* clear any blocked commands, call fe_done */ mtx_unlock(&thr->queue_lock); retval = ctl_process_done(io); continue; } io = (union ctl_io *)STAILQ_FIRST(&thr->incoming_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->incoming_queue, links); mtx_unlock(&thr->queue_lock); if (io->io_hdr.io_type == CTL_IO_TASK) ctl_run_task(io); else ctl_scsiio_precheck(softc, &io->scsiio); continue; } if (!ctl_pause_rtr) { io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->rtr_queue, links); mtx_unlock(&thr->queue_lock); retval = ctl_scsiio(&io->scsiio); if (retval != CTL_RETVAL_COMPLETE) CTL_DEBUG_PRINT(("ctl_scsiio failed\n")); continue; } } /* Sleep until we have something to do. */ mtx_sleep(thr, &thr->queue_lock, PDROP | PRIBIO, "-", 0); } } static void ctl_lun_thread(void *arg) { struct ctl_softc *softc = (struct ctl_softc *)arg; struct ctl_be_lun *be_lun; int retval; CTL_DEBUG_PRINT(("ctl_lun_thread starting\n")); for (;;) { retval = 0; mtx_lock(&softc->ctl_lock); be_lun = STAILQ_FIRST(&softc->pending_lun_queue); if (be_lun != NULL) { STAILQ_REMOVE_HEAD(&softc->pending_lun_queue, links); mtx_unlock(&softc->ctl_lock); ctl_create_lun(be_lun); continue; } /* Sleep until we have something to do. */ mtx_sleep(&softc->pending_lun_queue, &softc->ctl_lock, PDROP | PRIBIO, "-", 0); } } static void ctl_thresh_thread(void *arg) { struct ctl_softc *softc = (struct ctl_softc *)arg; struct ctl_lun *lun; struct ctl_be_lun *be_lun; struct scsi_da_rw_recovery_page *rwpage; struct ctl_logical_block_provisioning_page *page; const char *attr; uint64_t thres, val; int i, e; CTL_DEBUG_PRINT(("ctl_thresh_thread starting\n")); for (;;) { mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { be_lun = lun->be_lun; if ((lun->flags & CTL_LUN_DISABLED) || (lun->flags & CTL_LUN_OFFLINE) || lun->backend->lun_attr == NULL) continue; rwpage = &lun->mode_pages.rw_er_page[CTL_PAGE_CURRENT]; if ((rwpage->byte8 & SMS_RWER_LBPERE) == 0) continue; e = 0; page = &lun->mode_pages.lbp_page[CTL_PAGE_CURRENT]; for (i = 0; i < CTL_NUM_LBP_THRESH; i++) { if ((page->descr[i].flags & SLBPPD_ENABLED) == 0) continue; thres = scsi_4btoul(page->descr[i].count); thres <<= CTL_LBP_EXPONENT; switch (page->descr[i].resource) { case 0x01: attr = "blocksavail"; break; case 0x02: attr = "blocksused"; break; case 0xf1: attr = "poolblocksavail"; break; case 0xf2: attr = "poolblocksused"; break; default: continue; } mtx_unlock(&softc->ctl_lock); // XXX val = lun->backend->lun_attr( lun->be_lun->be_lun, attr); mtx_lock(&softc->ctl_lock); if (val == UINT64_MAX) continue; if ((page->descr[i].flags & SLBPPD_ARMING_MASK) == SLBPPD_ARMING_INC) e |= (val >= thres); else e |= (val <= thres); } mtx_lock(&lun->lun_lock); if (e) { if (lun->lasttpt == 0 || time_uptime - lun->lasttpt >= CTL_LBP_UA_PERIOD) { lun->lasttpt = time_uptime; ctl_est_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES); } } else { lun->lasttpt = 0; ctl_clr_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES); } mtx_unlock(&lun->lun_lock); } mtx_unlock(&softc->ctl_lock); pause("-", CTL_LBP_PERIOD * hz); } } static void ctl_enqueue_incoming(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; u_int idx; idx = (io->io_hdr.nexus.targ_port * 127 + io->io_hdr.nexus.initid.id) % worker_threads; thr = &softc->threads[idx]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->incoming_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } static void ctl_enqueue_rtr(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->rtr_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } static void ctl_enqueue_done(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->done_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } #ifdef notyet static void ctl_enqueue_isc(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->isc_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } /* Initialization and failover */ void ctl_init_isc_msg(void) { printf("CTL: Still calling this thing\n"); } /* * Init component * Initializes component into configuration defined by bootMode * (see hasc-sv.c) * returns hasc_Status: * OK * ERROR - fatal error */ static ctl_ha_comp_status ctl_isc_init(struct ctl_ha_component *c) { ctl_ha_comp_status ret = CTL_HA_COMP_STATUS_OK; c->status = ret; return ret; } /* Start component * Starts component in state requested. If component starts successfully, * it must set its own state to the requestrd state * When requested state is HASC_STATE_HA, the component may refine it * by adding _SLAVE or _MASTER flags. * Currently allowed state transitions are: * UNKNOWN->HA - initial startup * UNKNOWN->SINGLE - initial startup when no parter detected * HA->SINGLE - failover * returns ctl_ha_comp_status: * OK - component successfully started in requested state * FAILED - could not start the requested state, failover may * be possible * ERROR - fatal error detected, no future startup possible */ static ctl_ha_comp_status ctl_isc_start(struct ctl_ha_component *c, ctl_ha_state state) { ctl_ha_comp_status ret = CTL_HA_COMP_STATUS_OK; printf("%s: go\n", __func__); // UNKNOWN->HA or UNKNOWN->SINGLE (bootstrap) if (c->state == CTL_HA_STATE_UNKNOWN ) { control_softc->is_single = 0; if (ctl_ha_msg_create(CTL_HA_CHAN_CTL, ctl_isc_event_handler) != CTL_HA_STATUS_SUCCESS) { printf("ctl_isc_start: ctl_ha_msg_create failed.\n"); ret = CTL_HA_COMP_STATUS_ERROR; } } else if (CTL_HA_STATE_IS_HA(c->state) && CTL_HA_STATE_IS_SINGLE(state)){ // HA->SINGLE transition ctl_failover(); control_softc->is_single = 1; } else { printf("ctl_isc_start:Invalid state transition %X->%X\n", c->state, state); ret = CTL_HA_COMP_STATUS_ERROR; } if (CTL_HA_STATE_IS_SINGLE(state)) control_softc->is_single = 1; c->state = state; c->status = ret; return ret; } /* * Quiesce component * The component must clear any error conditions (set status to OK) and * prepare itself to another Start call * returns ctl_ha_comp_status: * OK * ERROR */ static ctl_ha_comp_status ctl_isc_quiesce(struct ctl_ha_component *c) { int ret = CTL_HA_COMP_STATUS_OK; ctl_pause_rtr = 1; c->status = ret; return ret; } struct ctl_ha_component ctl_ha_component_ctlisc = { .name = "CTL ISC", .state = CTL_HA_STATE_UNKNOWN, .init = ctl_isc_init, .start = ctl_isc_start, .quiesce = ctl_isc_quiesce }; #endif /* * vim: ts=8 */ Index: projects/clang-trunk/sys/cam/ctl/ctl.h =================================================================== --- projects/clang-trunk/sys/cam/ctl/ctl.h (revision 287501) +++ projects/clang-trunk/sys/cam/ctl/ctl.h (revision 287502) @@ -1,221 +1,223 @@ /*- * Copyright (c) 2003 Silicon Graphics International Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl.h#5 $ * $FreeBSD$ */ /* * Function definitions used both within CTL and potentially in various CTL * clients. * * Author: Ken Merry */ #ifndef _CTL_H_ #define _CTL_H_ #define CTL_RETVAL_COMPLETE 0 #define CTL_RETVAL_QUEUED 1 #define CTL_RETVAL_ALLOCATED 2 #define CTL_RETVAL_ERROR 3 typedef enum { CTL_PORT_NONE = 0x00, CTL_PORT_FC = 0x01, CTL_PORT_SCSI = 0x02, CTL_PORT_IOCTL = 0x04, CTL_PORT_INTERNAL = 0x08, CTL_PORT_ISCSI = 0x10, CTL_PORT_SAS = 0x20, CTL_PORT_ALL = 0xff, CTL_PORT_ISC = 0x100 // FC port for inter-shelf communication } ctl_port_type; struct ctl_port_entry { ctl_port_type port_type; char port_name[64]; int32_t targ_port; int physical_port; int virtual_port; u_int flags; #define CTL_PORT_WWNN_VALID 0x01 #define CTL_PORT_WWPN_VALID 0x02 uint64_t wwnn; uint64_t wwpn; int online; }; struct ctl_modepage_header { uint8_t page_code; uint8_t subpage; int32_t len_used; int32_t len_left; }; struct ctl_modepage_aps { struct ctl_modepage_header header; uint8_t lock_active; }; union ctl_modepage_info { struct ctl_modepage_header header; struct ctl_modepage_aps aps; }; /* * Serial number length, for VPD page 0x80. */ #define CTL_SN_LEN 16 /* * Device ID length, for VPD page 0x83. */ #define CTL_DEVID_LEN 64 #define CTL_DEVID_MIN_LEN 16 /* * WWPN length, for VPD page 0x83. */ #define CTL_WWPN_LEN 8 #define CTL_DRIVER_NAME_LEN 32 /* * Unit attention types. ASC/ASCQ values for these should be placed in * ctl_build_ua. These are also listed in order of reporting priority. * i.e. a poweron UA is reported first, bus reset second, etc. */ typedef enum { CTL_UA_NONE = 0x0000, CTL_UA_POWERON = 0x0001, CTL_UA_BUS_RESET = 0x0002, CTL_UA_TARG_RESET = 0x0004, CTL_UA_I_T_NEXUS_LOSS = 0x0008, CTL_UA_LUN_RESET = 0x0010, CTL_UA_LUN_CHANGE = 0x0020, CTL_UA_MODE_CHANGE = 0x0040, CTL_UA_LOG_CHANGE = 0x0080, CTL_UA_RES_PREEMPT = 0x0400, CTL_UA_RES_RELEASE = 0x0800, CTL_UA_REG_PREEMPT = 0x1000, CTL_UA_ASYM_ACC_CHANGE = 0x2000, CTL_UA_CAPACITY_CHANGED = 0x4000, CTL_UA_THIN_PROV_THRES = 0x8000 } ctl_ua_type; #ifdef _KERNEL MALLOC_DECLARE(M_CTL); struct ctl_page_index; #ifdef SYSCTL_DECL /* from sysctl.h */ SYSCTL_DECL(_kern_cam_ctl); #endif /* * Call these routines to enable or disable front end ports. */ int ctl_port_enable(ctl_port_type port_type); int ctl_port_disable(ctl_port_type port_type); /* * This routine grabs a list of frontend ports. */ int ctl_port_list(struct ctl_port_entry *entries, int num_entries_alloced, int *num_entries_filled, int *num_entries_dropped, ctl_port_type port_type, int no_virtual); /* * Put a string into an sbuf, escaping characters that are illegal or not * recommended in XML. Note this doesn't escape everything, just > < and &. */ int ctl_sbuf_printf_esc(struct sbuf *sb, char *str, int size); int ctl_ffz(uint32_t *mask, uint32_t size); int ctl_set_mask(uint32_t *mask, uint32_t bit); int ctl_clear_mask(uint32_t *mask, uint32_t bit); int ctl_is_set(uint32_t *mask, uint32_t bit); int ctl_caching_sp_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr); int ctl_control_page_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr); /** int ctl_failover_sp_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr); **/ int ctl_debugconf_sp_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc); int ctl_debugconf_sp_select_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr); int ctl_lbp_log_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc); int ctl_sap_log_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc); int ctl_config_move_done(union ctl_io *io); void ctl_datamove(union ctl_io *io); void ctl_done(union ctl_io *io); void ctl_data_submit_done(union ctl_io *io); void ctl_config_read_done(union ctl_io *io); void ctl_config_write_done(union ctl_io *io); void ctl_portDB_changed(int portnum); #ifdef notyet void ctl_init_isc_msg(void); #endif int ctl_ioctl_io(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); /* * KPI to manipulate LUN/port options */ struct ctl_option { STAILQ_ENTRY(ctl_option) links; char *name; char *value; }; typedef STAILQ_HEAD(ctl_options, ctl_option) ctl_options_t; struct ctl_be_arg; void ctl_init_opts(ctl_options_t *opts, int num_args, struct ctl_be_arg *args); +void ctl_update_opts(ctl_options_t *opts, int num_args, + struct ctl_be_arg *args); void ctl_free_opts(ctl_options_t *opts); char * ctl_get_opt(ctl_options_t *opts, const char *name); int ctl_expand_number(const char *buf, uint64_t *num); #endif /* _KERNEL */ #endif /* _CTL_H_ */ /* * vim: ts=8 */ Index: projects/clang-trunk/sys/cam/ctl/ctl_backend.c =================================================================== --- projects/clang-trunk/sys/cam/ctl/ctl_backend.c (revision 287501) +++ projects/clang-trunk/sys/cam/ctl/ctl_backend.c (revision 287502) @@ -1,220 +1,253 @@ /*- * Copyright (c) 2003 Silicon Graphics International Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend.c#3 $ */ /* * CTL backend driver registration routines * * Author: Ken Merry */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct ctl_softc *control_softc; int ctl_backend_register(struct ctl_backend_driver *be) { struct ctl_softc *softc; struct ctl_backend_driver *be_tmp; softc = control_softc; mtx_lock(&softc->ctl_lock); /* * Sanity check, make sure this isn't a duplicate registration. */ STAILQ_FOREACH(be_tmp, &softc->be_list, links) { if (strcmp(be_tmp->name, be->name) == 0) { mtx_unlock(&softc->ctl_lock); return (-1); } } mtx_unlock(&softc->ctl_lock); /* * Call the backend's initialization routine. */ be->init(); mtx_lock(&softc->ctl_lock); STAILQ_INSERT_TAIL(&softc->be_list, be, links); softc->num_backends++; /* * Don't want to increment the usage count for internal consumers, * we won't be able to unload otherwise. */ /* XXX KDM find a substitute for this? */ #if 0 if ((be->flags & CTL_BE_FLAG_INTERNAL) == 0) MOD_INC_USE_COUNT; #endif #ifdef CS_BE_CONFIG_MOVE_DONE_IS_NOT_USED be->config_move_done = ctl_config_move_done; #endif /* XXX KDM fix this! */ be->num_luns = 0; #if 0 atomic_set(&be->num_luns, 0); #endif mtx_unlock(&softc->ctl_lock); return (0); } int ctl_backend_deregister(struct ctl_backend_driver *be) { struct ctl_softc *softc; softc = control_softc; mtx_lock(&softc->ctl_lock); #if 0 if (atomic_read(&be->num_luns) != 0) { #endif /* XXX KDM fix this! */ if (be->num_luns != 0) { mtx_unlock(&softc->ctl_lock); return (-1); } STAILQ_REMOVE(&softc->be_list, be, ctl_backend_driver, links); softc->num_backends--; /* XXX KDM find a substitute for this? */ #if 0 if ((be->flags & CTL_BE_FLAG_INTERNAL) == 0) MOD_DEC_USE_COUNT; #endif mtx_unlock(&softc->ctl_lock); return (0); } struct ctl_backend_driver * ctl_backend_find(char *backend_name) { struct ctl_softc *softc; struct ctl_backend_driver *be_tmp; softc = control_softc; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(be_tmp, &softc->be_list, links) { if (strcmp(be_tmp->name, backend_name) == 0) { mtx_unlock(&softc->ctl_lock); return (be_tmp); } } mtx_unlock(&softc->ctl_lock); return (NULL); } void ctl_init_opts(ctl_options_t *opts, int num_args, struct ctl_be_arg *args) { struct ctl_option *opt; int i; STAILQ_INIT(opts); for (i = 0; i < num_args; i++) { if ((args[i].flags & CTL_BEARG_RD) == 0) continue; if ((args[i].flags & CTL_BEARG_ASCII) == 0) continue; opt = malloc(sizeof(*opt), M_CTL, M_WAITOK); - opt->name = malloc(strlen(args[i].kname) + 1, M_CTL, M_WAITOK); - strcpy(opt->name, args[i].kname); - opt->value = malloc(strlen(args[i].kvalue) + 1, M_CTL, M_WAITOK); - strcpy(opt->value, args[i].kvalue); + opt->name = strdup(args[i].kname, M_CTL); + opt->value = strdup(args[i].kvalue, M_CTL); STAILQ_INSERT_TAIL(opts, opt, links); + } +} + +void +ctl_update_opts(ctl_options_t *opts, int num_args, struct ctl_be_arg *args) +{ + struct ctl_option *opt; + int i; + + for (i = 0; i < num_args; i++) { + if ((args[i].flags & CTL_BEARG_RD) == 0) + continue; + if ((args[i].flags & CTL_BEARG_ASCII) == 0) + continue; + STAILQ_FOREACH(opt, opts, links) { + if (strcmp(opt->name, args[i].kname) == 0) + break; + } + if (args[i].kvalue != NULL && + ((char *)args[i].kvalue)[0] != 0) { + if (opt) { + free(opt->value, M_CTL); + opt->value = strdup(args[i].kvalue, M_CTL); + } else { + opt = malloc(sizeof(*opt), M_CTL, M_WAITOK); + opt->name = strdup(args[i].kname, M_CTL); + opt->value = strdup(args[i].kvalue, M_CTL); + STAILQ_INSERT_TAIL(opts, opt, links); + } + } else if (opt) { + STAILQ_REMOVE(opts, opt, ctl_option, links); + free(opt->name, M_CTL); + free(opt->value, M_CTL); + free(opt, M_CTL); + } } } void ctl_free_opts(ctl_options_t *opts) { struct ctl_option *opt; while ((opt = STAILQ_FIRST(opts)) != NULL) { STAILQ_REMOVE_HEAD(opts, links); free(opt->name, M_CTL); free(opt->value, M_CTL); free(opt, M_CTL); } } char * ctl_get_opt(ctl_options_t *opts, const char *name) { struct ctl_option *opt; STAILQ_FOREACH(opt, opts, links) { if (strcmp(opt->name, name) == 0) { return (opt->value); } } return (NULL); } Index: projects/clang-trunk/sys/cam/ctl/ctl_backend.h =================================================================== --- projects/clang-trunk/sys/cam/ctl/ctl_backend.h (revision 287501) +++ projects/clang-trunk/sys/cam/ctl/ctl_backend.h (revision 287502) @@ -1,313 +1,320 @@ /*- * Copyright (c) 2003 Silicon Graphics International Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend.h#2 $ * $FreeBSD$ */ /* * CTL backend driver definitions * * Author: Ken Merry */ #ifndef _CTL_BACKEND_H_ #define _CTL_BACKEND_H_ /* * XXX KDM move this to another header file? */ #define CTL_BE_NAME_LEN 32 /* * The ID_REQ flag is used to say that the caller has requested a * particular LUN ID in the req_lun_id field. If we cannot allocate that * LUN ID, the ctl_add_lun() call will fail. * * The POWERED_OFF flag tells us that the LUN should default to the powered * off state. It will return 0x04,0x02 until it is powered up. ("Logical * unit not ready, initializing command required.") * * The INOPERABLE flag tells us that this LUN is not operable for whatever * reason. This means that user data may have been (or has been?) lost. * We will return 0x31,0x00 ("Medium format corrupted") until the host * issues a FORMAT UNIT command to clear the error. * * The PRIMARY flag tells us that this LUN is registered as a Primary LUN * which is accessible via the Master shelf controller in an HA. This flag * being set indicates a Primary LUN. This flag being reset represents a * Secondary LUN controlled by the Secondary controller in an HA * configuration. Flag is applicable at this time to T_DIRECT types. * * The SERIAL_NUM flag tells us that the serial_num field is filled in and * valid for use in SCSI INQUIRY VPD page 0x80. * * The DEVID flag tells us that the device_id field is filled in and * valid for use in SCSI INQUIRY VPD page 0x83. * * The DEV_TYPE flag tells us that the device_type field is filled in. * * The UNMAP flag tells us that this LUN supports UNMAP. * * The OFFLINE flag tells us that this LUN can not access backing store. */ typedef enum { CTL_LUN_FLAG_ID_REQ = 0x01, CTL_LUN_FLAG_POWERED_OFF = 0x02, CTL_LUN_FLAG_INOPERABLE = 0x04, CTL_LUN_FLAG_PRIMARY = 0x08, CTL_LUN_FLAG_SERIAL_NUM = 0x10, CTL_LUN_FLAG_DEVID = 0x20, CTL_LUN_FLAG_DEV_TYPE = 0x40, CTL_LUN_FLAG_UNMAP = 0x80, CTL_LUN_FLAG_OFFLINE = 0x100, - CTL_LUN_FLAG_SERSEQ_READ = 0x200 + CTL_LUN_FLAG_READONLY = 0x200 } ctl_backend_lun_flags; +typedef enum { + CTL_LUN_SERSEQ_OFF, + CTL_LUN_SERSEQ_READ, + CTL_LUN_SERSEQ_ON +} ctl_lun_serseq; + #ifdef _KERNEL #define CTL_BACKEND_DECLARE(name, driver) \ static int name ## _modevent(module_t mod, int type, void *data) \ { \ switch (type) { \ case MOD_LOAD: \ ctl_backend_register( \ (struct ctl_backend_driver *)data); \ break; \ case MOD_UNLOAD: \ printf(#name " module unload - not possible for this module type\n"); \ return EINVAL; \ default: \ return EOPNOTSUPP; \ } \ return 0; \ } \ static moduledata_t name ## _mod = { \ #name, \ name ## _modevent, \ (void *)&driver \ }; \ DECLARE_MODULE(name, name ## _mod, SI_SUB_CONFIGURE, SI_ORDER_FOURTH); \ MODULE_DEPEND(name, ctl, 1, 1, 1); \ MODULE_DEPEND(name, cam, 1, 1, 1) typedef enum { CTL_LUN_CONFIG_OK, CTL_LUN_CONFIG_FAILURE } ctl_lun_config_status; typedef void (*be_callback_t)(void *be_lun); typedef void (*be_lun_config_t)(void *be_lun, ctl_lun_config_status status); /* * The lun_type field is the SCSI device type of this particular LUN. In * general, this should be T_DIRECT, although backends will want to create * a processor LUN, typically at LUN 0. See scsi_all.h for the defines for * the various SCSI device types. * * The flags are described above. * * The be_lun field is the backend driver's own context that will get * passsed back so that it can tell which LUN CTL is referencing. * * maxlba is the maximum accessible LBA on the LUN. Note that this is * different from the capacity of the array. capacity = maxlba + 1 * * blocksize is the size, in bytes, of each LBA on the LUN. In general * this should be 512. In theory CTL should be able to handle other block * sizes. Host application software may not deal with it very well, though. * * pblockexp is the log2() of number of LBAs on the LUN per physical sector. * * pblockoff is the lowest LBA on the LUN aligned to physical sector. * * ublockexp is the log2() of number of LBAs on the LUN per UNMAP block. * * ublockoff is the lowest LBA on the LUN aligned to UNMAP block. * * atomicblock is the number of blocks that can be written atomically. * * opttxferlen is the number of blocks that can be written in one operation. * * req_lun_id is the requested LUN ID. CTL only pays attention to this * field if the CTL_LUN_FLAG_ID_REQ flag is set. If the requested LUN ID is * not available, the LUN addition will fail. If a particular LUN ID isn't * requested, the first available LUN ID will be allocated. * * serial_num is the device serial number returned in the SCSI INQUIRY VPD * page 0x80. This should be a unique, per-shelf value. The data inside * this field should be ASCII only, left aligned, and any unused space * should be padded out with ASCII spaces. This field should NOT be NULL * terminated. * * device_id is the T10 device identifier returned in the SCSI INQUIRY VPD * page 0x83. This should be a unique, per-LUN value. The data inside * this field should be ASCII only, left aligned, and any unused space * should be padded with ASCII spaces. This field should NOT be NULL * terminated. * * The lun_shutdown() method is the callback for the ctl_invalidate_lun() * call. It is called when all outstanding I/O for that LUN has been * completed and CTL has deleted the resources for that LUN. When the CTL * backend gets this call, it can safely free its per-LUN resources. * * The lun_config_status() method is the callback for the ctl_add_lun() * call. It is called when the LUN is successfully added, or when LUN * addition fails. If the LUN is successfully added, the backend may call * the ctl_enable_lun() method to enable the LUN. * * The be field is a pointer to the ctl_backend_driver structure, which * contains the backend methods to be called by CTL. * * The ctl_lun field is for CTL internal use only, and should not be used * by the backend. * * The links field is for CTL internal use only, and should not be used by * the backend. */ struct ctl_be_lun { uint8_t lun_type; /* passed to CTL */ ctl_backend_lun_flags flags; /* passed to CTL */ + ctl_lun_serseq serseq; /* passed to CTL */ void *be_lun; /* passed to CTL */ uint64_t maxlba; /* passed to CTL */ uint32_t blocksize; /* passed to CTL */ uint16_t pblockexp; /* passed to CTL */ uint16_t pblockoff; /* passed to CTL */ uint16_t ublockexp; /* passed to CTL */ uint16_t ublockoff; /* passed to CTL */ uint32_t atomicblock; /* passed to CTL */ uint32_t opttxferlen; /* passed to CTL */ uint32_t req_lun_id; /* passed to CTL */ uint32_t lun_id; /* returned from CTL */ uint8_t serial_num[CTL_SN_LEN]; /* passed to CTL */ uint8_t device_id[CTL_DEVID_LEN];/* passed to CTL */ be_callback_t lun_shutdown; /* passed to CTL */ be_lun_config_t lun_config_status; /* passed to CTL */ struct ctl_backend_driver *be; /* passed to CTL */ void *ctl_lun; /* used by CTL */ ctl_options_t options; /* passed to CTL */ STAILQ_ENTRY(ctl_be_lun) links; /* used by CTL */ }; typedef enum { CTL_BE_FLAG_NONE = 0x00, /* no flags */ CTL_BE_FLAG_HAS_CONFIG = 0x01, /* can do config reads, writes */ CTL_BE_FLAG_INTERNAL = 0x02 /* don't inc mod refcount */ } ctl_backend_flags; typedef int (*be_init_t)(void); typedef int (*be_func_t)(union ctl_io *io); typedef void (*be_vfunc_t)(union ctl_io *io); typedef int (*be_ioctl_t)(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); typedef int (*be_luninfo_t)(void *be_lun, struct sbuf *sb); typedef uint64_t (*be_lunattr_t)(void *be_lun, const char *attrname); struct ctl_backend_driver { char name[CTL_BE_NAME_LEN]; /* passed to CTL */ ctl_backend_flags flags; /* passed to CTL */ be_init_t init; /* passed to CTL */ be_func_t data_submit; /* passed to CTL */ be_func_t data_move_done; /* passed to CTL */ be_func_t config_read; /* passed to CTL */ be_func_t config_write; /* passed to CTL */ be_ioctl_t ioctl; /* passed to CTL */ be_luninfo_t lun_info; /* passed to CTL */ be_lunattr_t lun_attr; /* passed to CTL */ #ifdef CS_BE_CONFIG_MOVE_DONE_IS_NOT_USED be_func_t config_move_done; /* passed to backend */ #endif #if 0 be_vfunc_t config_write_done; /* passed to backend */ #endif u_int num_luns; /* used by CTL */ STAILQ_ENTRY(ctl_backend_driver) links; /* used by CTL */ }; int ctl_backend_register(struct ctl_backend_driver *be); int ctl_backend_deregister(struct ctl_backend_driver *be); struct ctl_backend_driver *ctl_backend_find(char *backend_name); /* * To add a LUN, first call ctl_add_lun(). You will get the lun_config_status() * callback when the LUN addition has either succeeded or failed. * * Once you get that callback, you can then call ctl_enable_lun() to enable * the LUN. */ int ctl_add_lun(struct ctl_be_lun *be_lun); int ctl_enable_lun(struct ctl_be_lun *be_lun); /* * To delete a LUN, first call ctl_disable_lun(), then * ctl_invalidate_lun(). You will get the lun_shutdown() callback when all * I/O to the LUN has completed and the LUN has been deleted. */ int ctl_disable_lun(struct ctl_be_lun *be_lun); int ctl_invalidate_lun(struct ctl_be_lun *be_lun); /* * To start a LUN (transition from powered off to powered on state) call * ctl_start_lun(). To stop a LUN (transition from powered on to powered * off state) call ctl_stop_lun(). */ int ctl_start_lun(struct ctl_be_lun *be_lun); int ctl_stop_lun(struct ctl_be_lun *be_lun); /* * If a LUN is inoperable, call ctl_lun_inoperable(). Generally the LUN * will become operable once again when the user issues the SCSI FORMAT UNIT * command. (CTL will automatically clear the inoperable flag.) If we * need to re-enable the LUN, we can call ctl_lun_operable() to enable it * without a SCSI command. */ int ctl_lun_inoperable(struct ctl_be_lun *be_lun); int ctl_lun_operable(struct ctl_be_lun *be_lun); /* * To take a LUN offline, call ctl_lun_offline(). Generally the LUN will * be online again once the user sends a SCSI START STOP UNIT command with * the start and on/offline bits set. The backend can bring the LUN back * online via the ctl_lun_online() function, if necessary. */ int ctl_lun_offline(struct ctl_be_lun *be_lun); int ctl_lun_online(struct ctl_be_lun *be_lun); /* * Let the backend notify the initiator about changed capacity. */ void ctl_lun_capacity_changed(struct ctl_be_lun *be_lun); #endif /* _KERNEL */ #endif /* _CTL_BACKEND_H_ */ /* * vim: ts=8 */ Index: projects/clang-trunk/sys/cam/ctl/ctl_backend_block.c =================================================================== --- projects/clang-trunk/sys/cam/ctl/ctl_backend_block.c (revision 287501) +++ projects/clang-trunk/sys/cam/ctl/ctl_backend_block.c (revision 287502) @@ -1,2909 +1,2876 @@ /*- * Copyright (c) 2003 Silicon Graphics International Corp. * Copyright (c) 2009-2011 Spectra Logic Corporation * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ */ /* * CAM Target Layer driver backend for block devices. * * Author: Ken Merry */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The idea here is that we'll allocate enough S/G space to hold a 1MB * I/O. If we get an I/O larger than that, we'll split it. */ #define CTLBLK_HALF_IO_SIZE (512 * 1024) #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) #define CTLBLK_MAX_SEG MAXPHYS #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) #ifdef CTLBLK_DEBUG #define DPRINTF(fmt, args...) \ printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) do {} while(0) #endif #define PRIV(io) \ ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) #define ARGS(io) \ ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) SDT_PROVIDER_DEFINE(cbb); typedef enum { CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02, CTL_BE_BLOCK_LUN_WAITING = 0x04, - CTL_BE_BLOCK_LUN_MULTI_THREAD = 0x08 } ctl_be_block_lun_flags; typedef enum { CTL_BE_BLOCK_NONE, CTL_BE_BLOCK_DEV, CTL_BE_BLOCK_FILE } ctl_be_block_type; struct ctl_be_block_devdata { struct cdev *cdev; struct cdevsw *csw; int dev_ref; }; struct ctl_be_block_filedata { struct ucred *cred; }; union ctl_be_block_bedata { struct ctl_be_block_devdata dev; struct ctl_be_block_filedata file; }; struct ctl_be_block_io; struct ctl_be_block_lun; typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, const char *attrname); /* * Backend LUN structure. There is a 1:1 mapping between a block device * and a backend block LUN, and between a backend block LUN and a CTL LUN. */ struct ctl_be_block_lun { struct ctl_lun_create_params params; char lunname[32]; char *dev_path; ctl_be_block_type dev_type; struct vnode *vn; union ctl_be_block_bedata backend; cbb_dispatch_t dispatch; cbb_dispatch_t lun_flush; cbb_dispatch_t unmap; cbb_dispatch_t get_lba_status; cbb_getattr_t getattr; uma_zone_t lun_zone; uint64_t size_blocks; uint64_t size_bytes; - uint32_t blocksize; - uint16_t pblockexp; - uint16_t pblockoff; - uint16_t ublockexp; - uint16_t ublockoff; - uint32_t atomicblock; - uint32_t opttxferlen; struct ctl_be_block_softc *softc; struct devstat *disk_stats; ctl_be_block_lun_flags flags; STAILQ_ENTRY(ctl_be_block_lun) links; - struct ctl_be_lun ctl_be_lun; + struct ctl_be_lun cbe_lun; struct taskqueue *io_taskqueue; struct task io_task; int num_threads; STAILQ_HEAD(, ctl_io_hdr) input_queue; STAILQ_HEAD(, ctl_io_hdr) config_read_queue; STAILQ_HEAD(, ctl_io_hdr) config_write_queue; STAILQ_HEAD(, ctl_io_hdr) datamove_queue; struct mtx_padalign io_lock; struct mtx_padalign queue_lock; }; /* * Overall softc structure for the block backend module. */ struct ctl_be_block_softc { struct mtx lock; int num_luns; STAILQ_HEAD(, ctl_be_block_lun) lun_list; }; static struct ctl_be_block_softc backend_block_softc; /* * Per-I/O information. */ struct ctl_be_block_io { union ctl_io *io; struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; struct iovec xiovecs[CTLBLK_MAX_SEGS]; int bio_cmd; int num_segs; int num_bios_sent; int num_bios_done; int send_complete; int num_errors; struct bintime ds_t0; devstat_tag_type ds_tag_type; devstat_trans_flags ds_trans_type; uint64_t io_len; uint64_t io_offset; int io_arg; struct ctl_be_block_softc *softc; struct ctl_be_block_lun *lun; void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ }; static int cbb_num_threads = 14; SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, "CAM Target Layer Block Backend"); SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, &cbb_num_threads, 0, "Number of threads per backing file"); static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); static void ctl_free_beio(struct ctl_be_block_io *beio); static void ctl_complete_beio(struct ctl_be_block_io *beio); static int ctl_be_block_move_done(union ctl_io *io); static void ctl_be_block_biodone(struct bio *bio); static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname); static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname); static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_worker(void *context, int pending); static int ctl_be_block_submit(union ctl_io *io); static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); static int ctl_be_block_open(struct ctl_be_block_softc *softc, struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static int ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static void ctl_be_block_lun_shutdown(void *be_lun); static void ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status); static int ctl_be_block_config_write(union ctl_io *io); static int ctl_be_block_config_read(union ctl_io *io); static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname); int ctl_be_block_init(void); static struct ctl_backend_driver ctl_be_block_driver = { .name = "block", .flags = CTL_BE_FLAG_HAS_CONFIG, .init = ctl_be_block_init, .data_submit = ctl_be_block_submit, .data_move_done = ctl_be_block_move_done, .config_read = ctl_be_block_config_read, .config_write = ctl_be_block_config_write, .ioctl = ctl_be_block_ioctl, .lun_info = ctl_be_block_lun_info, .lun_attr = ctl_be_block_lun_attr }; MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend"); CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); static uma_zone_t beio_zone; static struct ctl_be_block_io * ctl_alloc_beio(struct ctl_be_block_softc *softc) { struct ctl_be_block_io *beio; beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO); beio->softc = softc; return (beio); } static void ctl_free_beio(struct ctl_be_block_io *beio) { int duplicate_free; int i; duplicate_free = 0; for (i = 0; i < beio->num_segs; i++) { if (beio->sg_segs[i].addr == NULL) duplicate_free++; uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); beio->sg_segs[i].addr = NULL; /* For compare we had two equal S/G lists. */ if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) { uma_zfree(beio->lun->lun_zone, beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL; } } if (duplicate_free > 0) { printf("%s: %d duplicate frees out of %d segments\n", __func__, duplicate_free, beio->num_segs); } uma_zfree(beio_zone, beio); } static void ctl_complete_beio(struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; if (beio->beio_cont != NULL) { beio->beio_cont(beio); } else { ctl_free_beio(beio); ctl_data_submit_done(io); } } static int ctl_be_block_move_done(union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_lun *be_lun; struct ctl_lba_len_flags *lbalen; #ifdef CTL_TIME_IO struct bintime cur_bt; #endif int i; beio = (struct ctl_be_block_io *)PRIV(io)->ptr; be_lun = beio->lun; DPRINTF("entered\n"); #ifdef CTL_TIME_IO getbintime(&cur_bt); bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); bintime_add(&io->io_hdr.dma_bt, &cur_bt); io->io_hdr.num_dmas++; #endif io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; /* * We set status at this point for read commands, and write * commands with errors. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { ; } else if ((io->io_hdr.port_status == 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { lbalen = ARGS(beio->io); if (lbalen->flags & CTL_LLF_READ) { ctl_set_success(&io->scsiio); } else if (lbalen->flags & CTL_LLF_COMPARE) { /* We have two data blocks ready for comparison. */ for (i = 0; i < beio->num_segs; i++) { if (memcmp(beio->sg_segs[i].addr, beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, beio->sg_segs[i].len) != 0) break; } if (i < beio->num_segs) ctl_set_sense(&io->scsiio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_MISCOMPARE, /*asc*/ 0x1D, /*ascq*/ 0x00, SSD_ELEM_NONE); else ctl_set_success(&io->scsiio); } } else if ((io->io_hdr.port_status != 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { /* * For hardware error sense keys, the sense key * specific value is defined to be a retry count, * but we use it to pass back an internal FETD * error code. XXX KDM Hopefully the FETD is only * using 16 bits for an error code, since that's * all the space we have in the sks field. */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ io->io_hdr.port_status); } /* * If this is a read, or a write with errors, it is done. */ if ((beio->bio_cmd == BIO_READ) || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { ctl_complete_beio(beio); return (0); } /* * At this point, we have a write and the DMA completed * successfully. We now have to queue it to the task queue to * execute the backend I/O. That is because we do blocking * memory allocations, and in the file backing case, blocking I/O. * This move done routine is generally called in the SIM's * interrupt context, and therefore we cannot block. */ mtx_lock(&be_lun->queue_lock); /* * XXX KDM make sure that links is okay to use at this point. * Otherwise, we either need to add another field to ctl_io_hdr, * or deal with resource allocation here. */ STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); return (0); } static void ctl_be_block_biodone(struct bio *bio) { struct ctl_be_block_io *beio; struct ctl_be_block_lun *be_lun; union ctl_io *io; int error; beio = bio->bio_caller1; be_lun = beio->lun; io = beio->io; DPRINTF("entered\n"); error = bio->bio_error; mtx_lock(&be_lun->io_lock); if (error != 0) beio->num_errors++; beio->num_bios_done++; /* * XXX KDM will this cause WITNESS to complain? Holding a lock * during the free might cause it to complain. */ g_destroy_bio(bio); /* * If the send complete bit isn't set, or we aren't the last I/O to * complete, then we're done. */ if ((beio->send_complete == 0) || (beio->num_bios_done < beio->num_bios_sent)) { mtx_unlock(&be_lun->io_lock); return; } /* * At this point, we've verified that we are the last I/O to * complete, so it's safe to drop the lock. */ devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If there are any errors from the backing device, we fail the * entire I/O with a medium error. */ if (beio->num_errors > 0) { if (error == EOPNOTSUPP) { ctl_set_invalid_opcode(&io->scsiio); } else if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else if (beio->bio_cmd == BIO_FLUSH) { /* XXX KDM is there is a better error here? */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xbad2); } else ctl_set_medium_error(&io->scsiio); ctl_complete_beio(beio); return; } /* * If this is a write, a flush, a delete or verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (beio->bio_cmd == BIO_FLUSH) || (beio->bio_cmd == BIO_DELETE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) ctl_set_success(&io->scsiio); #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct mount *mountpoint; int error, lock_flags; DPRINTF("entered\n"); binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_lock(be_lun->vn, lock_flags | LK_RETRY); error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, curthread); VOP_UNLOCK(be_lun->vn, 0); vn_finished_write(mountpoint); mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (error == 0) ctl_set_success(&io->scsiio); else { /* XXX KDM is there is a better error here? */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xbad1); } ctl_complete_beio(beio); } SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct ctl_be_block_filedata *file_data; union ctl_io *io; struct uio xuio; struct iovec *xiovec; int flags; int error, i; DPRINTF("entered\n"); file_data = &be_lun->backend.file; io = beio->io; flags = 0; if (ARGS(io)->flags & CTL_LLF_DPO) flags |= IO_DIRECT; if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) flags |= IO_SYNC; bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_READ; } else { SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; xuio.uio_resid = beio->io_len; xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = beio->xiovecs; xuio.uio_iovcnt = beio->num_segs; xuio.uio_td = curthread; for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { xiovec->iov_base = beio->sg_segs[i].addr; xiovec->iov_len = beio->sg_segs[i].len; } binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (beio->bio_cmd == BIO_READ) { vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); /* * UFS pays attention to IO_DIRECT for reads. If the * DIRECTIO option is configured into the kernel, it calls * ffs_rawread(). But that only works for single-segment * uios with user space addresses. In our case, with a * kernel uio, it still reads into the buffer cache, but it * will just try to release the buffer from the cache later * on in ffs_read(). * * ZFS does not pay attention to IO_DIRECT for reads. * * UFS does not pay attention to IO_SYNC for reads. * * ZFS pays attention to IO_SYNC (which translates into the * Solaris define FRSYNC for zfs_read()) for reads. It * attempts to sync the file before reading. */ error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); VOP_UNLOCK(be_lun->vn, 0); SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); } else { struct mount *mountpoint; int lock_flags; (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_lock(be_lun->vn, lock_flags | LK_RETRY); /* * UFS pays attention to IO_DIRECT for writes. The write * is done asynchronously. (Normally the write would just * get put into cache. * * UFS pays attention to IO_SYNC for writes. It will * attempt to write the buffer out synchronously if that * flag is set. * * ZFS does not pay attention to IO_DIRECT for writes. * * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) * for writes. It will flush the transaction from the * cache before returning. */ error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); VOP_UNLOCK(be_lun->vn, 0); vn_finished_write(mountpoint); SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); } mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If we got an error, set the sense data to "MEDIUM ERROR" and * return the I/O to the user. */ if (error != 0) { char path_str[32]; ctl_scsi_path_string(io, path_str, sizeof(path_str)); printf("%s%s command returned errno %d\n", path_str, (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", error); if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else ctl_set_medium_error(&io->scsiio); ctl_complete_beio(beio); return; } /* * If this is a write or a verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) ctl_set_success(&io->scsiio); #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct ctl_lba_len_flags *lbalen = ARGS(io); struct scsi_get_lba_status_data *data; off_t roff, off; int error, status; DPRINTF("entered\n"); - off = roff = ((off_t)lbalen->lba) * be_lun->blocksize; + off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 0, curthread->td_ucred, curthread); if (error == 0 && off > roff) status = 0; /* mapped up to off */ else { error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 0, curthread->td_ucred, curthread); if (error == 0 && off > roff) status = 1; /* deallocated up to off */ else { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; } } VOP_UNLOCK(be_lun->vn, 0); data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; scsi_u64to8b(lbalen->lba, data->descr[0].addr); - scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->blocksize - lbalen->lba), - data->descr[0].length); + scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - + lbalen->lba), data->descr[0].length); data->descr[0].status = status; ctl_complete_beio(beio); } static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) { struct vattr vattr; struct statfs statfs; uint64_t val; int error; val = UINT64_MAX; if (be_lun->vn == NULL) return (val); vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); if (strcmp(attrname, "blocksused") == 0) { error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); if (error == 0) - val = vattr.va_bytes / be_lun->blocksize; + val = vattr.va_bytes / be_lun->cbe_lun.blocksize; } if (strcmp(attrname, "blocksavail") == 0 && (be_lun->vn->v_iflag & VI_DOOMED) == 0) { error = VFS_STATFS(be_lun->vn->v_mount, &statfs); if (error == 0) val = statfs.f_bavail * statfs.f_bsize / - be_lun->blocksize; + be_lun->cbe_lun.blocksize; } VOP_UNLOCK(be_lun->vn, 0); return (val); } static void ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct ctl_be_block_devdata *dev_data; union ctl_io *io; struct uio xuio; struct iovec *xiovec; int flags; int error, i; DPRINTF("entered\n"); dev_data = &be_lun->backend.dev; io = beio->io; flags = 0; if (ARGS(io)->flags & CTL_LLF_DPO) flags |= IO_DIRECT; if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) flags |= IO_SYNC; bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_READ; } else { SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; xuio.uio_resid = beio->io_len; xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = beio->xiovecs; xuio.uio_iovcnt = beio->num_segs; xuio.uio_td = curthread; for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { xiovec->iov_base = beio->sg_segs[i].addr; xiovec->iov_len = beio->sg_segs[i].len; } binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (beio->bio_cmd == BIO_READ) { error = (*dev_data->csw->d_read)(dev_data->cdev, &xuio, flags); SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); } else { error = (*dev_data->csw->d_write)(dev_data->cdev, &xuio, flags); SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); } mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If we got an error, set the sense data to "MEDIUM ERROR" and * return the I/O to the user. */ if (error != 0) { if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else ctl_set_medium_error(&io->scsiio); ctl_complete_beio(beio); return; } /* * If this is a write or a verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) ctl_set_success(&io->scsiio); #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev; union ctl_io *io = beio->io; struct ctl_lba_len_flags *lbalen = ARGS(io); struct scsi_get_lba_status_data *data; off_t roff, off; int error, status; DPRINTF("entered\n"); - off = roff = ((off_t)lbalen->lba) * be_lun->blocksize; + off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKHOLE, (caddr_t)&off, FREAD, curthread); if (error == 0 && off > roff) status = 0; /* mapped up to off */ else { error = (*dev_data->csw->d_ioctl)(dev_data->cdev, FIOSEEKDATA, (caddr_t)&off, FREAD, curthread); if (error == 0 && off > roff) status = 1; /* deallocated up to off */ else { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; } } data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; scsi_u64to8b(lbalen->lba, data->descr[0].addr); - scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->blocksize - lbalen->lba), - data->descr[0].length); + scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - + lbalen->lba), data->descr[0].length); data->descr[0].status = status; ctl_complete_beio(beio); } static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct bio *bio; union ctl_io *io; struct ctl_be_block_devdata *dev_data; dev_data = &be_lun->backend.dev; io = beio->io; DPRINTF("entered\n"); /* This can't fail, it's a blocking allocation. */ bio = g_alloc_bio(); bio->bio_cmd = BIO_FLUSH; bio->bio_dev = dev_data->cdev; bio->bio_offset = 0; bio->bio_data = 0; bio->bio_done = ctl_be_block_biodone; bio->bio_caller1 = beio; bio->bio_pblkno = 0; /* * We don't need to acquire the LUN lock here, because we are only * sending one bio, and so there is no other context to synchronize * with. */ beio->num_bios_sent = 1; beio->send_complete = 1; binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); (*dev_data->csw->d_strategy)(bio); } static void ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio, uint64_t off, uint64_t len, int last) { struct bio *bio; struct ctl_be_block_devdata *dev_data; uint64_t maxlen; dev_data = &be_lun->backend.dev; - maxlen = LONG_MAX - (LONG_MAX % be_lun->blocksize); + maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); while (len > 0) { bio = g_alloc_bio(); bio->bio_cmd = BIO_DELETE; bio->bio_dev = dev_data->cdev; bio->bio_offset = off; bio->bio_length = MIN(len, maxlen); bio->bio_data = 0; bio->bio_done = ctl_be_block_biodone; bio->bio_caller1 = beio; - bio->bio_pblkno = off / be_lun->blocksize; + bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; off += bio->bio_length; len -= bio->bio_length; mtx_lock(&be_lun->io_lock); beio->num_bios_sent++; if (last && len == 0) beio->send_complete = 1; mtx_unlock(&be_lun->io_lock); (*dev_data->csw->d_strategy)(bio); } } static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io; struct ctl_be_block_devdata *dev_data; struct ctl_ptr_len_flags *ptrlen; struct scsi_unmap_desc *buf, *end; uint64_t len; dev_data = &be_lun->backend.dev; io = beio->io; DPRINTF("entered\n"); binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (beio->io_offset == -1) { beio->io_len = 0; ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; buf = (struct scsi_unmap_desc *)ptrlen->ptr; end = buf + ptrlen->len / sizeof(*buf); for (; buf < end; buf++) { len = (uint64_t)scsi_4btoul(buf->length) * - be_lun->blocksize; + be_lun->cbe_lun.blocksize; beio->io_len += len; ctl_be_block_unmap_dev_range(be_lun, beio, - scsi_8btou64(buf->lba) * be_lun->blocksize, len, - (end - buf < 2) ? TRUE : FALSE); + scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, + len, (end - buf < 2) ? TRUE : FALSE); } } else ctl_be_block_unmap_dev_range(be_lun, beio, beio->io_offset, beio->io_len, TRUE); } static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); int i; struct bio *bio; struct ctl_be_block_devdata *dev_data; off_t cur_offset; int max_iosize; DPRINTF("entered\n"); dev_data = &be_lun->backend.dev; /* * We have to limit our I/O size to the maximum supported by the * backend device. Hopefully it is MAXPHYS. If the driver doesn't * set it properly, use DFLTPHYS. */ max_iosize = dev_data->cdev->si_iosize_max; if (max_iosize < PAGE_SIZE) max_iosize = DFLTPHYS; cur_offset = beio->io_offset; for (i = 0; i < beio->num_segs; i++) { size_t cur_size; uint8_t *cur_ptr; cur_size = beio->sg_segs[i].len; cur_ptr = beio->sg_segs[i].addr; while (cur_size > 0) { /* This can't fail, it's a blocking allocation. */ bio = g_alloc_bio(); KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); bio->bio_cmd = beio->bio_cmd; bio->bio_dev = dev_data->cdev; bio->bio_caller1 = beio; bio->bio_length = min(cur_size, max_iosize); bio->bio_offset = cur_offset; bio->bio_data = cur_ptr; bio->bio_done = ctl_be_block_biodone; - bio->bio_pblkno = cur_offset / be_lun->blocksize; + bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; cur_offset += bio->bio_length; cur_ptr += bio->bio_length; cur_size -= bio->bio_length; TAILQ_INSERT_TAIL(&queue, bio, bio_queue); beio->num_bios_sent++; } } binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); beio->send_complete = 1; mtx_unlock(&be_lun->io_lock); /* * Fire off all allocated requests! */ while ((bio = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, bio, bio_queue); (*dev_data->csw->d_strategy)(bio); } } static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) { struct ctl_be_block_devdata *dev_data = &be_lun->backend.dev; struct diocgattr_arg arg; int error; if (dev_data->csw == NULL || dev_data->csw->d_ioctl == NULL) return (UINT64_MAX); strlcpy(arg.name, attrname, sizeof(arg.name)); arg.len = sizeof(arg.value.off); error = dev_data->csw->d_ioctl(dev_data->cdev, DIOCGATTR, (caddr_t)&arg, FREAD, curthread); if (error != 0) return (UINT64_MAX); return (arg.value.off); } static void ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, union ctl_io *io) { + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_lba_len_flags *lbalen; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; - beio->io_len = lbalen->len * be_lun->blocksize; - beio->io_offset = lbalen->lba * be_lun->blocksize; + beio->io_len = lbalen->len * cbe_lun->blocksize; + beio->io_offset = lbalen->lba * cbe_lun->blocksize; beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; beio->bio_cmd = BIO_FLUSH; beio->ds_trans_type = DEVSTAT_NO_DATA; DPRINTF("SYNC\n"); be_lun->lun_flush(be_lun, beio); } static void ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); if ((io->io_hdr.flags & CTL_FLAG_ABORT) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { ctl_config_write_done(io); return; } ctl_be_block_config_write(io); } static void ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, union ctl_io *io) { + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_lba_len_flags *lbalen; uint64_t len_left, lba; uint32_t pb, pbo, adj; int i, seglen; uint8_t *buf, *end; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; lbalen = ARGS(beio->io); if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { ctl_free_beio(beio); ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 0, /*bit*/ 0); ctl_config_write_done(io); return; } if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { - beio->io_offset = lbalen->lba * be_lun->blocksize; - beio->io_len = (uint64_t)lbalen->len * be_lun->blocksize; + beio->io_offset = lbalen->lba * cbe_lun->blocksize; + beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; beio->bio_cmd = BIO_DELETE; beio->ds_trans_type = DEVSTAT_FREE; be_lun->unmap(be_lun, beio); return; } beio->bio_cmd = BIO_WRITE; beio->ds_trans_type = DEVSTAT_WRITE; DPRINTF("WRITE SAME at LBA %jx len %u\n", (uintmax_t)lbalen->lba, lbalen->len); - pb = be_lun->blocksize << be_lun->pblockexp; - if (be_lun->pblockoff > 0) - pbo = pb - be_lun->blocksize * be_lun->pblockoff; + pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; + if (be_lun->cbe_lun.pblockoff > 0) + pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; else pbo = 0; - len_left = (uint64_t)lbalen->len * be_lun->blocksize; + len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { /* * Setup the S/G entry for this chunk. */ seglen = MIN(CTLBLK_MAX_SEG, len_left); - if (pb > be_lun->blocksize) { - adj = ((lbalen->lba + lba) * be_lun->blocksize + + if (pb > cbe_lun->blocksize) { + adj = ((lbalen->lba + lba) * cbe_lun->blocksize + seglen - pbo) % pb; if (seglen > adj) seglen -= adj; else - seglen -= seglen % be_lun->blocksize; + seglen -= seglen % cbe_lun->blocksize; } else - seglen -= seglen % be_lun->blocksize; + seglen -= seglen % cbe_lun->blocksize; beio->sg_segs[i].len = seglen; beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); DPRINTF("segment %d addr %p len %zd\n", i, beio->sg_segs[i].addr, beio->sg_segs[i].len); beio->num_segs++; len_left -= seglen; buf = beio->sg_segs[i].addr; end = buf + seglen; - for (; buf < end; buf += be_lun->blocksize) { - memcpy(buf, io->scsiio.kern_data_ptr, be_lun->blocksize); + for (; buf < end; buf += cbe_lun->blocksize) { + memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize); if (lbalen->flags & SWS_LBDATA) scsi_ulto4b(lbalen->lba + lba, buf); lba++; } } - beio->io_offset = lbalen->lba * be_lun->blocksize; - beio->io_len = lba * be_lun->blocksize; + beio->io_offset = lbalen->lba * cbe_lun->blocksize; + beio->io_len = lba * cbe_lun->blocksize; /* We can not do all in one run. Correct and schedule rerun. */ if (len_left > 0) { lbalen->lba += lba; lbalen->len -= lba; beio->beio_cont = ctl_be_block_cw_done_ws; } be_lun->dispatch(be_lun, beio); } static void ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_ptr_len_flags *ptrlen; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { ctl_free_beio(beio); ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 0, /*command*/ 1, /*field*/ 0, /*bit_valid*/ 0, /*bit*/ 0); ctl_config_write_done(io); return; } beio->io_len = 0; beio->io_offset = -1; beio->bio_cmd = BIO_DELETE; beio->ds_trans_type = DEVSTAT_FREE; DPRINTF("UNMAP\n"); be_lun->unmap(be_lun, beio); } static void ctl_be_block_cr_done(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); ctl_config_read_done(io); } static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; DPRINTF("entered\n"); softc = be_lun->softc; beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; beio->beio_cont = ctl_be_block_cr_done; PRIV(io)->ptr = (void *)beio; switch (io->scsiio.cdb[0]) { case SERVICE_ACTION_IN: /* GET LBA STATUS */ beio->bio_cmd = -1; beio->ds_trans_type = DEVSTAT_NO_DATA; beio->ds_tag_type = DEVSTAT_TAG_ORDERED; beio->io_len = 0; if (be_lun->get_lba_status) be_lun->get_lba_status(be_lun, beio); else ctl_be_block_cr_done(beio); break; default: panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); break; } } static void ctl_be_block_cw_done(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); ctl_config_write_done(io); } static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; DPRINTF("entered\n"); softc = be_lun->softc; beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; beio->beio_cont = ctl_be_block_cw_done; switch (io->scsiio.tag_type) { case CTL_TAG_ORDERED: beio->ds_tag_type = DEVSTAT_TAG_ORDERED; break; case CTL_TAG_HEAD_OF_QUEUE: beio->ds_tag_type = DEVSTAT_TAG_HEAD; break; case CTL_TAG_UNTAGGED: case CTL_TAG_SIMPLE: case CTL_TAG_ACA: default: beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; break; } PRIV(io)->ptr = (void *)beio; switch (io->scsiio.cdb[0]) { case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: ctl_be_block_cw_dispatch_sync(be_lun, io); break; case WRITE_SAME_10: case WRITE_SAME_16: ctl_be_block_cw_dispatch_ws(be_lun, io); break; case UNMAP: ctl_be_block_cw_dispatch_unmap(be_lun, io); break; default: panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); break; } } SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); static void ctl_be_block_next(struct ctl_be_block_io *beio) { struct ctl_be_block_lun *be_lun; union ctl_io *io; io = beio->io; be_lun = beio->lun; ctl_free_beio(beio); if ((io->io_hdr.flags & CTL_FLAG_ABORT) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { ctl_data_submit_done(io); return; } io->io_hdr.status &= ~CTL_STATUS_MASK; io->io_hdr.status |= CTL_STATUS_NONE; mtx_lock(&be_lun->queue_lock); /* * XXX KDM make sure that links is okay to use at this point. * Otherwise, we either need to add another field to ctl_io_hdr, * or deal with resource allocation here. */ STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); } static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; struct ctl_lba_len_flags *lbalen; struct ctl_ptr_len_flags *bptrlen; uint64_t len_left, lbas; int i; softc = be_lun->softc; DPRINTF("entered\n"); lbalen = ARGS(io); if (lbalen->flags & CTL_LLF_WRITE) { SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); } else { SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); } beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; bptrlen = PRIV(io); bptrlen->ptr = (void *)beio; switch (io->scsiio.tag_type) { case CTL_TAG_ORDERED: beio->ds_tag_type = DEVSTAT_TAG_ORDERED; break; case CTL_TAG_HEAD_OF_QUEUE: beio->ds_tag_type = DEVSTAT_TAG_HEAD; break; case CTL_TAG_UNTAGGED: case CTL_TAG_SIMPLE: case CTL_TAG_ACA: default: beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; break; } if (lbalen->flags & CTL_LLF_WRITE) { beio->bio_cmd = BIO_WRITE; beio->ds_trans_type = DEVSTAT_WRITE; } else { beio->bio_cmd = BIO_READ; beio->ds_trans_type = DEVSTAT_READ; } DPRINTF("%s at LBA %jx len %u @%ju\n", (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); if (lbalen->flags & CTL_LLF_COMPARE) lbas = CTLBLK_HALF_IO_SIZE; else lbas = CTLBLK_MAX_IO_SIZE; - lbas = MIN(lbalen->len - bptrlen->len, lbas / be_lun->blocksize); - beio->io_offset = (lbalen->lba + bptrlen->len) * be_lun->blocksize; - beio->io_len = lbas * be_lun->blocksize; + lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); + beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; + beio->io_len = lbas * cbe_lun->blocksize; bptrlen->len += lbas; for (i = 0, len_left = beio->io_len; len_left > 0; i++) { KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", i, CTLBLK_MAX_SEGS)); /* * Setup the S/G entry for this chunk. */ beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); DPRINTF("segment %d addr %p len %zd\n", i, beio->sg_segs[i].addr, beio->sg_segs[i].len); /* Set up second segment for compare operation. */ if (lbalen->flags & CTL_LLF_COMPARE) { beio->sg_segs[i + CTLBLK_HALF_SEGS].len = beio->sg_segs[i].len; beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); } beio->num_segs++; len_left -= beio->sg_segs[i].len; } if (bptrlen->len < lbalen->len) beio->beio_cont = ctl_be_block_next; io->scsiio.be_move_done = ctl_be_block_move_done; /* For compare we have separate S/G lists for read and datamove. */ if (lbalen->flags & CTL_LLF_COMPARE) io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; else io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; io->scsiio.kern_data_len = beio->io_len; io->scsiio.kern_data_resid = 0; io->scsiio.kern_sg_entries = beio->num_segs; io->io_hdr.flags |= CTL_FLAG_ALLOCATED | CTL_FLAG_KDPTR_SGLIST; /* * For the read case, we need to read the data into our buffers and * then we can send it back to the user. For the write case, we * need to get the data from the user first. */ if (beio->bio_cmd == BIO_READ) { SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); be_lun->dispatch(be_lun, beio); } else { SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_worker(void *context, int pending) { struct ctl_be_block_lun *be_lun; struct ctl_be_block_softc *softc; union ctl_io *io; be_lun = (struct ctl_be_block_lun *)context; softc = be_lun->softc; DPRINTF("entered\n"); mtx_lock(&be_lun->queue_lock); for (;;) { io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); if (io != NULL) { struct ctl_be_block_io *beio; DPRINTF("datamove queue\n"); STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; be_lun->dispatch(be_lun, beio); mtx_lock(&be_lun->queue_lock); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); if (io != NULL) { DPRINTF("config write queue\n"); STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); ctl_be_block_cw_dispatch(be_lun, io); mtx_lock(&be_lun->queue_lock); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); if (io != NULL) { DPRINTF("config read queue\n"); STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); ctl_be_block_cr_dispatch(be_lun, io); mtx_lock(&be_lun->queue_lock); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); if (io != NULL) { DPRINTF("input queue\n"); STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); /* * We must drop the lock, since this routine and * its children may sleep. */ ctl_be_block_dispatch(be_lun, io); mtx_lock(&be_lun->queue_lock); continue; } /* * If we get here, there is no work left in the queues, so * just break out and let the task queue go to sleep. */ break; } mtx_unlock(&be_lun->queue_lock); } /* * Entry point from CTL to the backend for I/O. We queue everything to a * work thread, so this just puts the I/O on a queue and wakes up the * thread. */ static int ctl_be_block_submit(union ctl_io *io) { struct ctl_be_block_lun *be_lun; - struct ctl_be_lun *ctl_be_lun; + struct ctl_be_lun *cbe_lun; DPRINTF("entered\n"); - ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ + cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; - be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; + be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; /* * Make sure we only get SCSI I/O. */ KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " "%#x) encountered", io->io_hdr.io_type)); PRIV(io)->len = 0; mtx_lock(&be_lun->queue_lock); /* * XXX KDM make sure that links is okay to use at this point. * Otherwise, we either need to add another field to ctl_io_hdr, * or deal with resource allocation here. */ STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); return (CTL_RETVAL_COMPLETE); } static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_be_block_softc *softc; int error; softc = &backend_block_softc; error = 0; switch (cmd) { case CTL_LUN_REQ: { struct ctl_lun_req *lun_req; lun_req = (struct ctl_lun_req *)addr; switch (lun_req->reqtype) { case CTL_LUNREQ_CREATE: error = ctl_be_block_create(softc, lun_req); break; case CTL_LUNREQ_RM: error = ctl_be_block_rm(softc, lun_req); break; case CTL_LUNREQ_MODIFY: error = ctl_be_block_modify(softc, lun_req); break; default: lun_req->status = CTL_LUN_ERROR; snprintf(lun_req->error_str, sizeof(lun_req->error_str), "invalid LUN request type %d", lun_req->reqtype); break; } break; } default: error = ENOTTY; break; } return (error); } static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { + struct ctl_be_lun *cbe_lun; struct ctl_be_block_filedata *file_data; struct ctl_lun_create_params *params; char *value; struct vattr vattr; off_t ps, pss, po, pos, us, uss, uo, uos; int error; error = 0; + cbe_lun = &be_lun->cbe_lun; file_data = &be_lun->backend.file; params = &be_lun->params; be_lun->dev_type = CTL_BE_BLOCK_FILE; be_lun->dispatch = ctl_be_block_dispatch_file; be_lun->lun_flush = ctl_be_block_flush_file; be_lun->get_lba_status = ctl_be_block_gls_file; be_lun->getattr = ctl_be_block_getattr_file; + be_lun->unmap = NULL; + cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); if (error != 0) { snprintf(req->error_str, sizeof(req->error_str), "error calling VOP_GETATTR() for file %s", be_lun->dev_path); return (error); } /* * Verify that we have the ability to upgrade to exclusive * access on this file so we can trap errors at open instead * of reporting them during first access. */ if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) { vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY); if (be_lun->vn->v_iflag & VI_DOOMED) { error = EBADF; snprintf(req->error_str, sizeof(req->error_str), "error locking file %s", be_lun->dev_path); return (error); } } - file_data->cred = crhold(curthread->td_ucred); if (params->lun_size_bytes != 0) be_lun->size_bytes = params->lun_size_bytes; else be_lun->size_bytes = vattr.va_size; - /* - * We set the multi thread flag for file operations because all - * filesystems (in theory) are capable of allowing multiple readers - * of a file at once. So we want to get the maximum possible - * concurrency. - */ - be_lun->flags |= CTL_BE_BLOCK_LUN_MULTI_THREAD; /* * For files we can use any logical block size. Prefer 512 bytes * for compatibility reasons. If file's vattr.va_blocksize * (preferred I/O block size) is bigger and multiple to chosen * logical block size -- report it as physical block size. */ if (params->blocksize_bytes != 0) - be_lun->blocksize = params->blocksize_bytes; + cbe_lun->blocksize = params->blocksize_bytes; else - be_lun->blocksize = 512; + cbe_lun->blocksize = 512; + be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; + cbe_lun->maxlba = (be_lun->size_blocks == 0) ? + 0 : (be_lun->size_blocks - 1); us = ps = vattr.va_blocksize; uo = po = 0; - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblocksize"); + value = ctl_get_opt(&cbe_lun->options, "pblocksize"); if (value != NULL) ctl_expand_number(value, &ps); - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblockoffset"); + value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); if (value != NULL) ctl_expand_number(value, &po); - pss = ps / be_lun->blocksize; - pos = po / be_lun->blocksize; - if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) && - ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) { - be_lun->pblockexp = fls(pss) - 1; - be_lun->pblockoff = (pss - pos) % pss; + pss = ps / cbe_lun->blocksize; + pos = po / cbe_lun->blocksize; + if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && + ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { + cbe_lun->pblockexp = fls(pss) - 1; + cbe_lun->pblockoff = (pss - pos) % pss; } - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublocksize"); + value = ctl_get_opt(&cbe_lun->options, "ublocksize"); if (value != NULL) ctl_expand_number(value, &us); - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublockoffset"); + value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); if (value != NULL) ctl_expand_number(value, &uo); - uss = us / be_lun->blocksize; - uos = uo / be_lun->blocksize; - if ((uss > 0) && (uss * be_lun->blocksize == us) && (uss >= uos) && - ((uss & (uss - 1)) == 0) && (uos * be_lun->blocksize == uo)) { - be_lun->ublockexp = fls(uss) - 1; - be_lun->ublockoff = (uss - uos) % uss; + uss = us / cbe_lun->blocksize; + uos = uo / cbe_lun->blocksize; + if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && + ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { + cbe_lun->ublockexp = fls(uss) - 1; + cbe_lun->ublockoff = (uss - uos) % uss; } /* * Sanity check. The media size has to be at least one * sector long. */ - if (be_lun->size_bytes < be_lun->blocksize) { + if (be_lun->size_bytes < cbe_lun->blocksize) { error = EINVAL; snprintf(req->error_str, sizeof(req->error_str), "file %s size %ju < block size %u", be_lun->dev_path, - (uintmax_t)be_lun->size_bytes, be_lun->blocksize); + (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); } - be_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / be_lun->blocksize; + cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; return (error); } static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_lun_create_params *params; struct vattr vattr; struct cdev *dev; struct cdevsw *devsw; char *value; int error, atomic, maxio, unmap, tmp; off_t ps, pss, po, pos, us, uss, uo, uos, otmp; params = &be_lun->params; be_lun->dev_type = CTL_BE_BLOCK_DEV; be_lun->backend.dev.cdev = be_lun->vn->v_rdev; be_lun->backend.dev.csw = dev_refthread(be_lun->backend.dev.cdev, &be_lun->backend.dev.dev_ref); if (be_lun->backend.dev.csw == NULL) panic("Unable to retrieve device switch"); if (strcmp(be_lun->backend.dev.csw->d_name, "zvol") == 0) { be_lun->dispatch = ctl_be_block_dispatch_zvol; be_lun->get_lba_status = ctl_be_block_gls_zvol; atomic = maxio = CTLBLK_MAX_IO_SIZE; } else { be_lun->dispatch = ctl_be_block_dispatch_dev; + be_lun->get_lba_status = NULL; atomic = 0; maxio = be_lun->backend.dev.cdev->si_iosize_max; if (maxio <= 0) maxio = DFLTPHYS; if (maxio > CTLBLK_MAX_IO_SIZE) maxio = CTLBLK_MAX_IO_SIZE; } be_lun->lun_flush = ctl_be_block_flush_dev; be_lun->getattr = ctl_be_block_getattr_dev; + be_lun->unmap = ctl_be_block_unmap_dev; error = VOP_GETATTR(be_lun->vn, &vattr, NOCRED); if (error) { snprintf(req->error_str, sizeof(req->error_str), "error getting vnode attributes for device %s", be_lun->dev_path); return (error); } dev = be_lun->vn->v_rdev; devsw = dev->si_devsw; if (!devsw->d_ioctl) { snprintf(req->error_str, sizeof(req->error_str), "no d_ioctl for device %s!", be_lun->dev_path); return (ENODEV); } error = devsw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, curthread); if (error) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned for DIOCGSECTORSIZE ioctl " "on %s!", error, be_lun->dev_path); return (error); } /* * If the user has asked for a blocksize that is greater than the * backing device's blocksize, we can do it only if the blocksize * the user is asking for is an even multiple of the underlying * device's blocksize. */ if ((params->blocksize_bytes != 0) && (params->blocksize_bytes >= tmp)) { if (params->blocksize_bytes % tmp == 0) { - be_lun->blocksize = params->blocksize_bytes; + cbe_lun->blocksize = params->blocksize_bytes; } else { snprintf(req->error_str, sizeof(req->error_str), "requested blocksize %u is not an even " "multiple of backing device blocksize %u", params->blocksize_bytes, tmp); return (EINVAL); } } else if (params->blocksize_bytes != 0) { snprintf(req->error_str, sizeof(req->error_str), "requested blocksize %u < backing device " "blocksize %u", params->blocksize_bytes, tmp); return (EINVAL); } else - be_lun->blocksize = tmp; + cbe_lun->blocksize = tmp; error = devsw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, curthread); if (error) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned for DIOCGMEDIASIZE " " ioctl on %s!", error, be_lun->dev_path); return (error); } if (params->lun_size_bytes != 0) { if (params->lun_size_bytes > otmp) { snprintf(req->error_str, sizeof(req->error_str), "requested LUN size %ju > backing device " "size %ju", (uintmax_t)params->lun_size_bytes, (uintmax_t)otmp); return (EINVAL); } be_lun->size_bytes = params->lun_size_bytes; } else be_lun->size_bytes = otmp; + be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; + cbe_lun->maxlba = (be_lun->size_blocks == 0) ? + 0 : (be_lun->size_blocks - 1); error = devsw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, curthread); if (error) ps = po = 0; else { error = devsw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, FREAD, curthread); if (error) po = 0; } us = ps; uo = po; - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblocksize"); + value = ctl_get_opt(&cbe_lun->options, "pblocksize"); if (value != NULL) ctl_expand_number(value, &ps); - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "pblockoffset"); + value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); if (value != NULL) ctl_expand_number(value, &po); - pss = ps / be_lun->blocksize; - pos = po / be_lun->blocksize; - if ((pss > 0) && (pss * be_lun->blocksize == ps) && (pss >= pos) && - ((pss & (pss - 1)) == 0) && (pos * be_lun->blocksize == po)) { - be_lun->pblockexp = fls(pss) - 1; - be_lun->pblockoff = (pss - pos) % pss; + pss = ps / cbe_lun->blocksize; + pos = po / cbe_lun->blocksize; + if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && + ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { + cbe_lun->pblockexp = fls(pss) - 1; + cbe_lun->pblockoff = (pss - pos) % pss; } - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublocksize"); + value = ctl_get_opt(&cbe_lun->options, "ublocksize"); if (value != NULL) ctl_expand_number(value, &us); - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "ublockoffset"); + value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); if (value != NULL) ctl_expand_number(value, &uo); - uss = us / be_lun->blocksize; - uos = uo / be_lun->blocksize; - if ((uss > 0) && (uss * be_lun->blocksize == us) && (uss >= uos) && - ((uss & (uss - 1)) == 0) && (uos * be_lun->blocksize == uo)) { - be_lun->ublockexp = fls(uss) - 1; - be_lun->ublockoff = (uss - uos) % uss; + uss = us / cbe_lun->blocksize; + uos = uo / cbe_lun->blocksize; + if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && + ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { + cbe_lun->ublockexp = fls(uss) - 1; + cbe_lun->ublockoff = (uss - uos) % uss; } - be_lun->atomicblock = atomic / be_lun->blocksize; - be_lun->opttxferlen = maxio / be_lun->blocksize; + cbe_lun->atomicblock = atomic / cbe_lun->blocksize; + cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { unmap = 1; } else { struct diocgattr_arg arg; strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); arg.len = sizeof(arg.value.i); error = devsw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, curthread); unmap = (error == 0) ? arg.value.i : 0; } - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap"); + value = ctl_get_opt(&cbe_lun->options, "unmap"); if (value != NULL) unmap = (strcmp(value, "on") == 0); if (unmap) - be_lun->unmap = ctl_be_block_unmap_dev; + cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; + else + cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; return (0); } static int ctl_be_block_close(struct ctl_be_block_lun *be_lun) { - DROP_GIANT(); - if (be_lun->vn) { - int flags = FREAD | FWRITE; + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; + int flags; + if (be_lun->vn) { switch (be_lun->dev_type) { case CTL_BE_BLOCK_DEV: if (be_lun->backend.dev.csw) { dev_relthread(be_lun->backend.dev.cdev, be_lun->backend.dev.dev_ref); be_lun->backend.dev.csw = NULL; be_lun->backend.dev.cdev = NULL; } break; case CTL_BE_BLOCK_FILE: break; case CTL_BE_BLOCK_NONE: break; default: panic("Unexpected backend type."); break; } + flags = FREAD; + if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) + flags |= FWRITE; (void)vn_close(be_lun->vn, flags, NOCRED, curthread); be_lun->vn = NULL; switch (be_lun->dev_type) { case CTL_BE_BLOCK_DEV: break; case CTL_BE_BLOCK_FILE: if (be_lun->backend.file.cred != NULL) { crfree(be_lun->backend.file.cred); be_lun->backend.file.cred = NULL; } break; case CTL_BE_BLOCK_NONE: break; default: panic("Unexpected backend type."); break; } be_lun->dev_type = CTL_BE_BLOCK_NONE; } - PICKUP_GIANT(); - return (0); } static int ctl_be_block_open(struct ctl_be_block_softc *softc, - struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) + struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct nameidata nd; - int flags; - int error; + char *value; + int error, flags; - /* - * XXX KDM allow a read-only option? - */ - flags = FREAD | FWRITE; error = 0; - if (rootvnode == NULL) { snprintf(req->error_str, sizeof(req->error_str), "Root filesystem is not mounted"); return (1); } - pwd_ensure_dirs(); - again: + value = ctl_get_opt(&cbe_lun->options, "file"); + if (value == NULL) { + snprintf(req->error_str, sizeof(req->error_str), + "no file argument specified"); + return (1); + } + free(be_lun->dev_path, M_CTLBLK); + be_lun->dev_path = strdup(value, M_CTLBLK); + + flags = FREAD; + value = ctl_get_opt(&cbe_lun->options, "readonly"); + if (value == NULL || strcmp(value, "on") != 0) + flags |= FWRITE; + +again: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); error = vn_open(&nd, &flags, 0, NULL); + if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { + flags &= ~FWRITE; + goto again; + } if (error) { /* * This is the only reasonable guess we can make as far as * path if the user doesn't give us a fully qualified path. * If they want to specify a file, they need to specify the * full path. */ if (be_lun->dev_path[0] != '/') { - char *dev_path = "/dev/"; char *dev_name; - /* Try adding device path at beginning of name */ - dev_name = malloc(strlen(be_lun->dev_path) - + strlen(dev_path) + 1, - M_CTLBLK, M_WAITOK); - if (dev_name) { - sprintf(dev_name, "%s%s", dev_path, - be_lun->dev_path); - free(be_lun->dev_path, M_CTLBLK); - be_lun->dev_path = dev_name; - goto again; - } + asprintf(&dev_name, M_CTLBLK, "/dev/%s", + be_lun->dev_path); + free(be_lun->dev_path, M_CTLBLK); + be_lun->dev_path = dev_name; + goto again; } snprintf(req->error_str, sizeof(req->error_str), "error opening %s: %d", be_lun->dev_path, error); return (error); } + if (flags & FWRITE) + cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; + else + cbe_lun->flags |= CTL_LUN_FLAG_READONLY; NDFREE(&nd, NDF_ONLY_PNBUF); - be_lun->vn = nd.ni_vp; /* We only support disks and files. */ if (vn_isdisk(be_lun->vn, &error)) { error = ctl_be_block_open_dev(be_lun, req); } else if (be_lun->vn->v_type == VREG) { error = ctl_be_block_open_file(be_lun, req); } else { error = EINVAL; snprintf(req->error_str, sizeof(req->error_str), "%s is not a disk or plain file", be_lun->dev_path); } VOP_UNLOCK(be_lun->vn, 0); if (error != 0) ctl_be_block_close(be_lun); + cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; + if (be_lun->dispatch != ctl_be_block_dispatch_dev) + cbe_lun->serseq = CTL_LUN_SERSEQ_READ; + value = ctl_get_opt(&cbe_lun->options, "serseq"); + if (value != NULL && strcmp(value, "on") == 0) + cbe_lun->serseq = CTL_LUN_SERSEQ_ON; + else if (value != NULL && strcmp(value, "read") == 0) + cbe_lun->serseq = CTL_LUN_SERSEQ_READ; + else if (value != NULL && strcmp(value, "off") == 0) + cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; return (0); } static int ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { + struct ctl_be_lun *cbe_lun; struct ctl_be_block_lun *be_lun; struct ctl_lun_create_params *params; char num_thread_str[16]; char tmpstr[32]; char *value; int retval, num_threads; int tmp_num_threads; params = &req->reqdata.create; retval = 0; req->status = CTL_LUN_OK; - num_threads = cbb_num_threads; - be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); - + cbe_lun = &be_lun->cbe_lun; + cbe_lun->be_lun = be_lun; be_lun->params = req->reqdata.create; be_lun->softc = softc; STAILQ_INIT(&be_lun->input_queue); STAILQ_INIT(&be_lun->config_read_queue); STAILQ_INIT(&be_lun->config_write_queue); STAILQ_INIT(&be_lun->datamove_queue); sprintf(be_lun->lunname, "cblk%d", softc->num_luns); mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF); mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF); - ctl_init_opts(&be_lun->ctl_be_lun.options, + ctl_init_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); - be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG, NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); - if (be_lun->lun_zone == NULL) { snprintf(req->error_str, sizeof(req->error_str), "error allocating UMA zone"); goto bailout_error; } if (params->flags & CTL_LUN_FLAG_DEV_TYPE) - be_lun->ctl_be_lun.lun_type = params->device_type; + cbe_lun->lun_type = params->device_type; else - be_lun->ctl_be_lun.lun_type = T_DIRECT; + cbe_lun->lun_type = T_DIRECT; + be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; + cbe_lun->flags = CTL_LUN_FLAG_PRIMARY; - if (be_lun->ctl_be_lun.lun_type == T_DIRECT) { - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "file"); - if (value == NULL) { - snprintf(req->error_str, sizeof(req->error_str), - "no file argument specified"); - goto bailout_error; - } - be_lun->dev_path = strdup(value, M_CTLBLK); + if (cbe_lun->lun_type == T_DIRECT) { be_lun->size_bytes = params->lun_size_bytes; if (params->blocksize_bytes != 0) - be_lun->blocksize = params->blocksize_bytes; + cbe_lun->blocksize = params->blocksize_bytes; else - be_lun->blocksize = 512; + cbe_lun->blocksize = 512; + be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; + cbe_lun->maxlba = (be_lun->size_blocks == 0) ? + 0 : (be_lun->size_blocks - 1); retval = ctl_be_block_open(softc, be_lun, req); - be_lun->size_blocks = be_lun->size_bytes / be_lun->blocksize; if (retval != 0) { retval = 0; req->status = CTL_LUN_WARNING; } + num_threads = cbb_num_threads; } else { - /* - * For processor devices, we don't have any size. - */ - be_lun->blocksize = 0; - be_lun->pblockexp = 0; - be_lun->pblockoff = 0; - be_lun->ublockexp = 0; - be_lun->ublockoff = 0; - be_lun->size_blocks = 0; - be_lun->size_bytes = 0; - be_lun->ctl_be_lun.maxlba = 0; - - /* - * Default to just 1 thread for processor devices. - */ num_threads = 1; } /* * XXX This searching loop might be refactored to be combined with * the loop above, */ - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "num_threads"); + value = ctl_get_opt(&cbe_lun->options, "num_threads"); if (value != NULL) { tmp_num_threads = strtol(value, NULL, 0); /* * We don't let the user specify less than one * thread, but hope he's clueful enough not to * specify 1000 threads. */ if (tmp_num_threads < 1) { snprintf(req->error_str, sizeof(req->error_str), "invalid number of threads %s", num_thread_str); goto bailout_error; } num_threads = tmp_num_threads; } - be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; - be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY; if (be_lun->vn == NULL) - be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_OFFLINE; - if (be_lun->unmap != NULL) - be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP; - if (be_lun->dispatch != ctl_be_block_dispatch_dev) - be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_SERSEQ_READ; - be_lun->ctl_be_lun.be_lun = be_lun; - be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ? - 0 : (be_lun->size_blocks - 1); - be_lun->ctl_be_lun.blocksize = be_lun->blocksize; - be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; - be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; - be_lun->ctl_be_lun.ublockexp = be_lun->ublockexp; - be_lun->ctl_be_lun.ublockoff = be_lun->ublockoff; - be_lun->ctl_be_lun.atomicblock = be_lun->atomicblock; - be_lun->ctl_be_lun.opttxferlen = be_lun->opttxferlen; + cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE; /* Tell the user the blocksize we ended up using */ params->lun_size_bytes = be_lun->size_bytes; - params->blocksize_bytes = be_lun->blocksize; + params->blocksize_bytes = cbe_lun->blocksize; if (params->flags & CTL_LUN_FLAG_ID_REQ) { - be_lun->ctl_be_lun.req_lun_id = params->req_lun_id; - be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ; + cbe_lun->req_lun_id = params->req_lun_id; + cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; } else - be_lun->ctl_be_lun.req_lun_id = 0; + cbe_lun->req_lun_id = 0; - be_lun->ctl_be_lun.lun_shutdown = ctl_be_block_lun_shutdown; - be_lun->ctl_be_lun.lun_config_status = - ctl_be_block_lun_config_status; - be_lun->ctl_be_lun.be = &ctl_be_block_driver; + cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; + cbe_lun->lun_config_status = ctl_be_block_lun_config_status; + cbe_lun->be = &ctl_be_block_driver; if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", softc->num_luns); - strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr, - MIN(sizeof(be_lun->ctl_be_lun.serial_num), - sizeof(tmpstr))); + strncpy((char *)cbe_lun->serial_num, tmpstr, + MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); /* Tell the user what we used for a serial number */ strncpy((char *)params->serial_num, tmpstr, MIN(sizeof(params->serial_num), sizeof(tmpstr))); } else { - strncpy((char *)be_lun->ctl_be_lun.serial_num, - params->serial_num, - MIN(sizeof(be_lun->ctl_be_lun.serial_num), + strncpy((char *)cbe_lun->serial_num, params->serial_num, + MIN(sizeof(cbe_lun->serial_num), sizeof(params->serial_num))); } if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); - strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr, - MIN(sizeof(be_lun->ctl_be_lun.device_id), - sizeof(tmpstr))); + strncpy((char *)cbe_lun->device_id, tmpstr, + MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); /* Tell the user what we used for a device ID */ strncpy((char *)params->device_id, tmpstr, MIN(sizeof(params->device_id), sizeof(tmpstr))); } else { - strncpy((char *)be_lun->ctl_be_lun.device_id, - params->device_id, - MIN(sizeof(be_lun->ctl_be_lun.device_id), + strncpy((char *)cbe_lun->device_id, params->device_id, + MIN(sizeof(cbe_lun->device_id), sizeof(params->device_id))); } TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); if (be_lun->io_taskqueue == NULL) { snprintf(req->error_str, sizeof(req->error_str), "unable to create taskqueue"); goto bailout_error; } /* * Note that we start the same number of threads by default for * both the file case and the block device case. For the file * case, we need multiple threads to allow concurrency, because the * vnode interface is designed to be a blocking interface. For the * block device case, ZFS zvols at least will block the caller's * context in many instances, and so we need multiple threads to * overcome that problem. Other block devices don't need as many * threads, but they shouldn't cause too many problems. * * If the user wants to just have a single thread for a block * device, he can specify that when the LUN is created, or change * the tunable/sysctl to alter the default number of threads. */ retval = taskqueue_start_threads(&be_lun->io_taskqueue, /*num threads*/num_threads, /*priority*/PWAIT, /*thread name*/ "%s taskq", be_lun->lunname); if (retval != 0) goto bailout_error; be_lun->num_threads = num_threads; mtx_lock(&softc->lock); softc->num_luns++; STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); mtx_unlock(&softc->lock); - retval = ctl_add_lun(&be_lun->ctl_be_lun); + retval = ctl_add_lun(&be_lun->cbe_lun); if (retval != 0) { mtx_lock(&softc->lock); STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); snprintf(req->error_str, sizeof(req->error_str), "ctl_add_lun() returned error %d, see dmesg for " "details", retval); retval = 0; goto bailout_error; } mtx_lock(&softc->lock); /* * Tell the config_status routine that we're waiting so it won't * clean up the LUN in the event of an error. */ be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); if (retval == EINTR) break; } be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) { snprintf(req->error_str, sizeof(req->error_str), "LUN configuration error, see dmesg for details"); STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); goto bailout_error; } else { - params->req_lun_id = be_lun->ctl_be_lun.lun_id; + params->req_lun_id = cbe_lun->lun_id; } mtx_unlock(&softc->lock); be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id, - be_lun->blocksize, + cbe_lun->blocksize, DEVSTAT_ALL_SUPPORTED, - be_lun->ctl_be_lun.lun_type + cbe_lun->lun_type | DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_OTHER); return (retval); bailout_error: req->status = CTL_LUN_ERROR; if (be_lun->io_taskqueue != NULL) taskqueue_free(be_lun->io_taskqueue); ctl_be_block_close(be_lun); if (be_lun->dev_path != NULL) free(be_lun->dev_path, M_CTLBLK); if (be_lun->lun_zone != NULL) uma_zdestroy(be_lun->lun_zone); - ctl_free_opts(&be_lun->ctl_be_lun.options); + ctl_free_opts(&cbe_lun->options); mtx_destroy(&be_lun->queue_lock); mtx_destroy(&be_lun->io_lock); free(be_lun, M_CTLBLK); return (retval); } static int ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_lun_rm_params *params; struct ctl_be_block_lun *be_lun; int retval; params = &req->reqdata.rm; mtx_lock(&softc->lock); - - be_lun = NULL; - STAILQ_FOREACH(be_lun, &softc->lun_list, links) { - if (be_lun->ctl_be_lun.lun_id == params->lun_id) + if (be_lun->cbe_lun.lun_id == params->lun_id) break; } mtx_unlock(&softc->lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "LUN %u is not managed by the block backend", params->lun_id); goto bailout_error; } - retval = ctl_disable_lun(&be_lun->ctl_be_lun); + retval = ctl_disable_lun(&be_lun->cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned from ctl_disable_lun() for " "LUN %d", retval, params->lun_id); goto bailout_error; } - retval = ctl_invalidate_lun(&be_lun->ctl_be_lun); + retval = ctl_invalidate_lun(&be_lun->cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned from ctl_invalidate_lun() for " "LUN %d", retval, params->lun_id); goto bailout_error; } mtx_lock(&softc->lock); be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); if (retval == EINTR) break; } be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { snprintf(req->error_str, sizeof(req->error_str), "interrupted waiting for LUN to be freed"); mtx_unlock(&softc->lock); goto bailout_error; } STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task); taskqueue_free(be_lun->io_taskqueue); ctl_be_block_close(be_lun); if (be_lun->disk_stats != NULL) devstat_remove_entry(be_lun->disk_stats); uma_zdestroy(be_lun->lun_zone); - ctl_free_opts(&be_lun->ctl_be_lun.options); + ctl_free_opts(&be_lun->cbe_lun.options); free(be_lun->dev_path, M_CTLBLK); mtx_destroy(&be_lun->queue_lock); mtx_destroy(&be_lun->io_lock); free(be_lun, M_CTLBLK); req->status = CTL_LUN_OK; return (0); bailout_error: req->status = CTL_LUN_ERROR; return (0); } static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct vattr vattr; int error; struct ctl_lun_create_params *params = &be_lun->params; if (params->lun_size_bytes != 0) { be_lun->size_bytes = params->lun_size_bytes; } else { vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); VOP_UNLOCK(be_lun->vn, 0); if (error != 0) { snprintf(req->error_str, sizeof(req->error_str), "error calling VOP_GETATTR() for file %s", be_lun->dev_path); return (error); } - be_lun->size_bytes = vattr.va_size; } - + be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; + cbe_lun->maxlba = (be_lun->size_blocks == 0) ? + 0 : (be_lun->size_blocks - 1); return (0); } static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { + struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_devdata *dev_data; int error; struct ctl_lun_create_params *params = &be_lun->params; uint64_t size_bytes; dev_data = &be_lun->backend.dev; if (!dev_data->csw->d_ioctl) { snprintf(req->error_str, sizeof(req->error_str), "no d_ioctl for device %s!", be_lun->dev_path); return (ENODEV); } error = dev_data->csw->d_ioctl(dev_data->cdev, DIOCGMEDIASIZE, (caddr_t)&size_bytes, FREAD, curthread); if (error) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned for DIOCGMEDIASIZE ioctl " "on %s!", error, be_lun->dev_path); return (error); } if (params->lun_size_bytes != 0) { if (params->lun_size_bytes > size_bytes) { snprintf(req->error_str, sizeof(req->error_str), "requested LUN size %ju > backing device " "size %ju", (uintmax_t)params->lun_size_bytes, (uintmax_t)size_bytes); return (EINVAL); } - be_lun->size_bytes = params->lun_size_bytes; } else { be_lun->size_bytes = size_bytes; } - + be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; + cbe_lun->maxlba = (be_lun->size_blocks == 0) ? + 0 : (be_lun->size_blocks - 1); return (0); } static int ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_lun_modify_params *params; struct ctl_be_block_lun *be_lun; + struct ctl_be_lun *cbe_lun; uint64_t oldsize; int error; params = &req->reqdata.modify; mtx_lock(&softc->lock); - be_lun = NULL; STAILQ_FOREACH(be_lun, &softc->lun_list, links) { - if (be_lun->ctl_be_lun.lun_id == params->lun_id) + if (be_lun->cbe_lun.lun_id == params->lun_id) break; } mtx_unlock(&softc->lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "LUN %u is not managed by the block backend", params->lun_id); goto bailout_error; } + cbe_lun = &be_lun->cbe_lun; - be_lun->params.lun_size_bytes = params->lun_size_bytes; + if (params->lun_size_bytes != 0) + be_lun->params.lun_size_bytes = params->lun_size_bytes; + ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); - oldsize = be_lun->size_bytes; + oldsize = be_lun->size_blocks; if (be_lun->vn == NULL) error = ctl_be_block_open(softc, be_lun, req); else if (vn_isdisk(be_lun->vn, &error)) error = ctl_be_block_modify_dev(be_lun, req); else if (be_lun->vn->v_type == VREG) error = ctl_be_block_modify_file(be_lun, req); else error = EINVAL; - be_lun->size_blocks = be_lun->size_bytes / be_lun->blocksize; - if (error == 0 && be_lun->size_bytes != oldsize) { - - /* - * The maximum LBA is the size - 1. - * - * XXX: Note that this field is being updated without locking, - * which might cause problems on 32-bit architectures. - */ - if (be_lun->unmap != NULL) - be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP; - be_lun->ctl_be_lun.maxlba = (be_lun->size_blocks == 0) ? - 0 : (be_lun->size_blocks - 1); - be_lun->ctl_be_lun.blocksize = be_lun->blocksize; - be_lun->ctl_be_lun.pblockexp = be_lun->pblockexp; - be_lun->ctl_be_lun.pblockoff = be_lun->pblockoff; - be_lun->ctl_be_lun.ublockexp = be_lun->ublockexp; - be_lun->ctl_be_lun.ublockoff = be_lun->ublockoff; - be_lun->ctl_be_lun.atomicblock = be_lun->atomicblock; - be_lun->ctl_be_lun.opttxferlen = be_lun->opttxferlen; - ctl_lun_capacity_changed(&be_lun->ctl_be_lun); - if (oldsize == 0 && be_lun->size_blocks != 0) - ctl_lun_online(&be_lun->ctl_be_lun); + if (be_lun->size_blocks != oldsize) + ctl_lun_capacity_changed(cbe_lun); + if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) && + be_lun->vn != NULL) { + cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE; + ctl_lun_online(cbe_lun); } /* Tell the user the exact size we ended up using */ params->lun_size_bytes = be_lun->size_bytes; req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; - return (0); bailout_error: req->status = CTL_LUN_ERROR; - return (0); } static void ctl_be_block_lun_shutdown(void *be_lun) { struct ctl_be_block_lun *lun; struct ctl_be_block_softc *softc; lun = (struct ctl_be_block_lun *)be_lun; softc = lun->softc; mtx_lock(&softc->lock); lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) wakeup(lun); mtx_unlock(&softc->lock); } static void ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status) { struct ctl_be_block_lun *lun; struct ctl_be_block_softc *softc; lun = (struct ctl_be_block_lun *)be_lun; softc = lun->softc; if (status == CTL_LUN_CONFIG_OK) { mtx_lock(&softc->lock); lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) wakeup(lun); mtx_unlock(&softc->lock); /* * We successfully added the LUN, attempt to enable it. */ - if (ctl_enable_lun(&lun->ctl_be_lun) != 0) { + if (ctl_enable_lun(&lun->cbe_lun) != 0) { printf("%s: ctl_enable_lun() failed!\n", __func__); - if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) { + if (ctl_invalidate_lun(&lun->cbe_lun) != 0) { printf("%s: ctl_invalidate_lun() failed!\n", __func__); } } return; } mtx_lock(&softc->lock); lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR; wakeup(lun); mtx_unlock(&softc->lock); } static int ctl_be_block_config_write(union ctl_io *io) { struct ctl_be_block_lun *be_lun; - struct ctl_be_lun *ctl_be_lun; + struct ctl_be_lun *cbe_lun; int retval; retval = 0; DPRINTF("entered\n"); - ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ + cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; - be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; + be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; switch (io->scsiio.cdb[0]) { case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: case WRITE_SAME_10: case WRITE_SAME_16: case UNMAP: /* * The upper level CTL code will filter out any CDBs with * the immediate bit set and return the proper error. * * We don't really need to worry about what LBA range the * user asked to be synced out. When they issue a sync * cache command, we'll sync out the whole thing. */ mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); break; case START_STOP_UNIT: { struct scsi_start_stop_unit *cdb; cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; if (cdb->how & SSS_START) - retval = ctl_start_lun(ctl_be_lun); + retval = ctl_start_lun(cbe_lun); else { - retval = ctl_stop_lun(ctl_be_lun); + retval = ctl_stop_lun(cbe_lun); /* * XXX KDM Copan-specific offline behavior. * Figure out a reasonable way to port this? */ #ifdef NEEDTOPORT if ((retval == 0) && (cdb->byte2 & SSS_ONOFFLINE)) - retval = ctl_lun_offline(ctl_be_lun); + retval = ctl_lun_offline(cbe_lun); #endif } /* * In general, the above routines should not fail. They * just set state for the LUN. So we've got something * pretty wrong here if we can't start or stop the LUN. */ if (retval != 0) { ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xf051); retval = CTL_RETVAL_COMPLETE; } else { ctl_set_success(&io->scsiio); } ctl_config_write_done(io); break; } default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_write_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } static int ctl_be_block_config_read(union ctl_io *io) { struct ctl_be_block_lun *be_lun; - struct ctl_be_lun *ctl_be_lun; + struct ctl_be_lun *cbe_lun; int retval = 0; DPRINTF("entered\n"); - ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ + cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; - be_lun = (struct ctl_be_block_lun *)ctl_be_lun->be_lun; + be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; switch (io->scsiio.cdb[0]) { case SERVICE_ACTION_IN: if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->config_read_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); retval = CTL_RETVAL_QUEUED; break; } ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb) { struct ctl_be_block_lun *lun; int retval; lun = (struct ctl_be_block_lun *)be_lun; retval = 0; retval = sbuf_printf(sb, "\t"); if (retval != 0) goto bailout; retval = sbuf_printf(sb, "%d", lun->num_threads); if (retval != 0) goto bailout; retval = sbuf_printf(sb, "\n"); bailout: return (retval); } static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname) { struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun; if (lun->getattr == NULL) return (UINT64_MAX); return (lun->getattr(lun, attrname)); } int ctl_be_block_init(void) { struct ctl_be_block_softc *softc; int retval; softc = &backend_block_softc; retval = 0; mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); STAILQ_INIT(&softc->lun_list); return (retval); } Index: projects/clang-trunk/sys/cam/ctl/ctl_backend_ramdisk.c =================================================================== --- projects/clang-trunk/sys/cam/ctl/ctl_backend_ramdisk.c (revision 287501) +++ projects/clang-trunk/sys/cam/ctl/ctl_backend_ramdisk.c (revision 287502) @@ -1,996 +1,964 @@ /*- * Copyright (c) 2003, 2008 Silicon Graphics International Corp. * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_ramdisk.c#3 $ */ /* * CAM Target Layer backend for a "fake" ramdisk. * * Author: Ken Merry */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef enum { CTL_BE_RAMDISK_LUN_UNCONFIGURED = 0x01, CTL_BE_RAMDISK_LUN_CONFIG_ERR = 0x02, CTL_BE_RAMDISK_LUN_WAITING = 0x04 } ctl_be_ramdisk_lun_flags; struct ctl_be_ramdisk_lun { + struct ctl_lun_create_params params; char lunname[32]; uint64_t size_bytes; uint64_t size_blocks; struct ctl_be_ramdisk_softc *softc; ctl_be_ramdisk_lun_flags flags; STAILQ_ENTRY(ctl_be_ramdisk_lun) links; - struct ctl_be_lun ctl_be_lun; + struct ctl_be_lun cbe_lun; struct taskqueue *io_taskqueue; struct task io_task; STAILQ_HEAD(, ctl_io_hdr) cont_queue; struct mtx_padalign queue_lock; }; struct ctl_be_ramdisk_softc { struct mtx lock; int rd_size; #ifdef CTL_RAMDISK_PAGES uint8_t **ramdisk_pages; int num_pages; #else uint8_t *ramdisk_buffer; #endif int num_luns; STAILQ_HEAD(, ctl_be_ramdisk_lun) lun_list; }; static struct ctl_be_ramdisk_softc rd_softc; int ctl_backend_ramdisk_init(void); void ctl_backend_ramdisk_shutdown(void); static int ctl_backend_ramdisk_move_done(union ctl_io *io); static int ctl_backend_ramdisk_submit(union ctl_io *io); static void ctl_backend_ramdisk_continue(union ctl_io *io); static int ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static int ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc, struct ctl_lun_req *req); static int ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc, - struct ctl_lun_req *req, int do_wait); + struct ctl_lun_req *req); static int ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc, struct ctl_lun_req *req); static void ctl_backend_ramdisk_worker(void *context, int pending); static void ctl_backend_ramdisk_lun_shutdown(void *be_lun); static void ctl_backend_ramdisk_lun_config_status(void *be_lun, ctl_lun_config_status status); static int ctl_backend_ramdisk_config_write(union ctl_io *io); static int ctl_backend_ramdisk_config_read(union ctl_io *io); static struct ctl_backend_driver ctl_be_ramdisk_driver = { .name = "ramdisk", .flags = CTL_BE_FLAG_HAS_CONFIG, .init = ctl_backend_ramdisk_init, .data_submit = ctl_backend_ramdisk_submit, .data_move_done = ctl_backend_ramdisk_move_done, .config_read = ctl_backend_ramdisk_config_read, .config_write = ctl_backend_ramdisk_config_write, .ioctl = ctl_backend_ramdisk_ioctl }; MALLOC_DEFINE(M_RAMDISK, "ramdisk", "Memory used for CTL RAMdisk"); CTL_BACKEND_DECLARE(cbr, ctl_be_ramdisk_driver); int ctl_backend_ramdisk_init(void) { struct ctl_be_ramdisk_softc *softc; #ifdef CTL_RAMDISK_PAGES int i; #endif softc = &rd_softc; memset(softc, 0, sizeof(*softc)); mtx_init(&softc->lock, "ctlramdisk", NULL, MTX_DEF); STAILQ_INIT(&softc->lun_list); softc->rd_size = 1024 * 1024; #ifdef CTL_RAMDISK_PAGES softc->num_pages = softc->rd_size / PAGE_SIZE; softc->ramdisk_pages = (uint8_t **)malloc(sizeof(uint8_t *) * softc->num_pages, M_RAMDISK, M_WAITOK); for (i = 0; i < softc->num_pages; i++) softc->ramdisk_pages[i] = malloc(PAGE_SIZE, M_RAMDISK,M_WAITOK); #else softc->ramdisk_buffer = (uint8_t *)malloc(softc->rd_size, M_RAMDISK, M_WAITOK); #endif return (0); } void ctl_backend_ramdisk_shutdown(void) { struct ctl_be_ramdisk_softc *softc; struct ctl_be_ramdisk_lun *lun, *next_lun; #ifdef CTL_RAMDISK_PAGES int i; #endif softc = &rd_softc; mtx_lock(&softc->lock); for (lun = STAILQ_FIRST(&softc->lun_list); lun != NULL; lun = next_lun){ /* * Grab the next LUN. The current LUN may get removed by * ctl_invalidate_lun(), which will call our LUN shutdown * routine, if there is no outstanding I/O for this LUN. */ next_lun = STAILQ_NEXT(lun, links); /* * Drop our lock here. Since ctl_invalidate_lun() can call * back into us, this could potentially lead to a recursive * lock of the same mutex, which would cause a hang. */ mtx_unlock(&softc->lock); - ctl_disable_lun(&lun->ctl_be_lun); - ctl_invalidate_lun(&lun->ctl_be_lun); + ctl_disable_lun(&lun->cbe_lun); + ctl_invalidate_lun(&lun->cbe_lun); mtx_lock(&softc->lock); } mtx_unlock(&softc->lock); #ifdef CTL_RAMDISK_PAGES for (i = 0; i < softc->num_pages; i++) free(softc->ramdisk_pages[i], M_RAMDISK); free(softc->ramdisk_pages, M_RAMDISK); #else free(softc->ramdisk_buffer, M_RAMDISK); #endif if (ctl_backend_deregister(&ctl_be_ramdisk_driver) != 0) { printf("ctl_backend_ramdisk_shutdown: " "ctl_backend_deregister() failed!\n"); } } static int ctl_backend_ramdisk_move_done(union ctl_io *io) { - struct ctl_be_lun *ctl_be_lun; + struct ctl_be_lun *cbe_lun; struct ctl_be_ramdisk_lun *be_lun; #ifdef CTL_TIME_IO struct bintime cur_bt; #endif CTL_DEBUG_PRINT(("ctl_backend_ramdisk_move_done\n")); - ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ + cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; - be_lun = (struct ctl_be_ramdisk_lun *)ctl_be_lun->be_lun; + be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun->be_lun; #ifdef CTL_TIME_IO getbintime(&cur_bt); bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); bintime_add(&io->io_hdr.dma_bt, &cur_bt); io->io_hdr.num_dmas++; #endif if (io->scsiio.kern_sg_entries > 0) free(io->scsiio.kern_data_ptr, M_RAMDISK); io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; if (io->io_hdr.flags & CTL_FLAG_ABORT) { ; } else if ((io->io_hdr.port_status == 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { if (io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer > 0) { mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->cont_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); return (0); } ctl_set_success(&io->scsiio); } else if ((io->io_hdr.port_status != 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { /* * For hardware error sense keys, the sense key * specific value is defined to be a retry count, * but we use it to pass back an internal FETD * error code. XXX KDM Hopefully the FETD is only * using 16 bits for an error code, since that's * all the space we have in the sks field. */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ io->io_hdr.port_status); } ctl_data_submit_done(io); return(0); } static int ctl_backend_ramdisk_submit(union ctl_io *io) { - struct ctl_be_lun *ctl_be_lun; + struct ctl_be_lun *cbe_lun; struct ctl_lba_len_flags *lbalen; - ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ + cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; if (lbalen->flags & CTL_LLF_VERIFY) { ctl_set_success(&io->scsiio); ctl_data_submit_done(io); return (CTL_RETVAL_COMPLETE); } io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer = - lbalen->len * ctl_be_lun->blocksize; + lbalen->len * cbe_lun->blocksize; ctl_backend_ramdisk_continue(io); return (CTL_RETVAL_COMPLETE); } static void ctl_backend_ramdisk_continue(union ctl_io *io) { struct ctl_be_ramdisk_softc *softc; int len, len_filled, sg_filled; #ifdef CTL_RAMDISK_PAGES struct ctl_sg_entry *sg_entries; int i; #endif softc = &rd_softc; len = io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer; #ifdef CTL_RAMDISK_PAGES sg_filled = min(btoc(len), softc->num_pages); if (sg_filled > 1) { io->scsiio.kern_data_ptr = malloc(sizeof(struct ctl_sg_entry) * sg_filled, M_RAMDISK, M_WAITOK); sg_entries = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; for (i = 0, len_filled = 0; i < sg_filled; i++) { sg_entries[i].addr = softc->ramdisk_pages[i]; sg_entries[i].len = MIN(PAGE_SIZE, len - len_filled); len_filled += sg_entries[i].len; } io->io_hdr.flags |= CTL_FLAG_KDPTR_SGLIST; } else { sg_filled = 0; len_filled = len; io->scsiio.kern_data_ptr = softc->ramdisk_pages[0]; } #else sg_filled = 0; len_filled = min(len, softc->rd_size); io->scsiio.kern_data_ptr = softc->ramdisk_buffer; #endif /* CTL_RAMDISK_PAGES */ io->scsiio.be_move_done = ctl_backend_ramdisk_move_done; io->scsiio.kern_data_resid = 0; io->scsiio.kern_data_len = len_filled; io->scsiio.kern_sg_entries = sg_filled; io->io_hdr.flags |= CTL_FLAG_ALLOCATED; io->io_hdr.ctl_private[CTL_PRIV_BACKEND].integer -= len_filled; #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } static void ctl_backend_ramdisk_worker(void *context, int pending) { struct ctl_be_ramdisk_softc *softc; struct ctl_be_ramdisk_lun *be_lun; union ctl_io *io; be_lun = (struct ctl_be_ramdisk_lun *)context; softc = be_lun->softc; mtx_lock(&be_lun->queue_lock); for (;;) { io = (union ctl_io *)STAILQ_FIRST(&be_lun->cont_queue); if (io != NULL) { STAILQ_REMOVE(&be_lun->cont_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); ctl_backend_ramdisk_continue(io); mtx_lock(&be_lun->queue_lock); continue; } /* * If we get here, there is no work left in the queues, so * just break out and let the task queue go to sleep. */ break; } mtx_unlock(&be_lun->queue_lock); } static int ctl_backend_ramdisk_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_be_ramdisk_softc *softc; int retval; retval = 0; softc = &rd_softc; switch (cmd) { case CTL_LUN_REQ: { struct ctl_lun_req *lun_req; lun_req = (struct ctl_lun_req *)addr; switch (lun_req->reqtype) { case CTL_LUNREQ_CREATE: - retval = ctl_backend_ramdisk_create(softc, lun_req, - /*do_wait*/ 1); + retval = ctl_backend_ramdisk_create(softc, lun_req); break; case CTL_LUNREQ_RM: retval = ctl_backend_ramdisk_rm(softc, lun_req); break; case CTL_LUNREQ_MODIFY: retval = ctl_backend_ramdisk_modify(softc, lun_req); break; default: lun_req->status = CTL_LUN_ERROR; snprintf(lun_req->error_str, sizeof(lun_req->error_str), "%s: invalid LUN request type %d", __func__, lun_req->reqtype); break; } break; } default: retval = ENOTTY; break; } return (retval); } static int ctl_backend_ramdisk_rm(struct ctl_be_ramdisk_softc *softc, struct ctl_lun_req *req) { struct ctl_be_ramdisk_lun *be_lun; struct ctl_lun_rm_params *params; int retval; retval = 0; params = &req->reqdata.rm; be_lun = NULL; mtx_lock(&softc->lock); STAILQ_FOREACH(be_lun, &softc->lun_list, links) { - if (be_lun->ctl_be_lun.lun_id == params->lun_id) + if (be_lun->cbe_lun.lun_id == params->lun_id) break; } mtx_unlock(&softc->lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "%s: LUN %u is not managed by the ramdisk backend", __func__, params->lun_id); goto bailout_error; } - retval = ctl_disable_lun(&be_lun->ctl_be_lun); + retval = ctl_disable_lun(&be_lun->cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "%s: error %d returned from ctl_disable_lun() for " "LUN %d", __func__, retval, params->lun_id); goto bailout_error; } /* * Set the waiting flag before we invalidate the LUN. Our shutdown * routine can be called any time after we invalidate the LUN, * and can be called from our context. * * This tells the shutdown routine that we're waiting, or we're * going to wait for the shutdown to happen. */ mtx_lock(&softc->lock); be_lun->flags |= CTL_BE_RAMDISK_LUN_WAITING; mtx_unlock(&softc->lock); - retval = ctl_invalidate_lun(&be_lun->ctl_be_lun); + retval = ctl_invalidate_lun(&be_lun->cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "%s: error %d returned from ctl_invalidate_lun() for " "LUN %d", __func__, retval, params->lun_id); mtx_lock(&softc->lock); be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING; mtx_unlock(&softc->lock); goto bailout_error; } mtx_lock(&softc->lock); while ((be_lun->flags & CTL_BE_RAMDISK_LUN_UNCONFIGURED) == 0) { retval = msleep(be_lun, &softc->lock, PCATCH, "ctlram", 0); if (retval == EINTR) break; } be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING; /* * We only remove this LUN from the list and free it (below) if * retval == 0. If the user interrupted the wait, we just bail out * without actually freeing the LUN. We let the shutdown routine * free the LUN if that happens. */ if (retval == 0) { STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_ramdisk_lun, links); softc->num_luns--; } mtx_unlock(&softc->lock); if (retval == 0) { taskqueue_drain(be_lun->io_taskqueue, &be_lun->io_task); taskqueue_free(be_lun->io_taskqueue); - ctl_free_opts(&be_lun->ctl_be_lun.options); + ctl_free_opts(&be_lun->cbe_lun.options); mtx_destroy(&be_lun->queue_lock); free(be_lun, M_RAMDISK); } req->status = CTL_LUN_OK; return (retval); bailout_error: req->status = CTL_LUN_ERROR; return (0); } static int ctl_backend_ramdisk_create(struct ctl_be_ramdisk_softc *softc, - struct ctl_lun_req *req, int do_wait) + struct ctl_lun_req *req) { struct ctl_be_ramdisk_lun *be_lun; + struct ctl_be_lun *cbe_lun; struct ctl_lun_create_params *params; - uint32_t blocksize; char *value; char tmpstr[32]; - int retval, unmap; + int retval; retval = 0; params = &req->reqdata.create; - if (params->blocksize_bytes != 0) - blocksize = params->blocksize_bytes; - else - blocksize = 512; - be_lun = malloc(sizeof(*be_lun), M_RAMDISK, M_ZERO | (do_wait ? - M_WAITOK : M_NOWAIT)); - - if (be_lun == NULL) { - snprintf(req->error_str, sizeof(req->error_str), - "%s: error allocating %zd bytes", __func__, - sizeof(*be_lun)); - goto bailout_error; - } + be_lun = malloc(sizeof(*be_lun), M_RAMDISK, M_ZERO | M_WAITOK); + cbe_lun = &be_lun->cbe_lun; + cbe_lun->be_lun = be_lun; + be_lun->params = req->reqdata.create; + be_lun->softc = softc; sprintf(be_lun->lunname, "cram%d", softc->num_luns); - ctl_init_opts(&be_lun->ctl_be_lun.options, - req->num_be_args, req->kern_be_args); + ctl_init_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); if (params->flags & CTL_LUN_FLAG_DEV_TYPE) - be_lun->ctl_be_lun.lun_type = params->device_type; + cbe_lun->lun_type = params->device_type; else - be_lun->ctl_be_lun.lun_type = T_DIRECT; + cbe_lun->lun_type = T_DIRECT; + be_lun->flags = CTL_BE_RAMDISK_LUN_UNCONFIGURED; + cbe_lun->flags = CTL_LUN_FLAG_PRIMARY; - if (be_lun->ctl_be_lun.lun_type == T_DIRECT) { - - if (params->lun_size_bytes < blocksize) { + if (cbe_lun->lun_type == T_DIRECT) { + if (params->blocksize_bytes != 0) + cbe_lun->blocksize = params->blocksize_bytes; + else + cbe_lun->blocksize = 512; + if (params->lun_size_bytes < cbe_lun->blocksize) { snprintf(req->error_str, sizeof(req->error_str), "%s: LUN size %ju < blocksize %u", __func__, - params->lun_size_bytes, blocksize); + params->lun_size_bytes, cbe_lun->blocksize); goto bailout_error; } - - be_lun->size_blocks = params->lun_size_bytes / blocksize; - be_lun->size_bytes = be_lun->size_blocks * blocksize; - - be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1; - be_lun->ctl_be_lun.atomicblock = UINT32_MAX; - be_lun->ctl_be_lun.opttxferlen = softc->rd_size / blocksize; - } else { - be_lun->ctl_be_lun.maxlba = 0; - blocksize = 0; - be_lun->size_bytes = 0; - be_lun->size_blocks = 0; + be_lun->size_blocks = params->lun_size_bytes / cbe_lun->blocksize; + be_lun->size_bytes = be_lun->size_blocks * cbe_lun->blocksize; + cbe_lun->maxlba = be_lun->size_blocks - 1; + cbe_lun->atomicblock = UINT32_MAX; + cbe_lun->opttxferlen = softc->rd_size / cbe_lun->blocksize; } - be_lun->ctl_be_lun.blocksize = blocksize; - /* Tell the user the blocksize we ended up using */ - params->blocksize_bytes = blocksize; - - /* Tell the user the exact size we ended up using */ + params->blocksize_bytes = cbe_lun->blocksize; params->lun_size_bytes = be_lun->size_bytes; - be_lun->softc = softc; - - unmap = 1; - value = ctl_get_opt(&be_lun->ctl_be_lun.options, "unmap"); + value = ctl_get_opt(&cbe_lun->options, "unmap"); if (value != NULL && strcmp(value, "on") == 0) - unmap = (strcmp(value, "on") == 0); + cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; + value = ctl_get_opt(&cbe_lun->options, "readonly"); + if (value != NULL && strcmp(value, "on") == 0) + cbe_lun->flags |= CTL_LUN_FLAG_READONLY; + cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; + value = ctl_get_opt(&cbe_lun->options, "serseq"); + if (value != NULL && strcmp(value, "on") == 0) + cbe_lun->serseq = CTL_LUN_SERSEQ_ON; + else if (value != NULL && strcmp(value, "read") == 0) + cbe_lun->serseq = CTL_LUN_SERSEQ_READ; + else if (value != NULL && strcmp(value, "off") == 0) + cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; - be_lun->flags = CTL_BE_RAMDISK_LUN_UNCONFIGURED; - be_lun->ctl_be_lun.flags = CTL_LUN_FLAG_PRIMARY; - if (unmap) - be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_UNMAP; - be_lun->ctl_be_lun.be_lun = be_lun; - if (params->flags & CTL_LUN_FLAG_ID_REQ) { - be_lun->ctl_be_lun.req_lun_id = params->req_lun_id; - be_lun->ctl_be_lun.flags |= CTL_LUN_FLAG_ID_REQ; + cbe_lun->req_lun_id = params->req_lun_id; + cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; } else - be_lun->ctl_be_lun.req_lun_id = 0; + cbe_lun->req_lun_id = 0; - be_lun->ctl_be_lun.lun_shutdown = ctl_backend_ramdisk_lun_shutdown; - be_lun->ctl_be_lun.lun_config_status = - ctl_backend_ramdisk_lun_config_status; - be_lun->ctl_be_lun.be = &ctl_be_ramdisk_driver; + cbe_lun->lun_shutdown = ctl_backend_ramdisk_lun_shutdown; + cbe_lun->lun_config_status = ctl_backend_ramdisk_lun_config_status; + cbe_lun->be = &ctl_be_ramdisk_driver; if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", softc->num_luns); - strncpy((char *)be_lun->ctl_be_lun.serial_num, tmpstr, - MIN(sizeof(be_lun->ctl_be_lun.serial_num), - sizeof(tmpstr))); + strncpy((char *)cbe_lun->serial_num, tmpstr, + MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); /* Tell the user what we used for a serial number */ strncpy((char *)params->serial_num, tmpstr, MIN(sizeof(params->serial_num), sizeof(tmpstr))); } else { - strncpy((char *)be_lun->ctl_be_lun.serial_num, - params->serial_num, - MIN(sizeof(be_lun->ctl_be_lun.serial_num), + strncpy((char *)cbe_lun->serial_num, params->serial_num, + MIN(sizeof(cbe_lun->serial_num), sizeof(params->serial_num))); } if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); - strncpy((char *)be_lun->ctl_be_lun.device_id, tmpstr, - MIN(sizeof(be_lun->ctl_be_lun.device_id), - sizeof(tmpstr))); + strncpy((char *)cbe_lun->device_id, tmpstr, + MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); /* Tell the user what we used for a device ID */ strncpy((char *)params->device_id, tmpstr, MIN(sizeof(params->device_id), sizeof(tmpstr))); } else { - strncpy((char *)be_lun->ctl_be_lun.device_id, - params->device_id, - MIN(sizeof(be_lun->ctl_be_lun.device_id), + strncpy((char *)cbe_lun->device_id, params->device_id, + MIN(sizeof(cbe_lun->device_id), sizeof(params->device_id))); } STAILQ_INIT(&be_lun->cont_queue); mtx_init(&be_lun->queue_lock, "cram queue lock", NULL, MTX_DEF); TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_backend_ramdisk_worker, be_lun); be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); if (be_lun->io_taskqueue == NULL) { snprintf(req->error_str, sizeof(req->error_str), "%s: Unable to create taskqueue", __func__); goto bailout_error; } retval = taskqueue_start_threads(&be_lun->io_taskqueue, /*num threads*/1, /*priority*/PWAIT, /*thread name*/ "%s taskq", be_lun->lunname); if (retval != 0) goto bailout_error; mtx_lock(&softc->lock); softc->num_luns++; STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); mtx_unlock(&softc->lock); - retval = ctl_add_lun(&be_lun->ctl_be_lun); + retval = ctl_add_lun(&be_lun->cbe_lun); if (retval != 0) { mtx_lock(&softc->lock); STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_ramdisk_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); snprintf(req->error_str, sizeof(req->error_str), "%s: ctl_add_lun() returned error %d, see dmesg for " "details", __func__, retval); retval = 0; goto bailout_error; } - if (do_wait == 0) - return (retval); - mtx_lock(&softc->lock); /* * Tell the config_status routine that we're waiting so it won't * clean up the LUN in the event of an error. */ be_lun->flags |= CTL_BE_RAMDISK_LUN_WAITING; while (be_lun->flags & CTL_BE_RAMDISK_LUN_UNCONFIGURED) { retval = msleep(be_lun, &softc->lock, PCATCH, "ctlram", 0); if (retval == EINTR) break; } be_lun->flags &= ~CTL_BE_RAMDISK_LUN_WAITING; if (be_lun->flags & CTL_BE_RAMDISK_LUN_CONFIG_ERR) { snprintf(req->error_str, sizeof(req->error_str), "%s: LUN configuration error, see dmesg for details", __func__); STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_ramdisk_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); goto bailout_error; } else { - params->req_lun_id = be_lun->ctl_be_lun.lun_id; + params->req_lun_id = cbe_lun->lun_id; } mtx_unlock(&softc->lock); req->status = CTL_LUN_OK; return (retval); bailout_error: req->status = CTL_LUN_ERROR; if (be_lun != NULL) { if (be_lun->io_taskqueue != NULL) { taskqueue_free(be_lun->io_taskqueue); } - ctl_free_opts(&be_lun->ctl_be_lun.options); + ctl_free_opts(&cbe_lun->options); mtx_destroy(&be_lun->queue_lock); free(be_lun, M_RAMDISK); } return (retval); } static int ctl_backend_ramdisk_modify(struct ctl_be_ramdisk_softc *softc, struct ctl_lun_req *req) { struct ctl_be_ramdisk_lun *be_lun; + struct ctl_be_lun *cbe_lun; struct ctl_lun_modify_params *params; uint32_t blocksize; params = &req->reqdata.modify; - be_lun = NULL; - mtx_lock(&softc->lock); STAILQ_FOREACH(be_lun, &softc->lun_list, links) { - if (be_lun->ctl_be_lun.lun_id == params->lun_id) + if (be_lun->cbe_lun.lun_id == params->lun_id) break; } mtx_unlock(&softc->lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "%s: LUN %u is not managed by the ramdisk backend", __func__, params->lun_id); goto bailout_error; } + cbe_lun = &be_lun->cbe_lun; - if (params->lun_size_bytes == 0) { - snprintf(req->error_str, sizeof(req->error_str), - "%s: LUN size \"auto\" not supported " - "by the ramdisk backend", __func__); - goto bailout_error; - } + if (params->lun_size_bytes != 0) + be_lun->params.lun_size_bytes = params->lun_size_bytes; + ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); + blocksize = be_lun->cbe_lun.blocksize; - blocksize = be_lun->ctl_be_lun.blocksize; - - if (params->lun_size_bytes < blocksize) { + if (be_lun->params.lun_size_bytes < blocksize) { snprintf(req->error_str, sizeof(req->error_str), "%s: LUN size %ju < blocksize %u", __func__, - params->lun_size_bytes, blocksize); + be_lun->params.lun_size_bytes, blocksize); goto bailout_error; } - be_lun->size_blocks = params->lun_size_bytes / blocksize; + be_lun->size_blocks = be_lun->params.lun_size_bytes / blocksize; be_lun->size_bytes = be_lun->size_blocks * blocksize; + be_lun->cbe_lun.maxlba = be_lun->size_blocks - 1; + ctl_lun_capacity_changed(&be_lun->cbe_lun); - /* - * The maximum LBA is the size - 1. - * - * XXX: Note that this field is being updated without locking, - * which might cause problems on 32-bit architectures. - */ - be_lun->ctl_be_lun.maxlba = be_lun->size_blocks - 1; - ctl_lun_capacity_changed(&be_lun->ctl_be_lun); - /* Tell the user the exact size we ended up using */ params->lun_size_bytes = be_lun->size_bytes; req->status = CTL_LUN_OK; return (0); bailout_error: req->status = CTL_LUN_ERROR; return (0); } static void ctl_backend_ramdisk_lun_shutdown(void *be_lun) { struct ctl_be_ramdisk_lun *lun; struct ctl_be_ramdisk_softc *softc; int do_free; lun = (struct ctl_be_ramdisk_lun *)be_lun; softc = lun->softc; do_free = 0; mtx_lock(&softc->lock); lun->flags |= CTL_BE_RAMDISK_LUN_UNCONFIGURED; if (lun->flags & CTL_BE_RAMDISK_LUN_WAITING) { wakeup(lun); } else { STAILQ_REMOVE(&softc->lun_list, lun, ctl_be_ramdisk_lun, links); softc->num_luns--; do_free = 1; } mtx_unlock(&softc->lock); if (do_free != 0) free(be_lun, M_RAMDISK); } static void ctl_backend_ramdisk_lun_config_status(void *be_lun, ctl_lun_config_status status) { struct ctl_be_ramdisk_lun *lun; struct ctl_be_ramdisk_softc *softc; lun = (struct ctl_be_ramdisk_lun *)be_lun; softc = lun->softc; if (status == CTL_LUN_CONFIG_OK) { mtx_lock(&softc->lock); lun->flags &= ~CTL_BE_RAMDISK_LUN_UNCONFIGURED; if (lun->flags & CTL_BE_RAMDISK_LUN_WAITING) wakeup(lun); mtx_unlock(&softc->lock); /* * We successfully added the LUN, attempt to enable it. */ - if (ctl_enable_lun(&lun->ctl_be_lun) != 0) { + if (ctl_enable_lun(&lun->cbe_lun) != 0) { printf("%s: ctl_enable_lun() failed!\n", __func__); - if (ctl_invalidate_lun(&lun->ctl_be_lun) != 0) { + if (ctl_invalidate_lun(&lun->cbe_lun) != 0) { printf("%s: ctl_invalidate_lun() failed!\n", __func__); } } return; } mtx_lock(&softc->lock); lun->flags &= ~CTL_BE_RAMDISK_LUN_UNCONFIGURED; /* * If we have a user waiting, let him handle the cleanup. If not, * clean things up here. */ if (lun->flags & CTL_BE_RAMDISK_LUN_WAITING) { lun->flags |= CTL_BE_RAMDISK_LUN_CONFIG_ERR; wakeup(lun); } else { STAILQ_REMOVE(&softc->lun_list, lun, ctl_be_ramdisk_lun, links); softc->num_luns--; free(lun, M_RAMDISK); } mtx_unlock(&softc->lock); } static int ctl_backend_ramdisk_config_write(union ctl_io *io) { struct ctl_be_ramdisk_softc *softc; int retval; retval = 0; softc = &rd_softc; switch (io->scsiio.cdb[0]) { case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: /* * The upper level CTL code will filter out any CDBs with * the immediate bit set and return the proper error. It * will also not allow a sync cache command to go to a LUN * that is powered down. * * We don't really need to worry about what LBA range the * user asked to be synced out. When they issue a sync * cache command, we'll sync out the whole thing. * * This is obviously just a stubbed out implementation. * The real implementation will be in the RAIDCore/CTL * interface, and can only really happen when RAIDCore * implements a per-array cache sync. */ ctl_set_success(&io->scsiio); ctl_config_write_done(io); break; case START_STOP_UNIT: { struct scsi_start_stop_unit *cdb; - struct ctl_be_lun *ctl_be_lun; + struct ctl_be_lun *cbe_lun; struct ctl_be_ramdisk_lun *be_lun; cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; - ctl_be_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ + cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; - be_lun = (struct ctl_be_ramdisk_lun *)ctl_be_lun->be_lun; + be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun->be_lun; if (cdb->how & SSS_START) - retval = ctl_start_lun(ctl_be_lun); + retval = ctl_start_lun(cbe_lun); else { - retval = ctl_stop_lun(ctl_be_lun); + retval = ctl_stop_lun(cbe_lun); #ifdef NEEDTOPORT if ((retval == 0) && (cdb->byte2 & SSS_ONOFFLINE)) - retval = ctl_lun_offline(ctl_be_lun); + retval = ctl_lun_offline(cbe_lun); #endif } /* * In general, the above routines should not fail. They * just set state for the LUN. So we've got something * pretty wrong here if we can't start or stop the LUN. */ if (retval != 0) { ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xf051); retval = CTL_RETVAL_COMPLETE; } else { ctl_set_success(&io->scsiio); } ctl_config_write_done(io); break; } case WRITE_SAME_10: case WRITE_SAME_16: case UNMAP: ctl_set_success(&io->scsiio); ctl_config_write_done(io); break; default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_write_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } static int ctl_backend_ramdisk_config_read(union ctl_io *io) { int retval = 0; switch (io->scsiio.cdb[0]) { case SERVICE_ACTION_IN: if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { /* We have nothing to tell, leave default data. */ ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; } ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } Index: projects/clang-trunk/sys/cam/ctl/ctl_private.h =================================================================== --- projects/clang-trunk/sys/cam/ctl/ctl_private.h (revision 287501) +++ projects/clang-trunk/sys/cam/ctl/ctl_private.h (revision 287502) @@ -1,544 +1,537 @@ /*- * Copyright (c) 2003, 2004, 2005, 2008 Silicon Graphics International Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_private.h#7 $ * $FreeBSD$ */ /* * CAM Target Layer driver private data structures/definitions. * * Author: Ken Merry */ #ifndef _CTL_PRIVATE_H_ #define _CTL_PRIVATE_H_ /* * SCSI vendor and product names. */ #define CTL_VENDOR "FREEBSD " #define CTL_DIRECT_PRODUCT "CTLDISK " #define CTL_PROCESSOR_PRODUCT "CTLPROCESSOR " #define CTL_UNKNOWN_PRODUCT "CTLDEVICE " typedef enum { CTL_IOCTL_INPROG, CTL_IOCTL_DATAMOVE, CTL_IOCTL_DONE } ctl_fe_ioctl_state; struct ctl_fe_ioctl_params { struct cv sem; struct mtx ioctl_mtx; ctl_fe_ioctl_state state; }; #define CTL_POOL_ENTRIES_OTHER_SC 200 struct ctl_io_pool { char name[64]; uint32_t id; struct ctl_softc *ctl_softc; struct uma_zone *zone; }; typedef enum { CTL_SER_BLOCK, CTL_SER_BLOCKOPT, CTL_SER_EXTENT, CTL_SER_EXTENTOPT, CTL_SER_EXTENTSEQ, CTL_SER_PASS, CTL_SER_SKIP } ctl_serialize_action; typedef enum { CTL_ACTION_BLOCK, CTL_ACTION_OVERLAP, CTL_ACTION_OVERLAP_TAG, CTL_ACTION_PASS, CTL_ACTION_SKIP, CTL_ACTION_ERROR } ctl_action; /* * WARNING: Keep the bottom nibble here free, we OR in the data direction * flags for each command. * * Note: "OK_ON_ALL_LUNS" == we don't have to have a lun configured * "OK_ON_BOTH" == we have to have a lun configured * "SA5" == command has 5-bit service action at byte 1 */ typedef enum { CTL_CMD_FLAG_NONE = 0x0000, CTL_CMD_FLAG_NO_SENSE = 0x0010, CTL_CMD_FLAG_OK_ON_ALL_LUNS = 0x0020, CTL_CMD_FLAG_ALLOW_ON_RESV = 0x0040, CTL_CMD_FLAG_ALLOW_ON_PR_WRESV = 0x0080, CTL_CMD_FLAG_OK_ON_PROC = 0x0100, CTL_CMD_FLAG_OK_ON_SLUN = 0x0200, CTL_CMD_FLAG_OK_ON_BOTH = 0x0300, CTL_CMD_FLAG_OK_ON_STOPPED = 0x0400, CTL_CMD_FLAG_OK_ON_INOPERABLE = 0x0800, CTL_CMD_FLAG_OK_ON_OFFLINE = 0x1000, CTL_CMD_FLAG_OK_ON_SECONDARY = 0x2000, CTL_CMD_FLAG_ALLOW_ON_PR_RESV = 0x4000, CTL_CMD_FLAG_SA5 = 0x8000 } ctl_cmd_flags; typedef enum { CTL_SERIDX_TUR = 0, CTL_SERIDX_READ, CTL_SERIDX_WRITE, CTL_SERIDX_UNMAP, CTL_SERIDX_SYNC, CTL_SERIDX_MD_SNS, CTL_SERIDX_MD_SEL, CTL_SERIDX_RQ_SNS, CTL_SERIDX_INQ, CTL_SERIDX_RD_CAP, CTL_SERIDX_RES, CTL_SERIDX_LOG_SNS, CTL_SERIDX_FORMAT, CTL_SERIDX_START, /* TBD: others to be filled in as needed */ CTL_SERIDX_COUNT, /* LAST, not a normal code, provides # codes */ CTL_SERIDX_INVLD = CTL_SERIDX_COUNT } ctl_seridx; typedef int ctl_opfunc(struct ctl_scsiio *ctsio); struct ctl_cmd_entry { ctl_opfunc *execute; ctl_seridx seridx; ctl_cmd_flags flags; ctl_lun_error_pattern pattern; uint8_t length; /* CDB length */ uint8_t usage[15]; /* Mask of allowed CDB bits * after the opcode byte. */ }; typedef enum { CTL_LUN_NONE = 0x000, CTL_LUN_CONTROL = 0x001, CTL_LUN_RESERVED = 0x002, CTL_LUN_INVALID = 0x004, CTL_LUN_DISABLED = 0x008, CTL_LUN_MALLOCED = 0x010, CTL_LUN_STOPPED = 0x020, CTL_LUN_INOPERABLE = 0x040, CTL_LUN_OFFLINE = 0x080, CTL_LUN_PR_RESERVED = 0x100, CTL_LUN_PRIMARY_SC = 0x200, CTL_LUN_SENSE_DESC = 0x400, CTL_LUN_READONLY = 0x800 } ctl_lun_flags; typedef enum { - CTL_LUN_SERSEQ_OFF, - CTL_LUN_SERSEQ_READ, - CTL_LUN_SERSEQ_ON -} ctl_lun_serseq; - -typedef enum { CTLBLOCK_FLAG_NONE = 0x00, CTLBLOCK_FLAG_INVALID = 0x01 } ctlblock_flags; union ctl_softcs { struct ctl_softc *ctl_softc; struct ctlblock_softc *ctlblock_softc; }; /* * Mode page defaults. */ #if 0 /* * These values make Solaris trim off some of the capacity. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 63 #define CTL_DEFAULT_HEADS 255 /* * These values seem to work okay. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 63 #define CTL_DEFAULT_HEADS 16 /* * These values work reasonably well. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 512 #define CTL_DEFAULT_HEADS 64 #endif /* * Solaris is somewhat picky about how many heads and sectors per track you * have defined in mode pages 3 and 4. These values seem to cause Solaris * to get the capacity more or less right when you run the format tool. * They still have problems when dealing with devices larger than 1TB, * but there isn't anything we can do about that. * * For smaller LUN sizes, this ends up causing the number of cylinders to * work out to 0. Solaris actually recognizes that and comes up with its * own bogus geometry to fit the actual capacity of the drive. They really * should just give up on geometry and stick to the read capacity * information alone for modern disk drives. * * One thing worth mentioning about Solaris' mkfs command is that it * doesn't like sectors per track values larger than 256. 512 seems to * work okay for format, but causes problems when you try to make a * filesystem. * * Another caveat about these values: the product of these two values * really should be a power of 2. This is because of the simplistic * shift-based calculation that we have to use on the i386 platform to * calculate the number of cylinders here. (If you use a divide, you end * up calling __udivdi3(), which is a hardware FP call on the PC. On the * XScale, it is done in software, so you can do that from inside the * kernel.) * * So for the current values (256 S/T, 128 H), we get 32768, which works * very nicely for calculating cylinders. * * If you want to change these values so that their product is no longer a * power of 2, re-visit the calculation in ctl_init_page_index(). You may * need to make it a bit more complicated to get the number of cylinders * right. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 256 #define CTL_DEFAULT_HEADS 128 #define CTL_DEFAULT_ROTATION_RATE SVPD_NON_ROTATING struct ctl_page_index; typedef int ctl_modesen_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc); typedef int ctl_modesel_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr); typedef enum { CTL_PAGE_FLAG_NONE = 0x00, CTL_PAGE_FLAG_DISK_ONLY = 0x01 } ctl_page_flags; struct ctl_page_index { uint8_t page_code; uint8_t subpage; uint16_t page_len; uint8_t *page_data; ctl_page_flags page_flags; ctl_modesen_handler *sense_handler; ctl_modesel_handler *select_handler; }; #define CTL_PAGE_CURRENT 0x00 #define CTL_PAGE_CHANGEABLE 0x01 #define CTL_PAGE_DEFAULT 0x02 #define CTL_PAGE_SAVED 0x03 #define CTL_NUM_LBP_PARAMS 4 #define CTL_NUM_LBP_THRESH 4 #define CTL_LBP_EXPONENT 11 /* 2048 sectors */ #define CTL_LBP_PERIOD 10 /* 10 seconds */ #define CTL_LBP_UA_PERIOD 300 /* 5 minutes */ struct ctl_logical_block_provisioning_page { struct scsi_logical_block_provisioning_page main; struct scsi_logical_block_provisioning_page_descr descr[CTL_NUM_LBP_THRESH]; }; static const struct ctl_page_index page_index_template[] = { {SMS_RW_ERROR_RECOVERY_PAGE, 0, sizeof(struct scsi_da_rw_recovery_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_FORMAT_DEVICE_PAGE, 0, sizeof(struct scsi_format_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_RIGID_DISK_PAGE, 0, sizeof(struct scsi_rigid_disk_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_CACHING_PAGE, 0, sizeof(struct scsi_caching_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, ctl_caching_sp_handler}, {SMS_CONTROL_MODE_PAGE, 0, sizeof(struct scsi_control_page), NULL, CTL_PAGE_FLAG_NONE, NULL, ctl_control_page_handler}, {SMS_INFO_EXCEPTIONS_PAGE, 0, sizeof(struct scsi_info_exceptions_page), NULL, CTL_PAGE_FLAG_NONE, NULL, NULL}, {SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF, 0x02, sizeof(struct ctl_logical_block_provisioning_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_VENDOR_SPECIFIC_PAGE | SMPH_SPF, DBGCNF_SUBPAGE_CODE, sizeof(struct copan_debugconf_subpage), NULL, CTL_PAGE_FLAG_NONE, ctl_debugconf_sp_sense_handler, ctl_debugconf_sp_select_handler}, }; #define CTL_NUM_MODE_PAGES sizeof(page_index_template)/ \ sizeof(page_index_template[0]) struct ctl_mode_pages { struct scsi_da_rw_recovery_page rw_er_page[4]; struct scsi_format_page format_page[4]; struct scsi_rigid_disk_page rigid_disk_page[4]; struct scsi_caching_page caching_page[4]; struct scsi_control_page control_page[4]; struct scsi_info_exceptions_page ie_page[4]; struct ctl_logical_block_provisioning_page lbp_page[4]; struct copan_debugconf_subpage debugconf_subpage[4]; struct ctl_page_index index[CTL_NUM_MODE_PAGES]; }; static const struct ctl_page_index log_page_index_template[] = { {SLS_SUPPORTED_PAGES_PAGE, 0, 0, NULL, CTL_PAGE_FLAG_NONE, NULL, NULL}, {SLS_SUPPORTED_PAGES_PAGE, SLS_SUPPORTED_SUBPAGES_SUBPAGE, 0, NULL, CTL_PAGE_FLAG_NONE, NULL, NULL}, {SLS_LOGICAL_BLOCK_PROVISIONING, 0, 0, NULL, CTL_PAGE_FLAG_NONE, ctl_lbp_log_sense_handler, NULL}, {SLS_STAT_AND_PERF, 0, 0, NULL, CTL_PAGE_FLAG_NONE, ctl_sap_log_sense_handler, NULL}, }; #define CTL_NUM_LOG_PAGES sizeof(log_page_index_template)/ \ sizeof(log_page_index_template[0]) struct ctl_log_pages { uint8_t pages_page[CTL_NUM_LOG_PAGES]; uint8_t subpages_page[CTL_NUM_LOG_PAGES * 2]; uint8_t lbp_page[12*CTL_NUM_LBP_PARAMS]; struct stat_page { struct scsi_log_stat_and_perf sap; struct scsi_log_idle_time it; struct scsi_log_time_interval ti; } stat_page; struct ctl_page_index index[CTL_NUM_LOG_PAGES]; }; struct ctl_lun_delay_info { ctl_delay_type datamove_type; uint32_t datamove_delay; ctl_delay_type done_type; uint32_t done_delay; }; typedef enum { CTL_ERR_INJ_NONE = 0x00, CTL_ERR_INJ_ABORTED = 0x01 } ctl_err_inject_flags; typedef enum { CTL_PR_FLAG_NONE = 0x00, CTL_PR_FLAG_REGISTERED = 0x01, CTL_PR_FLAG_ACTIVE_RES = 0x02 } ctl_per_res_flags; #define CTL_PR_ALL_REGISTRANTS 0xFFFFFFFF #define CTL_PR_NO_RESERVATION 0xFFFFFFF0 struct ctl_devid { int len; uint8_t data[]; }; /* * For report target port groups. */ #define NUM_TARGET_PORT_GROUPS 2 #define CTL_WRITE_BUFFER_SIZE 262144 struct tpc_list; struct ctl_lun { struct mtx lun_lock; uint64_t lun; ctl_lun_flags flags; - ctl_lun_serseq serseq; STAILQ_HEAD(,ctl_error_desc) error_list; uint64_t error_serial; struct ctl_softc *ctl_softc; struct ctl_be_lun *be_lun; struct ctl_backend_driver *backend; int io_count; struct ctl_lun_delay_info delay_info; int sync_interval; int sync_count; #ifdef CTL_TIME_IO sbintime_t idle_time; sbintime_t last_busy; #endif TAILQ_HEAD(ctl_ooaq, ctl_io_hdr) ooa_queue; TAILQ_HEAD(ctl_blockq,ctl_io_hdr) blocked_queue; STAILQ_ENTRY(ctl_lun) links; STAILQ_ENTRY(ctl_lun) run_links; #ifdef CTL_WITH_CA uint32_t have_ca[CTL_MAX_INITIATORS >> 5]; struct scsi_sense_data pending_sense[CTL_MAX_INITIATORS]; #endif ctl_ua_type *pending_ua[CTL_MAX_PORTS]; time_t lasttpt; struct ctl_mode_pages mode_pages; struct ctl_log_pages log_pages; struct ctl_lun_io_stats stats; uint32_t res_idx; unsigned int PRGeneration; uint64_t *pr_keys[2 * CTL_MAX_PORTS]; int pr_key_count; uint32_t pr_res_idx; uint8_t res_type; uint8_t *write_buffer; struct ctl_devid *lun_devid; TAILQ_HEAD(tpc_lists, tpc_list) tpc_lists; }; typedef enum { CTL_FLAG_REAL_SYNC = 0x02, CTL_FLAG_ACTIVE_SHELF = 0x04 } ctl_gen_flags; #define CTL_MAX_THREADS 16 struct ctl_thread { struct mtx_padalign queue_lock; struct ctl_softc *ctl_softc; struct thread *thread; STAILQ_HEAD(, ctl_io_hdr) incoming_queue; STAILQ_HEAD(, ctl_io_hdr) rtr_queue; STAILQ_HEAD(, ctl_io_hdr) done_queue; STAILQ_HEAD(, ctl_io_hdr) isc_queue; }; struct tpc_token; struct ctl_softc { struct mtx ctl_lock; struct cdev *dev; int open_count; int num_disks; int num_luns; ctl_gen_flags flags; ctl_ha_mode ha_mode; int ha_id; int ha_state; int is_single; int port_offset; int persis_offset; int inquiry_pq_no_lun; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; void *othersc_pool; struct proc *ctl_proc; int targ_online; uint32_t ctl_lun_mask[(CTL_MAX_LUNS + 31) / 32]; struct ctl_lun *ctl_luns[CTL_MAX_LUNS]; uint32_t ctl_port_mask[(CTL_MAX_PORTS + 31) / 32]; STAILQ_HEAD(, ctl_lun) lun_list; STAILQ_HEAD(, ctl_be_lun) pending_lun_queue; uint32_t num_frontends; STAILQ_HEAD(, ctl_frontend) fe_list; uint32_t num_ports; STAILQ_HEAD(, ctl_port) port_list; struct ctl_port *ctl_ports[CTL_MAX_PORTS]; uint32_t num_backends; STAILQ_HEAD(, ctl_backend_driver) be_list; struct uma_zone *io_zone; uint32_t cur_pool_id; struct ctl_thread threads[CTL_MAX_THREADS]; TAILQ_HEAD(tpc_tokens, tpc_token) tpc_tokens; struct callout tpc_timeout; struct mtx tpc_lock; }; #ifdef _KERNEL extern const struct ctl_cmd_entry ctl_cmd_table[256]; uint32_t ctl_get_initindex(struct ctl_nexus *nexus); uint32_t ctl_get_resindex(struct ctl_nexus *nexus); uint32_t ctl_port_idx(int port_num); int ctl_lun_map_init(struct ctl_port *port); int ctl_lun_map_deinit(struct ctl_port *port); int ctl_lun_map_set(struct ctl_port *port, uint32_t plun, uint32_t glun); int ctl_lun_map_unset(struct ctl_port *port, uint32_t plun); uint32_t ctl_lun_map_from_port(struct ctl_port *port, uint32_t plun); uint32_t ctl_lun_map_to_port(struct ctl_port *port, uint32_t glun); int ctl_pool_create(struct ctl_softc *ctl_softc, const char *pool_name, uint32_t total_ctl_io, void **npool); void ctl_pool_free(struct ctl_io_pool *pool); int ctl_scsi_release(struct ctl_scsiio *ctsio); int ctl_scsi_reserve(struct ctl_scsiio *ctsio); int ctl_start_stop(struct ctl_scsiio *ctsio); int ctl_sync_cache(struct ctl_scsiio *ctsio); int ctl_format(struct ctl_scsiio *ctsio); int ctl_read_buffer(struct ctl_scsiio *ctsio); int ctl_write_buffer(struct ctl_scsiio *ctsio); int ctl_write_same(struct ctl_scsiio *ctsio); int ctl_unmap(struct ctl_scsiio *ctsio); int ctl_mode_select(struct ctl_scsiio *ctsio); int ctl_mode_sense(struct ctl_scsiio *ctsio); int ctl_log_sense(struct ctl_scsiio *ctsio); int ctl_read_capacity(struct ctl_scsiio *ctsio); int ctl_read_capacity_16(struct ctl_scsiio *ctsio); int ctl_read_defect(struct ctl_scsiio *ctsio); int ctl_read_write(struct ctl_scsiio *ctsio); int ctl_cnw(struct ctl_scsiio *ctsio); int ctl_report_luns(struct ctl_scsiio *ctsio); int ctl_request_sense(struct ctl_scsiio *ctsio); int ctl_tur(struct ctl_scsiio *ctsio); int ctl_verify(struct ctl_scsiio *ctsio); int ctl_inquiry(struct ctl_scsiio *ctsio); int ctl_persistent_reserve_in(struct ctl_scsiio *ctsio); int ctl_persistent_reserve_out(struct ctl_scsiio *ctsio); int ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio); int ctl_report_supported_opcodes(struct ctl_scsiio *ctsio); int ctl_report_supported_tmf(struct ctl_scsiio *ctsio); int ctl_report_timestamp(struct ctl_scsiio *ctsio); int ctl_isc(struct ctl_scsiio *ctsio); int ctl_get_lba_status(struct ctl_scsiio *ctsio); void ctl_tpc_init(struct ctl_softc *softc); void ctl_tpc_shutdown(struct ctl_softc *softc); void ctl_tpc_lun_init(struct ctl_lun *lun); void ctl_tpc_lun_shutdown(struct ctl_lun *lun); int ctl_inquiry_evpd_tpc(struct ctl_scsiio *ctsio, int alloc_len); int ctl_receive_copy_status_lid1(struct ctl_scsiio *ctsio); int ctl_receive_copy_failure_details(struct ctl_scsiio *ctsio); int ctl_receive_copy_status_lid4(struct ctl_scsiio *ctsio); int ctl_receive_copy_operating_parameters(struct ctl_scsiio *ctsio); int ctl_extended_copy_lid1(struct ctl_scsiio *ctsio); int ctl_extended_copy_lid4(struct ctl_scsiio *ctsio); int ctl_copy_operation_abort(struct ctl_scsiio *ctsio); int ctl_populate_token(struct ctl_scsiio *ctsio); int ctl_write_using_token(struct ctl_scsiio *ctsio); int ctl_receive_rod_token_information(struct ctl_scsiio *ctsio); int ctl_report_all_rod_tokens(struct ctl_scsiio *ctsio); #endif /* _KERNEL */ #endif /* _CTL_PRIVATE_H_ */ /* * vim: ts=8 */ Index: projects/clang-trunk/sys/dev/drm2/i915/intel_drv.h =================================================================== --- projects/clang-trunk/sys/dev/drm2/i915/intel_drv.h (revision 287501) +++ projects/clang-trunk/sys/dev/drm2/i915/intel_drv.h (revision 287502) @@ -1,508 +1,513 @@ /* * Copyright (c) 2006 Dave Airlie * Copyright (c) 2007-2008 Intel Corporation * Jesse Barnes * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef DRM_INTEL_DRV_H #define DRM_INTEL_DRV_H #include #include #include #include #include #define _intel_wait_for(DEV, COND, MS, W, WMSG) \ ({ \ int end, ret; \ \ end = ticks + (MS) * hz / 1000; \ ret = 0; \ \ while (!(COND)) { \ if (time_after(ticks, end)) { \ ret = -ETIMEDOUT; \ break; \ } \ if (W) \ pause((WMSG), 1); \ else \ DELAY(1000); \ } \ \ ret; \ }) #define wait_for_atomic_us(COND, US) ({ \ int i, ret__ = -ETIMEDOUT; \ for (i = 0; i < (US); i++) { \ if ((COND)) { \ ret__ = 0; \ break; \ } \ DELAY(1); \ } \ ret__; \ }) #define wait_for(COND, MS) _intel_wait_for(NULL, COND, MS, 1, "915wfi") #define wait_for_atomic(COND, MS) _intel_wait_for(NULL, COND, MS, 0, "915wfa") #define KHz(x) (1000*x) #define MHz(x) KHz(1000*x) +/* + * Display related stuff + */ + /* store information about an Ixxx DVO */ /* The i830->i865 use multiple DVOs with multiple i2cs */ /* the i915, i945 have a single sDVO i2c bus - which is different */ #define MAX_OUTPUTS 6 /* maximum connectors per crtcs in the mode set */ #define INTELFB_CONN_LIMIT 4 #define INTEL_I2C_BUS_DVO 1 #define INTEL_I2C_BUS_SDVO 2 /* these are outputs from the chip - integrated only external chips are via DVO or SDVO output */ #define INTEL_OUTPUT_UNUSED 0 #define INTEL_OUTPUT_ANALOG 1 #define INTEL_OUTPUT_DVO 2 #define INTEL_OUTPUT_SDVO 3 #define INTEL_OUTPUT_LVDS 4 #define INTEL_OUTPUT_TVOUT 5 #define INTEL_OUTPUT_HDMI 6 #define INTEL_OUTPUT_DISPLAYPORT 7 #define INTEL_OUTPUT_EDP 8 +#define INTEL_OUTPUT_UNKNOWN 9 /* Intel Pipe Clone Bit */ #define INTEL_HDMIB_CLONE_BIT 1 #define INTEL_HDMIC_CLONE_BIT 2 #define INTEL_HDMID_CLONE_BIT 3 #define INTEL_HDMIE_CLONE_BIT 4 #define INTEL_HDMIF_CLONE_BIT 5 #define INTEL_SDVO_NON_TV_CLONE_BIT 6 #define INTEL_SDVO_TV_CLONE_BIT 7 #define INTEL_SDVO_LVDS_CLONE_BIT 8 #define INTEL_ANALOG_CLONE_BIT 9 #define INTEL_TV_CLONE_BIT 10 #define INTEL_DP_B_CLONE_BIT 11 #define INTEL_DP_C_CLONE_BIT 12 #define INTEL_DP_D_CLONE_BIT 13 #define INTEL_LVDS_CLONE_BIT 14 #define INTEL_DVO_TMDS_CLONE_BIT 15 #define INTEL_DVO_LVDS_CLONE_BIT 16 #define INTEL_EDP_CLONE_BIT 17 #define INTEL_DVO_CHIP_NONE 0 #define INTEL_DVO_CHIP_LVDS 1 #define INTEL_DVO_CHIP_TMDS 2 #define INTEL_DVO_CHIP_TVOUT 4 /* drm_display_mode->private_flags */ #define INTEL_MODE_PIXEL_MULTIPLIER_SHIFT (0x0) #define INTEL_MODE_PIXEL_MULTIPLIER_MASK (0xf << INTEL_MODE_PIXEL_MULTIPLIER_SHIFT) #define INTEL_MODE_DP_FORCE_6BPC (0x10) /* This flag must be set by the encoder's mode_fixup if it changes the crtc * timings in the mode to prevent the crtc fixup from overwriting them. * Currently only lvds needs that. */ #define INTEL_MODE_CRTC_TIMINGS_SET (0x20) static inline void intel_mode_set_pixel_multiplier(struct drm_display_mode *mode, int multiplier) { mode->clock *= multiplier; mode->private_flags |= multiplier; } static inline int intel_mode_get_pixel_multiplier(const struct drm_display_mode *mode) { return (mode->private_flags & INTEL_MODE_PIXEL_MULTIPLIER_MASK) >> INTEL_MODE_PIXEL_MULTIPLIER_SHIFT; } struct intel_framebuffer { struct drm_framebuffer base; struct drm_i915_gem_object *obj; }; struct intel_fbdev { struct drm_fb_helper helper; struct intel_framebuffer ifb; struct list_head fbdev_list; struct drm_display_mode *our_mode; }; struct intel_encoder { struct drm_encoder base; int type; bool needs_tv_clock; void (*hot_plug)(struct intel_encoder *); int crtc_mask; int clone_mask; }; struct intel_connector { struct drm_connector base; struct intel_encoder *encoder; }; struct intel_crtc { struct drm_crtc base; enum pipe pipe; enum plane plane; u8 lut_r[256], lut_g[256], lut_b[256]; int dpms_mode; bool active; /* is the crtc on? independent of the dpms mode */ bool busy; /* is scanout buffer being updated frequently? */ struct callout idle_callout; bool lowfreq_avail; struct intel_overlay *overlay; struct intel_unpin_work *unpin_work; int fdi_lanes; struct drm_i915_gem_object *cursor_bo; uint32_t cursor_addr; int16_t cursor_x, cursor_y; int16_t cursor_width, cursor_height; bool cursor_visible; unsigned int bpp; /* We can share PLLs across outputs if the timings match */ struct intel_pch_pll *pch_pll; }; struct intel_plane { struct drm_plane base; enum pipe pipe; struct drm_i915_gem_object *obj; bool primary_disabled; int max_downscale; u32 lut_r[1024], lut_g[1024], lut_b[1024]; void (*update_plane)(struct drm_plane *plane, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, uint32_t x, uint32_t y, uint32_t src_w, uint32_t src_h); void (*disable_plane)(struct drm_plane *plane); int (*update_colorkey)(struct drm_plane *plane, struct drm_intel_sprite_colorkey *key); void (*get_colorkey)(struct drm_plane *plane, struct drm_intel_sprite_colorkey *key); }; struct intel_watermark_params { unsigned long fifo_size; unsigned long max_wm; unsigned long default_wm; unsigned long guard_size; unsigned long cacheline_size; }; struct cxsr_latency { int is_desktop; int is_ddr3; unsigned long fsb_freq; unsigned long mem_freq; unsigned long display_sr; unsigned long display_hpll_disable; unsigned long cursor_sr; unsigned long cursor_hpll_disable; }; #define to_intel_crtc(x) container_of(x, struct intel_crtc, base) #define to_intel_connector(x) container_of(x, struct intel_connector, base) #define to_intel_encoder(x) container_of(x, struct intel_encoder, base) #define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base) #define to_intel_plane(x) container_of(x, struct intel_plane, base) #define DIP_HEADER_SIZE 5 #define DIP_TYPE_AVI 0x82 #define DIP_VERSION_AVI 0x2 #define DIP_LEN_AVI 13 #define DIP_AVI_PR_1 0 #define DIP_AVI_PR_2 1 #define DIP_TYPE_SPD 0x83 #define DIP_VERSION_SPD 0x1 #define DIP_LEN_SPD 25 #define DIP_SPD_UNKNOWN 0 #define DIP_SPD_DSTB 0x1 #define DIP_SPD_DVDP 0x2 #define DIP_SPD_DVHS 0x3 #define DIP_SPD_HDDVR 0x4 #define DIP_SPD_DVC 0x5 #define DIP_SPD_DSC 0x6 #define DIP_SPD_VCD 0x7 #define DIP_SPD_GAME 0x8 #define DIP_SPD_PC 0x9 #define DIP_SPD_BD 0xa #define DIP_SPD_SCD 0xb struct dip_infoframe { uint8_t type; /* HB0 */ uint8_t ver; /* HB1 */ uint8_t len; /* HB2 - body len, not including checksum */ uint8_t ecc; /* Header ECC */ uint8_t checksum; /* PB0 */ union { struct { /* PB1 - Y 6:5, A 4:4, B 3:2, S 1:0 */ uint8_t Y_A_B_S; /* PB2 - C 7:6, M 5:4, R 3:0 */ uint8_t C_M_R; /* PB3 - ITC 7:7, EC 6:4, Q 3:2, SC 1:0 */ uint8_t ITC_EC_Q_SC; /* PB4 - VIC 6:0 */ uint8_t VIC; /* PB5 - YQ 7:6, CN 5:4, PR 3:0 */ uint8_t YQ_CN_PR; /* PB6 to PB13 */ uint16_t top_bar_end; uint16_t bottom_bar_start; uint16_t left_bar_end; uint16_t right_bar_start; } __attribute__ ((packed)) avi; struct { uint8_t vn[8]; uint8_t pd[16]; uint8_t sdi; } __attribute__ ((packed)) spd; uint8_t payload[27]; } __attribute__ ((packed)) body; } __attribute__((packed)); struct intel_hdmi { struct intel_encoder base; u32 sdvox_reg; int ddc_bus; int ddi_port; uint32_t color_range; bool has_hdmi_sink; bool has_audio; enum hdmi_force_audio force_audio; void (*write_infoframe)(struct drm_encoder *encoder, struct dip_infoframe *frame); }; static inline struct drm_crtc * intel_get_crtc_for_pipe(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = dev->dev_private; return dev_priv->pipe_to_crtc_mapping[pipe]; } static inline struct drm_crtc * intel_get_crtc_for_plane(struct drm_device *dev, int plane) { struct drm_i915_private *dev_priv = dev->dev_private; return dev_priv->plane_to_crtc_mapping[plane]; } struct intel_unpin_work { struct task task; struct drm_device *dev; struct drm_i915_gem_object *old_fb_obj; struct drm_i915_gem_object *pending_flip_obj; struct drm_pending_vblank_event *event; int pending; bool enable_stall_check; }; struct intel_fbc_work { struct timeout_task task; struct drm_crtc *crtc; struct drm_framebuffer *fb; int interval; }; int intel_ddc_get_modes(struct drm_connector *c, device_t adapter); extern bool intel_ddc_probe(struct intel_encoder *intel_encoder, int ddc_bus); extern void intel_attach_force_audio_property(struct drm_connector *connector); extern void intel_attach_broadcast_rgb_property(struct drm_connector *connector); extern void intel_crt_init(struct drm_device *dev); extern void intel_hdmi_init(struct drm_device *dev, int sdvox_reg); extern struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); extern void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder, struct drm_display_mode *adjusted_mode); extern void intel_hdmi_set_spd_infoframe(struct drm_encoder *encoder); extern void intel_dip_infoframe_csum(struct dip_infoframe *avi_if); extern bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob); extern void intel_dvo_init(struct drm_device *dev); extern void intel_tv_init(struct drm_device *dev); extern void intel_mark_busy(struct drm_device *dev, struct drm_i915_gem_object *obj); extern bool intel_lvds_init(struct drm_device *dev); extern void intel_dp_init(struct drm_device *dev, int dp_reg); void intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); extern bool intel_dpd_is_edp(struct drm_device *dev); extern void intel_edp_link_config(struct intel_encoder *, int *, int *); extern bool intel_encoder_is_pch_edp(struct drm_encoder *encoder); extern int intel_plane_init(struct drm_device *dev, enum pipe pipe); extern void intel_flush_display_plane(struct drm_i915_private *dev_priv, enum plane plane); void intel_sanitize_pm(struct drm_device *dev); /* intel_panel.c */ extern void intel_fixed_panel_mode(struct drm_display_mode *fixed_mode, struct drm_display_mode *adjusted_mode); extern void intel_pch_panel_fitting(struct drm_device *dev, int fitting_mode, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); extern u32 intel_panel_get_max_backlight(struct drm_device *dev); extern u32 intel_panel_get_backlight(struct drm_device *dev); extern void intel_panel_set_backlight(struct drm_device *dev, u32 level); extern int intel_panel_setup_backlight(struct drm_device *dev); extern void intel_panel_enable_backlight(struct drm_device *dev); extern void intel_panel_disable_backlight(struct drm_device *dev); extern void intel_panel_destroy_backlight(struct drm_device *dev); extern enum drm_connector_status intel_panel_detect(struct drm_device *dev); extern void intel_crtc_load_lut(struct drm_crtc *crtc); extern void intel_encoder_prepare(struct drm_encoder *encoder); extern void intel_encoder_commit(struct drm_encoder *encoder); extern void intel_encoder_destroy(struct drm_encoder *encoder); static inline struct intel_encoder *intel_attached_encoder(struct drm_connector *connector) { return to_intel_connector(connector)->encoder; } extern void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder); extern struct drm_encoder *intel_best_encoder(struct drm_connector *connector); extern struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, struct drm_crtc *crtc); int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_file *file_priv); extern void intel_wait_for_vblank(struct drm_device *dev, int pipe); extern void intel_wait_for_pipe_off(struct drm_device *dev, int pipe); struct intel_load_detect_pipe { struct drm_framebuffer *release_fb; bool load_detect_temp; int dpms_mode; }; extern bool intel_get_load_detect_pipe(struct intel_encoder *intel_encoder, struct drm_connector *connector, struct drm_display_mode *mode, struct intel_load_detect_pipe *old); extern void intel_release_load_detect_pipe(struct intel_encoder *intel_encoder, struct drm_connector *connector, struct intel_load_detect_pipe *old); extern void intelfb_restore(void); extern void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, u16 blue, int regno); extern void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, int regno); extern void intel_enable_clock_gating(struct drm_device *dev); extern void ironlake_disable_rc6(struct drm_device *dev); extern void ironlake_enable_drps(struct drm_device *dev); extern void ironlake_disable_drps(struct drm_device *dev); extern void gen6_enable_rps(struct drm_i915_private *dev_priv); extern void gen6_update_ring_freq(struct drm_i915_private *dev_priv); extern void gen6_disable_rps(struct drm_device *dev); extern void intel_init_emon(struct drm_device *dev); extern int intel_enable_rc6(const struct drm_device *dev); extern void intel_ddi_dpms(struct drm_encoder *encoder, int mode); extern void intel_ddi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); extern int intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_i915_gem_object *obj, struct intel_ring_buffer *pipelined); extern void intel_unpin_fb_obj(struct drm_i915_gem_object *obj); extern int intel_framebuffer_init(struct drm_device *dev, struct intel_framebuffer *ifb, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_i915_gem_object *obj); extern int intel_fbdev_init(struct drm_device *dev); extern void intel_fbdev_fini(struct drm_device *dev); extern void intel_prepare_page_flip(struct drm_device *dev, int plane); extern void intel_finish_page_flip(struct drm_device *dev, int pipe); extern void intel_finish_page_flip_plane(struct drm_device *dev, int plane); extern void intel_setup_overlay(struct drm_device *dev); extern void intel_cleanup_overlay(struct drm_device *dev); extern int intel_overlay_switch_off(struct intel_overlay *overlay); extern int intel_overlay_put_image(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int intel_overlay_attrs(struct drm_device *dev, void *data, struct drm_file *file_priv); extern void intel_fb_output_poll_changed(struct drm_device *dev); extern void intel_fb_restore_mode(struct drm_device *dev); extern void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state); #define assert_pipe_enabled(d, p) assert_pipe(d, p, true) #define assert_pipe_disabled(d, p) assert_pipe(d, p, false) extern void intel_init_clock_gating(struct drm_device *dev); extern void intel_write_eld(struct drm_encoder *encoder, struct drm_display_mode *mode); extern void intel_cpt_verify_modeset(struct drm_device *dev, int pipe); extern void intel_prepare_ddi(struct drm_device *dev); extern void hsw_fdi_link_train(struct drm_crtc *crtc); extern void intel_ddi_init(struct drm_device *dev, enum port port); /* For use by IVB LP watermark workaround in intel_sprite.c */ extern void intel_update_watermarks(struct drm_device *dev); extern void intel_update_sprite_watermarks(struct drm_device *dev, int pipe, uint32_t sprite_width, int pixel_size); extern void intel_update_linetime_watermarks(struct drm_device *dev, int pipe, struct drm_display_mode *mode); extern int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int intel_sprite_get_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); extern u32 intel_dpio_read(struct drm_i915_private *dev_priv, int reg); /* Power-related functions, located in intel_pm.c */ extern void intel_init_pm(struct drm_device *dev); /* FBC */ extern bool intel_fbc_enabled(struct drm_device *dev); extern void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval); extern void intel_update_fbc(struct drm_device *dev); #endif /* __INTEL_DRV_H__ */ Index: projects/clang-trunk/sys/dev/drm2/i915/intel_ringbuffer.h =================================================================== --- projects/clang-trunk/sys/dev/drm2/i915/intel_ringbuffer.h (revision 287501) +++ projects/clang-trunk/sys/dev/drm2/i915/intel_ringbuffer.h (revision 287502) @@ -1,215 +1,227 @@ /* * $FreeBSD$ */ #ifndef _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_ +/* + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" + * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" + * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use" + * + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same + * cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ +#define I915_RING_FREE_SPACE 64 + struct intel_hw_status_page { u32 *page_addr; unsigned int gfx_addr; struct drm_i915_gem_object *obj; }; #define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base)) #define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) #define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base)) #define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) #define I915_READ_HEAD(ring) I915_READ(RING_HEAD((ring)->mmio_base)) #define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) #define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base)) #define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) #define I915_READ_NOPID(ring) I915_READ(RING_NOPID((ring)->mmio_base)) #define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base)) #define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base)) struct intel_ring_buffer { const char *name; enum intel_ring_id { RCS = 0x0, VCS, BCS, } id; #define I915_NUM_RINGS 3 u32 mmio_base; void *virtual_start; struct drm_device *dev; struct drm_i915_gem_object *obj; u32 head; u32 tail; int space; int size; int effective_size; struct intel_hw_status_page status_page; /** We track the position of the requests in the ring buffer, and * when each is retired we increment last_retired_head as the GPU * must have finished processing the request and so we know we * can advance the ringbuffer up to that position. * * last_retired_head is set to -1 after the value is consumed so * we can detect new retirements. */ u32 last_retired_head; - u32 irq_refcount; + u32 irq_refcount; /* protected by dev_priv->irq_lock */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ u32 trace_irq_seqno; u32 sync_seqno[I915_NUM_RINGS-1]; bool (*irq_get)(struct intel_ring_buffer *ring); void (*irq_put)(struct intel_ring_buffer *ring); int (*init)(struct intel_ring_buffer *ring); void (*write_tail)(struct intel_ring_buffer *ring, uint32_t value); int (*flush)(struct intel_ring_buffer *ring, uint32_t invalidate_domains, uint32_t flush_domains); int (*add_request)(struct intel_ring_buffer *ring, uint32_t *seqno); uint32_t (*get_seqno)(struct intel_ring_buffer *ring); int (*dispatch_execbuffer)(struct intel_ring_buffer *ring, uint32_t offset, uint32_t length); +#define I915_DISPATCH_SECURE 0x1 +#define I915_DISPATCH_PINNED 0x2 void (*cleanup)(struct intel_ring_buffer *ring); int (*sync_to)(struct intel_ring_buffer *ring, struct intel_ring_buffer *to, u32 seqno); - + u32 semaphore_register[3]; /*our mbox written by others */ u32 signal_mbox[2]; /* mboxes this ring signals to */ - /** * List of objects currently involved in rendering from the * ringbuffer. * * Includes buffers having the contents of their GPU caches * flushed, not necessarily primitives. last_rendering_seqno * represents when the rendering involved will be completed. * * A reference is held on the buffer while on this list. */ struct list_head active_list; /** * List of breadcrumbs associated with GPU requests currently * outstanding. */ struct list_head request_list; /** * List of objects currently pending a GPU write flush. * * All elements on this list will belong to either the * active_list or flushing_list, last_rendering_seqno can * be used to differentiate between the two elements. */ struct list_head gpu_write_list; /** * Do we have some not yet emitted requests outstanding? */ u32 outstanding_lazy_request; /** * Do an explicit TLB flush before MI_SET_CONTEXT */ bool itlb_before_ctx_switch; struct i915_hw_context *default_context; struct drm_i915_gem_object *last_context_obj; drm_local_map_t map; void *private; }; static inline bool intel_ring_initialized(struct intel_ring_buffer *ring) { return ring->obj != NULL; } static inline unsigned intel_ring_flag(struct intel_ring_buffer *ring) { return 1 << ring->id; } static inline uint32_t intel_ring_sync_index(struct intel_ring_buffer *ring, struct intel_ring_buffer *other) { int idx; /* * cs -> 0 = vcs, 1 = bcs * vcs -> 0 = bcs, 1 = cs, * bcs -> 0 = cs, 1 = vcs. */ idx = (other - ring) - 1; if (idx < 0) idx += I915_NUM_RINGS; return idx; } -static inline uint32_t -intel_read_status_page(struct intel_ring_buffer *ring, int reg) +static inline u32 +intel_read_status_page(struct intel_ring_buffer *ring, + int reg) { - /* Ensure that the compiler doesn't optimize away the load. */ __compiler_membar(); return (atomic_load_acq_32(ring->status_page.page_addr + reg)); } void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring); int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n); static inline int intel_wait_ring_idle(struct intel_ring_buffer *ring) { return (intel_wait_ring_buffer(ring, ring->size - 8)); } int intel_ring_begin(struct intel_ring_buffer *ring, int n); static inline void intel_ring_emit(struct intel_ring_buffer *ring, - uint32_t data) + u32 data) { *(volatile uint32_t *)((char *)ring->virtual_start + ring->tail) = data; ring->tail += 4; } void intel_ring_advance(struct intel_ring_buffer *ring); uint32_t intel_ring_get_seqno(struct intel_ring_buffer *ring); int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); int intel_init_blt_ring_buffer(struct drm_device *dev); u32 intel_ring_get_active_head(struct intel_ring_buffer *ring); void intel_ring_setup_status_page(struct intel_ring_buffer *ring); static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring) { return ring->tail; } void i915_trace_irq_get(struct intel_ring_buffer *ring, uint32_t seqno); /* DRI warts */ int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start, uint32_t size); #endif /* _INTEL_RINGBUFFER_H_ */ Index: projects/clang-trunk/sys/dev/drm2/i915/intel_sdvo_regs.h =================================================================== --- projects/clang-trunk/sys/dev/drm2/i915/intel_sdvo_regs.h (revision 287501) +++ projects/clang-trunk/sys/dev/drm2/i915/intel_sdvo_regs.h (revision 287502) @@ -1,725 +1,732 @@ /* * Copyright © 2006-2007 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Eric Anholt * * $FreeBSD$ */ /** * @file SDVO command definitions and structures. */ #define SDVO_OUTPUT_FIRST (0) #define SDVO_OUTPUT_TMDS0 (1 << 0) #define SDVO_OUTPUT_RGB0 (1 << 1) #define SDVO_OUTPUT_CVBS0 (1 << 2) #define SDVO_OUTPUT_SVID0 (1 << 3) #define SDVO_OUTPUT_YPRPB0 (1 << 4) #define SDVO_OUTPUT_SCART0 (1 << 5) #define SDVO_OUTPUT_LVDS0 (1 << 6) #define SDVO_OUTPUT_TMDS1 (1 << 8) #define SDVO_OUTPUT_RGB1 (1 << 9) #define SDVO_OUTPUT_CVBS1 (1 << 10) #define SDVO_OUTPUT_SVID1 (1 << 11) #define SDVO_OUTPUT_YPRPB1 (1 << 12) #define SDVO_OUTPUT_SCART1 (1 << 13) #define SDVO_OUTPUT_LVDS1 (1 << 14) #define SDVO_OUTPUT_LAST (14) struct intel_sdvo_caps { u8 vendor_id; u8 device_id; u8 device_rev_id; u8 sdvo_version_major; u8 sdvo_version_minor; unsigned int sdvo_inputs_mask:2; unsigned int smooth_scaling:1; unsigned int sharp_scaling:1; unsigned int up_scaling:1; unsigned int down_scaling:1; unsigned int stall_support:1; unsigned int pad:1; u16 output_flags; } __attribute__((packed)); +/* Note: SDVO detailed timing flags match EDID misc flags. */ +#define DTD_FLAG_HSYNC_POSITIVE (1 << 1) +#define DTD_FLAG_VSYNC_POSITIVE (1 << 2) +#define DTD_FLAG_INTERLACE (1 << 7) + /** This matches the EDID DTD structure, more or less */ struct intel_sdvo_dtd { struct { u16 clock; /**< pixel clock, in 10kHz units */ u8 h_active; /**< lower 8 bits (pixels) */ u8 h_blank; /**< lower 8 bits (pixels) */ u8 h_high; /**< upper 4 bits each h_active, h_blank */ u8 v_active; /**< lower 8 bits (lines) */ u8 v_blank; /**< lower 8 bits (lines) */ u8 v_high; /**< upper 4 bits each v_active, v_blank */ } part1; struct { u8 h_sync_off; /**< lower 8 bits, from hblank start */ u8 h_sync_width; /**< lower 8 bits (pixels) */ /** lower 4 bits each vsync offset, vsync width */ u8 v_sync_off_width; /** * 2 high bits of hsync offset, 2 high bits of hsync width, * bits 4-5 of vsync offset, and 2 high bits of vsync width. */ u8 sync_off_width_high; u8 dtd_flags; u8 sdvo_flags; /** bits 6-7 of vsync offset at bits 6-7 */ u8 v_sync_off_high; u8 reserved; } part2; } __attribute__((packed)); struct intel_sdvo_pixel_clock_range { u16 min; /**< pixel clock, in 10kHz units */ u16 max; /**< pixel clock, in 10kHz units */ } __attribute__((packed)); struct intel_sdvo_preferred_input_timing_args { u16 clock; u16 width; u16 height; u8 interlace:1; u8 scaled:1; u8 pad:6; } __attribute__((packed)); /* I2C registers for SDVO */ #define SDVO_I2C_ARG_0 0x07 #define SDVO_I2C_ARG_1 0x06 #define SDVO_I2C_ARG_2 0x05 #define SDVO_I2C_ARG_3 0x04 #define SDVO_I2C_ARG_4 0x03 #define SDVO_I2C_ARG_5 0x02 #define SDVO_I2C_ARG_6 0x01 #define SDVO_I2C_ARG_7 0x00 #define SDVO_I2C_OPCODE 0x08 #define SDVO_I2C_CMD_STATUS 0x09 #define SDVO_I2C_RETURN_0 0x0a #define SDVO_I2C_RETURN_1 0x0b #define SDVO_I2C_RETURN_2 0x0c #define SDVO_I2C_RETURN_3 0x0d #define SDVO_I2C_RETURN_4 0x0e #define SDVO_I2C_RETURN_5 0x0f #define SDVO_I2C_RETURN_6 0x10 #define SDVO_I2C_RETURN_7 0x11 #define SDVO_I2C_VENDOR_BEGIN 0x20 /* Status results */ #define SDVO_CMD_STATUS_POWER_ON 0x0 #define SDVO_CMD_STATUS_SUCCESS 0x1 #define SDVO_CMD_STATUS_NOTSUPP 0x2 #define SDVO_CMD_STATUS_INVALID_ARG 0x3 #define SDVO_CMD_STATUS_PENDING 0x4 #define SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED 0x5 #define SDVO_CMD_STATUS_SCALING_NOT_SUPP 0x6 /* SDVO commands, argument/result registers */ #define SDVO_CMD_RESET 0x01 /** Returns a struct intel_sdvo_caps */ #define SDVO_CMD_GET_DEVICE_CAPS 0x02 #define SDVO_CMD_GET_FIRMWARE_REV 0x86 # define SDVO_DEVICE_FIRMWARE_MINOR SDVO_I2C_RETURN_0 # define SDVO_DEVICE_FIRMWARE_MAJOR SDVO_I2C_RETURN_1 # define SDVO_DEVICE_FIRMWARE_PATCH SDVO_I2C_RETURN_2 /** * Reports which inputs are trained (managed to sync). * * Devices must have trained within 2 vsyncs of a mode change. */ #define SDVO_CMD_GET_TRAINED_INPUTS 0x03 struct intel_sdvo_get_trained_inputs_response { unsigned int input0_trained:1; unsigned int input1_trained:1; unsigned int pad:6; } __attribute__((packed)); /** Returns a struct intel_sdvo_output_flags of active outputs. */ #define SDVO_CMD_GET_ACTIVE_OUTPUTS 0x04 /** * Sets the current set of active outputs. * * Takes a struct intel_sdvo_output_flags. Must be preceded by a SET_IN_OUT_MAP * on multi-output devices. */ #define SDVO_CMD_SET_ACTIVE_OUTPUTS 0x05 /** * Returns the current mapping of SDVO inputs to outputs on the device. * * Returns two struct intel_sdvo_output_flags structures. */ #define SDVO_CMD_GET_IN_OUT_MAP 0x06 struct intel_sdvo_in_out_map { u16 in0, in1; }; /** * Sets the current mapping of SDVO inputs to outputs on the device. * * Takes two struct i380_sdvo_output_flags structures. */ #define SDVO_CMD_SET_IN_OUT_MAP 0x07 /** * Returns a struct intel_sdvo_output_flags of attached displays. */ #define SDVO_CMD_GET_ATTACHED_DISPLAYS 0x0b /** * Returns a struct intel_sdvo_ouptut_flags of displays supporting hot plugging. */ #define SDVO_CMD_GET_HOT_PLUG_SUPPORT 0x0c /** * Takes a struct intel_sdvo_output_flags. */ #define SDVO_CMD_SET_ACTIVE_HOT_PLUG 0x0d /** * Returns a struct intel_sdvo_output_flags of displays with hot plug * interrupts enabled. */ #define SDVO_CMD_GET_ACTIVE_HOT_PLUG 0x0e #define SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE 0x0f struct intel_sdvo_get_interrupt_event_source_response { u16 interrupt_status; unsigned int ambient_light_interrupt:1; unsigned int hdmi_audio_encrypt_change:1; unsigned int pad:6; } __attribute__((packed)); /** * Selects which input is affected by future input commands. * * Commands affected include SET_INPUT_TIMINGS_PART[12], * GET_INPUT_TIMINGS_PART[12], GET_PREFERRED_INPUT_TIMINGS_PART[12], * GET_INPUT_PIXEL_CLOCK_RANGE, and CREATE_PREFERRED_INPUT_TIMINGS. */ #define SDVO_CMD_SET_TARGET_INPUT 0x10 struct intel_sdvo_set_target_input_args { unsigned int target_1:1; unsigned int pad:7; } __attribute__((packed)); /** * Takes a struct intel_sdvo_output_flags of which outputs are targeted by * future output commands. * * Affected commands inclue SET_OUTPUT_TIMINGS_PART[12], * GET_OUTPUT_TIMINGS_PART[12], and GET_OUTPUT_PIXEL_CLOCK_RANGE. */ #define SDVO_CMD_SET_TARGET_OUTPUT 0x11 #define SDVO_CMD_GET_INPUT_TIMINGS_PART1 0x12 #define SDVO_CMD_GET_INPUT_TIMINGS_PART2 0x13 #define SDVO_CMD_SET_INPUT_TIMINGS_PART1 0x14 #define SDVO_CMD_SET_INPUT_TIMINGS_PART2 0x15 #define SDVO_CMD_SET_OUTPUT_TIMINGS_PART1 0x16 #define SDVO_CMD_SET_OUTPUT_TIMINGS_PART2 0x17 #define SDVO_CMD_GET_OUTPUT_TIMINGS_PART1 0x18 #define SDVO_CMD_GET_OUTPUT_TIMINGS_PART2 0x19 /* Part 1 */ # define SDVO_DTD_CLOCK_LOW SDVO_I2C_ARG_0 # define SDVO_DTD_CLOCK_HIGH SDVO_I2C_ARG_1 # define SDVO_DTD_H_ACTIVE SDVO_I2C_ARG_2 # define SDVO_DTD_H_BLANK SDVO_I2C_ARG_3 # define SDVO_DTD_H_HIGH SDVO_I2C_ARG_4 # define SDVO_DTD_V_ACTIVE SDVO_I2C_ARG_5 # define SDVO_DTD_V_BLANK SDVO_I2C_ARG_6 # define SDVO_DTD_V_HIGH SDVO_I2C_ARG_7 /* Part 2 */ # define SDVO_DTD_HSYNC_OFF SDVO_I2C_ARG_0 # define SDVO_DTD_HSYNC_WIDTH SDVO_I2C_ARG_1 # define SDVO_DTD_VSYNC_OFF_WIDTH SDVO_I2C_ARG_2 # define SDVO_DTD_SYNC_OFF_WIDTH_HIGH SDVO_I2C_ARG_3 # define SDVO_DTD_DTD_FLAGS SDVO_I2C_ARG_4 # define SDVO_DTD_DTD_FLAG_INTERLACED (1 << 7) # define SDVO_DTD_DTD_FLAG_STEREO_MASK (3 << 5) # define SDVO_DTD_DTD_FLAG_INPUT_MASK (3 << 3) # define SDVO_DTD_DTD_FLAG_SYNC_MASK (3 << 1) # define SDVO_DTD_SDVO_FLAS SDVO_I2C_ARG_5 # define SDVO_DTD_SDVO_FLAG_STALL (1 << 7) # define SDVO_DTD_SDVO_FLAG_CENTERED (0 << 6) # define SDVO_DTD_SDVO_FLAG_UPPER_LEFT (1 << 6) # define SDVO_DTD_SDVO_FLAG_SCALING_MASK (3 << 4) # define SDVO_DTD_SDVO_FLAG_SCALING_NONE (0 << 4) # define SDVO_DTD_SDVO_FLAG_SCALING_SHARP (1 << 4) # define SDVO_DTD_SDVO_FLAG_SCALING_SMOOTH (2 << 4) # define SDVO_DTD_VSYNC_OFF_HIGH SDVO_I2C_ARG_6 /** * Generates a DTD based on the given width, height, and flags. * * This will be supported by any device supporting scaling or interlaced * modes. */ #define SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING 0x1a # define SDVO_PREFERRED_INPUT_TIMING_CLOCK_LOW SDVO_I2C_ARG_0 # define SDVO_PREFERRED_INPUT_TIMING_CLOCK_HIGH SDVO_I2C_ARG_1 # define SDVO_PREFERRED_INPUT_TIMING_WIDTH_LOW SDVO_I2C_ARG_2 # define SDVO_PREFERRED_INPUT_TIMING_WIDTH_HIGH SDVO_I2C_ARG_3 # define SDVO_PREFERRED_INPUT_TIMING_HEIGHT_LOW SDVO_I2C_ARG_4 # define SDVO_PREFERRED_INPUT_TIMING_HEIGHT_HIGH SDVO_I2C_ARG_5 # define SDVO_PREFERRED_INPUT_TIMING_FLAGS SDVO_I2C_ARG_6 # define SDVO_PREFERRED_INPUT_TIMING_FLAGS_INTERLACED (1 << 0) # define SDVO_PREFERRED_INPUT_TIMING_FLAGS_SCALED (1 << 1) #define SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1 0x1b #define SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2 0x1c /** Returns a struct intel_sdvo_pixel_clock_range */ #define SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE 0x1d /** Returns a struct intel_sdvo_pixel_clock_range */ #define SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE 0x1e /** Returns a byte bitfield containing SDVO_CLOCK_RATE_MULT_* flags */ #define SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS 0x1f /** Returns a byte containing a SDVO_CLOCK_RATE_MULT_* flag */ #define SDVO_CMD_GET_CLOCK_RATE_MULT 0x20 /** Takes a byte containing a SDVO_CLOCK_RATE_MULT_* flag */ #define SDVO_CMD_SET_CLOCK_RATE_MULT 0x21 # define SDVO_CLOCK_RATE_MULT_1X (1 << 0) # define SDVO_CLOCK_RATE_MULT_2X (1 << 1) # define SDVO_CLOCK_RATE_MULT_4X (1 << 3) #define SDVO_CMD_GET_SUPPORTED_TV_FORMATS 0x27 /** 6 bytes of bit flags for TV formats shared by all TV format functions */ struct intel_sdvo_tv_format { unsigned int ntsc_m:1; unsigned int ntsc_j:1; unsigned int ntsc_443:1; unsigned int pal_b:1; unsigned int pal_d:1; unsigned int pal_g:1; unsigned int pal_h:1; unsigned int pal_i:1; unsigned int pal_m:1; unsigned int pal_n:1; unsigned int pal_nc:1; unsigned int pal_60:1; unsigned int secam_b:1; unsigned int secam_d:1; unsigned int secam_g:1; unsigned int secam_k:1; unsigned int secam_k1:1; unsigned int secam_l:1; unsigned int secam_60:1; unsigned int hdtv_std_smpte_240m_1080i_59:1; unsigned int hdtv_std_smpte_240m_1080i_60:1; unsigned int hdtv_std_smpte_260m_1080i_59:1; unsigned int hdtv_std_smpte_260m_1080i_60:1; unsigned int hdtv_std_smpte_274m_1080i_50:1; unsigned int hdtv_std_smpte_274m_1080i_59:1; unsigned int hdtv_std_smpte_274m_1080i_60:1; unsigned int hdtv_std_smpte_274m_1080p_23:1; unsigned int hdtv_std_smpte_274m_1080p_24:1; unsigned int hdtv_std_smpte_274m_1080p_25:1; unsigned int hdtv_std_smpte_274m_1080p_29:1; unsigned int hdtv_std_smpte_274m_1080p_30:1; unsigned int hdtv_std_smpte_274m_1080p_50:1; unsigned int hdtv_std_smpte_274m_1080p_59:1; unsigned int hdtv_std_smpte_274m_1080p_60:1; unsigned int hdtv_std_smpte_295m_1080i_50:1; unsigned int hdtv_std_smpte_295m_1080p_50:1; unsigned int hdtv_std_smpte_296m_720p_59:1; unsigned int hdtv_std_smpte_296m_720p_60:1; unsigned int hdtv_std_smpte_296m_720p_50:1; unsigned int hdtv_std_smpte_293m_480p_59:1; unsigned int hdtv_std_smpte_170m_480i_59:1; unsigned int hdtv_std_iturbt601_576i_50:1; unsigned int hdtv_std_iturbt601_576p_50:1; unsigned int hdtv_std_eia_7702a_480i_60:1; unsigned int hdtv_std_eia_7702a_480p_60:1; unsigned int pad:3; } __attribute__((packed)); #define SDVO_CMD_GET_TV_FORMAT 0x28 #define SDVO_CMD_SET_TV_FORMAT 0x29 /** Returns the resolutiosn that can be used with the given TV format */ #define SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT 0x83 struct intel_sdvo_sdtv_resolution_request { unsigned int ntsc_m:1; unsigned int ntsc_j:1; unsigned int ntsc_443:1; unsigned int pal_b:1; unsigned int pal_d:1; unsigned int pal_g:1; unsigned int pal_h:1; unsigned int pal_i:1; unsigned int pal_m:1; unsigned int pal_n:1; unsigned int pal_nc:1; unsigned int pal_60:1; unsigned int secam_b:1; unsigned int secam_d:1; unsigned int secam_g:1; unsigned int secam_k:1; unsigned int secam_k1:1; unsigned int secam_l:1; unsigned int secam_60:1; unsigned int pad:5; } __attribute__((packed)); struct intel_sdvo_sdtv_resolution_reply { unsigned int res_320x200:1; unsigned int res_320x240:1; unsigned int res_400x300:1; unsigned int res_640x350:1; unsigned int res_640x400:1; unsigned int res_640x480:1; unsigned int res_704x480:1; unsigned int res_704x576:1; unsigned int res_720x350:1; unsigned int res_720x400:1; unsigned int res_720x480:1; unsigned int res_720x540:1; unsigned int res_720x576:1; unsigned int res_768x576:1; unsigned int res_800x600:1; unsigned int res_832x624:1; unsigned int res_920x766:1; unsigned int res_1024x768:1; unsigned int res_1280x1024:1; unsigned int pad:5; } __attribute__((packed)); /* Get supported resolution with squire pixel aspect ratio that can be scaled for the requested HDTV format */ #define SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT 0x85 struct intel_sdvo_hdtv_resolution_request { unsigned int hdtv_std_smpte_240m_1080i_59:1; unsigned int hdtv_std_smpte_240m_1080i_60:1; unsigned int hdtv_std_smpte_260m_1080i_59:1; unsigned int hdtv_std_smpte_260m_1080i_60:1; unsigned int hdtv_std_smpte_274m_1080i_50:1; unsigned int hdtv_std_smpte_274m_1080i_59:1; unsigned int hdtv_std_smpte_274m_1080i_60:1; unsigned int hdtv_std_smpte_274m_1080p_23:1; unsigned int hdtv_std_smpte_274m_1080p_24:1; unsigned int hdtv_std_smpte_274m_1080p_25:1; unsigned int hdtv_std_smpte_274m_1080p_29:1; unsigned int hdtv_std_smpte_274m_1080p_30:1; unsigned int hdtv_std_smpte_274m_1080p_50:1; unsigned int hdtv_std_smpte_274m_1080p_59:1; unsigned int hdtv_std_smpte_274m_1080p_60:1; unsigned int hdtv_std_smpte_295m_1080i_50:1; unsigned int hdtv_std_smpte_295m_1080p_50:1; unsigned int hdtv_std_smpte_296m_720p_59:1; unsigned int hdtv_std_smpte_296m_720p_60:1; unsigned int hdtv_std_smpte_296m_720p_50:1; unsigned int hdtv_std_smpte_293m_480p_59:1; unsigned int hdtv_std_smpte_170m_480i_59:1; unsigned int hdtv_std_iturbt601_576i_50:1; unsigned int hdtv_std_iturbt601_576p_50:1; unsigned int hdtv_std_eia_7702a_480i_60:1; unsigned int hdtv_std_eia_7702a_480p_60:1; unsigned int pad:6; } __attribute__((packed)); struct intel_sdvo_hdtv_resolution_reply { unsigned int res_640x480:1; unsigned int res_800x600:1; unsigned int res_1024x768:1; unsigned int res_1280x960:1; unsigned int res_1400x1050:1; unsigned int res_1600x1200:1; unsigned int res_1920x1440:1; unsigned int res_2048x1536:1; unsigned int res_2560x1920:1; unsigned int res_3200x2400:1; unsigned int res_3840x2880:1; unsigned int pad1:5; unsigned int res_848x480:1; unsigned int res_1064x600:1; unsigned int res_1280x720:1; unsigned int res_1360x768:1; unsigned int res_1704x960:1; unsigned int res_1864x1050:1; unsigned int res_1920x1080:1; unsigned int res_2128x1200:1; unsigned int res_2560x1400:1; unsigned int res_2728x1536:1; unsigned int res_3408x1920:1; unsigned int res_4264x2400:1; unsigned int res_5120x2880:1; unsigned int pad2:3; unsigned int res_768x480:1; unsigned int res_960x600:1; unsigned int res_1152x720:1; unsigned int res_1124x768:1; unsigned int res_1536x960:1; unsigned int res_1680x1050:1; unsigned int res_1728x1080:1; unsigned int res_1920x1200:1; unsigned int res_2304x1440:1; unsigned int res_2456x1536:1; unsigned int res_3072x1920:1; unsigned int res_3840x2400:1; unsigned int res_4608x2880:1; unsigned int pad3:3; unsigned int res_1280x1024:1; unsigned int pad4:7; unsigned int res_1280x768:1; unsigned int pad5:7; } __attribute__((packed)); /* Get supported power state returns info for encoder and monitor, rely on last SetTargetInput and SetTargetOutput calls */ #define SDVO_CMD_GET_SUPPORTED_POWER_STATES 0x2a /* Get power state returns info for encoder and monitor, rely on last SetTargetInput and SetTargetOutput calls */ #define SDVO_CMD_GET_POWER_STATE 0x2b #define SDVO_CMD_GET_ENCODER_POWER_STATE 0x2b #define SDVO_CMD_SET_ENCODER_POWER_STATE 0x2c # define SDVO_ENCODER_STATE_ON (1 << 0) # define SDVO_ENCODER_STATE_STANDBY (1 << 1) # define SDVO_ENCODER_STATE_SUSPEND (1 << 2) # define SDVO_ENCODER_STATE_OFF (1 << 3) # define SDVO_MONITOR_STATE_ON (1 << 4) # define SDVO_MONITOR_STATE_STANDBY (1 << 5) # define SDVO_MONITOR_STATE_SUSPEND (1 << 6) # define SDVO_MONITOR_STATE_OFF (1 << 7) #define SDVO_CMD_GET_MAX_PANEL_POWER_SEQUENCING 0x2d #define SDVO_CMD_GET_PANEL_POWER_SEQUENCING 0x2e #define SDVO_CMD_SET_PANEL_POWER_SEQUENCING 0x2f /** * The panel power sequencing parameters are in units of milliseconds. * The high fields are bits 8:9 of the 10-bit values. */ struct sdvo_panel_power_sequencing { u8 t0; u8 t1; u8 t2; u8 t3; u8 t4; unsigned int t0_high:2; unsigned int t1_high:2; unsigned int t2_high:2; unsigned int t3_high:2; unsigned int t4_high:2; unsigned int pad:6; } __attribute__((packed)); #define SDVO_CMD_GET_MAX_BACKLIGHT_LEVEL 0x30 struct sdvo_max_backlight_reply { u8 max_value; u8 default_value; } __attribute__((packed)); #define SDVO_CMD_GET_BACKLIGHT_LEVEL 0x31 #define SDVO_CMD_SET_BACKLIGHT_LEVEL 0x32 #define SDVO_CMD_GET_AMBIENT_LIGHT 0x33 struct sdvo_get_ambient_light_reply { u16 trip_low; u16 trip_high; u16 value; } __attribute__((packed)); #define SDVO_CMD_SET_AMBIENT_LIGHT 0x34 struct sdvo_set_ambient_light_reply { u16 trip_low; u16 trip_high; unsigned int enable:1; unsigned int pad:7; } __attribute__((packed)); /* Set display power state */ #define SDVO_CMD_SET_DISPLAY_POWER_STATE 0x7d # define SDVO_DISPLAY_STATE_ON (1 << 0) # define SDVO_DISPLAY_STATE_STANDBY (1 << 1) # define SDVO_DISPLAY_STATE_SUSPEND (1 << 2) # define SDVO_DISPLAY_STATE_OFF (1 << 3) #define SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS 0x84 struct intel_sdvo_enhancements_reply { unsigned int flicker_filter:1; unsigned int flicker_filter_adaptive:1; unsigned int flicker_filter_2d:1; unsigned int saturation:1; unsigned int hue:1; unsigned int brightness:1; unsigned int contrast:1; unsigned int overscan_h:1; unsigned int overscan_v:1; unsigned int hpos:1; unsigned int vpos:1; unsigned int sharpness:1; unsigned int dot_crawl:1; unsigned int dither:1; unsigned int tv_chroma_filter:1; unsigned int tv_luma_filter:1; } __attribute__((packed)); /* Picture enhancement limits below are dependent on the current TV format, * and thus need to be queried and set after it. */ #define SDVO_CMD_GET_MAX_FLICKER_FILTER 0x4d #define SDVO_CMD_GET_MAX_FLICKER_FILTER_ADAPTIVE 0x7b #define SDVO_CMD_GET_MAX_FLICKER_FILTER_2D 0x52 #define SDVO_CMD_GET_MAX_SATURATION 0x55 #define SDVO_CMD_GET_MAX_HUE 0x58 #define SDVO_CMD_GET_MAX_BRIGHTNESS 0x5b #define SDVO_CMD_GET_MAX_CONTRAST 0x5e #define SDVO_CMD_GET_MAX_OVERSCAN_H 0x61 #define SDVO_CMD_GET_MAX_OVERSCAN_V 0x64 #define SDVO_CMD_GET_MAX_HPOS 0x67 #define SDVO_CMD_GET_MAX_VPOS 0x6a #define SDVO_CMD_GET_MAX_SHARPNESS 0x6d #define SDVO_CMD_GET_MAX_TV_CHROMA_FILTER 0x74 #define SDVO_CMD_GET_MAX_TV_LUMA_FILTER 0x77 struct intel_sdvo_enhancement_limits_reply { u16 max_value; u16 default_value; } __attribute__((packed)); #define SDVO_CMD_GET_LVDS_PANEL_INFORMATION 0x7f #define SDVO_CMD_SET_LVDS_PANEL_INFORMATION 0x80 # define SDVO_LVDS_COLOR_DEPTH_18 (0 << 0) # define SDVO_LVDS_COLOR_DEPTH_24 (1 << 0) # define SDVO_LVDS_CONNECTOR_SPWG (0 << 2) # define SDVO_LVDS_CONNECTOR_OPENLDI (1 << 2) # define SDVO_LVDS_SINGLE_CHANNEL (0 << 4) # define SDVO_LVDS_DUAL_CHANNEL (1 << 4) #define SDVO_CMD_GET_FLICKER_FILTER 0x4e #define SDVO_CMD_SET_FLICKER_FILTER 0x4f #define SDVO_CMD_GET_FLICKER_FILTER_ADAPTIVE 0x50 #define SDVO_CMD_SET_FLICKER_FILTER_ADAPTIVE 0x51 #define SDVO_CMD_GET_FLICKER_FILTER_2D 0x53 #define SDVO_CMD_SET_FLICKER_FILTER_2D 0x54 #define SDVO_CMD_GET_SATURATION 0x56 #define SDVO_CMD_SET_SATURATION 0x57 #define SDVO_CMD_GET_HUE 0x59 #define SDVO_CMD_SET_HUE 0x5a #define SDVO_CMD_GET_BRIGHTNESS 0x5c #define SDVO_CMD_SET_BRIGHTNESS 0x5d #define SDVO_CMD_GET_CONTRAST 0x5f #define SDVO_CMD_SET_CONTRAST 0x60 #define SDVO_CMD_GET_OVERSCAN_H 0x62 #define SDVO_CMD_SET_OVERSCAN_H 0x63 #define SDVO_CMD_GET_OVERSCAN_V 0x65 #define SDVO_CMD_SET_OVERSCAN_V 0x66 #define SDVO_CMD_GET_HPOS 0x68 #define SDVO_CMD_SET_HPOS 0x69 #define SDVO_CMD_GET_VPOS 0x6b #define SDVO_CMD_SET_VPOS 0x6c #define SDVO_CMD_GET_SHARPNESS 0x6e #define SDVO_CMD_SET_SHARPNESS 0x6f #define SDVO_CMD_GET_TV_CHROMA_FILTER 0x75 #define SDVO_CMD_SET_TV_CHROMA_FILTER 0x76 #define SDVO_CMD_GET_TV_LUMA_FILTER 0x78 #define SDVO_CMD_SET_TV_LUMA_FILTER 0x79 struct intel_sdvo_enhancements_arg { u16 value; } __attribute__((packed)); #define SDVO_CMD_GET_DOT_CRAWL 0x70 #define SDVO_CMD_SET_DOT_CRAWL 0x71 # define SDVO_DOT_CRAWL_ON (1 << 0) # define SDVO_DOT_CRAWL_DEFAULT_ON (1 << 1) #define SDVO_CMD_GET_DITHER 0x72 #define SDVO_CMD_SET_DITHER 0x73 # define SDVO_DITHER_ON (1 << 0) # define SDVO_DITHER_DEFAULT_ON (1 << 1) #define SDVO_CMD_SET_CONTROL_BUS_SWITCH 0x7a # define SDVO_CONTROL_BUS_PROM (1 << 0) # define SDVO_CONTROL_BUS_DDC1 (1 << 1) # define SDVO_CONTROL_BUS_DDC2 (1 << 2) # define SDVO_CONTROL_BUS_DDC3 (1 << 3) /* HDMI op codes */ #define SDVO_CMD_GET_SUPP_ENCODE 0x9d #define SDVO_CMD_GET_ENCODE 0x9e #define SDVO_CMD_SET_ENCODE 0x9f #define SDVO_ENCODE_DVI 0x0 #define SDVO_ENCODE_HDMI 0x1 #define SDVO_CMD_SET_PIXEL_REPLI 0x8b #define SDVO_CMD_GET_PIXEL_REPLI 0x8c #define SDVO_CMD_GET_COLORIMETRY_CAP 0x8d #define SDVO_CMD_SET_COLORIMETRY 0x8e #define SDVO_COLORIMETRY_RGB256 0x0 #define SDVO_COLORIMETRY_RGB220 0x1 #define SDVO_COLORIMETRY_YCrCb422 0x3 #define SDVO_COLORIMETRY_YCrCb444 0x4 #define SDVO_CMD_GET_COLORIMETRY 0x8f #define SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER 0x90 #define SDVO_CMD_SET_AUDIO_STAT 0x91 #define SDVO_CMD_GET_AUDIO_STAT 0x92 #define SDVO_CMD_SET_HBUF_INDEX 0x93 + #define SDVO_HBUF_INDEX_ELD 0 + #define SDVO_HBUF_INDEX_AVI_IF 1 #define SDVO_CMD_GET_HBUF_INDEX 0x94 #define SDVO_CMD_GET_HBUF_INFO 0x95 #define SDVO_CMD_SET_HBUF_AV_SPLIT 0x96 #define SDVO_CMD_GET_HBUF_AV_SPLIT 0x97 #define SDVO_CMD_SET_HBUF_DATA 0x98 #define SDVO_CMD_GET_HBUF_DATA 0x99 #define SDVO_CMD_SET_HBUF_TXRATE 0x9a #define SDVO_CMD_GET_HBUF_TXRATE 0x9b #define SDVO_HBUF_TX_DISABLED (0 << 6) #define SDVO_HBUF_TX_ONCE (2 << 6) #define SDVO_HBUF_TX_VSYNC (3 << 6) #define SDVO_CMD_GET_AUDIO_TX_INFO 0x9c #define SDVO_NEED_TO_STALL (1 << 7) struct intel_sdvo_encode { u8 dvi_rev; u8 hdmi_rev; } __attribute__ ((packed)); Index: projects/clang-trunk/sys/dev/e1000/e1000_80003es2lan.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_80003es2lan.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_80003es2lan.c (revision 287502) @@ -1,1526 +1,1513 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* 80003ES2LAN Gigabit Ethernet Controller (Copper) * 80003ES2LAN Gigabit Ethernet Controller (Serdes) */ #include "e1000_api.h" static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw); static void e1000_release_phy_80003es2lan(struct e1000_hw *hw); static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw); static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw); static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data); static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw); static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw); static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw); static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw); static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw); static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex); static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw); static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw); static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw); static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw); static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw); /* A table for the GG82563 cable length where the range is defined * with a lower bound at "index" and the upper bound at * "index + 5". */ static const u16 e1000_gg82563_cable_length_table[] = { 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF }; #define GG82563_CABLE_LENGTH_TABLE_SIZE \ (sizeof(e1000_gg82563_cable_length_table) / \ sizeof(e1000_gg82563_cable_length_table[0])) /** * e1000_init_phy_params_80003es2lan - Init ESB2 PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; DEBUGFUNC("e1000_init_phy_params_80003es2lan"); if (hw->phy.media_type != e1000_media_type_copper) { phy->type = e1000_phy_none; return E1000_SUCCESS; } else { phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_80003es2lan; } phy->addr = 1; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 100; phy->type = e1000_phy_gg82563; phy->ops.acquire = e1000_acquire_phy_80003es2lan; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.check_reset_block = e1000_check_reset_block_generic; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.get_cfg_done = e1000_get_cfg_done_80003es2lan; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.release = e1000_release_phy_80003es2lan; phy->ops.reset = e1000_phy_hw_reset_generic; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_80003es2lan; phy->ops.get_cable_length = e1000_get_cable_length_80003es2lan; phy->ops.read_reg = e1000_read_phy_reg_gg82563_80003es2lan; phy->ops.write_reg = e1000_write_phy_reg_gg82563_80003es2lan; phy->ops.cfg_on_link_up = e1000_cfg_on_link_up_80003es2lan; /* This can only be done after all function pointers are setup. */ ret_val = e1000_get_phy_id(hw); /* Verify phy id */ if (phy->id != GG82563_E_PHY_ID) return -E1000_ERR_PHY; return ret_val; } /** * e1000_init_nvm_params_80003es2lan - Init ESB2 NVM func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u16 size; DEBUGFUNC("e1000_init_nvm_params_80003es2lan"); nvm->opcode_bits = 8; nvm->delay_usec = 1; switch (nvm->override) { case e1000_nvm_override_spi_large: nvm->page_size = 32; nvm->address_bits = 16; break; case e1000_nvm_override_spi_small: nvm->page_size = 8; nvm->address_bits = 8; break; default: nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; break; } nvm->type = e1000_nvm_eeprom_spi; size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> E1000_EECD_SIZE_EX_SHIFT); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. */ size += NVM_WORD_SIZE_BASE_SHIFT; /* EEPROM access above 16k is unsupported */ if (size > 14) size = 14; nvm->word_size = 1 << size; /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_80003es2lan; nvm->ops.read = e1000_read_nvm_eerd; nvm->ops.release = e1000_release_nvm_80003es2lan; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_generic; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.write = e1000_write_nvm_80003es2lan; return E1000_SUCCESS; } /** * e1000_init_mac_params_80003es2lan - Init ESB2 MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_80003es2lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_params_80003es2lan"); /* Set media type and media-dependent function pointers */ switch (hw->device_id) { case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: hw->phy.media_type = e1000_media_type_internal_serdes; mac->ops.check_for_link = e1000_check_for_serdes_link_generic; mac->ops.setup_physical_interface = e1000_setup_fiber_serdes_link_generic; break; default: hw->phy.media_type = e1000_media_type_copper; mac->ops.check_for_link = e1000_check_for_copper_link_generic; mac->ops.setup_physical_interface = e1000_setup_copper_link_80003es2lan; break; } /* Set mta register count */ mac->mta_reg_count = 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* FWSM register */ mac->has_fwsm = TRUE; /* ARC supported; valid only if manageability features are enabled. */ mac->arc_subsystem_valid = !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); /* Adaptive IFS not supported */ mac->adaptive_ifs = FALSE; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; /* reset */ mac->ops.reset_hw = e1000_reset_hw_80003es2lan; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_80003es2lan; /* link setup */ mac->ops.setup_link = e1000_setup_link_generic; /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_generic; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_80003es2lan; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* blink LED */ mac->ops.blink_led = e1000_blink_led_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_generic; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_generic; mac->ops.led_off = e1000_led_off_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_80003es2lan; /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_80003es2lan; /* set lan id for port to determine which phy lock to use */ hw->mac.ops.set_lan_id(hw); return E1000_SUCCESS; } /** * e1000_init_function_pointers_80003es2lan - Init ESB2 func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_80003es2lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_80003es2lan"); hw->mac.ops.init_params = e1000_init_mac_params_80003es2lan; hw->nvm.ops.init_params = e1000_init_nvm_params_80003es2lan; hw->phy.ops.init_params = e1000_init_phy_params_80003es2lan; } /** * e1000_acquire_phy_80003es2lan - Acquire rights to access PHY * @hw: pointer to the HW structure * * A wrapper to acquire access rights to the correct PHY. **/ static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_acquire_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; return e1000_acquire_swfw_sync_80003es2lan(hw, mask); } /** * e1000_release_phy_80003es2lan - Release rights to access PHY * @hw: pointer to the HW structure * * A wrapper to release access rights to the correct PHY. **/ static void e1000_release_phy_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_release_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; e1000_release_swfw_sync_80003es2lan(hw, mask); } /** * e1000_acquire_mac_csr_80003es2lan - Acquire right to access Kumeran register * @hw: pointer to the HW structure * * Acquire the semaphore to access the Kumeran interface. * **/ static s32 e1000_acquire_mac_csr_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_acquire_mac_csr_80003es2lan"); mask = E1000_SWFW_CSR_SM; return e1000_acquire_swfw_sync_80003es2lan(hw, mask); } /** * e1000_release_mac_csr_80003es2lan - Release right to access Kumeran Register * @hw: pointer to the HW structure * * Release the semaphore used to access the Kumeran interface **/ static void e1000_release_mac_csr_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_release_mac_csr_80003es2lan"); mask = E1000_SWFW_CSR_SM; e1000_release_swfw_sync_80003es2lan(hw, mask); } /** * e1000_acquire_nvm_80003es2lan - Acquire rights to access NVM * @hw: pointer to the HW structure * * Acquire the semaphore to access the EEPROM. **/ static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_acquire_nvm_80003es2lan"); ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); if (ret_val) return ret_val; ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); return ret_val; } /** * e1000_release_nvm_80003es2lan - Relinquish rights to access NVM * @hw: pointer to the HW structure * * Release the semaphore used to access the EEPROM. **/ static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_80003es2lan"); e1000_release_nvm_generic(hw); e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); } /** * e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore * @hw: pointer to the HW structure * @mask: specifies which semaphore to acquire * * Acquire the SW/FW semaphore to access the PHY or NVM. The mask * will also specify which port we're acquiring the lock for. **/ static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) { u32 swfw_sync; u32 swmask = mask; u32 fwmask = mask << 16; s32 i = 0; s32 timeout = 50; DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan"); while (i < timeout) { if (e1000_get_hw_semaphore_generic(hw)) return -E1000_ERR_SWFW_SYNC; swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); if (!(swfw_sync & (fwmask | swmask))) break; /* Firmware currently using resource (fwmask) * or other software thread using resource (swmask) */ e1000_put_hw_semaphore_generic(hw); msec_delay_irq(5); i++; } if (i == timeout) { DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); return -E1000_ERR_SWFW_SYNC; } swfw_sync |= swmask; E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); e1000_put_hw_semaphore_generic(hw); return E1000_SUCCESS; } /** * e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore * @hw: pointer to the HW structure * @mask: specifies which semaphore to acquire * * Release the SW/FW semaphore used to access the PHY or NVM. The mask * will also specify which port we're releasing the lock for. **/ static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) { u32 swfw_sync; DEBUGFUNC("e1000_release_swfw_sync_80003es2lan"); while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) ; /* Empty */ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); swfw_sync &= ~mask; E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); e1000_put_hw_semaphore_generic(hw); } /** * e1000_read_phy_reg_gg82563_80003es2lan - Read GG82563 PHY register * @hw: pointer to the HW structure * @offset: offset of the register to read * @data: pointer to the data returned from the operation * * Read the GG82563 PHY register. **/ static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; u32 page_select; u16 temp; DEBUGFUNC("e1000_read_phy_reg_gg82563_80003es2lan"); ret_val = e1000_acquire_phy_80003es2lan(hw); if (ret_val) return ret_val; /* Select Configuration Page */ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { page_select = GG82563_PHY_PAGE_SELECT; } else { /* Use Alternative Page Select register to access * registers 30 and 31 */ page_select = GG82563_PHY_PAGE_SELECT_ALT; } temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp); if (ret_val) { e1000_release_phy_80003es2lan(hw); return ret_val; } if (hw->dev_spec._80003es2lan.mdic_wa_enable) { /* The "ready" bit in the MDIC register may be incorrectly set * before the device has completed the "Page Select" MDI * transaction. So we wait 200us after each MDI command... */ usec_delay(200); /* ...and verify the command was successful. */ ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp); if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { e1000_release_phy_80003es2lan(hw); return -E1000_ERR_PHY; } usec_delay(200); ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); usec_delay(200); } else { ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); } e1000_release_phy_80003es2lan(hw); return ret_val; } /** * e1000_write_phy_reg_gg82563_80003es2lan - Write GG82563 PHY register * @hw: pointer to the HW structure * @offset: offset of the register to read * @data: value to write to the register * * Write to the GG82563 PHY register. **/ static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; u32 page_select; u16 temp; DEBUGFUNC("e1000_write_phy_reg_gg82563_80003es2lan"); ret_val = e1000_acquire_phy_80003es2lan(hw); if (ret_val) return ret_val; /* Select Configuration Page */ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { page_select = GG82563_PHY_PAGE_SELECT; } else { /* Use Alternative Page Select register to access * registers 30 and 31 */ page_select = GG82563_PHY_PAGE_SELECT_ALT; } temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp); if (ret_val) { e1000_release_phy_80003es2lan(hw); return ret_val; } if (hw->dev_spec._80003es2lan.mdic_wa_enable) { /* The "ready" bit in the MDIC register may be incorrectly set * before the device has completed the "Page Select" MDI * transaction. So we wait 200us after each MDI command... */ usec_delay(200); /* ...and verify the command was successful. */ ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp); if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { e1000_release_phy_80003es2lan(hw); return -E1000_ERR_PHY; } usec_delay(200); ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); usec_delay(200); } else { ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); } e1000_release_phy_80003es2lan(hw); return ret_val; } /** * e1000_write_nvm_80003es2lan - Write to ESB2 NVM * @hw: pointer to the HW structure * @offset: offset of the register to read * @words: number of words to write * @data: buffer of data to write to the NVM * * Write "words" of data to the ESB2 NVM. **/ static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { DEBUGFUNC("e1000_write_nvm_80003es2lan"); return e1000_write_nvm_spi(hw, offset, words, data); } /** * e1000_get_cfg_done_80003es2lan - Wait for configuration to complete * @hw: pointer to the HW structure * * Wait a specific amount of time for manageability processes to complete. * This is a function pointer entry point called by the phy module. **/ static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw) { s32 timeout = PHY_CFG_TIMEOUT; u32 mask = E1000_NVM_CFG_DONE_PORT_0; DEBUGFUNC("e1000_get_cfg_done_80003es2lan"); if (hw->bus.func == 1) mask = E1000_NVM_CFG_DONE_PORT_1; while (timeout) { if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) break; msec_delay(1); timeout--; } if (!timeout) { DEBUGOUT("MNG configuration cycle has not completed.\n"); return -E1000_ERR_RESET; } return E1000_SUCCESS; } /** * e1000_phy_force_speed_duplex_80003es2lan - Force PHY speed and duplex * @hw: pointer to the HW structure * * Force the speed and duplex settings onto the PHY. This is a * function pointer entry point called by the phy module. **/ static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; bool link; DEBUGFUNC("e1000_phy_force_speed_duplex_80003es2lan"); if (!(hw->phy.ops.read_reg)) return E1000_SUCCESS; /* Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI * forced whenever speed and duplex are forced. */ ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_AUTO; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; DEBUGOUT1("GG82563 PSCR: %X\n", phy_data); ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_data); if (ret_val) return ret_val; e1000_phy_force_speed_duplex_setup(hw, &phy_data); /* Reset the phy to commit changes. */ phy_data |= MII_CR_RESET; ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_data); if (ret_val) return ret_val; usec_delay(1); if (hw->phy.autoneg_wait_to_complete) { DEBUGOUT("Waiting for forced speed/duplex link on GG82563 phy.\n"); ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; if (!link) { /* We didn't get link. * Reset the DSP and cross our fingers. */ ret_val = e1000_phy_reset_dsp_generic(hw); if (ret_val) return ret_val; } /* Try once more */ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; } ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; /* Resetting the phy means we need to verify the TX_CLK corresponds * to the link speed. 10Mbps -> 2.5MHz, else 25MHz. */ phy_data &= ~GG82563_MSCR_TX_CLK_MASK; if (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED) phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5; else phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25; /* In addition, we must re-enable CRS on Tx for both half and full * duplex. */ phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data); return ret_val; } /** * e1000_get_cable_length_80003es2lan - Set approximate cable length * @hw: pointer to the HW structure * * Find the approximate cable length as measured by the GG82563 PHY. * This is a function pointer entry point called by the phy module. **/ static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, index; DEBUGFUNC("e1000_get_cable_length_80003es2lan"); if (!(hw->phy.ops.read_reg)) return E1000_SUCCESS; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_DSP_DISTANCE, &phy_data); if (ret_val) return ret_val; index = phy_data & GG82563_DSPD_CABLE_LENGTH; if (index >= GG82563_CABLE_LENGTH_TABLE_SIZE - 5) return -E1000_ERR_PHY; phy->min_cable_length = e1000_gg82563_cable_length_table[index]; phy->max_cable_length = e1000_gg82563_cable_length_table[index + 5]; phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; return E1000_SUCCESS; } /** * e1000_get_link_up_info_80003es2lan - Report speed and duplex * @hw: pointer to the HW structure * @speed: pointer to speed buffer * @duplex: pointer to duplex buffer * * Retrieve the current speed and duplex configuration. **/ static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, u16 *duplex) { s32 ret_val; DEBUGFUNC("e1000_get_link_up_info_80003es2lan"); if (hw->phy.media_type == e1000_media_type_copper) { ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); hw->phy.ops.cfg_on_link_up(hw); } else { ret_val = e1000_get_speed_and_duplex_fiber_serdes_generic(hw, speed, duplex); } return ret_val; } /** * e1000_reset_hw_80003es2lan - Reset the ESB2 controller * @hw: pointer to the HW structure * * Perform a global reset to the ESB2 controller. **/ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 kum_reg_data; DEBUGFUNC("e1000_reset_hw_80003es2lan"); /* Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); ret_val = e1000_acquire_phy_80003es2lan(hw); if (ret_val) return ret_val; DEBUGOUT("Issuing a global reset to MAC\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); e1000_release_phy_80003es2lan(hw); /* Disable IBIST slave mode (far-end loopback) */ - ret_val = e1000_read_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_INBAND_PARAM, &kum_reg_data); - if (!ret_val) { - kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_INBAND_PARAM, - kum_reg_data); - if (ret_val) - DEBUGOUT("Error disabling far-end loopback\n"); - } else - DEBUGOUT("Error disabling far-end loopback\n"); + e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, + &kum_reg_data); + kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; + e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, + kum_reg_data); ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) /* We don't want to continue accessing MAC registers. */ return ret_val; /* Clear any pending interrupt events. */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); return e1000_check_alt_mac_addr_generic(hw); } /** * e1000_init_hw_80003es2lan - Initialize the ESB2 controller * @hw: pointer to the HW structure * * Initialize the hw bits, LED, VFTA, MTA, link and hw counters. **/ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 reg_data; s32 ret_val; u16 kum_reg_data; u16 i; DEBUGFUNC("e1000_init_hw_80003es2lan"); e1000_initialize_hw_bits_80003es2lan(hw); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); /* An error is not fatal and we should not stop init due to this */ if (ret_val) DEBUGOUT("Error initializing identification LED\n"); /* Disabling VLAN filtering */ DEBUGOUT("Initializing the IEEE VLAN\n"); mac->ops.clear_vfta(hw); /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); if (ret_val) return ret_val; /* Disable IBIST slave mode (far-end loopback) */ - ret_val = - e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - &kum_reg_data); - if (!ret_val) { - kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; - ret_val = e1000_write_kmrn_reg_80003es2lan(hw, - E1000_KMRNCTRLSTA_INBAND_PARAM, - kum_reg_data); - if (ret_val) - DEBUGOUT("Error disabling far-end loopback\n"); - } else - DEBUGOUT("Error disabling far-end loopback\n"); + e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, + &kum_reg_data); + kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; + e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, + kum_reg_data); /* Set the transmit descriptor write-back policy */ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg_data); /* ...for both queues. */ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg_data); /* Enable retransmit on late collisions */ reg_data = E1000_READ_REG(hw, E1000_TCTL); reg_data |= E1000_TCTL_RTLC; E1000_WRITE_REG(hw, E1000_TCTL, reg_data); /* Configure Gigabit Carry Extend Padding */ reg_data = E1000_READ_REG(hw, E1000_TCTL_EXT); reg_data &= ~E1000_TCTL_EXT_GCEX_MASK; reg_data |= DEFAULT_TCTL_EXT_GCEX_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TCTL_EXT, reg_data); /* Configure Transmit Inter-Packet Gap */ reg_data = E1000_READ_REG(hw, E1000_TIPG); reg_data &= ~E1000_TIPG_IPGT_MASK; reg_data |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TIPG, reg_data); reg_data = E1000_READ_REG_ARRAY(hw, E1000_FFLT, 0x0001); reg_data &= ~0x00100000; E1000_WRITE_REG_ARRAY(hw, E1000_FFLT, 0x0001, reg_data); /* default to TRUE to enable the MDIC W/A */ hw->dev_spec._80003es2lan.mdic_wa_enable = TRUE; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET >> E1000_KMRNCTRLSTA_OFFSET_SHIFT, &i); if (!ret_val) { if ((i & E1000_KMRNCTRLSTA_OPMODE_MASK) == E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO) hw->dev_spec._80003es2lan.mdic_wa_enable = FALSE; } /* Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_80003es2lan(hw); return ret_val; } /** * e1000_initialize_hw_bits_80003es2lan - Init hw bits of ESB2 * @hw: pointer to the HW structure * * Initializes required hardware-dependent bits needed for normal operation. **/ static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_initialize_hw_bits_80003es2lan"); /* Transmit Descriptor Control 0 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); /* Transmit Descriptor Control 1 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); /* Transmit Arbitration Control 0 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); reg &= ~(0xF << 27); /* 30:27 */ if (hw->phy.media_type != e1000_media_type_copper) reg &= ~(1 << 20); E1000_WRITE_REG(hw, E1000_TARC(0), reg); /* Transmit Arbitration Control 1 */ reg = E1000_READ_REG(hw, E1000_TARC(1)); if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) reg &= ~(1 << 28); else reg |= (1 << 28); E1000_WRITE_REG(hw, E1000_TARC(1), reg); /* Disable IPv6 extension header parsing because some malformed * IPv6 headers can hang the Rx. */ reg = E1000_READ_REG(hw, E1000_RFCTL); reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS); E1000_WRITE_REG(hw, E1000_RFCTL, reg); return; } /** * e1000_copper_link_setup_gg82563_80003es2lan - Configure GG82563 Link * @hw: pointer to the HW structure * * Setup some GG82563 PHY registers for obtaining link **/ static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u32 reg; u16 data; DEBUGFUNC("e1000_copper_link_setup_gg82563_80003es2lan"); ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &data); if (ret_val) return ret_val; data |= GG82563_MSCR_ASSERT_CRS_ON_TX; /* Use 25MHz for both link down and 1000Base-T for Tx clock. */ data |= GG82563_MSCR_TX_CLK_1000MBPS_25; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, data); if (ret_val) return ret_val; /* Options: * MDI/MDI-X = 0 (default) * 0 - Auto for all speeds * 1 - MDI mode * 2 - MDI-X mode * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) */ ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_SPEC_CTRL, &data); if (ret_val) return ret_val; data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK; switch (phy->mdix) { case 1: data |= GG82563_PSCR_CROSSOVER_MODE_MDI; break; case 2: data |= GG82563_PSCR_CROSSOVER_MODE_MDIX; break; case 0: default: data |= GG82563_PSCR_CROSSOVER_MODE_AUTO; break; } /* Options: * disable_polarity_correction = 0 (default) * Automatic Correction for Reversed Cable Polarity * 0 - Disabled * 1 - Enabled */ data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE; if (phy->disable_polarity_correction) data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_SPEC_CTRL, data); if (ret_val) return ret_val; /* SW Reset the PHY so all changes take effect */ ret_val = hw->phy.ops.commit(hw); if (ret_val) { DEBUGOUT("Error Resetting the PHY\n"); return ret_val; } /* Bypass Rx and Tx FIFO's */ reg = E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL; data = (E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS | E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS); ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data); if (ret_val) return ret_val; reg = E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, reg, &data); if (ret_val) return ret_val; data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_SPEC_CTRL_2, &data); if (ret_val) return ret_val; data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_SPEC_CTRL_2, data); if (ret_val) return ret_val; reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg &= ~E1000_CTRL_EXT_LINK_MODE_MASK; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, &data); if (ret_val) return ret_val; /* Do not init these registers when the HW is in IAMT mode, since the * firmware will have already initialized them. We only initialize * them if the HW is not in IAMT mode. */ if (!hw->mac.ops.check_mng_mode(hw)) { /* Enable Electrical Idle on the PHY */ data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, &data); if (ret_val) return ret_val; data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, data); if (ret_val) return ret_val; } /* Workaround: Disable padding in Kumeran interface in the MAC * and in the PHY to avoid CRC errors. */ ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_INBAND_CTRL, &data); if (ret_val) return ret_val; data |= GG82563_ICR_DIS_PADDING; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_INBAND_CTRL, data); if (ret_val) return ret_val; return E1000_SUCCESS; } /** * e1000_setup_copper_link_80003es2lan - Setup Copper Link for ESB2 * @hw: pointer to the HW structure * * Essentially a wrapper for setting up all things "copper" related. * This is a function pointer entry point called by the mac module. **/ static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 reg_data; DEBUGFUNC("e1000_setup_copper_link_80003es2lan"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Set the mac to wait the maximum time between each * iteration and increase the max iterations when * polling the phy; this fixes erroneous timeouts at 10Mbps. */ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 4), 0xFFFF); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), ®_data); if (ret_val) return ret_val; reg_data |= 0x3F; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), reg_data); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, ®_data); if (ret_val) return ret_val; reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, reg_data); if (ret_val) return ret_val; ret_val = e1000_copper_link_setup_gg82563_80003es2lan(hw); if (ret_val) return ret_val; return e1000_setup_copper_link_generic(hw); } /** * e1000_cfg_on_link_up_80003es2lan - es2 link configuration after link-up * @hw: pointer to the HW structure * @duplex: current duplex setting * * Configure the KMRN interface by applying last minute quirks for * 10/100 operation. **/ static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 speed; u16 duplex; DEBUGFUNC("e1000_configure_on_link_up"); if (hw->phy.media_type == e1000_media_type_copper) { ret_val = e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex); if (ret_val) return ret_val; if (speed == SPEED_1000) ret_val = e1000_cfg_kmrn_1000_80003es2lan(hw); else ret_val = e1000_cfg_kmrn_10_100_80003es2lan(hw, duplex); } return ret_val; } /** * e1000_cfg_kmrn_10_100_80003es2lan - Apply "quirks" for 10/100 operation * @hw: pointer to the HW structure * @duplex: current duplex setting * * Configure the KMRN interface by applying last minute quirks for * 10/100 operation. **/ static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex) { s32 ret_val; u32 tipg; u32 i = 0; u16 reg_data, reg_data2; DEBUGFUNC("e1000_configure_kmrn_for_10_100"); reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, reg_data); if (ret_val) return ret_val; /* Configure Transmit Inter-Packet Gap */ tipg = E1000_READ_REG(hw, E1000_TIPG); tipg &= ~E1000_TIPG_IPGT_MASK; tipg |= DEFAULT_TIPG_IPGT_10_100_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TIPG, tipg); do { ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data2); if (ret_val) return ret_val; i++; } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); if (duplex == HALF_DUPLEX) reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER; else reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; return hw->phy.ops.write_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); } /** * e1000_cfg_kmrn_1000_80003es2lan - Apply "quirks" for gigabit operation * @hw: pointer to the HW structure * * Configure the KMRN interface by applying last minute quirks for * gigabit operation. **/ static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw) { s32 ret_val; u16 reg_data, reg_data2; u32 tipg; u32 i = 0; DEBUGFUNC("e1000_configure_kmrn_for_1000"); reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, reg_data); if (ret_val) return ret_val; /* Configure Transmit Inter-Packet Gap */ tipg = E1000_READ_REG(hw, E1000_TIPG); tipg &= ~E1000_TIPG_IPGT_MASK; tipg |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TIPG, tipg); do { ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data2); if (ret_val) return ret_val; i++; } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; return hw->phy.ops.write_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); } /** * e1000_read_kmrn_reg_80003es2lan - Read kumeran register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquire semaphore, then read the PHY register at offset * using the kumeran interface. The information retrieved is stored in data. * Release the semaphore before exiting. **/ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data) { u32 kmrnctrlsta; s32 ret_val; DEBUGFUNC("e1000_read_kmrn_reg_80003es2lan"); ret_val = e1000_acquire_mac_csr_80003es2lan(hw); if (ret_val) return ret_val; kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); E1000_WRITE_FLUSH(hw); usec_delay(2); kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA); *data = (u16)kmrnctrlsta; e1000_release_mac_csr_80003es2lan(hw); return ret_val; } /** * e1000_write_kmrn_reg_80003es2lan - Write kumeran register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquire semaphore, then write the data to PHY register * at the offset using the kumeran interface. Release semaphore * before exiting. **/ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data) { u32 kmrnctrlsta; s32 ret_val; DEBUGFUNC("e1000_write_kmrn_reg_80003es2lan"); ret_val = e1000_acquire_mac_csr_80003es2lan(hw); if (ret_val) return ret_val; kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | data; E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); E1000_WRITE_FLUSH(hw); usec_delay(2); e1000_release_mac_csr_80003es2lan(hw); return ret_val; } /** * e1000_read_mac_addr_80003es2lan - Read device MAC address * @hw: pointer to the HW structure **/ static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_read_mac_addr_80003es2lan"); /* If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm * address. */ ret_val = e1000_check_alt_mac_addr_generic(hw); if (ret_val) return ret_val; return e1000_read_mac_addr_generic(hw); } /** * e1000_power_down_phy_copper_80003es2lan - Remove link during PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw) { /* If the management interface is not enabled, then power down */ if (!(hw->mac.ops.check_mng_mode(hw) || hw->phy.ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_80003es2lan - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_80003es2lan"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); E1000_READ_REG(hw, E1000_IAC); E1000_READ_REG(hw, E1000_ICRXOC); E1000_READ_REG(hw, E1000_ICRXPTC); E1000_READ_REG(hw, E1000_ICRXATC); E1000_READ_REG(hw, E1000_ICTXPTC); E1000_READ_REG(hw, E1000_ICTXATC); E1000_READ_REG(hw, E1000_ICTXQEC); E1000_READ_REG(hw, E1000_ICTXQMTC); E1000_READ_REG(hw, E1000_ICRXDMTC); } Index: projects/clang-trunk/sys/dev/e1000/e1000_82540.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_82540.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_82540.c (revision 287502) @@ -1,718 +1,718 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* * 82540EM Gigabit Ethernet Controller * 82540EP Gigabit Ethernet Controller * 82545EM Gigabit Ethernet Controller (Copper) * 82545EM Gigabit Ethernet Controller (Fiber) * 82545GM Gigabit Ethernet Controller * 82546EB Gigabit Ethernet Controller (Copper) * 82546EB Gigabit Ethernet Controller (Fiber) * 82546GB Gigabit Ethernet Controller */ #include "e1000_api.h" static s32 e1000_init_phy_params_82540(struct e1000_hw *hw); static s32 e1000_init_nvm_params_82540(struct e1000_hw *hw); static s32 e1000_init_mac_params_82540(struct e1000_hw *hw); static s32 e1000_adjust_serdes_amplitude_82540(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_82540(struct e1000_hw *hw); static s32 e1000_init_hw_82540(struct e1000_hw *hw); static s32 e1000_reset_hw_82540(struct e1000_hw *hw); static s32 e1000_set_phy_mode_82540(struct e1000_hw *hw); static s32 e1000_set_vco_speed_82540(struct e1000_hw *hw); static s32 e1000_setup_copper_link_82540(struct e1000_hw *hw); static s32 e1000_setup_fiber_serdes_link_82540(struct e1000_hw *hw); static void e1000_power_down_phy_copper_82540(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82540(struct e1000_hw *hw); /** * e1000_init_phy_params_82540 - Init PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_82540(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; + s32 ret_val = E1000_SUCCESS; phy->addr = 1; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 10000; phy->type = e1000_phy_m88; /* Function Pointers */ phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.get_cfg_done = e1000_get_cfg_done_generic; phy->ops.read_reg = e1000_read_phy_reg_m88; phy->ops.reset = e1000_phy_hw_reset_generic; phy->ops.write_reg = e1000_write_phy_reg_m88; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_82540; ret_val = e1000_get_phy_id(hw); if (ret_val) goto out; /* Verify phy id */ switch (hw->mac.type) { case e1000_82540: case e1000_82545: case e1000_82545_rev_3: case e1000_82546: case e1000_82546_rev_3: if (phy->id == M88E1011_I_PHY_ID) break; /* Fall Through */ default: ret_val = -E1000_ERR_PHY; goto out; break; } out: return ret_val; } /** * e1000_init_nvm_params_82540 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_nvm_params_82540(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); DEBUGFUNC("e1000_init_nvm_params_82540"); nvm->type = e1000_nvm_eeprom_microwire; nvm->delay_usec = 50; nvm->opcode_bits = 3; switch (nvm->override) { case e1000_nvm_override_microwire_large: nvm->address_bits = 8; nvm->word_size = 256; break; case e1000_nvm_override_microwire_small: nvm->address_bits = 6; nvm->word_size = 64; break; default: nvm->address_bits = eecd & E1000_EECD_SIZE ? 8 : 6; nvm->word_size = eecd & E1000_EECD_SIZE ? 256 : 64; break; } /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_generic; nvm->ops.read = e1000_read_nvm_microwire; nvm->ops.release = e1000_release_nvm_generic; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_generic; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.write = e1000_write_nvm_microwire; return E1000_SUCCESS; } /** * e1000_init_mac_params_82540 - Init MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_82540(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_init_mac_params_82540"); /* Set media type */ switch (hw->device_id) { case E1000_DEV_ID_82545EM_FIBER: case E1000_DEV_ID_82545GM_FIBER: case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: hw->phy.media_type = e1000_media_type_fiber; break; case E1000_DEV_ID_82545GM_SERDES: case E1000_DEV_ID_82546GB_SERDES: hw->phy.media_type = e1000_media_type_internal_serdes; break; default: hw->phy.media_type = e1000_media_type_copper; break; } /* Set mta register count */ mac->mta_reg_count = 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pci_generic; /* function id */ mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pci; /* reset */ mac->ops.reset_hw = e1000_reset_hw_82540; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_82540; /* link setup */ mac->ops.setup_link = e1000_setup_link_generic; /* physical interface setup */ mac->ops.setup_physical_interface = (hw->phy.media_type == e1000_media_type_copper) ? e1000_setup_copper_link_82540 : e1000_setup_fiber_serdes_link_82540; /* check for link */ switch (hw->phy.media_type) { case e1000_media_type_copper: mac->ops.check_for_link = e1000_check_for_copper_link_generic; break; case e1000_media_type_fiber: mac->ops.check_for_link = e1000_check_for_fiber_link_generic; break; case e1000_media_type_internal_serdes: mac->ops.check_for_link = e1000_check_for_serdes_link_generic; break; default: ret_val = -E1000_ERR_CONFIG; goto out; break; } /* link info */ mac->ops.get_link_up_info = (hw->phy.media_type == e1000_media_type_copper) ? e1000_get_speed_and_duplex_copper_generic : e1000_get_speed_and_duplex_fiber_serdes_generic; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82540; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_generic; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_generic; mac->ops.led_off = e1000_led_off_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82540; out: return ret_val; } /** * e1000_init_function_pointers_82540 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_82540(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_82540"); hw->mac.ops.init_params = e1000_init_mac_params_82540; hw->nvm.ops.init_params = e1000_init_nvm_params_82540; hw->phy.ops.init_params = e1000_init_phy_params_82540; } /** * e1000_reset_hw_82540 - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. **/ static s32 e1000_reset_hw_82540(struct e1000_hw *hw) { u32 ctrl, manc; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_reset_hw_82540"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); /* * Delay to allow any outstanding PCI transactions to complete * before resetting the device. */ msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGOUT("Issuing a global reset to 82540/82545/82546 MAC\n"); switch (hw->mac.type) { case e1000_82545_rev_3: case e1000_82546_rev_3: E1000_WRITE_REG(hw, E1000_CTRL_DUP, ctrl | E1000_CTRL_RST); break; default: /* * These controllers can't ack the 64-bit write when * issuing the reset, so we use IO-mapping as a * workaround to issue the reset. */ E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); break; } /* Wait for EEPROM reload */ msec_delay(5); /* Disable HW ARPs on ASF enabled adapters */ manc = E1000_READ_REG(hw, E1000_MANC); manc &= ~E1000_MANC_ARP_EN; E1000_WRITE_REG(hw, E1000_MANC, manc); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); return ret_val; } /** * e1000_init_hw_82540 - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. **/ static s32 e1000_init_hw_82540(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 txdctl, ctrl_ext; - s32 ret_val; + s32 ret_val = E1000_SUCCESS; u16 i; DEBUGFUNC("e1000_init_hw_82540"); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); if (ret_val) { DEBUGOUT("Error initializing identification LED\n"); /* This is not fatal and we should not stop init due to this */ } /* Disabling VLAN filtering */ DEBUGOUT("Initializing the IEEE VLAN\n"); if (mac->type < e1000_82545_rev_3) E1000_WRITE_REG(hw, E1000_VET, 0); mac->ops.clear_vfta(hw); /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) { E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* * Avoid back to back register writes by adding the register * read (flush). This is to protect against some strange * bridge configurations that may issue Memory Write Block * (MWB) to our register space. The *_rev_3 hardware at * least doesn't respond correctly to every other dword in an * MWB to our register space. */ E1000_WRITE_FLUSH(hw); } if (mac->type < e1000_82545_rev_3) e1000_pcix_mmrbc_workaround_generic(hw); /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0)); txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB; E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl); /* * Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_82540(hw); if ((hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER) || (hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3)) { ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); /* * Relaxed ordering must be disabled to avoid a parity * error crash in a PCI slot. */ ctrl_ext |= E1000_CTRL_EXT_RO_DIS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); } return ret_val; } /** * e1000_setup_copper_link_82540 - Configure copper link settings * @hw: pointer to the HW structure * * Calls the appropriate function to configure the link for auto-neg or forced * speed and duplex. Then we check for link, once link is established calls * to configure collision distance and flow control are called. If link is * not established, we return -E1000_ERR_PHY (-2). **/ static s32 e1000_setup_copper_link_82540(struct e1000_hw *hw) { u32 ctrl; - s32 ret_val; + s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_setup_copper_link_82540"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ret_val = e1000_set_phy_mode_82540(hw); if (ret_val) goto out; if (hw->mac.type == e1000_82545_rev_3 || hw->mac.type == e1000_82546_rev_3) { ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &data); if (ret_val) goto out; data |= 0x00000008; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, data); if (ret_val) goto out; } ret_val = e1000_copper_link_setup_m88(hw); if (ret_val) goto out; ret_val = e1000_setup_copper_link_generic(hw); out: return ret_val; } /** * e1000_setup_fiber_serdes_link_82540 - Setup link for fiber/serdes * @hw: pointer to the HW structure * * Set the output amplitude to the value in the EEPROM and adjust the VCO * speed to improve Bit Error Rate (BER) performance. Configures collision * distance and flow control for fiber and serdes links. Upon successful * setup, poll for link. **/ static s32 e1000_setup_fiber_serdes_link_82540(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_setup_fiber_serdes_link_82540"); switch (mac->type) { case e1000_82545_rev_3: case e1000_82546_rev_3: if (hw->phy.media_type == e1000_media_type_internal_serdes) { /* * If we're on serdes media, adjust the output * amplitude to value set in the EEPROM. */ ret_val = e1000_adjust_serdes_amplitude_82540(hw); if (ret_val) goto out; } /* Adjust VCO speed to improve BER performance */ ret_val = e1000_set_vco_speed_82540(hw); if (ret_val) goto out; default: break; } ret_val = e1000_setup_fiber_serdes_link_generic(hw); out: return ret_val; } /** * e1000_adjust_serdes_amplitude_82540 - Adjust amplitude based on EEPROM * @hw: pointer to the HW structure * * Adjust the SERDES output amplitude based on the EEPROM settings. **/ static s32 e1000_adjust_serdes_amplitude_82540(struct e1000_hw *hw) { - s32 ret_val; + s32 ret_val = E1000_SUCCESS; u16 nvm_data; DEBUGFUNC("e1000_adjust_serdes_amplitude_82540"); ret_val = hw->nvm.ops.read(hw, NVM_SERDES_AMPLITUDE, 1, &nvm_data); if (ret_val) goto out; if (nvm_data != NVM_RESERVED_WORD) { /* Adjust serdes output amplitude only. */ nvm_data &= NVM_SERDES_AMPLITUDE_MASK; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_EXT_CTRL, nvm_data); if (ret_val) goto out; } out: return ret_val; } /** * e1000_set_vco_speed_82540 - Set VCO speed for better performance * @hw: pointer to the HW structure * * Set the VCO speed to improve Bit Error Rate (BER) performance. **/ static s32 e1000_set_vco_speed_82540(struct e1000_hw *hw) { - s32 ret_val; + s32 ret_val = E1000_SUCCESS; u16 default_page = 0; u16 phy_data; DEBUGFUNC("e1000_set_vco_speed_82540"); /* Set PHY register 30, page 5, bit 8 to 0 */ ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_PAGE_SELECT, &default_page); if (ret_val) goto out; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0005); if (ret_val) goto out; ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); if (ret_val) goto out; phy_data &= ~M88E1000_PHY_VCO_REG_BIT8; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); if (ret_val) goto out; /* Set PHY register 30, page 4, bit 11 to 1 */ ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0004); if (ret_val) goto out; ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data); if (ret_val) goto out; phy_data |= M88E1000_PHY_VCO_REG_BIT11; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data); if (ret_val) goto out; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, default_page); out: return ret_val; } /** * e1000_set_phy_mode_82540 - Set PHY to class A mode * @hw: pointer to the HW structure * * Sets the PHY to class A mode and assumes the following operations will * follow to enable the new class mode: * 1. Do a PHY soft reset. * 2. Restart auto-negotiation or force link. **/ static s32 e1000_set_phy_mode_82540(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 nvm_data; DEBUGFUNC("e1000_set_phy_mode_82540"); if (hw->mac.type != e1000_82545_rev_3) goto out; ret_val = hw->nvm.ops.read(hw, NVM_PHY_CLASS_WORD, 1, &nvm_data); if (ret_val) { ret_val = -E1000_ERR_PHY; goto out; } if ((nvm_data != NVM_RESERVED_WORD) && (nvm_data & NVM_PHY_CLASS_A)) { ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x000B); if (ret_val) { ret_val = -E1000_ERR_PHY; goto out; } ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x8104); if (ret_val) { ret_val = -E1000_ERR_PHY; goto out; } } out: return ret_val; } /** * e1000_power_down_phy_copper_82540 - Remove link in case of PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_82540(struct e1000_hw *hw) { /* If the management interface is not enabled, then power down */ if (!(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_SMBUS_EN)) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_82540 - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_82540(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_82540"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); } /** * e1000_read_mac_addr_82540 - Read device MAC address * @hw: pointer to the HW structure * * Reads the device MAC address from the EEPROM and stores the value. * Since devices with two ports use the same EEPROM, we increment the * last bit in the MAC address for the second port. * * This version is being used over generic because of customer issues * with VmWare and Virtual Box when using generic. It seems in * the emulated 82545, RAR[0] does NOT have a valid address after a * reset, this older method works and using this breaks nothing for * these legacy adapters. **/ s32 e1000_read_mac_addr_82540(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 offset, nvm_data, i; DEBUGFUNC("e1000_read_mac_addr"); for (i = 0; i < ETH_ADDR_LEN; i += 2) { offset = i >> 1; ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF); hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8); } /* Flip last bit of mac address if we're on second port */ if (hw->bus.func == E1000_FUNC_1) hw->mac.perm_addr[5] ^= 1; for (i = 0; i < ETH_ADDR_LEN; i++) hw->mac.addr[i] = hw->mac.perm_addr[i]; out: return ret_val; } Index: projects/clang-trunk/sys/dev/e1000/e1000_82541.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_82541.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_82541.c (revision 287502) @@ -1,1304 +1,1303 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* * 82541EI Gigabit Ethernet Controller * 82541ER Gigabit Ethernet Controller * 82541GI Gigabit Ethernet Controller * 82541PI Gigabit Ethernet Controller * 82547EI Gigabit Ethernet Controller * 82547GI Gigabit Ethernet Controller */ #include "e1000_api.h" static s32 e1000_init_phy_params_82541(struct e1000_hw *hw); static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw); static s32 e1000_init_mac_params_82541(struct e1000_hw *hw); static s32 e1000_reset_hw_82541(struct e1000_hw *hw); static s32 e1000_init_hw_82541(struct e1000_hw *hw); static s32 e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_phy_hw_reset_82541(struct e1000_hw *hw); static s32 e1000_setup_copper_link_82541(struct e1000_hw *hw); static s32 e1000_check_for_link_82541(struct e1000_hw *hw); static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw); static s32 e1000_set_d3_lplu_state_82541(struct e1000_hw *hw, bool active); static s32 e1000_setup_led_82541(struct e1000_hw *hw); static s32 e1000_cleanup_led_82541(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_82541(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82541(struct e1000_hw *hw); static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw, bool link_up); static s32 e1000_phy_init_script_82541(struct e1000_hw *hw); static void e1000_power_down_phy_copper_82541(struct e1000_hw *hw); static const u16 e1000_igp_cable_length_table[] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25, 25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60, 60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90, 90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120}; #define IGP01E1000_AGC_LENGTH_TABLE_SIZE \ (sizeof(e1000_igp_cable_length_table) / \ sizeof(e1000_igp_cable_length_table[0])) /** * e1000_init_phy_params_82541 - Init PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_82541(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - s32 ret_val; + s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_init_phy_params_82541"); phy->addr = 1; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 10000; phy->type = e1000_phy_igp; /* Function Pointers */ phy->ops.check_polarity = e1000_check_polarity_igp; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; phy->ops.get_cable_length = e1000_get_cable_length_igp_82541; phy->ops.get_cfg_done = e1000_get_cfg_done_generic; phy->ops.get_info = e1000_get_phy_info_igp; phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.reset = e1000_phy_hw_reset_82541; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82541; phy->ops.write_reg = e1000_write_phy_reg_igp; phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_82541; ret_val = e1000_get_phy_id(hw); if (ret_val) goto out; /* Verify phy id */ if (phy->id != IGP01E1000_I_PHY_ID) { ret_val = -E1000_ERR_PHY; goto out; } out: return ret_val; } /** * e1000_init_nvm_params_82541 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; s32 ret_val = E1000_SUCCESS; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u16 size; DEBUGFUNC("e1000_init_nvm_params_82541"); switch (nvm->override) { case e1000_nvm_override_spi_large: nvm->type = e1000_nvm_eeprom_spi; eecd |= E1000_EECD_ADDR_BITS; break; case e1000_nvm_override_spi_small: nvm->type = e1000_nvm_eeprom_spi; eecd &= ~E1000_EECD_ADDR_BITS; break; case e1000_nvm_override_microwire_large: nvm->type = e1000_nvm_eeprom_microwire; eecd |= E1000_EECD_SIZE; break; case e1000_nvm_override_microwire_small: nvm->type = e1000_nvm_eeprom_microwire; eecd &= ~E1000_EECD_SIZE; break; default: nvm->type = eecd & E1000_EECD_TYPE ? e1000_nvm_eeprom_spi : e1000_nvm_eeprom_microwire; break; } if (nvm->type == e1000_nvm_eeprom_spi) { nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS) ? 16 : 8; nvm->delay_usec = 1; nvm->opcode_bits = 8; nvm->page_size = (eecd & E1000_EECD_ADDR_BITS) ? 32 : 8; /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_generic; nvm->ops.read = e1000_read_nvm_spi; nvm->ops.release = e1000_release_nvm_generic; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_generic; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.write = e1000_write_nvm_spi; /* * nvm->word_size must be discovered after the pointers * are set so we can verify the size from the nvm image * itself. Temporarily set it to a dummy value so the * read will work. */ nvm->word_size = 64; ret_val = nvm->ops.read(hw, NVM_CFG, 1, &size); if (ret_val) goto out; size = (size & NVM_SIZE_MASK) >> NVM_SIZE_SHIFT; /* * if size != 0, it can be added to a constant and become * the left-shift value to set the word_size. Otherwise, * word_size stays at 64. */ if (size) { size += NVM_WORD_SIZE_BASE_SHIFT_82541; nvm->word_size = 1 << size; } } else { nvm->address_bits = (eecd & E1000_EECD_ADDR_BITS) ? 8 : 6; nvm->delay_usec = 50; nvm->opcode_bits = 3; nvm->word_size = (eecd & E1000_EECD_ADDR_BITS) ? 256 : 64; /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_generic; nvm->ops.read = e1000_read_nvm_microwire; nvm->ops.release = e1000_release_nvm_generic; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_generic; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.write = e1000_write_nvm_microwire; } out: return ret_val; } /** * e1000_init_mac_params_82541 - Init MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_82541(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_params_82541"); /* Set media type */ hw->phy.media_type = e1000_media_type_copper; /* Set mta register count */ mac->mta_reg_count = 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* Function Pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pci_generic; /* function id */ mac->ops.set_lan_id = e1000_set_lan_id_single_port; /* reset */ mac->ops.reset_hw = e1000_reset_hw_82541; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_82541; /* link setup */ mac->ops.setup_link = e1000_setup_link_generic; /* physical interface link setup */ mac->ops.setup_physical_interface = e1000_setup_copper_link_82541; /* check for link */ mac->ops.check_for_link = e1000_check_for_link_82541; /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_82541; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82541; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_82541; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_82541; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_generic; mac->ops.led_off = e1000_led_off_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82541; return E1000_SUCCESS; } /** * e1000_init_function_pointers_82541 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_82541(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_82541"); hw->mac.ops.init_params = e1000_init_mac_params_82541; hw->nvm.ops.init_params = e1000_init_nvm_params_82541; hw->phy.ops.init_params = e1000_init_phy_params_82541; } /** * e1000_reset_hw_82541 - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. **/ static s32 e1000_reset_hw_82541(struct e1000_hw *hw) { - u32 ledctl, ctrl, manc; + u32 ledctl, ctrl, icr, manc; DEBUGFUNC("e1000_reset_hw_82541"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); /* * Delay to allow any outstanding PCI transactions to complete * before resetting the device. */ msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Must reset the Phy before resetting the MAC */ if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) { E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_PHY_RST)); - E1000_WRITE_FLUSH(hw); msec_delay(5); } DEBUGOUT("Issuing a global reset to 82541/82547 MAC\n"); switch (hw->mac.type) { case e1000_82541: case e1000_82541_rev_2: /* * These controllers can't ack the 64-bit write when * issuing the reset, so we use IO-mapping as a * workaround to issue the reset. */ E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); break; default: E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); break; } /* Wait for NVM reload */ msec_delay(20); /* Disable HW ARPs on ASF enabled adapters */ manc = E1000_READ_REG(hw, E1000_MANC); manc &= ~E1000_MANC_ARP_EN; E1000_WRITE_REG(hw, E1000_MANC, manc); if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) { e1000_phy_init_script_82541(hw); /* Configure activity LED after Phy reset */ ledctl = E1000_READ_REG(hw, E1000_LEDCTL); ledctl &= IGP_ACTIVITY_LED_MASK; ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); } /* Once again, mask the interrupts */ DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); /* Clear any pending interrupt events. */ - E1000_READ_REG(hw, E1000_ICR); + icr = E1000_READ_REG(hw, E1000_ICR); return E1000_SUCCESS; } /** * e1000_init_hw_82541 - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. **/ static s32 e1000_init_hw_82541(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541; u32 i, txdctl; s32 ret_val; DEBUGFUNC("e1000_init_hw_82541"); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); if (ret_val) { DEBUGOUT("Error initializing identification LED\n"); /* This is not fatal and we should not stop init due to this */ } /* Storing the Speed Power Down value for later use */ ret_val = hw->phy.ops.read_reg(hw, IGP01E1000_GMII_FIFO, &dev_spec->spd_default); if (ret_val) goto out; /* Disabling VLAN filtering */ DEBUGOUT("Initializing the IEEE VLAN\n"); mac->ops.clear_vfta(hw); /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) { E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* * Avoid back to back register writes by adding the register * read (flush). This is to protect against some strange * bridge configurations that may issue Memory Write Block * (MWB) to our register space. */ E1000_WRITE_FLUSH(hw); } /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0)); txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB; E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl); /* * Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_82541(hw); out: return ret_val; } /** * e1000_get_link_up_info_82541 - Report speed and duplex * @hw: pointer to the HW structure * @speed: pointer to speed buffer * @duplex: pointer to duplex buffer * * Retrieve the current speed and duplex configuration. **/ static s32 e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed, u16 *duplex) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; DEBUGFUNC("e1000_get_link_up_info_82541"); ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); if (ret_val) goto out; if (!phy->speed_downgraded) goto out; /* * IGP01 PHY may advertise full duplex operation after speed * downgrade even if it is operating at half duplex. * Here we set the duplex settings to match the duplex in the * link partner's capabilities. */ ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_EXP, &data); if (ret_val) goto out; if (!(data & NWAY_ER_LP_NWAY_CAPS)) { *duplex = HALF_DUPLEX; } else { ret_val = phy->ops.read_reg(hw, PHY_LP_ABILITY, &data); if (ret_val) goto out; if (*speed == SPEED_100) { if (!(data & NWAY_LPAR_100TX_FD_CAPS)) *duplex = HALF_DUPLEX; } else if (*speed == SPEED_10) { if (!(data & NWAY_LPAR_10T_FD_CAPS)) *duplex = HALF_DUPLEX; } } out: return ret_val; } /** * e1000_phy_hw_reset_82541 - PHY hardware reset * @hw: pointer to the HW structure * * Verify the reset block is not blocking us from resetting. Acquire * semaphore (if necessary) and read/set/write the device control reset * bit in the PHY. Wait the appropriate delay time for the device to * reset and release the semaphore (if necessary). **/ static s32 e1000_phy_hw_reset_82541(struct e1000_hw *hw) { s32 ret_val; u32 ledctl; DEBUGFUNC("e1000_phy_hw_reset_82541"); ret_val = e1000_phy_hw_reset_generic(hw); if (ret_val) goto out; e1000_phy_init_script_82541(hw); if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) { /* Configure activity LED after PHY reset */ ledctl = E1000_READ_REG(hw, E1000_LEDCTL); ledctl &= IGP_ACTIVITY_LED_MASK; ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); } out: return ret_val; } /** * e1000_setup_copper_link_82541 - Configure copper link settings * @hw: pointer to the HW structure * * Calls the appropriate function to configure the link for auto-neg or forced * speed and duplex. Then we check for link, once link is established calls * to configure collision distance and flow control are called. If link is * not established, we return -E1000_ERR_PHY (-2). **/ static s32 e1000_setup_copper_link_82541(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541; s32 ret_val; u32 ctrl, ledctl; DEBUGFUNC("e1000_setup_copper_link_82541"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Earlier revs of the IGP phy require us to force MDI. */ if (hw->mac.type == e1000_82541 || hw->mac.type == e1000_82547) { dev_spec->dsp_config = e1000_dsp_config_disabled; phy->mdix = 1; } else { dev_spec->dsp_config = e1000_dsp_config_enabled; } ret_val = e1000_copper_link_setup_igp(hw); if (ret_val) goto out; if (hw->mac.autoneg) { if (dev_spec->ffe_config == e1000_ffe_config_active) dev_spec->ffe_config = e1000_ffe_config_enabled; } /* Configure activity LED after Phy reset */ ledctl = E1000_READ_REG(hw, E1000_LEDCTL); ledctl &= IGP_ACTIVITY_LED_MASK; ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); ret_val = e1000_setup_copper_link_generic(hw); out: return ret_val; } /** * e1000_check_for_link_82541 - Check/Store link connection * @hw: pointer to the HW structure * * This checks the link condition of the adapter and stores the * results in the hw->mac structure. **/ static s32 e1000_check_for_link_82541(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; bool link; DEBUGFUNC("e1000_check_for_link_82541"); /* * We only want to go out to the PHY registers to see if Auto-Neg * has completed and/or if our link status has changed. The * get_link_status flag is set upon receiving a Link Status * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) { ret_val = E1000_SUCCESS; goto out; } /* * First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) goto out; if (!link) { ret_val = e1000_config_dsp_after_link_change_82541(hw, FALSE); goto out; /* No link detected */ } mac->get_link_status = FALSE; /* * Check if there was DownShift, must be checked * immediately after link-up */ e1000_check_downshift_generic(hw); /* * If we are forcing speed/duplex, then we simply return since * we have already determined whether we have link or not. */ if (!mac->autoneg) { ret_val = -E1000_ERR_CONFIG; goto out; } ret_val = e1000_config_dsp_after_link_change_82541(hw, TRUE); /* * Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to * configure Collision Distance in the MAC. */ mac->ops.config_collision_dist(hw); /* * Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); out: return ret_val; } /** * e1000_config_dsp_after_link_change_82541 - Config DSP after link * @hw: pointer to the HW structure * @link_up: boolean flag for link up status * * Return E1000_ERR_PHY when failing to read/write the PHY, else E1000_SUCCESS * at any other case. * * 82541_rev_2 & 82547_rev_2 have the capability to configure the DSP when a * gigabit link is achieved to improve link quality. **/ static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw, bool link_up) { struct e1000_phy_info *phy = &hw->phy; struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541; s32 ret_val; u32 idle_errs = 0; u16 phy_data, phy_saved_data, speed, duplex, i; u16 ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20; u16 dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = { IGP01E1000_PHY_AGC_PARAM_A, IGP01E1000_PHY_AGC_PARAM_B, IGP01E1000_PHY_AGC_PARAM_C, IGP01E1000_PHY_AGC_PARAM_D}; DEBUGFUNC("e1000_config_dsp_after_link_change_82541"); if (link_up) { ret_val = hw->mac.ops.get_link_up_info(hw, &speed, &duplex); if (ret_val) { DEBUGOUT("Error getting link speed and duplex\n"); goto out; } if (speed != SPEED_1000) { ret_val = E1000_SUCCESS; goto out; } ret_val = phy->ops.get_cable_length(hw); if (ret_val) goto out; if ((dev_spec->dsp_config == e1000_dsp_config_enabled) && phy->min_cable_length >= 50) { for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ret_val = phy->ops.read_reg(hw, dsp_reg_array[i], &phy_data); if (ret_val) goto out; phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; ret_val = phy->ops.write_reg(hw, dsp_reg_array[i], phy_data); if (ret_val) goto out; } dev_spec->dsp_config = e1000_dsp_config_activated; } if ((dev_spec->ffe_config != e1000_ffe_config_enabled) || (phy->min_cable_length >= 50)) { ret_val = E1000_SUCCESS; goto out; } /* clear previous idle error counts */ ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) goto out; for (i = 0; i < ffe_idle_err_timeout; i++) { usec_delay(1000); ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) goto out; idle_errs += (phy_data & SR_1000T_IDLE_ERROR_CNT); if (idle_errs > SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT) { dev_spec->ffe_config = e1000_ffe_config_active; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_DSP_FFE, IGP01E1000_PHY_DSP_FFE_CM_CP); if (ret_val) goto out; break; } if (idle_errs) ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_100; } } else { if (dev_spec->dsp_config == e1000_dsp_config_activated) { /* * Save off the current value of register 0x2F5B * to be restored at the end of the routines. */ ret_val = phy->ops.read_reg(hw, 0x2F5B, &phy_saved_data); if (ret_val) goto out; /* Disable the PHY transmitter */ ret_val = phy->ops.write_reg(hw, 0x2F5B, 0x0003); if (ret_val) goto out; msec_delay_irq(20); ret_val = phy->ops.write_reg(hw, 0x0000, IGP01E1000_IEEE_FORCE_GIG); if (ret_val) goto out; for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ret_val = phy->ops.read_reg(hw, dsp_reg_array[i], &phy_data); if (ret_val) goto out; phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX; phy_data |= IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS; ret_val = phy->ops.write_reg(hw, dsp_reg_array[i], phy_data); if (ret_val) goto out; } ret_val = phy->ops.write_reg(hw, 0x0000, IGP01E1000_IEEE_RESTART_AUTONEG); if (ret_val) goto out; msec_delay_irq(20); /* Now enable the transmitter */ ret_val = phy->ops.write_reg(hw, 0x2F5B, phy_saved_data); if (ret_val) goto out; dev_spec->dsp_config = e1000_dsp_config_enabled; } if (dev_spec->ffe_config != e1000_ffe_config_active) { ret_val = E1000_SUCCESS; goto out; } /* * Save off the current value of register 0x2F5B * to be restored at the end of the routines. */ ret_val = phy->ops.read_reg(hw, 0x2F5B, &phy_saved_data); if (ret_val) goto out; /* Disable the PHY transmitter */ ret_val = phy->ops.write_reg(hw, 0x2F5B, 0x0003); if (ret_val) goto out; msec_delay_irq(20); ret_val = phy->ops.write_reg(hw, 0x0000, IGP01E1000_IEEE_FORCE_GIG); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_DSP_FFE, IGP01E1000_PHY_DSP_FFE_DEFAULT); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, 0x0000, IGP01E1000_IEEE_RESTART_AUTONEG); if (ret_val) goto out; msec_delay_irq(20); /* Now enable the transmitter */ ret_val = phy->ops.write_reg(hw, 0x2F5B, phy_saved_data); if (ret_val) goto out; dev_spec->ffe_config = e1000_ffe_config_enabled; } out: return ret_val; } /** * e1000_get_cable_length_igp_82541 - Determine cable length for igp PHY * @hw: pointer to the HW structure * * The automatic gain control (agc) normalizes the amplitude of the * received signal, adjusting for the attenuation produced by the * cable. By reading the AGC registers, which represent the * combination of coarse and fine gain value, the value can be put * into a lookup table to obtain the approximate cable length * for each channel. **/ static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 i, data; u16 cur_agc_value, agc_value = 0; u16 min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE; u16 agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = {IGP01E1000_PHY_AGC_A, IGP01E1000_PHY_AGC_B, IGP01E1000_PHY_AGC_C, IGP01E1000_PHY_AGC_D}; DEBUGFUNC("e1000_get_cable_length_igp_82541"); /* Read the AGC registers for all channels */ for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) { ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &data); if (ret_val) goto out; cur_agc_value = data >> IGP01E1000_AGC_LENGTH_SHIFT; /* Bounds checking */ if ((cur_agc_value >= IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) || (cur_agc_value == 0)) { ret_val = -E1000_ERR_PHY; goto out; } agc_value += cur_agc_value; if (min_agc_value > cur_agc_value) min_agc_value = cur_agc_value; } /* Remove the minimal AGC result for length < 50m */ if (agc_value < IGP01E1000_PHY_CHANNEL_NUM * 50) { agc_value -= min_agc_value; /* Average the three remaining channels for the length. */ agc_value /= (IGP01E1000_PHY_CHANNEL_NUM - 1); } else { /* Average the channels for the length. */ agc_value /= IGP01E1000_PHY_CHANNEL_NUM; } phy->min_cable_length = (e1000_igp_cable_length_table[agc_value] > IGP01E1000_AGC_RANGE) ? (e1000_igp_cable_length_table[agc_value] - IGP01E1000_AGC_RANGE) : 0; phy->max_cable_length = e1000_igp_cable_length_table[agc_value] + IGP01E1000_AGC_RANGE; phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; out: return ret_val; } /** * e1000_set_d3_lplu_state_82541 - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D3 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D3 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. **/ static s32 e1000_set_d3_lplu_state_82541(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; DEBUGFUNC("e1000_set_d3_lplu_state_82541"); switch (hw->mac.type) { case e1000_82541_rev_2: case e1000_82547_rev_2: break; default: ret_val = e1000_set_d3_lplu_state_generic(hw, active); goto out; break; } ret_val = phy->ops.read_reg(hw, IGP01E1000_GMII_FIFO, &data); if (ret_val) goto out; if (!active) { data &= ~IGP01E1000_GMII_FLEX_SPD; ret_val = phy->ops.write_reg(hw, IGP01E1000_GMII_FIFO, data); if (ret_val) goto out; /* * LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) goto out; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) goto out; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { data |= IGP01E1000_GMII_FLEX_SPD; ret_val = phy->ops.write_reg(hw, IGP01E1000_GMII_FIFO, data); if (ret_val) goto out; /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) goto out; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); } out: return ret_val; } /** * e1000_setup_led_82541 - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use and saves the current state * of the LED so it can be later restored. **/ static s32 e1000_setup_led_82541(struct e1000_hw *hw) { struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541; s32 ret_val; DEBUGFUNC("e1000_setup_led_82541"); ret_val = hw->phy.ops.read_reg(hw, IGP01E1000_GMII_FIFO, &dev_spec->spd_default); if (ret_val) goto out; ret_val = hw->phy.ops.write_reg(hw, IGP01E1000_GMII_FIFO, (u16)(dev_spec->spd_default & ~IGP01E1000_GMII_SPD)); if (ret_val) goto out; E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); out: return ret_val; } /** * e1000_cleanup_led_82541 - Set LED config to default operation * @hw: pointer to the HW structure * * Remove the current LED configuration and set the LED configuration * to the default value, saved from the EEPROM. **/ static s32 e1000_cleanup_led_82541(struct e1000_hw *hw) { struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541; s32 ret_val; DEBUGFUNC("e1000_cleanup_led_82541"); ret_val = hw->phy.ops.write_reg(hw, IGP01E1000_GMII_FIFO, dev_spec->spd_default); if (ret_val) goto out; E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); out: return ret_val; } /** * e1000_phy_init_script_82541 - Initialize GbE PHY * @hw: pointer to the HW structure * * Initializes the IGP PHY. **/ static s32 e1000_phy_init_script_82541(struct e1000_hw *hw) { struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541; u32 ret_val; u16 phy_saved_data; DEBUGFUNC("e1000_phy_init_script_82541"); if (!dev_spec->phy_init_script) { ret_val = E1000_SUCCESS; goto out; } /* Delay after phy reset to enable NVM configuration to load */ msec_delay(20); /* * Save off the current value of register 0x2F5B to be restored at * the end of this routine. */ ret_val = hw->phy.ops.read_reg(hw, 0x2F5B, &phy_saved_data); /* Disabled the PHY transmitter */ hw->phy.ops.write_reg(hw, 0x2F5B, 0x0003); msec_delay(20); hw->phy.ops.write_reg(hw, 0x0000, 0x0140); msec_delay(5); switch (hw->mac.type) { case e1000_82541: case e1000_82547: hw->phy.ops.write_reg(hw, 0x1F95, 0x0001); hw->phy.ops.write_reg(hw, 0x1F71, 0xBD21); hw->phy.ops.write_reg(hw, 0x1F79, 0x0018); hw->phy.ops.write_reg(hw, 0x1F30, 0x1600); hw->phy.ops.write_reg(hw, 0x1F31, 0x0014); hw->phy.ops.write_reg(hw, 0x1F32, 0x161C); hw->phy.ops.write_reg(hw, 0x1F94, 0x0003); hw->phy.ops.write_reg(hw, 0x1F96, 0x003F); hw->phy.ops.write_reg(hw, 0x2010, 0x0008); break; case e1000_82541_rev_2: case e1000_82547_rev_2: hw->phy.ops.write_reg(hw, 0x1F73, 0x0099); break; default: break; } hw->phy.ops.write_reg(hw, 0x0000, 0x3300); msec_delay(20); /* Now enable the transmitter */ hw->phy.ops.write_reg(hw, 0x2F5B, phy_saved_data); if (hw->mac.type == e1000_82547) { u16 fused, fine, coarse; /* Move to analog registers page */ hw->phy.ops.read_reg(hw, IGP01E1000_ANALOG_SPARE_FUSE_STATUS, &fused); if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) { hw->phy.ops.read_reg(hw, IGP01E1000_ANALOG_FUSE_STATUS, &fused); fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK; coarse = fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK; if (coarse > IGP01E1000_ANALOG_FUSE_COARSE_THRESH) { coarse -= IGP01E1000_ANALOG_FUSE_COARSE_10; fine -= IGP01E1000_ANALOG_FUSE_FINE_1; } else if (coarse == IGP01E1000_ANALOG_FUSE_COARSE_THRESH) fine -= IGP01E1000_ANALOG_FUSE_FINE_10; fused = (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) | (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) | (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK); hw->phy.ops.write_reg(hw, IGP01E1000_ANALOG_FUSE_CONTROL, fused); hw->phy.ops.write_reg(hw, IGP01E1000_ANALOG_FUSE_BYPASS, IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL); } } out: return ret_val; } /** * e1000_init_script_state_82541 - Enable/Disable PHY init script * @hw: pointer to the HW structure * @state: boolean value used to enable/disable PHY init script * * Allows the driver to enable/disable the PHY init script, if the PHY is an * IGP PHY. **/ void e1000_init_script_state_82541(struct e1000_hw *hw, bool state) { struct e1000_dev_spec_82541 *dev_spec = &hw->dev_spec._82541; DEBUGFUNC("e1000_init_script_state_82541"); if (hw->phy.type != e1000_phy_igp) { DEBUGOUT("Initialization script not necessary.\n"); goto out; } dev_spec->phy_init_script = state; out: return; } /** * e1000_power_down_phy_copper_82541 - Remove link in case of PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_82541(struct e1000_hw *hw) { /* If the management interface is not enabled, then power down */ if (!(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_SMBUS_EN)) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_82541 - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_82541(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_82541"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); } /** * e1000_read_mac_addr_82541 - Read device MAC address * @hw: pointer to the HW structure * * Reads the device MAC address from the EEPROM and stores the value. **/ static s32 e1000_read_mac_addr_82541(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 offset, nvm_data, i; DEBUGFUNC("e1000_read_mac_addr"); for (i = 0; i < ETH_ADDR_LEN; i += 2) { offset = i >> 1; ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF); hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8); } for (i = 0; i < ETH_ADDR_LEN; i++) hw->mac.addr[i] = hw->mac.perm_addr[i]; out: return ret_val; } Index: projects/clang-trunk/sys/dev/e1000/e1000_82542.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_82542.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_82542.c (revision 287502) @@ -1,591 +1,591 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* * 82542 Gigabit Ethernet Controller */ #include "e1000_api.h" static s32 e1000_init_phy_params_82542(struct e1000_hw *hw); static s32 e1000_init_nvm_params_82542(struct e1000_hw *hw); static s32 e1000_init_mac_params_82542(struct e1000_hw *hw); static s32 e1000_get_bus_info_82542(struct e1000_hw *hw); static s32 e1000_reset_hw_82542(struct e1000_hw *hw); static s32 e1000_init_hw_82542(struct e1000_hw *hw); static s32 e1000_setup_link_82542(struct e1000_hw *hw); static s32 e1000_led_on_82542(struct e1000_hw *hw); static s32 e1000_led_off_82542(struct e1000_hw *hw); static int e1000_rar_set_82542(struct e1000_hw *hw, u8 *addr, u32 index); static void e1000_clear_hw_cntrs_82542(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82542(struct e1000_hw *hw); /** * e1000_init_phy_params_82542 - Init PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_82542(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_init_phy_params_82542"); phy->type = e1000_phy_none; return ret_val; } /** * e1000_init_nvm_params_82542 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_nvm_params_82542(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; DEBUGFUNC("e1000_init_nvm_params_82542"); nvm->address_bits = 6; nvm->delay_usec = 50; nvm->opcode_bits = 3; nvm->type = e1000_nvm_eeprom_microwire; nvm->word_size = 64; /* Function Pointers */ nvm->ops.read = e1000_read_nvm_microwire; nvm->ops.release = e1000_stop_nvm; nvm->ops.write = e1000_write_nvm_microwire; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.validate = e1000_validate_nvm_checksum_generic; return E1000_SUCCESS; } /** * e1000_init_mac_params_82542 - Init MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_82542(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_params_82542"); /* Set media type */ hw->phy.media_type = e1000_media_type_fiber; /* Set mta register count */ mac->mta_reg_count = 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_82542; /* function id */ mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pci; /* reset */ mac->ops.reset_hw = e1000_reset_hw_82542; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_82542; /* link setup */ mac->ops.setup_link = e1000_setup_link_82542; /* phy/fiber/serdes setup */ mac->ops.setup_physical_interface = e1000_setup_fiber_serdes_link_generic; /* check for link */ mac->ops.check_for_link = e1000_check_for_fiber_link_generic; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82542; /* set RAR */ mac->ops.rar_set = e1000_rar_set_82542; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_82542; mac->ops.led_off = e1000_led_off_82542; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82542; /* link info */ mac->ops.get_link_up_info = e1000_get_speed_and_duplex_fiber_serdes_generic; return E1000_SUCCESS; } /** * e1000_init_function_pointers_82542 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_82542(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_82542"); hw->mac.ops.init_params = e1000_init_mac_params_82542; hw->nvm.ops.init_params = e1000_init_nvm_params_82542; hw->phy.ops.init_params = e1000_init_phy_params_82542; } /** * e1000_get_bus_info_82542 - Obtain bus information for adapter * @hw: pointer to the HW structure * * This will obtain information about the HW bus for which the * adapter is attached and stores it in the hw structure. **/ static s32 e1000_get_bus_info_82542(struct e1000_hw *hw) { DEBUGFUNC("e1000_get_bus_info_82542"); hw->bus.type = e1000_bus_type_pci; hw->bus.speed = e1000_bus_speed_unknown; hw->bus.width = e1000_bus_width_unknown; return E1000_SUCCESS; } /** * e1000_reset_hw_82542 - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. **/ static s32 e1000_reset_hw_82542(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; s32 ret_val = E1000_SUCCESS; u32 ctrl; DEBUGFUNC("e1000_reset_hw_82542"); if (hw->revision_id == E1000_REVISION_2) { DEBUGOUT("Disabling MWI on 82542 rev 2\n"); e1000_pci_clear_mwi(hw); } DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); /* * Delay to allow any outstanding PCI transactions to complete before * resetting the device */ msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGOUT("Issuing a global reset to 82542/82543 MAC\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); hw->nvm.ops.reload(hw); msec_delay(2); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); if (hw->revision_id == E1000_REVISION_2) { if (bus->pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(hw); } return ret_val; } /** * e1000_init_hw_82542 - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. **/ static s32 e1000_init_hw_82542(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_dev_spec_82542 *dev_spec = &hw->dev_spec._82542; s32 ret_val = E1000_SUCCESS; u32 ctrl; u16 i; DEBUGFUNC("e1000_init_hw_82542"); /* Disabling VLAN filtering */ E1000_WRITE_REG(hw, E1000_VET, 0); mac->ops.clear_vfta(hw); /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */ if (hw->revision_id == E1000_REVISION_2) { DEBUGOUT("Disabling MWI on 82542 rev 2.0\n"); e1000_pci_clear_mwi(hw); E1000_WRITE_REG(hw, E1000_RCTL, E1000_RCTL_RST); E1000_WRITE_FLUSH(hw); msec_delay(5); } /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */ if (hw->revision_id == E1000_REVISION_2) { E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_FLUSH(hw); msec_delay(1); if (hw->bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(hw); } /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* * Set the PCI priority bit correctly in the CTRL register. This * determines if the adapter gives priority to receives, or if it * gives equal priority to transmits and receives. */ if (dev_spec->dma_fairness) { ctrl = E1000_READ_REG(hw, E1000_CTRL); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PRIOR); } /* Setup link and flow control */ ret_val = e1000_setup_link_82542(hw); /* * Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_82542(hw); return ret_val; } /** * e1000_setup_link_82542 - Setup flow control and link settings * @hw: pointer to the HW structure * * Determines which flow control settings to use, then configures flow * control. Calls the appropriate media-specific link configuration * function. Assuming the adapter has a valid link partner, a valid link * should be established. Assumes the hardware has previously been reset * and the transmitter and receiver are not enabled. **/ static s32 e1000_setup_link_82542(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; - s32 ret_val; + s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_setup_link_82542"); ret_val = e1000_set_default_fc_generic(hw); if (ret_val) goto out; hw->fc.requested_mode &= ~e1000_fc_tx_pause; if (mac->report_tx_early) hw->fc.requested_mode &= ~e1000_fc_rx_pause; /* * Save off the requested flow control mode for use later. Depending * on the link partner's capabilities, we may or may not use this mode. */ hw->fc.current_mode = hw->fc.requested_mode; DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); /* Call the necessary subroutine to configure the link. */ ret_val = mac->ops.setup_physical_interface(hw); if (ret_val) goto out; /* * Initialize the flow control address, type, and PAUSE timer * registers to their default values. This is done even if flow * control is disabled, because it does not hurt anything to * initialize these registers. */ DEBUGOUT("Initializing Flow Control address, type and timer regs\n"); E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); ret_val = e1000_set_fc_watermarks_generic(hw); out: return ret_val; } /** * e1000_led_on_82542 - Turn on SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED on. **/ static s32 e1000_led_on_82542(struct e1000_hw *hw) { u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGFUNC("e1000_led_on_82542"); ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_led_off_82542 - Turn off SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED off. **/ static s32 e1000_led_off_82542(struct e1000_hw *hw) { u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGFUNC("e1000_led_off_82542"); ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_rar_set_82542 - Set receive address register * @hw: pointer to the HW structure * @addr: pointer to the receive address * @index: receive address array register * * Sets the receive address array register at index to the address passed * in by addr. **/ static int e1000_rar_set_82542(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; DEBUGFUNC("e1000_rar_set_82542"); /* * HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* If MAC address zero, no need to set the AV bit */ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; E1000_WRITE_REG_ARRAY(hw, E1000_RA, (index << 1), rar_low); E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((index << 1) + 1), rar_high); return E1000_SUCCESS; } /** * e1000_translate_register_82542 - Translate the proper register offset * @reg: e1000 register to be read * * Registers in 82542 are located in different offsets than other adapters * even though they function in the same manner. This function takes in * the name of the register to read and returns the correct offset for * 82542 silicon. **/ u32 e1000_translate_register_82542(u32 reg) { /* * Some of the 82542 registers are located at different * offsets than they are in newer adapters. * Despite the difference in location, the registers * function in the same manner. */ switch (reg) { case E1000_RA: reg = 0x00040; break; case E1000_RDTR: reg = 0x00108; break; case E1000_RDBAL(0): reg = 0x00110; break; case E1000_RDBAH(0): reg = 0x00114; break; case E1000_RDLEN(0): reg = 0x00118; break; case E1000_RDH(0): reg = 0x00120; break; case E1000_RDT(0): reg = 0x00128; break; case E1000_RDBAL(1): reg = 0x00138; break; case E1000_RDBAH(1): reg = 0x0013C; break; case E1000_RDLEN(1): reg = 0x00140; break; case E1000_RDH(1): reg = 0x00148; break; case E1000_RDT(1): reg = 0x00150; break; case E1000_FCRTH: reg = 0x00160; break; case E1000_FCRTL: reg = 0x00168; break; case E1000_MTA: reg = 0x00200; break; case E1000_TDBAL(0): reg = 0x00420; break; case E1000_TDBAH(0): reg = 0x00424; break; case E1000_TDLEN(0): reg = 0x00428; break; case E1000_TDH(0): reg = 0x00430; break; case E1000_TDT(0): reg = 0x00438; break; case E1000_TIDV: reg = 0x00440; break; case E1000_VFTA: reg = 0x00600; break; case E1000_TDFH: reg = 0x08010; break; case E1000_TDFT: reg = 0x08018; break; default: break; } return reg; } /** * e1000_clear_hw_cntrs_82542 - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_82542(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_82542"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); } /** * e1000_read_mac_addr_82542 - Read device MAC address * @hw: pointer to the HW structure * * Reads the device MAC address from the EEPROM and stores the value. **/ -s32 e1000_read_mac_addr_82542(struct e1000_hw *hw) +static s32 e1000_read_mac_addr_82542(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 offset, nvm_data, i; DEBUGFUNC("e1000_read_mac_addr"); for (i = 0; i < ETH_ADDR_LEN; i += 2) { offset = i >> 1; ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF); hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8); } for (i = 0; i < ETH_ADDR_LEN; i++) hw->mac.addr[i] = hw->mac.perm_addr[i]; out: return ret_val; } Index: projects/clang-trunk/sys/dev/e1000/e1000_82543.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_82543.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_82543.c (revision 287502) @@ -1,1596 +1,1596 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* * 82543GC Gigabit Ethernet Controller (Fiber) * 82543GC Gigabit Ethernet Controller (Copper) * 82544EI Gigabit Ethernet Controller (Copper) * 82544EI Gigabit Ethernet Controller (Fiber) * 82544GC Gigabit Ethernet Controller (Copper) * 82544GC Gigabit Ethernet Controller (LOM) */ #include "e1000_api.h" static s32 e1000_init_phy_params_82543(struct e1000_hw *hw); static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw); static s32 e1000_init_mac_params_82543(struct e1000_hw *hw); static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 data); static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw); static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw); static s32 e1000_reset_hw_82543(struct e1000_hw *hw); static s32 e1000_init_hw_82543(struct e1000_hw *hw); static s32 e1000_setup_link_82543(struct e1000_hw *hw); static s32 e1000_setup_copper_link_82543(struct e1000_hw *hw); static s32 e1000_setup_fiber_link_82543(struct e1000_hw *hw); static s32 e1000_check_for_copper_link_82543(struct e1000_hw *hw); static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw); static s32 e1000_led_on_82543(struct e1000_hw *hw); static s32 e1000_led_off_82543(struct e1000_hw *hw); static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset, u32 value); static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw); static s32 e1000_config_mac_to_phy_82543(struct e1000_hw *hw); static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw); static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl); static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw); static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl); static u16 e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw); static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data, u16 count); static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw); static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state); static s32 e1000_read_mac_addr_82543(struct e1000_hw *hw); /** * e1000_init_phy_params_82543 - Init PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_82543(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_init_phy_params_82543"); if (hw->phy.media_type != e1000_media_type_copper) { phy->type = e1000_phy_none; goto out; } else { phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper; } phy->addr = 1; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 10000; phy->type = e1000_phy_m88; /* Function Pointers */ phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82543; phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.get_cfg_done = e1000_get_cfg_done_generic; phy->ops.read_reg = (hw->mac.type == e1000_82543) ? e1000_read_phy_reg_82543 : e1000_read_phy_reg_m88; phy->ops.reset = (hw->mac.type == e1000_82543) ? e1000_phy_hw_reset_82543 : e1000_phy_hw_reset_generic; phy->ops.write_reg = (hw->mac.type == e1000_82543) ? e1000_write_phy_reg_82543 : e1000_write_phy_reg_m88; phy->ops.get_info = e1000_get_phy_info_m88; /* * The external PHY of the 82543 can be in a funky state. * Resetting helps us read the PHY registers for acquiring * the PHY ID. */ if (!e1000_init_phy_disabled_82543(hw)) { ret_val = phy->ops.reset(hw); if (ret_val) { DEBUGOUT("Resetting PHY during init failed.\n"); goto out; } msec_delay(20); } ret_val = e1000_get_phy_id(hw); if (ret_val) goto out; /* Verify phy id */ switch (hw->mac.type) { case e1000_82543: if (phy->id != M88E1000_E_PHY_ID) { ret_val = -E1000_ERR_PHY; goto out; } break; case e1000_82544: if (phy->id != M88E1000_I_PHY_ID) { ret_val = -E1000_ERR_PHY; goto out; } break; default: ret_val = -E1000_ERR_PHY; goto out; break; } out: return ret_val; } /** * e1000_init_nvm_params_82543 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; DEBUGFUNC("e1000_init_nvm_params_82543"); nvm->type = e1000_nvm_eeprom_microwire; nvm->word_size = 64; nvm->delay_usec = 50; nvm->address_bits = 6; nvm->opcode_bits = 3; /* Function Pointers */ nvm->ops.read = e1000_read_nvm_microwire; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_generic; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.write = e1000_write_nvm_microwire; return E1000_SUCCESS; } /** * e1000_init_mac_params_82543 - Init MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_82543(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_params_82543"); /* Set media type */ switch (hw->device_id) { case E1000_DEV_ID_82543GC_FIBER: case E1000_DEV_ID_82544EI_FIBER: hw->phy.media_type = e1000_media_type_fiber; break; default: hw->phy.media_type = e1000_media_type_copper; break; } /* Set mta register count */ mac->mta_reg_count = 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pci_generic; /* function id */ mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pci; /* reset */ mac->ops.reset_hw = e1000_reset_hw_82543; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_82543; /* link setup */ mac->ops.setup_link = e1000_setup_link_82543; /* physical interface setup */ mac->ops.setup_physical_interface = (hw->phy.media_type == e1000_media_type_copper) ? e1000_setup_copper_link_82543 : e1000_setup_fiber_link_82543; /* check for link */ mac->ops.check_for_link = (hw->phy.media_type == e1000_media_type_copper) ? e1000_check_for_copper_link_82543 : e1000_check_for_fiber_link_82543; /* link info */ mac->ops.get_link_up_info = (hw->phy.media_type == e1000_media_type_copper) ? e1000_get_speed_and_duplex_copper_generic : e1000_get_speed_and_duplex_fiber_serdes_generic; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_82543; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82543; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_82543; mac->ops.led_off = e1000_led_off_82543; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82543; /* Set tbi compatibility */ if ((hw->mac.type != e1000_82543) || (hw->phy.media_type == e1000_media_type_fiber)) e1000_set_tbi_compatibility_82543(hw, FALSE); return E1000_SUCCESS; } /** * e1000_init_function_pointers_82543 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_82543(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_82543"); hw->mac.ops.init_params = e1000_init_mac_params_82543; hw->nvm.ops.init_params = e1000_init_nvm_params_82543; hw->phy.ops.init_params = e1000_init_phy_params_82543; } /** * e1000_tbi_compatibility_enabled_82543 - Returns TBI compat status * @hw: pointer to the HW structure * * Returns the current status of 10-bit Interface (TBI) compatibility * (enabled/disabled). **/ static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw) { struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543; bool state = FALSE; DEBUGFUNC("e1000_tbi_compatibility_enabled_82543"); if (hw->mac.type != e1000_82543) { DEBUGOUT("TBI compatibility workaround for 82543 only.\n"); goto out; } state = !!(dev_spec->tbi_compatibility & TBI_COMPAT_ENABLED); out: return state; } /** * e1000_set_tbi_compatibility_82543 - Set TBI compatibility * @hw: pointer to the HW structure * @state: enable/disable TBI compatibility * * Enables or disabled 10-bit Interface (TBI) compatibility. **/ void e1000_set_tbi_compatibility_82543(struct e1000_hw *hw, bool state) { struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543; DEBUGFUNC("e1000_set_tbi_compatibility_82543"); if (hw->mac.type != e1000_82543) { DEBUGOUT("TBI compatibility workaround for 82543 only.\n"); goto out; } if (state) dev_spec->tbi_compatibility |= TBI_COMPAT_ENABLED; else dev_spec->tbi_compatibility &= ~TBI_COMPAT_ENABLED; out: return; } /** * e1000_tbi_sbp_enabled_82543 - Returns TBI SBP status * @hw: pointer to the HW structure * * Returns the current status of 10-bit Interface (TBI) store bad packet (SBP) * (enabled/disabled). **/ bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw) { struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543; bool state = FALSE; DEBUGFUNC("e1000_tbi_sbp_enabled_82543"); if (hw->mac.type != e1000_82543) { DEBUGOUT("TBI compatibility workaround for 82543 only.\n"); goto out; } state = !!(dev_spec->tbi_compatibility & TBI_SBP_ENABLED); out: return state; } /** * e1000_set_tbi_sbp_82543 - Set TBI SBP * @hw: pointer to the HW structure * @state: enable/disable TBI store bad packet * * Enables or disabled 10-bit Interface (TBI) store bad packet (SBP). **/ static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state) { struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543; DEBUGFUNC("e1000_set_tbi_sbp_82543"); if (state && e1000_tbi_compatibility_enabled_82543(hw)) dev_spec->tbi_compatibility |= TBI_SBP_ENABLED; else dev_spec->tbi_compatibility &= ~TBI_SBP_ENABLED; return; } /** * e1000_init_phy_disabled_82543 - Returns init PHY status * @hw: pointer to the HW structure * * Returns the current status of whether PHY initialization is disabled. * True if PHY initialization is disabled else FALSE. **/ static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw) { struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543; bool ret_val; DEBUGFUNC("e1000_init_phy_disabled_82543"); if (hw->mac.type != e1000_82543) { ret_val = FALSE; goto out; } ret_val = dev_spec->init_phy_disabled; out: return ret_val; } /** * e1000_tbi_adjust_stats_82543 - Adjust stats when TBI enabled * @hw: pointer to the HW structure * @stats: Struct containing statistic register values * @frame_len: The length of the frame in question * @mac_addr: The Ethernet destination address of the frame in question * @max_frame_size: The maximum frame size * * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT **/ void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw, struct e1000_hw_stats *stats, u32 frame_len, u8 *mac_addr, u32 max_frame_size) { if (!(e1000_tbi_sbp_enabled_82543(hw))) goto out; /* First adjust the frame length. */ frame_len--; /* * We need to adjust the statistics counters, since the hardware * counters overcount this packet as a CRC error and undercount * the packet as a good packet */ /* This packet should not be counted as a CRC error. */ stats->crcerrs--; /* This packet does count as a Good Packet Received. */ stats->gprc++; /* Adjust the Good Octets received counters */ stats->gorc += frame_len; /* * Is this a broadcast or multicast? Check broadcast first, * since the test for a multicast frame will test positive on * a broadcast frame. */ if ((mac_addr[0] == 0xff) && (mac_addr[1] == 0xff)) /* Broadcast packet */ stats->bprc++; else if (*mac_addr & 0x01) /* Multicast packet */ stats->mprc++; /* * In this case, the hardware has over counted the number of * oversize frames. */ if ((frame_len == max_frame_size) && (stats->roc > 0)) stats->roc--; /* * Adjust the bin counters when the extra byte put the frame in the * wrong bin. Remember that the frame_len was adjusted above. */ if (frame_len == 64) { stats->prc64++; stats->prc127--; } else if (frame_len == 127) { stats->prc127++; stats->prc255--; } else if (frame_len == 255) { stats->prc255++; stats->prc511--; } else if (frame_len == 511) { stats->prc511++; stats->prc1023--; } else if (frame_len == 1023) { stats->prc1023++; stats->prc1522--; } else if (frame_len == 1522) { stats->prc1522++; } out: return; } /** * e1000_read_phy_reg_82543 - Read PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the PHY at offset and stores the information read to data. **/ static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 *data) { u32 mdic; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_read_phy_reg_82543"); if (offset > MAX_PHY_REG_ADDRESS) { DEBUGOUT1("PHY Address %d is out of range\n", offset); ret_val = -E1000_ERR_PARAM; goto out; } /* * We must first send a preamble through the MDIO pin to signal the * beginning of an MII instruction. This is done by sending 32 * consecutive "1" bits. */ e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); /* * Now combine the next few fields that are required for a read * operation. We use this method instead of calling the * e1000_shift_out_mdi_bits routine five different times. The format * of an MII read instruction consists of a shift out of 14 bits and * is defined as follows: * * followed by a shift in of 18 bits. This first two bits shifted in * are TurnAround bits used to avoid contention on the MDIO pin when a * READ operation is performed. These two bits are thrown away * followed by a shift in of 16 bits which contains the desired data. */ mdic = (offset | (hw->phy.addr << 5) | (PHY_OP_READ << 10) | (PHY_SOF << 12)); e1000_shift_out_mdi_bits_82543(hw, mdic, 14); /* * Now that we've shifted out the read command to the MII, we need to * "shift in" the 16-bit value (18 total bits) of the requested PHY * register address. */ *data = e1000_shift_in_mdi_bits_82543(hw); out: return ret_val; } /** * e1000_write_phy_reg_82543 - Write PHY register * @hw: pointer to the HW structure * @offset: register offset to be written * @data: pointer to the data to be written at offset * * Writes data to the PHY at offset. **/ static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 data) { u32 mdic; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_write_phy_reg_82543"); if (offset > MAX_PHY_REG_ADDRESS) { DEBUGOUT1("PHY Address %d is out of range\n", offset); ret_val = -E1000_ERR_PARAM; goto out; } /* * We'll need to use the SW defined pins to shift the write command * out to the PHY. We first send a preamble to the PHY to signal the * beginning of the MII instruction. This is done by sending 32 * consecutive "1" bits. */ e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE); /* * Now combine the remaining required fields that will indicate a * write operation. We use this method instead of calling the * e1000_shift_out_mdi_bits routine for each field in the command. The * format of a MII write instruction is as follows: * . */ mdic = ((PHY_TURNAROUND) | (offset << 2) | (hw->phy.addr << 7) | (PHY_OP_WRITE << 12) | (PHY_SOF << 14)); mdic <<= 16; mdic |= (u32)data; e1000_shift_out_mdi_bits_82543(hw, mdic, 32); out: return ret_val; } /** * e1000_raise_mdi_clk_82543 - Raise Management Data Input clock * @hw: pointer to the HW structure * @ctrl: pointer to the control register * * Raise the management data input clock by setting the MDC bit in the control * register. **/ static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl) { /* * Raise the clock input to the Management Data Clock (by setting the * MDC bit), and then delay a sufficient amount of time. */ E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl | E1000_CTRL_MDC)); E1000_WRITE_FLUSH(hw); usec_delay(10); } /** * e1000_lower_mdi_clk_82543 - Lower Management Data Input clock * @hw: pointer to the HW structure * @ctrl: pointer to the control register * * Lower the management data input clock by clearing the MDC bit in the * control register. **/ static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl) { /* * Lower the clock input to the Management Data Clock (by clearing the * MDC bit), and then delay a sufficient amount of time. */ E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl & ~E1000_CTRL_MDC)); E1000_WRITE_FLUSH(hw); usec_delay(10); } /** * e1000_shift_out_mdi_bits_82543 - Shift data bits our to the PHY * @hw: pointer to the HW structure * @data: data to send to the PHY * @count: number of bits to shift out * * We need to shift 'count' bits out to the PHY. So, the value in the * "data" parameter will be shifted out to the PHY one bit at a time. * In order to do this, "data" must be broken down into bits. **/ static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data, u16 count) { u32 ctrl, mask; /* * We need to shift "count" number of bits out to the PHY. So, the * value in the "data" parameter will be shifted out to the PHY one * bit at a time. In order to do this, "data" must be broken down * into bits. */ mask = 0x01; mask <<= (count - 1); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */ ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR); while (mask) { /* * A "1" is shifted out to the PHY by setting the MDIO bit to * "1" and then raising and lowering the Management Data Clock. * A "0" is shifted out to the PHY by setting the MDIO bit to * "0" and then raising and lowering the clock. */ if (data & mask) ctrl |= E1000_CTRL_MDIO; else ctrl &= ~E1000_CTRL_MDIO; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); E1000_WRITE_FLUSH(hw); usec_delay(10); e1000_raise_mdi_clk_82543(hw, &ctrl); e1000_lower_mdi_clk_82543(hw, &ctrl); mask >>= 1; } } /** * e1000_shift_in_mdi_bits_82543 - Shift data bits in from the PHY * @hw: pointer to the HW structure * * In order to read a register from the PHY, we need to shift 18 bits * in from the PHY. Bits are "shifted in" by raising the clock input to * the PHY (setting the MDC bit), and then reading the value of the data out * MDIO bit. **/ static u16 e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw) { u32 ctrl; u16 data = 0; u8 i; /* * In order to read a register from the PHY, we need to shift in a * total of 18 bits from the PHY. The first two bit (turnaround) * times are used to avoid contention on the MDIO pin when a read * operation is performed. These two bits are ignored by us and * thrown away. Bits are "shifted in" by raising the input to the * Management Data Clock (setting the MDC bit) and then reading the * value of the MDIO bit. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); /* * Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as * input. */ ctrl &= ~E1000_CTRL_MDIO_DIR; ctrl &= ~E1000_CTRL_MDIO; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); E1000_WRITE_FLUSH(hw); /* * Raise and lower the clock before reading in the data. This accounts * for the turnaround bits. The first clock occurred when we clocked * out the last bit of the Register Address. */ e1000_raise_mdi_clk_82543(hw, &ctrl); e1000_lower_mdi_clk_82543(hw, &ctrl); for (data = 0, i = 0; i < 16; i++) { data <<= 1; e1000_raise_mdi_clk_82543(hw, &ctrl); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Check to see if we shifted in a "1". */ if (ctrl & E1000_CTRL_MDIO) data |= 1; e1000_lower_mdi_clk_82543(hw, &ctrl); } e1000_raise_mdi_clk_82543(hw, &ctrl); e1000_lower_mdi_clk_82543(hw, &ctrl); return data; } /** * e1000_phy_force_speed_duplex_82543 - Force speed/duplex for PHY * @hw: pointer to the HW structure * * Calls the function to force speed and duplex for the m88 PHY, and * if the PHY is not auto-negotiating and the speed is forced to 10Mbit, * then call the function for polarity reversal workaround. **/ static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_phy_force_speed_duplex_82543"); ret_val = e1000_phy_force_speed_duplex_m88(hw); if (ret_val) goto out; if (!hw->mac.autoneg && (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED)) ret_val = e1000_polarity_reversal_workaround_82543(hw); out: return ret_val; } /** * e1000_polarity_reversal_workaround_82543 - Workaround polarity reversal * @hw: pointer to the HW structure * * When forcing link to 10 Full or 10 Half, the PHY can reverse the polarity * inadvertently. To workaround the issue, we disable the transmitter on * the PHY until we have established the link partner's link parameters. **/ static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 mii_status_reg; u16 i; bool link; if (!(hw->phy.ops.write_reg)) goto out; /* Polarity reversal workaround for forced 10F/10H links. */ /* Disable the transmitter on the PHY */ ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); if (ret_val) goto out; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF); if (ret_val) goto out; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); if (ret_val) goto out; /* * This loop will early-out if the NO link condition has been met. * In other words, DO NOT use e1000_phy_has_link_generic() here. */ for (i = PHY_FORCE_TIME; i > 0; i--) { /* * Read the MII Status Register and wait for Link Status bit * to be clear. */ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) goto out; ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) goto out; if (!(mii_status_reg & ~MII_SR_LINK_STATUS)) break; msec_delay_irq(100); } /* Recommended delay time after link has been lost */ msec_delay_irq(1000); /* Now we will re-enable the transmitter on the PHY */ ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); if (ret_val) goto out; msec_delay_irq(50); ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0); if (ret_val) goto out; msec_delay_irq(50); ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00); if (ret_val) goto out; msec_delay_irq(50); ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000); if (ret_val) goto out; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000); if (ret_val) goto out; /* * Read the MII Status Register and wait for Link Status bit * to be set. */ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_TIME, 100000, &link); if (ret_val) goto out; out: return ret_val; } /** * e1000_phy_hw_reset_82543 - PHY hardware reset * @hw: pointer to the HW structure * * Sets the PHY_RESET_DIR bit in the extended device control register * to put the PHY into a reset and waits for completion. Once the reset * has been accomplished, clear the PHY_RESET_DIR bit to take the PHY out * of reset. **/ static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw) { u32 ctrl_ext; s32 ret_val; DEBUGFUNC("e1000_phy_hw_reset_82543"); /* * Read the Extended Device Control Register, assert the PHY_RESET_DIR * bit to put the PHY into reset... */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR; ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); msec_delay(10); /* ...then take it out of reset. */ ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); usec_delay(150); if (!(hw->phy.ops.get_cfg_done)) return E1000_SUCCESS; ret_val = hw->phy.ops.get_cfg_done(hw); return ret_val; } /** * e1000_reset_hw_82543 - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. **/ static s32 e1000_reset_hw_82543(struct e1000_hw *hw) { - u32 ctrl; + u32 ctrl, icr; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_reset_hw_82543"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); e1000_set_tbi_sbp_82543(hw, FALSE); /* * Delay to allow any outstanding PCI transactions to complete before * resetting the device */ msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGOUT("Issuing a global reset to 82543/82544 MAC\n"); if (hw->mac.type == e1000_82543) { E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); } else { /* * The 82544 can't ACK the 64-bit write when issuing the * reset, so use IO-mapping as a workaround. */ E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); } /* * After MAC reset, force reload of NVM to restore power-on * settings to device. */ hw->nvm.ops.reload(hw); msec_delay(2); /* Masking off and clearing any pending interrupts */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); - E1000_READ_REG(hw, E1000_ICR); + icr = E1000_READ_REG(hw, E1000_ICR); return ret_val; } /** * e1000_init_hw_82543 - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. **/ static s32 e1000_init_hw_82543(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_dev_spec_82543 *dev_spec = &hw->dev_spec._82543; u32 ctrl; s32 ret_val; u16 i; DEBUGFUNC("e1000_init_hw_82543"); /* Disabling VLAN filtering */ E1000_WRITE_REG(hw, E1000_VET, 0); mac->ops.clear_vfta(hw); /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) { E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); E1000_WRITE_FLUSH(hw); } /* * Set the PCI priority bit correctly in the CTRL register. This * determines if the adapter gives priority to receives, or if it * gives equal priority to transmits and receives. */ if (hw->mac.type == e1000_82543 && dev_spec->dma_fairness) { ctrl = E1000_READ_REG(hw, E1000_CTRL); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PRIOR); } e1000_pcix_mmrbc_workaround_generic(hw); /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); /* * Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_82543(hw); return ret_val; } /** * e1000_setup_link_82543 - Setup flow control and link settings * @hw: pointer to the HW structure * * Read the EEPROM to determine the initial polarity value and write the * extended device control register with the information before calling * the generic setup link function, which does the following: * Determines which flow control settings to use, then configures flow * control. Calls the appropriate media-specific link configuration * function. Assuming the adapter has a valid link partner, a valid link * should be established. Assumes the hardware has previously been reset * and the transmitter and receiver are not enabled. **/ static s32 e1000_setup_link_82543(struct e1000_hw *hw) { u32 ctrl_ext; s32 ret_val; u16 data; DEBUGFUNC("e1000_setup_link_82543"); /* * Take the 4 bits from NVM word 0xF that determine the initial * polarity value for the SW controlled pins, and setup the * Extended Device Control reg with that info. * This is needed because one of the SW controlled pins is used for * signal detection. So this should be done before phy setup. */ if (hw->mac.type == e1000_82543) { ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); ret_val = -E1000_ERR_NVM; goto out; } ctrl_ext = ((data & NVM_WORD0F_SWPDIO_EXT_MASK) << NVM_SWDPIO_EXT_SHIFT); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); } ret_val = e1000_setup_link_generic(hw); out: return ret_val; } /** * e1000_setup_copper_link_82543 - Configure copper link settings * @hw: pointer to the HW structure * * Configures the link for auto-neg or forced speed and duplex. Then we check * for link, once link is established calls to configure collision distance * and flow control are called. **/ static s32 e1000_setup_copper_link_82543(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; bool link; DEBUGFUNC("e1000_setup_copper_link_82543"); ctrl = E1000_READ_REG(hw, E1000_CTRL) | E1000_CTRL_SLU; /* * With 82543, we need to force speed and duplex on the MAC * equal to what the PHY speed and duplex configuration is. * In addition, we need to perform a hardware reset on the * PHY to take it out of reset. */ if (hw->mac.type == e1000_82543) { ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ret_val = hw->phy.ops.reset(hw); if (ret_val) goto out; } else { ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); } /* Set MDI/MDI-X, Polarity Reversal, and downshift settings */ ret_val = e1000_copper_link_setup_m88(hw); if (ret_val) goto out; if (hw->mac.autoneg) { /* * Setup autoneg and flow control advertisement and perform * autonegotiation. */ ret_val = e1000_copper_link_autoneg(hw); if (ret_val) goto out; } else { /* * PHY will be set to 10H, 10F, 100H or 100F * depending on user settings. */ DEBUGOUT("Forcing Speed and Duplex\n"); ret_val = e1000_phy_force_speed_duplex_82543(hw); if (ret_val) { DEBUGOUT("Error Forcing Speed and Duplex\n"); goto out; } } /* * Check link status. Wait up to 100 microseconds for link to become * valid. */ ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10, &link); if (ret_val) goto out; if (link) { DEBUGOUT("Valid link established!!!\n"); /* Config the MAC and PHY after link is up */ if (hw->mac.type == e1000_82544) { hw->mac.ops.config_collision_dist(hw); } else { ret_val = e1000_config_mac_to_phy_82543(hw); if (ret_val) goto out; } ret_val = e1000_config_fc_after_link_up_generic(hw); } else { DEBUGOUT("Unable to establish link!!!\n"); } out: return ret_val; } /** * e1000_setup_fiber_link_82543 - Setup link for fiber * @hw: pointer to the HW structure * * Configures collision distance and flow control for fiber links. Upon * successful setup, poll for link. **/ static s32 e1000_setup_fiber_link_82543(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_setup_fiber_link_82543"); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Take the link out of reset */ ctrl &= ~E1000_CTRL_LRST; hw->mac.ops.config_collision_dist(hw); ret_val = e1000_commit_fc_settings_generic(hw); if (ret_val) goto out; DEBUGOUT("Auto-negotiation enabled\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); E1000_WRITE_FLUSH(hw); msec_delay(1); /* * For these adapters, the SW definable pin 1 is cleared when the * optics detect a signal. If we have a signal, then poll for a * "Link-Up" indication. */ if (!(E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) ret_val = e1000_poll_fiber_serdes_link_generic(hw); else DEBUGOUT("No signal detected\n"); out: return ret_val; } /** * e1000_check_for_copper_link_82543 - Check for link (Copper) * @hw: pointer to the HW structure * * Checks the phy for link, if link exists, do the following: * - check for downshift * - do polarity workaround (if necessary) * - configure collision distance * - configure flow control after link up * - configure tbi compatibility **/ static s32 e1000_check_for_copper_link_82543(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 icr, rctl; s32 ret_val; u16 speed, duplex; bool link; DEBUGFUNC("e1000_check_for_copper_link_82543"); if (!mac->get_link_status) { ret_val = E1000_SUCCESS; goto out; } ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) goto out; if (!link) goto out; /* No link detected */ mac->get_link_status = FALSE; e1000_check_downshift_generic(hw); /* * If we are forcing speed/duplex, then we can return since * we have already determined whether we have link or not. */ if (!mac->autoneg) { /* * If speed and duplex are forced to 10H or 10F, then we will * implement the polarity reversal workaround. We disable * interrupts first, and upon returning, place the devices * interrupt state to its previous value except for the link * status change interrupt which will happened due to the * execution of this workaround. */ if (mac->forced_speed_duplex & E1000_ALL_10_SPEED) { E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF); ret_val = e1000_polarity_reversal_workaround_82543(hw); icr = E1000_READ_REG(hw, E1000_ICR); E1000_WRITE_REG(hw, E1000_ICS, (icr & ~E1000_ICS_LSC)); E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK); } ret_val = -E1000_ERR_CONFIG; goto out; } /* * We have a M88E1000 PHY and Auto-Neg is enabled. If we * have Si on board that is 82544 or newer, Auto * Speed Detection takes care of MAC speed/duplex * configuration. So we only need to configure Collision * Distance in the MAC. Otherwise, we need to force * speed/duplex on the MAC to the current PHY speed/duplex * settings. */ if (mac->type == e1000_82544) hw->mac.ops.config_collision_dist(hw); else { ret_val = e1000_config_mac_to_phy_82543(hw); if (ret_val) { DEBUGOUT("Error configuring MAC to PHY settings\n"); goto out; } } /* * Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); /* * At this point we know that we are on copper and we have * auto-negotiated link. These are conditions for checking the link * partner capability register. We use the link speed to determine if * TBI compatibility needs to be turned on or off. If the link is not * at gigabit speed, then TBI compatibility is not needed. If we are * at gigabit speed, we turn on TBI compatibility. */ if (e1000_tbi_compatibility_enabled_82543(hw)) { ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex); if (ret_val) { DEBUGOUT("Error getting link speed and duplex\n"); return ret_val; } if (speed != SPEED_1000) { /* * If link speed is not set to gigabit speed, * we do not need to enable TBI compatibility. */ if (e1000_tbi_sbp_enabled_82543(hw)) { /* * If we previously were in the mode, * turn it off. */ e1000_set_tbi_sbp_82543(hw, FALSE); rctl = E1000_READ_REG(hw, E1000_RCTL); rctl &= ~E1000_RCTL_SBP; E1000_WRITE_REG(hw, E1000_RCTL, rctl); } } else { /* * If TBI compatibility is was previously off, * turn it on. For compatibility with a TBI link * partner, we will store bad packets. Some * frames have an additional byte on the end and * will look like CRC errors to to the hardware. */ if (!e1000_tbi_sbp_enabled_82543(hw)) { e1000_set_tbi_sbp_82543(hw, TRUE); rctl = E1000_READ_REG(hw, E1000_RCTL); rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(hw, E1000_RCTL, rctl); } } } out: return ret_val; } /** * e1000_check_for_fiber_link_82543 - Check for link (Fiber) * @hw: pointer to the HW structure * * Checks for link up on the hardware. If link is not up and we have * a signal, then we need to force link up. **/ static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 rxcw, ctrl, status; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_check_for_fiber_link_82543"); ctrl = E1000_READ_REG(hw, E1000_CTRL); status = E1000_READ_REG(hw, E1000_STATUS); rxcw = E1000_READ_REG(hw, E1000_RXCW); /* * If we don't have link (auto-negotiation failed or link partner * cannot auto-negotiate), the cable is plugged in (we have signal), * and our link partner is not trying to auto-negotiate with us (we * are receiving idles or data), we need to force link up. We also * need to give auto-negotiation time to complete, in case the cable * was just plugged in. The autoneg_failed flag does this. */ /* (ctrl & E1000_CTRL_SWDPIN1) == 0 == have signal */ if ((!(ctrl & E1000_CTRL_SWDPIN1)) && (!(status & E1000_STATUS_LU)) && (!(rxcw & E1000_RXCW_C))) { if (!mac->autoneg_failed) { mac->autoneg_failed = TRUE; ret_val = 0; goto out; } DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n"); /* Disable auto-negotiation in the TXCW register */ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); /* Force link-up and also force full-duplex. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Configure Flow Control after forcing link up. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) { DEBUGOUT("Error configuring flow control\n"); goto out; } } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { /* * If we are forcing link and we are receiving /C/ ordered * sets, re-enable auto-negotiation in the TXCW register * and disable forced link in the Device Control register * in an attempt to auto-negotiate with our link partner. */ DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n"); E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); mac->serdes_has_link = TRUE; } out: return ret_val; } /** * e1000_config_mac_to_phy_82543 - Configure MAC to PHY settings * @hw: pointer to the HW structure * * For the 82543 silicon, we need to set the MAC to match the settings * of the PHY, even if the PHY is auto-negotiating. **/ static s32 e1000_config_mac_to_phy_82543(struct e1000_hw *hw) { u32 ctrl; s32 ret_val = E1000_SUCCESS; u16 phy_data; DEBUGFUNC("e1000_config_mac_to_phy_82543"); if (!(hw->phy.ops.read_reg)) goto out; /* Set the bits to force speed and duplex */ ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS); /* * Set up duplex in the Device Control and Transmit Control * registers depending on negotiated values. */ ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) goto out; ctrl &= ~E1000_CTRL_FD; if (phy_data & M88E1000_PSSR_DPLX) ctrl |= E1000_CTRL_FD; hw->mac.ops.config_collision_dist(hw); /* * Set up speed in the Device Control register depending on * negotiated values. */ if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) ctrl |= E1000_CTRL_SPD_1000; else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS) ctrl |= E1000_CTRL_SPD_100; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); out: return ret_val; } /** * e1000_write_vfta_82543 - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: the 32-bit offset in which to write the value to. * @value: the 32-bit value to write at location offset. * * This writes a 32-bit value to a 32-bit offset in the VLAN filter * table. **/ static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset, u32 value) { u32 temp; DEBUGFUNC("e1000_write_vfta_82543"); if ((hw->mac.type == e1000_82544) && (offset & 1)) { temp = E1000_READ_REG_ARRAY(hw, E1000_VFTA, offset - 1); E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset - 1, temp); E1000_WRITE_FLUSH(hw); } else { e1000_write_vfta_generic(hw, offset, value); } } /** * e1000_led_on_82543 - Turn on SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED on. **/ static s32 e1000_led_on_82543(struct e1000_hw *hw) { u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGFUNC("e1000_led_on_82543"); if (hw->mac.type == e1000_82544 && hw->phy.media_type == e1000_media_type_copper) { /* Clear SW-definable Pin 0 to turn on the LED */ ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } else { /* Fiber 82544 and all 82543 use this method */ ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } E1000_WRITE_REG(hw, E1000_CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_led_off_82543 - Turn off SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED off. **/ static s32 e1000_led_off_82543(struct e1000_hw *hw) { u32 ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGFUNC("e1000_led_off_82543"); if (hw->mac.type == e1000_82544 && hw->phy.media_type == e1000_media_type_copper) { /* Set SW-definable Pin 0 to turn off the LED */ ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } else { ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; } E1000_WRITE_REG(hw, E1000_CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_clear_hw_cntrs_82543 - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_82543"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); } /** * e1000_read_mac_addr_82543 - Read device MAC address * @hw: pointer to the HW structure * * Reads the device MAC address from the EEPROM and stores the value. * Since devices with two ports use the same EEPROM, we increment the * last bit in the MAC address for the second port. * **/ s32 e1000_read_mac_addr_82543(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 offset, nvm_data, i; DEBUGFUNC("e1000_read_mac_addr"); for (i = 0; i < ETH_ADDR_LEN; i += 2) { offset = i >> 1; ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF); hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8); } /* Flip last bit of mac address if we're on second port */ if (hw->bus.func == E1000_FUNC_1) hw->mac.perm_addr[5] ^= 1; for (i = 0; i < ETH_ADDR_LEN; i++) hw->mac.addr[i] = hw->mac.perm_addr[i]; out: return ret_val; } Index: projects/clang-trunk/sys/dev/e1000/e1000_82571.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_82571.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_82571.h (revision 287502) @@ -1,66 +1,65 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_82571_H_ #define _E1000_82571_H_ #define ID_LED_RESERVED_F746 0xF746 #define ID_LED_DEFAULT_82573 ((ID_LED_DEF1_DEF2 << 12) | \ (ID_LED_OFF1_ON2 << 8) | \ (ID_LED_DEF1_DEF2 << 4) | \ (ID_LED_DEF1_DEF2)) #define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000 #define AN_RETRY_COUNT 5 /* Autoneg Retry Count value */ /* Intr Throttling - RW */ #define E1000_EITR_82574(_n) (0x000E8 + (0x4 * (_n))) #define E1000_EIAC_82574 0x000DC /* Ext. Interrupt Auto Clear - RW */ #define E1000_EIAC_MASK_82574 0x01F00000 -#define E1000_IVAR_INT_ALLOC_VALID 0x8 +#define E1000_NVM_INIT_CTRL2_MNGM 0x6000 /* Manageability Operation Mode mask */ -/* Manageability Operation Mode mask */ -#define E1000_NVM_INIT_CTRL2_MNGM 0x6000 +#define E1000_RXCFGL 0x0B634 /* TimeSync Rx EtherType & Msg Type Reg - RW */ #define E1000_BASE1000T_STATUS 10 #define E1000_IDLE_ERROR_COUNT_MASK 0xFF #define E1000_RECEIVE_ERROR_COUNTER 21 #define E1000_RECEIVE_ERROR_MAX 0xFFFF bool e1000_check_phy_82574(struct e1000_hw *hw); bool e1000_get_laa_state_82571(struct e1000_hw *hw); void e1000_set_laa_state_82571(struct e1000_hw *hw, bool state); #endif Index: projects/clang-trunk/sys/dev/e1000/e1000_82575.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_82575.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_82575.c (revision 287502) @@ -1,3675 +1,3642 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* * 82575EB Gigabit Network Connection * 82575EB Gigabit Backplane Connection * 82575GB Gigabit Network Connection * 82576 Gigabit Network Connection * 82576 Quad Port Gigabit Mezzanine Adapter * 82580 Gigabit Network Connection * I350 Gigabit Network Connection */ #include "e1000_api.h" #include "e1000_i210.h" static s32 e1000_init_phy_params_82575(struct e1000_hw *hw); static s32 e1000_init_mac_params_82575(struct e1000_hw *hw); static s32 e1000_acquire_phy_82575(struct e1000_hw *hw); static void e1000_release_phy_82575(struct e1000_hw *hw); static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw); static void e1000_release_nvm_82575(struct e1000_hw *hw); static s32 e1000_check_for_link_82575(struct e1000_hw *hw); static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw); static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw); static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw); static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_reset_hw_82575(struct e1000_hw *hw); static s32 e1000_reset_hw_82580(struct e1000_hw *hw); static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data); static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active); static s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active); static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active); static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw); static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw); static s32 e1000_get_media_type_82575(struct e1000_hw *hw); static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw); static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data); static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw); static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_get_phy_id_82575(struct e1000_hw *hw); static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static bool e1000_sgmii_active_82575(struct e1000_hw *hw); static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); static void e1000_config_collision_dist_82575(struct e1000_hw *hw); static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw); static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw); static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw); static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw); static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw); static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset); static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset); static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); static void e1000_clear_vfta_i350(struct e1000_hw *hw); static void e1000_i2c_start(struct e1000_hw *hw); static void e1000_i2c_stop(struct e1000_hw *hw); static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data); static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data); static s32 e1000_get_i2c_ack(struct e1000_hw *hw); static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data); static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data); static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data); static bool e1000_get_i2c_data(u32 *i2cctl); static const u16 e1000_82580_rxpbs_table[] = { 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; #define E1000_82580_RXPBS_TABLE_SIZE \ (sizeof(e1000_82580_rxpbs_table) / \ sizeof(e1000_82580_rxpbs_table[0])) /** * e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO * @hw: pointer to the HW structure * * Called to determine if the I2C pins are being used for I2C or as an * external MDIO interface since the two options are mutually exclusive. **/ static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw) { u32 reg = 0; bool ext_mdio = FALSE; DEBUGFUNC("e1000_sgmii_uses_mdio_82575"); switch (hw->mac.type) { case e1000_82575: case e1000_82576: reg = E1000_READ_REG(hw, E1000_MDIC); ext_mdio = !!(reg & E1000_MDIC_DEST); break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: reg = E1000_READ_REG(hw, E1000_MDICNFG); ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO); break; default: break; } return ext_mdio; } /** * e1000_init_phy_params_82575 - Init PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u32 ctrl_ext; DEBUGFUNC("e1000_init_phy_params_82575"); phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic; phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic; if (hw->phy.media_type != e1000_media_type_copper) { phy->type = e1000_phy_none; goto out; } phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_82575; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 100; phy->ops.acquire = e1000_acquire_phy_82575; phy->ops.check_reset_block = e1000_check_reset_block_generic; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.get_cfg_done = e1000_get_cfg_done_82575; phy->ops.release = e1000_release_phy_82575; ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); if (e1000_sgmii_active_82575(hw)) { phy->ops.reset = e1000_phy_hw_reset_sgmii_82575; ctrl_ext |= E1000_CTRL_I2C_ENA; } else { phy->ops.reset = e1000_phy_hw_reset_generic; ctrl_ext &= ~E1000_CTRL_I2C_ENA; } E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); e1000_reset_mdicnfg_82580(hw); if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) { phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575; phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575; } else { switch (hw->mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: phy->ops.read_reg = e1000_read_phy_reg_82580; phy->ops.write_reg = e1000_write_phy_reg_82580; break; case e1000_i210: case e1000_i211: phy->ops.read_reg = e1000_read_phy_reg_gs40g; phy->ops.write_reg = e1000_write_phy_reg_gs40g; break; default: phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.write_reg = e1000_write_phy_reg_igp; } } /* Set phy->phy_addr and phy->id. */ ret_val = e1000_get_phy_id_82575(hw); /* Verify phy id and set remaining function pointers */ switch (phy->id) { case M88E1543_E_PHY_ID: case M88E1512_E_PHY_ID: case I347AT4_E_PHY_ID: case M88E1112_E_PHY_ID: case M88E1340M_E_PHY_ID: case M88E1111_I_PHY_ID: phy->type = e1000_phy_m88; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.get_info = e1000_get_phy_info_m88; if (phy->id == I347AT4_E_PHY_ID || phy->id == M88E1112_E_PHY_ID || phy->id == M88E1340M_E_PHY_ID) phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; else if (phy->id == M88E1543_E_PHY_ID || phy->id == M88E1512_E_PHY_ID) phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; else phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; /* Check if this PHY is confgured for media swap. */ if (phy->id == M88E1112_E_PHY_ID) { u16 data; ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 2); if (ret_val) goto out; ret_val = phy->ops.read_reg(hw, E1000_M88E1112_MAC_CTRL_1, &data); if (ret_val) goto out; data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; if (data == E1000_M88E1112_AUTO_COPPER_SGMII || data == E1000_M88E1112_AUTO_COPPER_BASEX) hw->mac.ops.check_for_link = e1000_check_for_link_media_swap; } if (phy->id == M88E1512_E_PHY_ID) { ret_val = e1000_initialize_M88E1512_phy(hw); if (ret_val) goto out; } break; case IGP03E1000_E_PHY_ID: case IGP04E1000_E_PHY_ID: phy->type = e1000_phy_igp_3; phy->ops.check_polarity = e1000_check_polarity_igp; phy->ops.get_info = e1000_get_phy_info_igp; phy->ops.get_cable_length = e1000_get_cable_length_igp_2; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; break; case I82580_I_PHY_ID: case I350_I_PHY_ID: phy->type = e1000_phy_82580; phy->ops.check_polarity = e1000_check_polarity_82577; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82577; phy->ops.get_cable_length = e1000_get_cable_length_82577; phy->ops.get_info = e1000_get_phy_info_82577; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; break; case I210_I_PHY_ID: phy->type = e1000_phy_i210; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; break; default: ret_val = -E1000_ERR_PHY; goto out; } out: return ret_val; } /** * e1000_init_nvm_params_82575 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ s32 e1000_init_nvm_params_82575(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u16 size; DEBUGFUNC("e1000_init_nvm_params_82575"); size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> E1000_EECD_SIZE_EX_SHIFT); /* * Added to a constant, "size" becomes the left-shift value * for setting word_size. */ size += NVM_WORD_SIZE_BASE_SHIFT; /* Just in case size is out of range, cap it to the largest * EEPROM size supported */ if (size > 15) size = 15; nvm->word_size = 1 << size; if (hw->mac.type < e1000_i210) { nvm->opcode_bits = 8; nvm->delay_usec = 1; switch (nvm->override) { case e1000_nvm_override_spi_large: nvm->page_size = 32; nvm->address_bits = 16; break; case e1000_nvm_override_spi_small: nvm->page_size = 8; nvm->address_bits = 8; break; default: nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; break; } if (nvm->word_size == (1 << 15)) nvm->page_size = 128; nvm->type = e1000_nvm_eeprom_spi; } else { nvm->type = e1000_nvm_flash_hw; } /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_82575; nvm->ops.release = e1000_release_nvm_82575; if (nvm->word_size < (1 << 15)) nvm->ops.read = e1000_read_nvm_eerd; else nvm->ops.read = e1000_read_nvm_spi; nvm->ops.write = e1000_write_nvm_spi; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_82575; /* override generic family function pointers for specific descendants */ switch (hw->mac.type) { case e1000_82580: nvm->ops.validate = e1000_validate_nvm_checksum_82580; nvm->ops.update = e1000_update_nvm_checksum_82580; break; case e1000_i350: case e1000_i354: nvm->ops.validate = e1000_validate_nvm_checksum_i350; nvm->ops.update = e1000_update_nvm_checksum_i350; break; default: break; } return E1000_SUCCESS; } /** * e1000_init_mac_params_82575 - Init MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_82575(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; DEBUGFUNC("e1000_init_mac_params_82575"); /* Derives media type */ e1000_get_media_type_82575(hw); /* Set mta register count */ mac->mta_reg_count = 128; /* Set uta register count */ mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES_82575; if (mac->type == e1000_82576) mac->rar_entry_count = E1000_RAR_ENTRIES_82576; if (mac->type == e1000_82580) mac->rar_entry_count = E1000_RAR_ENTRIES_82580; if (mac->type == e1000_i350 || mac->type == e1000_i354) mac->rar_entry_count = E1000_RAR_ENTRIES_I350; /* Enable EEE default settings for EEE supported devices */ if (mac->type >= e1000_i350) dev_spec->eee_disable = FALSE; /* Allow a single clear of the SW semaphore on I210 and newer */ if (mac->type >= e1000_i210) dev_spec->clear_semaphore_once = TRUE; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* FWSM register */ mac->has_fwsm = TRUE; /* ARC supported; valid only if manageability features are enabled. */ mac->arc_subsystem_valid = !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; /* reset */ if (mac->type >= e1000_82580) mac->ops.reset_hw = e1000_reset_hw_82580; else mac->ops.reset_hw = e1000_reset_hw_82575; /* hw initialization */ if ((mac->type == e1000_i210) || (mac->type == e1000_i211)) mac->ops.init_hw = e1000_init_hw_i210; else mac->ops.init_hw = e1000_init_hw_82575; /* link setup */ mac->ops.setup_link = e1000_setup_link_generic; /* physical interface link setup */ mac->ops.setup_physical_interface = (hw->phy.media_type == e1000_media_type_copper) ? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575; /* physical interface shutdown */ mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575; /* physical interface power up */ mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575; /* check for link */ mac->ops.check_for_link = e1000_check_for_link_82575; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82575; /* configure collision distance */ mac->ops.config_collision_dist = e1000_config_collision_dist_82575; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) { /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_i350; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_i350; } else { /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; } if (hw->mac.type >= e1000_82580) mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_crossover_generic; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* blink LED */ mac->ops.blink_led = e1000_blink_led_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_generic; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_generic; mac->ops.led_off = e1000_led_off_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575; /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_82575; /* acquire SW_FW sync */ mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575; mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575; if (mac->type >= e1000_i210) { mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210; mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210; } /* set lan id for port to determine which phy lock to use */ hw->mac.ops.set_lan_id(hw); return E1000_SUCCESS; } /** * e1000_init_function_pointers_82575 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_82575"); hw->mac.ops.init_params = e1000_init_mac_params_82575; hw->nvm.ops.init_params = e1000_init_nvm_params_82575; hw->phy.ops.init_params = e1000_init_phy_params_82575; hw->mbx.ops.init_params = e1000_init_mbx_params_pf; } /** * e1000_acquire_phy_82575 - Acquire rights to access PHY * @hw: pointer to the HW structure * * Acquire access rights to the correct PHY. **/ static s32 e1000_acquire_phy_82575(struct e1000_hw *hw) { u16 mask = E1000_SWFW_PHY0_SM; DEBUGFUNC("e1000_acquire_phy_82575"); if (hw->bus.func == E1000_FUNC_1) mask = E1000_SWFW_PHY1_SM; else if (hw->bus.func == E1000_FUNC_2) mask = E1000_SWFW_PHY2_SM; else if (hw->bus.func == E1000_FUNC_3) mask = E1000_SWFW_PHY3_SM; return hw->mac.ops.acquire_swfw_sync(hw, mask); } /** * e1000_release_phy_82575 - Release rights to access PHY * @hw: pointer to the HW structure * * A wrapper to release access rights to the correct PHY. **/ static void e1000_release_phy_82575(struct e1000_hw *hw) { u16 mask = E1000_SWFW_PHY0_SM; DEBUGFUNC("e1000_release_phy_82575"); if (hw->bus.func == E1000_FUNC_1) mask = E1000_SWFW_PHY1_SM; else if (hw->bus.func == E1000_FUNC_2) mask = E1000_SWFW_PHY2_SM; else if (hw->bus.func == E1000_FUNC_3) mask = E1000_SWFW_PHY3_SM; hw->mac.ops.release_swfw_sync(hw, mask); } /** * e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the PHY register at offset using the serial gigabit media independent * interface and stores the retrieved information in data. **/ static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val = -E1000_ERR_PARAM; DEBUGFUNC("e1000_read_phy_reg_sgmii_82575"); if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { DEBUGOUT1("PHY Address %u is out of range\n", offset); goto out; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_read_phy_reg_i2c(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Writes the data to PHY register at the offset using the serial gigabit * media independent interface. **/ static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val = -E1000_ERR_PARAM; DEBUGFUNC("e1000_write_phy_reg_sgmii_82575"); if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { DEBUGOUT1("PHY Address %d is out of range\n", offset); goto out; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_write_phy_reg_i2c(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_get_phy_id_82575 - Retrieve PHY addr and id * @hw: pointer to the HW structure * * Retrieves the PHY address and ID for both PHY's which do and do not use * sgmi interface. **/ static s32 e1000_get_phy_id_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 phy_id; u32 ctrl_ext; u32 mdic; DEBUGFUNC("e1000_get_phy_id_82575"); /* some i354 devices need an extra read for phy id */ if (hw->mac.type == e1000_i354) e1000_get_phy_id(hw); /* * For SGMII PHYs, we try the list of possible addresses until * we find one that works. For non-SGMII PHYs * (e.g. integrated copper PHYs), an address of 1 should * work. The result of this function should mean phy->phy_addr * and phy->id are set correctly. */ if (!e1000_sgmii_active_82575(hw)) { phy->addr = 1; ret_val = e1000_get_phy_id(hw); goto out; } if (e1000_sgmii_uses_mdio_82575(hw)) { switch (hw->mac.type) { case e1000_82575: case e1000_82576: mdic = E1000_READ_REG(hw, E1000_MDIC); mdic &= E1000_MDIC_PHY_MASK; phy->addr = mdic >> E1000_MDIC_PHY_SHIFT; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: mdic = E1000_READ_REG(hw, E1000_MDICNFG); mdic &= E1000_MDICNFG_PHY_MASK; phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT; break; default: ret_val = -E1000_ERR_PHY; goto out; break; } ret_val = e1000_get_phy_id(hw); goto out; } /* Power on sgmii phy if it is disabled */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA); E1000_WRITE_FLUSH(hw); msec_delay(300); /* * The address field in the I2CCMD register is 3 bits and 0 is invalid. * Therefore, we need to test 1-7 */ for (phy->addr = 1; phy->addr < 8; phy->addr++) { ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id); if (ret_val == E1000_SUCCESS) { DEBUGOUT2("Vendor ID 0x%08X read at address %u\n", phy_id, phy->addr); /* * At the time of this writing, The M88 part is * the only supported SGMII PHY product. */ if (phy_id == M88_VENDOR) break; } else { DEBUGOUT1("PHY address %u was unreadable\n", phy->addr); } } /* A valid PHY type couldn't be found. */ if (phy->addr == 8) { phy->addr = 0; ret_val = -E1000_ERR_PHY; } else { ret_val = e1000_get_phy_id(hw); } /* restore previous sfp cage power state */ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); out: return ret_val; } /** * e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset * @hw: pointer to the HW structure * * Resets the PHY using the serial gigabit media independent interface. **/ static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; struct e1000_phy_info *phy = &hw->phy; DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575"); /* * This isn't a TRUE "hard" reset, but is the only reset * available to us at this time. */ DEBUGOUT("Soft resetting SGMII attached PHY...\n"); if (!(hw->phy.ops.write_reg)) goto out; /* * SFP documentation requires the following to configure the SPF module * to work on SGMII. No further documentation is given. */ ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084); if (ret_val) goto out; ret_val = hw->phy.ops.commit(hw); if (ret_val) goto out; if (phy->id == M88E1512_E_PHY_ID) ret_val = e1000_initialize_M88E1512_phy(hw); out: return ret_val; } /** * e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_set_d0_lplu_state_82575"); if (!(hw->phy.ops.read_reg)) goto out; ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); if (ret_val) goto out; if (active) { data |= IGP02E1000_PM_D0_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); if (ret_val) goto out; /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } else { data &= ~IGP02E1000_PM_D0_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); /* * LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) goto out; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) goto out; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } } out: return ret_val; } /** * e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 data; DEBUGFUNC("e1000_set_d0_lplu_state_82580"); data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); if (active) { data |= E1000_82580_PM_D0_LPLU; /* When LPLU is enabled, we should disable SmartSpeed */ data &= ~E1000_82580_PM_SPD; } else { data &= ~E1000_82580_PM_D0_LPLU; /* * LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) data |= E1000_82580_PM_SPD; else if (phy->smart_speed == e1000_smart_speed_off) data &= ~E1000_82580_PM_SPD; } E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D3 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D3 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. **/ s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 data; DEBUGFUNC("e1000_set_d3_lplu_state_82580"); data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); if (!active) { data &= ~E1000_82580_PM_D3_LPLU; /* * LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) data |= E1000_82580_PM_SPD; else if (phy->smart_speed == e1000_smart_speed_off) data &= ~E1000_82580_PM_SPD; } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { data |= E1000_82580_PM_D3_LPLU; /* When LPLU is enabled, we should disable SmartSpeed */ data &= ~E1000_82580_PM_SPD; } E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); return E1000_SUCCESS; } /** * e1000_acquire_nvm_82575 - Request for access to EEPROM * @hw: pointer to the HW structure * * Acquire the necessary semaphores for exclusive access to the EEPROM. * Set the EEPROM access request bit and wait for EEPROM access grant bit. * Return successful if access grant bit set, else clear the request for * EEPROM access and return -E1000_ERR_NVM (-1). **/ static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_acquire_nvm_82575"); ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); if (ret_val) goto out; /* * Check if there is some access * error this access may hook on */ if (hw->mac.type == e1000_i350) { u32 eecd = E1000_READ_REG(hw, E1000_EECD); if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT | E1000_EECD_TIMEOUT)) { /* Clear all access error flags */ E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_ERROR_CLR); DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); } } if (hw->mac.type == e1000_82580) { u32 eecd = E1000_READ_REG(hw, E1000_EECD); if (eecd & E1000_EECD_BLOCKED) { /* Clear access error flag */ E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_BLOCKED); DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); } } ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); out: return ret_val; } /** * e1000_release_nvm_82575 - Release exclusive access to EEPROM * @hw: pointer to the HW structure * * Stop any current commands to the EEPROM and clear the EEPROM request bit, * then release the semaphores acquired. **/ static void e1000_release_nvm_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_82575"); e1000_release_nvm_generic(hw); e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); } /** * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore * @hw: pointer to the HW structure * @mask: specifies which semaphore to acquire * * Acquire the SW/FW semaphore to access the PHY or NVM. The mask * will also specify which port we're acquiring the lock for. **/ static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) { u32 swfw_sync; u32 swmask = mask; u32 fwmask = mask << 16; s32 ret_val = E1000_SUCCESS; s32 i = 0, timeout = 200; DEBUGFUNC("e1000_acquire_swfw_sync_82575"); while (i < timeout) { if (e1000_get_hw_semaphore_generic(hw)) { ret_val = -E1000_ERR_SWFW_SYNC; goto out; } swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); if (!(swfw_sync & (fwmask | swmask))) break; /* * Firmware currently using resource (fwmask) * or other software thread using resource (swmask) */ e1000_put_hw_semaphore_generic(hw); msec_delay_irq(5); i++; } if (i == timeout) { DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); ret_val = -E1000_ERR_SWFW_SYNC; goto out; } swfw_sync |= swmask; E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); e1000_put_hw_semaphore_generic(hw); out: return ret_val; } /** * e1000_release_swfw_sync_82575 - Release SW/FW semaphore * @hw: pointer to the HW structure * @mask: specifies which semaphore to acquire * * Release the SW/FW semaphore used to access the PHY or NVM. The mask * will also specify which port we're releasing the lock for. **/ static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) { u32 swfw_sync; DEBUGFUNC("e1000_release_swfw_sync_82575"); while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) ; /* Empty */ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); swfw_sync &= ~mask; E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); e1000_put_hw_semaphore_generic(hw); } /** * e1000_get_cfg_done_82575 - Read config done bit * @hw: pointer to the HW structure * * Read the management control register for the config done bit for * completion status. NOTE: silicon which is EEPROM-less will fail trying * to read the config done bit, so an error is *ONLY* logged and returns * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon * would not be able to be reset or change link. **/ static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw) { s32 timeout = PHY_CFG_TIMEOUT; u32 mask = E1000_NVM_CFG_DONE_PORT_0; DEBUGFUNC("e1000_get_cfg_done_82575"); if (hw->bus.func == E1000_FUNC_1) mask = E1000_NVM_CFG_DONE_PORT_1; else if (hw->bus.func == E1000_FUNC_2) mask = E1000_NVM_CFG_DONE_PORT_2; else if (hw->bus.func == E1000_FUNC_3) mask = E1000_NVM_CFG_DONE_PORT_3; while (timeout) { if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) break; msec_delay(1); timeout--; } if (!timeout) DEBUGOUT("MNG configuration cycle has not completed.\n"); /* If EEPROM is not marked present, init the PHY manually */ if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) && (hw->phy.type == e1000_phy_igp_3)) e1000_phy_init_script_igp3(hw); return E1000_SUCCESS; } /** * e1000_get_link_up_info_82575 - Get link speed/duplex info * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * This is a wrapper function, if using the serial gigabit media independent * interface, use PCS to retrieve the link speed and duplex information. * Otherwise, use the generic function to get the link speed and duplex info. **/ static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex) { s32 ret_val; DEBUGFUNC("e1000_get_link_up_info_82575"); if (hw->phy.media_type != e1000_media_type_copper) ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed, duplex); else ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); return ret_val; } /** * e1000_check_for_link_82575 - Check for link * @hw: pointer to the HW structure * * If sgmii is enabled, then use the pcs register to determine link, otherwise * use the generic interface for determining link. **/ static s32 e1000_check_for_link_82575(struct e1000_hw *hw) { s32 ret_val; u16 speed, duplex; DEBUGFUNC("e1000_check_for_link_82575"); if (hw->phy.media_type != e1000_media_type_copper) { ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed, &duplex); /* * Use this flag to determine if link needs to be checked or * not. If we have link clear the flag so that we do not * continue to check for link. */ hw->mac.get_link_status = !hw->mac.serdes_has_link; /* * Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); } else { ret_val = e1000_check_for_copper_link_generic(hw); } return ret_val; } /** * e1000_check_for_link_media_swap - Check which M88E1112 interface linked * @hw: pointer to the HW structure * * Poll the M88E1112 interfaces to see which interface achieved link. */ static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; u8 port = 0; DEBUGFUNC("e1000_check_for_link_media_swap"); - /* Check for copper. */ + /* Check the copper medium. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); if (ret_val) return ret_val; if (data & E1000_M88E1112_STATUS_LINK) port = E1000_MEDIA_PORT_COPPER; - /* Check for other. */ + /* Check the other medium. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); if (ret_val) return ret_val; + /* reset page to 0 */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); + if (ret_val) + return ret_val; + if (data & E1000_M88E1112_STATUS_LINK) port = E1000_MEDIA_PORT_OTHER; /* Determine if a swap needs to happen. */ if (port && (hw->dev_spec._82575.media_port != port)) { hw->dev_spec._82575.media_port = port; hw->dev_spec._82575.media_changed = TRUE; - } - - if (port == E1000_MEDIA_PORT_COPPER) { - /* reset page to 0 */ - ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); - if (ret_val) - return ret_val; - e1000_check_for_link_82575(hw); } else { - e1000_check_for_link_82575(hw); - /* reset page to 0 */ - ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); - if (ret_val) - return ret_val; + ret_val = e1000_check_for_link_82575(hw); } return E1000_SUCCESS; } /** * e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown * @hw: pointer to the HW structure **/ static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_power_up_serdes_link_82575"); if ((hw->phy.media_type != e1000_media_type_internal_serdes) && !e1000_sgmii_active_82575(hw)) return; /* Enable PCS to turn on link */ reg = E1000_READ_REG(hw, E1000_PCS_CFG0); reg |= E1000_PCS_CFG_PCS_EN; E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); /* Power up the laser */ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg &= ~E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); /* flush the write to verify completion */ E1000_WRITE_FLUSH(hw); msec_delay(1); } /** * e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * Using the physical coding sub-layer (PCS), retrieve the current speed and * duplex, then store the values in the pointers provided. **/ static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex) { struct e1000_mac_info *mac = &hw->mac; u32 pcs; u32 status; DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575"); /* * Read the PCS Status register for link state. For non-copper mode, * the status register is not accurate. The PCS status register is * used instead. */ pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT); /* * The link up bit determines when link is up on autoneg. */ if (pcs & E1000_PCS_LSTS_LINK_OK) { mac->serdes_has_link = TRUE; /* Detect and store PCS speed */ if (pcs & E1000_PCS_LSTS_SPEED_1000) *speed = SPEED_1000; else if (pcs & E1000_PCS_LSTS_SPEED_100) *speed = SPEED_100; else *speed = SPEED_10; /* Detect and store PCS duplex */ if (pcs & E1000_PCS_LSTS_DUPLEX_FULL) *duplex = FULL_DUPLEX; else *duplex = HALF_DUPLEX; /* Check if it is an I354 2.5Gb backplane connection. */ if (mac->type == e1000_i354) { status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && !(status & E1000_STATUS_2P5_SKU_OVER)) { *speed = SPEED_2500; *duplex = FULL_DUPLEX; DEBUGOUT("2500 Mbs, "); DEBUGOUT("Full Duplex\n"); } } } else { mac->serdes_has_link = FALSE; *speed = 0; *duplex = 0; } return E1000_SUCCESS; } /** * e1000_shutdown_serdes_link_82575 - Remove link during power down * @hw: pointer to the HW structure * * In the case of serdes shut down sfp and PCS on driver unload * when management pass thru is not enabled. **/ void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_shutdown_serdes_link_82575"); if ((hw->phy.media_type != e1000_media_type_internal_serdes) && !e1000_sgmii_active_82575(hw)) return; if (!e1000_enable_mng_pass_thru(hw)) { /* Disable PCS to turn off link */ reg = E1000_READ_REG(hw, E1000_PCS_CFG0); reg &= ~E1000_PCS_CFG_PCS_EN; E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); /* shutdown the laser */ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); /* flush the write to verify completion */ E1000_WRITE_FLUSH(hw); msec_delay(1); } return; } /** * e1000_reset_hw_82575 - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. **/ static s32 e1000_reset_hw_82575(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_reset_hw_82575"); /* * Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); /* set the completion timeout for interface */ ret_val = e1000_set_pcie_completion_timeout(hw); if (ret_val) DEBUGOUT("PCI-E Set completion timeout has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGOUT("Issuing a global reset to MAC\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) { /* * When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ DEBUGOUT("Auto Read Done did not complete\n"); } /* If EEPROM is not present, run manual init scripts */ if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES)) e1000_reset_init_script_82575(hw); /* Clear any pending interrupt events. */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); /* Install any alternate MAC address into RAR0 */ ret_val = e1000_check_alt_mac_addr_generic(hw); return ret_val; } /** * e1000_init_hw_82575 - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. **/ s32 e1000_init_hw_82575(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; u16 i, rar_count = mac->rar_entry_count; DEBUGFUNC("e1000_init_hw_82575"); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); if (ret_val) { DEBUGOUT("Error initializing identification LED\n"); /* This is not fatal and we should not stop init due to this */ } /* Disabling VLAN filtering */ DEBUGOUT("Initializing the IEEE VLAN\n"); mac->ops.clear_vfta(hw); /* Setup the receive address */ e1000_init_rx_addrs_generic(hw, rar_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* Zero out the Unicast HASH table */ DEBUGOUT("Zeroing the UTA\n"); for (i = 0; i < mac->uta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0); /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); /* Set the default MTU size */ hw->dev_spec._82575.mtu = 1500; /* * Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_82575(hw); return ret_val; } /** * e1000_setup_copper_link_82575 - Configure copper link settings * @hw: pointer to the HW structure * * Configures the link for auto-neg or forced speed and duplex. Then we check * for link, once link is established calls to configure collision distance * and flow control are called. **/ static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u32 phpm_reg; DEBUGFUNC("e1000_setup_copper_link_82575"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Clear Go Link Disconnect bit on supported devices */ switch (hw->mac.type) { case e1000_82580: case e1000_i350: case e1000_i210: case e1000_i211: phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); phpm_reg &= ~E1000_82580_PM_GO_LINKD; E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg); break; default: break; } ret_val = e1000_setup_serdes_link_82575(hw); if (ret_val) goto out; if (e1000_sgmii_active_82575(hw)) { /* allow time for SFP cage time to power up phy */ msec_delay(300); ret_val = hw->phy.ops.reset(hw); if (ret_val) { DEBUGOUT("Error resetting the PHY.\n"); goto out; } } switch (hw->phy.type) { case e1000_phy_i210: case e1000_phy_m88: switch (hw->phy.id) { case I347AT4_E_PHY_ID: case M88E1112_E_PHY_ID: case M88E1340M_E_PHY_ID: case M88E1543_E_PHY_ID: case M88E1512_E_PHY_ID: case I210_I_PHY_ID: ret_val = e1000_copper_link_setup_m88_gen2(hw); break; default: ret_val = e1000_copper_link_setup_m88(hw); break; } break; case e1000_phy_igp_3: ret_val = e1000_copper_link_setup_igp(hw); break; case e1000_phy_82580: ret_val = e1000_copper_link_setup_82577(hw); break; default: ret_val = -E1000_ERR_PHY; break; } if (ret_val) goto out; ret_val = e1000_setup_copper_link_generic(hw); out: return ret_val; } /** * e1000_setup_serdes_link_82575 - Setup link for serdes * @hw: pointer to the HW structure * * Configure the physical coding sub-layer (PCS) link. The PCS link is * used on copper connections where the serialized gigabit media independent * interface (sgmii), or serdes fiber is being used. Configures the link * for auto-negotiation or forces speed/duplex. **/ static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw) { u32 ctrl_ext, ctrl_reg, reg, anadv_reg; bool pcs_autoneg; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_setup_serdes_link_82575"); if ((hw->phy.media_type != e1000_media_type_internal_serdes) && !e1000_sgmii_active_82575(hw)) return ret_val; /* * On the 82575, SerDes loopback mode persists until it is * explicitly turned off or a power cycle is performed. A read to * the register does not indicate its status. Therefore, we ensure * loopback mode is disabled during initialization. */ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); /* power on the sfp cage if present */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); ctrl_reg |= E1000_CTRL_SLU; /* set both sw defined pins on 82575/82576*/ if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1; reg = E1000_READ_REG(hw, E1000_PCS_LCTL); /* default pcs_autoneg to the same setting as mac autoneg */ pcs_autoneg = hw->mac.autoneg; switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { case E1000_CTRL_EXT_LINK_MODE_SGMII: /* sgmii mode lets the phy handle forcing speed/duplex */ pcs_autoneg = TRUE; /* autoneg time out should be disabled for SGMII mode */ reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT); break; case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: /* disable PCS autoneg and support parallel detect only */ pcs_autoneg = FALSE; /* fall through to default case */ default: if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) { ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT) pcs_autoneg = FALSE; } /* * non-SGMII modes only supports a speed of 1000/Full for the * link so it is best to just force the MAC and let the pcs * link either autoneg or be forced to 1000/Full */ ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD | E1000_CTRL_FD | E1000_CTRL_FRCDPX; /* set speed of 1000/Full if speed/duplex is forced */ reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL; break; } E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); /* * New SerDes mode allows for forcing speed or autonegotiating speed * at 1gb. Autoneg should be default set by most drivers. This is the * mode that will be compatible with older link partners and switches. * However, both are supported by the hardware and some drivers/tools. */ reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP | E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK); if (pcs_autoneg) { /* Set PCS register for autoneg */ reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */ E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */ /* Disable force flow control for autoneg */ reg &= ~E1000_PCS_LCTL_FORCE_FCTRL; /* Configure flow control advertisement for autoneg */ anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE); switch (hw->fc.requested_mode) { case e1000_fc_full: case e1000_fc_rx_pause: anadv_reg |= E1000_TXCW_ASM_DIR; anadv_reg |= E1000_TXCW_PAUSE; break; case e1000_fc_tx_pause: anadv_reg |= E1000_TXCW_ASM_DIR; break; default: break; } E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg); DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg); } else { /* Set PCS register for forced link */ reg |= E1000_PCS_LCTL_FSD; /* Force Speed */ /* Force flow control for forced link */ reg |= E1000_PCS_LCTL_FORCE_FCTRL; DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg); } E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); if (!pcs_autoneg && !e1000_sgmii_active_82575(hw)) e1000_force_mac_fc_generic(hw); return ret_val; } /** * e1000_get_media_type_82575 - derives current media type. * @hw: pointer to the HW structure * * The media type is chosen reflecting few settings. * The following are taken into account: * - link mode set in the current port Init Control Word #3 * - current link mode settings in CSR register * - MDIO vs. I2C PHY control interface chosen * - SFP module media type **/ static s32 e1000_get_media_type_82575(struct e1000_hw *hw) { struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; s32 ret_val = E1000_SUCCESS; u32 ctrl_ext = 0; u32 link_mode = 0; /* Set internal phy as default */ dev_spec->sgmii_active = FALSE; dev_spec->module_plugged = FALSE; /* Get CSR setting */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); /* extract link mode setting */ link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK; switch (link_mode) { case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: hw->phy.media_type = e1000_media_type_internal_serdes; break; case E1000_CTRL_EXT_LINK_MODE_GMII: hw->phy.media_type = e1000_media_type_copper; break; case E1000_CTRL_EXT_LINK_MODE_SGMII: /* Get phy control interface type set (MDIO vs. I2C)*/ if (e1000_sgmii_uses_mdio_82575(hw)) { hw->phy.media_type = e1000_media_type_copper; dev_spec->sgmii_active = TRUE; break; } /* fall through for I2C based SGMII */ case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: /* read media type from SFP EEPROM */ ret_val = e1000_set_sfp_media_type_82575(hw); if ((ret_val != E1000_SUCCESS) || (hw->phy.media_type == e1000_media_type_unknown)) { /* * If media type was not identified then return media * type defined by the CTRL_EXT settings. */ hw->phy.media_type = e1000_media_type_internal_serdes; if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) { hw->phy.media_type = e1000_media_type_copper; dev_spec->sgmii_active = TRUE; } break; } /* do not change link mode for 100BaseFX */ if (dev_spec->eth_flags.e100_base_fx) break; /* change current link mode setting */ ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; if (hw->phy.media_type == e1000_media_type_copper) ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; else ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); break; } return ret_val; } /** * e1000_set_sfp_media_type_82575 - derives SFP module media type. * @hw: pointer to the HW structure * * The media type is chosen based on SFP module. * compatibility flags retrieved from SFP ID EEPROM. **/ static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw) { s32 ret_val = E1000_ERR_CONFIG; u32 ctrl_ext = 0; struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags; u8 tranceiver_type = 0; s32 timeout = 3; /* Turn I2C interface ON and power on sfp cage */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA); E1000_WRITE_FLUSH(hw); /* Read SFP module data */ while (timeout) { ret_val = e1000_read_sfp_data_byte(hw, E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET), &tranceiver_type); if (ret_val == E1000_SUCCESS) break; msec_delay(100); timeout--; } if (ret_val != E1000_SUCCESS) goto out; ret_val = e1000_read_sfp_data_byte(hw, E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET), (u8 *)eth_flags); if (ret_val != E1000_SUCCESS) goto out; /* Check if there is some SFP module plugged and powered */ if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) || (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) { dev_spec->module_plugged = TRUE; if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { hw->phy.media_type = e1000_media_type_internal_serdes; } else if (eth_flags->e100_base_fx) { dev_spec->sgmii_active = TRUE; hw->phy.media_type = e1000_media_type_internal_serdes; } else if (eth_flags->e1000_base_t) { dev_spec->sgmii_active = TRUE; hw->phy.media_type = e1000_media_type_copper; } else { hw->phy.media_type = e1000_media_type_unknown; DEBUGOUT("PHY module has not been recognized\n"); goto out; } } else { hw->phy.media_type = e1000_media_type_unknown; } ret_val = E1000_SUCCESS; out: /* Restore I2C interface setting */ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); return ret_val; } /** * e1000_valid_led_default_82575 - Verify a valid default LED config * @hw: pointer to the HW structure * @data: pointer to the NVM (EEPROM) * * Read the EEPROM for the current default LED configuration. If the * LED configuration is not valid, set to a valid LED configuration. **/ static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_82575"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { switch (hw->phy.media_type) { case e1000_media_type_internal_serdes: *data = ID_LED_DEFAULT_82575_SERDES; break; case e1000_media_type_copper: default: *data = ID_LED_DEFAULT; break; } } out: return ret_val; } /** * e1000_sgmii_active_82575 - Return sgmii state * @hw: pointer to the HW structure * * 82575 silicon has a serialized gigabit media independent interface (sgmii) * which can be enabled for use in the embedded applications. Simply * return the current state of the sgmii interface. **/ static bool e1000_sgmii_active_82575(struct e1000_hw *hw) { struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; return dev_spec->sgmii_active; } /** * e1000_reset_init_script_82575 - Inits HW defaults after reset * @hw: pointer to the HW structure * * Inits recommended HW defaults after a reset when there is no EEPROM * detected. This is only for the 82575. **/ static s32 e1000_reset_init_script_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_reset_init_script_82575"); if (hw->mac.type == e1000_82575) { DEBUGOUT("Running reset init script for 82575\n"); /* SerDes configuration via SERDESCTRL */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15); /* CCM configuration via CCMCTL register */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00); e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00); /* PCIe lanes configuration */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC); e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF); e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05); e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81); /* PCIe PLL Configuration */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00); } return E1000_SUCCESS; } /** * e1000_read_mac_addr_82575 - Read device MAC address * @hw: pointer to the HW structure **/ static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_read_mac_addr_82575"); /* * If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm * address. */ ret_val = e1000_check_alt_mac_addr_generic(hw); if (ret_val) goto out; ret_val = e1000_read_mac_addr_generic(hw); out: return ret_val; } /** * e1000_config_collision_dist_82575 - Configure collision distance * @hw: pointer to the HW structure * * Configures the collision distance to the default value and is used * during link setup. **/ static void e1000_config_collision_dist_82575(struct e1000_hw *hw) { u32 tctl_ext; DEBUGFUNC("e1000_config_collision_dist_82575"); tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT); tctl_ext &= ~E1000_TCTL_EXT_COLD; tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT; E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext); E1000_WRITE_FLUSH(hw); } /** * e1000_power_down_phy_copper_82575 - Remove link during PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; if (!(phy->ops.check_reset_block)) return; /* If the management interface is not enabled, then power down */ if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_82575"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); E1000_READ_REG(hw, E1000_IAC); E1000_READ_REG(hw, E1000_ICRXOC); E1000_READ_REG(hw, E1000_ICRXPTC); E1000_READ_REG(hw, E1000_ICRXATC); E1000_READ_REG(hw, E1000_ICTXPTC); E1000_READ_REG(hw, E1000_ICTXATC); E1000_READ_REG(hw, E1000_ICTXQEC); E1000_READ_REG(hw, E1000_ICTXQMTC); E1000_READ_REG(hw, E1000_ICRXDMTC); E1000_READ_REG(hw, E1000_CBTMPC); E1000_READ_REG(hw, E1000_HTDPMC); E1000_READ_REG(hw, E1000_CBRMPC); E1000_READ_REG(hw, E1000_RPTHC); E1000_READ_REG(hw, E1000_HGPTC); E1000_READ_REG(hw, E1000_HTCBDPC); E1000_READ_REG(hw, E1000_HGORCL); E1000_READ_REG(hw, E1000_HGORCH); E1000_READ_REG(hw, E1000_HGOTCL); E1000_READ_REG(hw, E1000_HGOTCH); E1000_READ_REG(hw, E1000_LENERRS); /* This register should not be read in copper configurations */ if ((hw->phy.media_type == e1000_media_type_internal_serdes) || e1000_sgmii_active_82575(hw)) E1000_READ_REG(hw, E1000_SCVPC); } /** * e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable * @hw: pointer to the HW structure * * After Rx enable, if manageability is enabled then there is likely some * bad data at the start of the fifo and possibly in the DMA fifo. This * function clears the fifos and flushes any packets that came in as rx was * being enabled. **/ void e1000_rx_fifo_flush_82575(struct e1000_hw *hw) { u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; int i, ms_wait; - DEBUGFUNC("e1000_rx_fifo_flush_82575"); - - /* disable IPv6 options as per hardware errata */ - rfctl = E1000_READ_REG(hw, E1000_RFCTL); - rfctl |= E1000_RFCTL_IPV6_EX_DIS; - E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); - + DEBUGFUNC("e1000_rx_fifo_workaround_82575"); if (hw->mac.type != e1000_82575 || !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) return; /* Disable all Rx queues */ for (i = 0; i < 4; i++) { rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE); } /* Poll all queues to verify they have shut down */ for (ms_wait = 0; ms_wait < 10; ms_wait++) { msec_delay(1); rx_enabled = 0; for (i = 0; i < 4; i++) rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i)); if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE)) break; } if (ms_wait == 10) DEBUGOUT("Queue disable timed out after 10ms\n"); /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all * incoming packets are rejected. Set enable and wait 2ms so that * any packet that was coming in as RCTL.EN was set is flushed */ + rfctl = E1000_READ_REG(hw, E1000_RFCTL); E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF); rlpml = E1000_READ_REG(hw, E1000_RLPML); E1000_WRITE_REG(hw, E1000_RLPML, 0); rctl = E1000_READ_REG(hw, E1000_RCTL); temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP); temp_rctl |= E1000_RCTL_LPE; E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl); E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN); E1000_WRITE_FLUSH(hw); msec_delay(2); /* Enable Rx queues that were previously enabled and restore our * previous state */ for (i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]); E1000_WRITE_REG(hw, E1000_RCTL, rctl); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RLPML, rlpml); E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); /* Flush receive errors generated by workaround */ E1000_READ_REG(hw, E1000_ROC); E1000_READ_REG(hw, E1000_RNBC); E1000_READ_REG(hw, E1000_MPC); } /** * e1000_set_pcie_completion_timeout - set pci-e completion timeout * @hw: pointer to the HW structure * * The defaults for 82575 and 82576 should be in the range of 50us to 50ms, * however the hardware default for these parts is 500us to 1ms which is less * than the 10ms recommended by the pci-e spec. To address this we need to * increase the value to either 10ms to 200ms for capability version 1 config, * or 16ms to 55ms for version 2. **/ static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw) { u32 gcr = E1000_READ_REG(hw, E1000_GCR); s32 ret_val = E1000_SUCCESS; u16 pcie_devctl2; /* only take action if timeout value is defaulted to 0 */ if (gcr & E1000_GCR_CMPL_TMOUT_MASK) goto out; /* * if capababilities version is type 1 we can write the * timeout of 10ms to 200ms through the GCR register */ if (!(gcr & E1000_GCR_CAP_VER2)) { gcr |= E1000_GCR_CMPL_TMOUT_10ms; goto out; } /* * for version 2 capabilities we need to write the config space * directly in order to set the completion timeout value for * 16ms to 55ms */ ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, &pcie_devctl2); if (ret_val) goto out; pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, &pcie_devctl2); out: /* disable completion timeout resend */ gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; E1000_WRITE_REG(hw, E1000_GCR, gcr); return ret_val; } /** * e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing * @hw: pointer to the hardware struct * @enable: state to enter, either enabled or disabled * @pf: Physical Function pool - do not set anti-spoofing for the PF * * enables/disables L2 switch anti-spoofing functionality. **/ void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf) { u32 reg_val, reg_offset; switch (hw->mac.type) { case e1000_82576: reg_offset = E1000_DTXSWC; break; case e1000_i350: case e1000_i354: reg_offset = E1000_TXSWC; break; default: return; } reg_val = E1000_READ_REG(hw, reg_offset); if (enable) { reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK | E1000_DTXSWC_VLAN_SPOOF_MASK); /* The PF can spoof - it has to in order to * support emulation mode NICs */ reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); } else { reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | E1000_DTXSWC_VLAN_SPOOF_MASK); } E1000_WRITE_REG(hw, reg_offset, reg_val); } /** * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback * @hw: pointer to the hardware struct * @enable: state to enter, either enabled or disabled * * enables/disables L2 switch loopback functionality. **/ void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) { u32 dtxswc; switch (hw->mac.type) { case e1000_82576: dtxswc = E1000_READ_REG(hw, E1000_DTXSWC); if (enable) dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; else dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc); break; case e1000_i350: case e1000_i354: dtxswc = E1000_READ_REG(hw, E1000_TXSWC); if (enable) dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; else dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc); break; default: /* Currently no other hardware supports loopback */ break; } } /** * e1000_vmdq_set_replication_pf - enable or disable vmdq replication * @hw: pointer to the hardware struct * @enable: state to enter, either enabled or disabled * * enables/disables replication of packets across multiple pools. **/ void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) { u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL); if (enable) vt_ctl |= E1000_VT_CTL_VM_REPL_EN; else vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN; E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl); } /** * e1000_read_phy_reg_82580 - Read 82580 MDI control register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the MDI control register in the PHY at offset and stores the * information read to data. **/ static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_read_phy_reg_82580"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_read_phy_reg_mdic(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_write_phy_reg_82580 - Write 82580 MDI control register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write to register at offset * * Writes data to MDI control register in the PHY at offset. **/ static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; DEBUGFUNC("e1000_write_phy_reg_82580"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_write_phy_reg_mdic(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits * @hw: pointer to the HW structure * * This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on * the values found in the EEPROM. This addresses an issue in which these * bits are not restored from EEPROM after reset. **/ static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u32 mdicnfg; u16 nvm_data = 0; DEBUGFUNC("e1000_reset_mdicnfg_82580"); if (hw->mac.type != e1000_82580) goto out; if (!e1000_sgmii_active_82575(hw)) goto out; ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG); if (nvm_data & NVM_WORD24_EXT_MDIO) mdicnfg |= E1000_MDICNFG_EXT_MDIO; if (nvm_data & NVM_WORD24_COM_MDIO) mdicnfg |= E1000_MDICNFG_COM_MDIO; E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg); out: return ret_val; } /** * e1000_reset_hw_82580 - Reset hardware * @hw: pointer to the HW structure * * This resets function or entire device (all ports, etc.) * to a known state. **/ static s32 e1000_reset_hw_82580(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; /* BH SW mailbox bit in SW_FW_SYNC */ u16 swmbsw_mask = E1000_SW_SYNCH_MB; u32 ctrl; bool global_device_reset = hw->dev_spec._82575.global_device_reset; DEBUGFUNC("e1000_reset_hw_82580"); hw->dev_spec._82575.global_device_reset = FALSE; /* 82580 does not reliably do global_device_reset due to hw errata */ if (hw->mac.type == e1000_82580) global_device_reset = FALSE; /* Get current control state. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); /* * Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); /* Determine whether or not a global dev reset is requested */ if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw, swmbsw_mask)) global_device_reset = FALSE; if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) & E1000_STAT_DEV_RST_SET)) ctrl |= E1000_CTRL_DEV_RST; else ctrl |= E1000_CTRL_RST; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); switch (hw->device_id) { case E1000_DEV_ID_DH89XXCC_SGMII: break; default: E1000_WRITE_FLUSH(hw); break; } /* Add delay to insure DEV_RST or RST has time to complete */ msec_delay(5); ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) { /* * When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ DEBUGOUT("Auto Read Done did not complete\n"); } /* clear global device reset status bit */ E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET); /* Clear any pending interrupt events. */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); ret_val = e1000_reset_mdicnfg_82580(hw); if (ret_val) DEBUGOUT("Could not reset MDICNFG based on EEPROM\n"); /* Install any alternate MAC address into RAR0 */ ret_val = e1000_check_alt_mac_addr_generic(hw); /* Release semaphore */ if (global_device_reset) hw->mac.ops.release_swfw_sync(hw, swmbsw_mask); return ret_val; } /** * e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size * @data: data received by reading RXPBS register * * The 82580 uses a table based approach for packet buffer allocation sizes. * This function converts the retrieved value into the correct table value * 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 * 0x0 36 72 144 1 2 4 8 16 * 0x8 35 70 140 rsv rsv rsv rsv rsv */ u16 e1000_rxpbs_adjust_82580(u32 data) { u16 ret_val = 0; if (data < E1000_82580_RXPBS_TABLE_SIZE) ret_val = e1000_82580_rxpbs_table[data]; return ret_val; } /** * e1000_validate_nvm_checksum_with_offset - Validate EEPROM * checksum * @hw: pointer to the HW structure * @offset: offset in words of the checksum protected region * * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) { s32 ret_val = E1000_SUCCESS; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_validate_nvm_checksum_with_offset"); for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) { ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } checksum += nvm_data; } if (checksum != (u16) NVM_SUM) { DEBUGOUT("NVM Checksum Invalid\n"); ret_val = -E1000_ERR_NVM; goto out; } out: return ret_val; } /** * e1000_update_nvm_checksum_with_offset - Update EEPROM * checksum * @hw: pointer to the HW structure * @offset: offset in words of the checksum protected region * * Updates the EEPROM checksum by reading/adding each word of the EEPROM * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. **/ s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_update_nvm_checksum_with_offset"); for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) { ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error while updating checksum.\n"); goto out; } checksum += nvm_data; } checksum = (u16) NVM_SUM - checksum; ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1, &checksum); if (ret_val) DEBUGOUT("NVM Write Error while updating checksum.\n"); out: return ret_val; } /** * e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM section checksum by reading/adding each word of * the EEPROM and then verifies that the sum of the EEPROM is * equal to 0xBABA. **/ static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw) { s32 ret_val; u16 eeprom_regions_count = 1; u16 j, nvm_data; u16 nvm_offset; DEBUGFUNC("e1000_validate_nvm_checksum_82580"); ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) { /* if chekcsums compatibility bit is set validate checksums * for all 4 ports. */ eeprom_regions_count = 4; } for (j = 0; j < eeprom_regions_count; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_validate_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val != E1000_SUCCESS) goto out; } out: return ret_val; } /** * e1000_update_nvm_checksum_82580 - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM section checksums for all 4 ports by reading/adding * each word of the EEPROM up to the checksum. Then calculates the EEPROM * checksum and writes the value to the EEPROM. **/ static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw) { s32 ret_val; u16 j, nvm_data; u16 nvm_offset; DEBUGFUNC("e1000_update_nvm_checksum_82580"); ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n"); goto out; } if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) { /* set compatibility bit to validate checksums appropriately */ nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK; ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n"); goto out; } } for (j = 0; j < 4; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val) goto out; } out: return ret_val; } /** * e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM section checksum by reading/adding each word of * the EEPROM and then verifies that the sum of the EEPROM is * equal to 0xBABA. **/ static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 j; u16 nvm_offset; DEBUGFUNC("e1000_validate_nvm_checksum_i350"); for (j = 0; j < 4; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_validate_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val != E1000_SUCCESS) goto out; } out: return ret_val; } /** * e1000_update_nvm_checksum_i350 - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM section checksums for all 4 ports by reading/adding * each word of the EEPROM up to the checksum. Then calculates the EEPROM * checksum and writes the value to the EEPROM. **/ static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 j; u16 nvm_offset; DEBUGFUNC("e1000_update_nvm_checksum_i350"); for (j = 0; j < 4; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val != E1000_SUCCESS) goto out; } out: return ret_val; } /** * __e1000_access_emi_reg - Read/write EMI register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: pointer to value to read/write from/to the EMI address * @read: boolean flag to indicate read or write **/ static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address, u16 *data, bool read) { s32 ret_val; DEBUGFUNC("__e1000_access_emi_reg"); ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address); if (ret_val) return ret_val; if (read) ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data); else ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data); return ret_val; } /** * e1000_read_emi_reg - Read Extended Management Interface register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: value to be read from the EMI address **/ s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data) { DEBUGFUNC("e1000_read_emi_reg"); return __e1000_access_emi_reg(hw, addr, data, TRUE); } /** * e1000_initialize_M88E1512_phy - Initialize M88E1512 PHY * @hw: pointer to the HW structure * * Initialize Marverl 1512 to work correctly with Avoton. **/ s32 e1000_initialize_M88E1512_phy(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_initialize_M88E1512_phy"); /* Check if this is correct PHY. */ if (phy->id != M88E1512_E_PHY_ID) goto out; /* Switch to PHY page 0xFF. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xCC0C); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159); if (ret_val) goto out; /* Switch to PHY page 0xFB. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0x000D); if (ret_val) goto out; /* Switch to PHY page 0x12. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12); if (ret_val) goto out; /* Change mode to SGMII-to-Copper */ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001); if (ret_val) goto out; /* Return the PHY to page 0. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); if (ret_val) goto out; ret_val = phy->ops.commit(hw); if (ret_val) { DEBUGOUT("Error committing the PHY changes\n"); return ret_val; } msec_delay(1000); out: return ret_val; } /** * e1000_set_eee_i350 - Enable/disable EEE support * @hw: pointer to the HW structure - * @adv1g: boolean flag enabling 1G EEE advertisement - * @adv100m: boolean flag enabling 100M EEE advertisement * * Enable/disable EEE based on setting in dev_spec structure. * **/ -s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M) +s32 e1000_set_eee_i350(struct e1000_hw *hw) { u32 ipcnfg, eeer; DEBUGFUNC("e1000_set_eee_i350"); if ((hw->mac.type < e1000_i350) || (hw->phy.media_type != e1000_media_type_copper)) goto out; ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG); eeer = E1000_READ_REG(hw, E1000_EEER); /* enable or disable per user setting */ if (!(hw->dev_spec._82575.eee_disable)) { u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU); - if (adv100M) - ipcnfg |= E1000_IPCNFG_EEE_100M_AN; - else - ipcnfg &= ~E1000_IPCNFG_EEE_100M_AN; - - if (adv1G) - ipcnfg |= E1000_IPCNFG_EEE_1G_AN; - else - ipcnfg &= ~E1000_IPCNFG_EEE_1G_AN; - + ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); /* This bit should not be set in normal operation. */ if (eee_su & E1000_EEE_SU_LPI_CLK_STP) DEBUGOUT("LPI Clock Stop Bit should not be set!\n"); } else { ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); } E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg); E1000_WRITE_REG(hw, E1000_EEER, eeer); E1000_READ_REG(hw, E1000_IPCNFG); E1000_READ_REG(hw, E1000_EEER); out: return E1000_SUCCESS; } /** * e1000_set_eee_i354 - Enable/disable EEE support * @hw: pointer to the HW structure - * @adv1g: boolean flag enabling 1G EEE advertisement - * @adv100m: boolean flag enabling 100M EEE advertisement * * Enable/disable EEE legacy mode based on setting in dev_spec structure. * **/ -s32 e1000_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M) +s32 e1000_set_eee_i354(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 phy_data; DEBUGFUNC("e1000_set_eee_i354"); if ((hw->phy.media_type != e1000_media_type_copper) || ((phy->id != M88E1543_E_PHY_ID) && (phy->id != M88E1512_E_PHY_ID))) goto out; if (!hw->dev_spec._82575.eee_disable) { /* Switch to PHY page 18. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18); if (ret_val) goto out; ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1, &phy_data); if (ret_val) goto out; phy_data |= E1000_M88E1543_EEE_CTRL_1_MS; ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1, phy_data); if (ret_val) goto out; /* Return the PHY to page 0. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); if (ret_val) goto out; /* Turn on EEE advertisement. */ ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, &phy_data); if (ret_val) goto out; - if (adv100M) - phy_data |= E1000_EEE_ADV_100_SUPPORTED; - else - phy_data &= ~E1000_EEE_ADV_100_SUPPORTED; - - if (adv1G) - phy_data |= E1000_EEE_ADV_1000_SUPPORTED; - else - phy_data &= ~E1000_EEE_ADV_1000_SUPPORTED; - + phy_data |= E1000_EEE_ADV_100_SUPPORTED | + E1000_EEE_ADV_1000_SUPPORTED; ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, phy_data); } else { /* Turn off EEE advertisement. */ ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, &phy_data); if (ret_val) goto out; phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED | E1000_EEE_ADV_1000_SUPPORTED); ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, phy_data); } out: return ret_val; } /** * e1000_get_eee_status_i354 - Get EEE status * @hw: pointer to the HW structure * @status: EEE status * * Get EEE status by guessing based on whether Tx or Rx LPI indications have * been received. **/ s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 phy_data; DEBUGFUNC("e1000_get_eee_status_i354"); /* Check if EEE is supported on this device. */ if ((hw->phy.media_type != e1000_media_type_copper) || ((phy->id != M88E1543_E_PHY_ID) && (phy->id != M88E1512_E_PHY_ID))) goto out; ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354, E1000_PCS_STATUS_DEV_I354, &phy_data); if (ret_val) goto out; *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD | E1000_PCS_STATUS_RX_LPI_RCVD) ? TRUE : FALSE; out: return ret_val; } /* Due to a hw errata, if the host tries to configure the VFTA register * while performing queries from the BMC or DMA, then the VFTA in some * cases won't be written. */ /** * e1000_clear_vfta_i350 - Clear VLAN filter table * @hw: pointer to the HW structure * * Clears the register array which contains the VLAN filter table by * setting all the values to 0. **/ void e1000_clear_vfta_i350(struct e1000_hw *hw) { u32 offset; int i; DEBUGFUNC("e1000_clear_vfta_350"); for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); E1000_WRITE_FLUSH(hw); } } /** * e1000_write_vfta_i350 - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: register offset in VLAN filter table * @value: register value written to VLAN filter table * * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) { int i; DEBUGFUNC("e1000_write_vfta_350"); for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); E1000_WRITE_FLUSH(hw); } /** * e1000_set_i2c_bb - Enable I2C bit-bang * @hw: pointer to the HW structure * * Enable I2C bit-bang interface * **/ s32 e1000_set_i2c_bb(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u32 ctrl_ext, i2cparams; DEBUGFUNC("e1000_set_i2c_bb"); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_I2C_ENA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS); i2cparams |= E1000_I2CBB_EN; i2cparams |= E1000_I2C_DATA_OE_N; i2cparams |= E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams); E1000_WRITE_FLUSH(hw); return ret_val; } /** * e1000_read_i2c_byte_generic - Reads 8 bit word over I2C * @hw: pointer to hardware structure * @byte_offset: byte offset to read * @dev_addr: device address * @data: value read * * Performs byte read operation over I2C interface at * a specified device address. **/ s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, u8 *data) { s32 status = E1000_SUCCESS; u32 max_retry = 10; u32 retry = 1; u16 swfw_mask = 0; bool nack = TRUE; DEBUGFUNC("e1000_read_i2c_byte_generic"); swfw_mask = E1000_SWFW_PHY0_SM; do { if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) { status = E1000_ERR_SWFW_SYNC; goto read_byte_out; } e1000_i2c_start(hw); /* Device Address and write indication */ status = e1000_clock_out_i2c_byte(hw, dev_addr); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_byte(hw, byte_offset); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; e1000_i2c_start(hw); /* Device Address and read indication */ status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1)); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_in_i2c_byte(hw, data); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_bit(hw, nack); if (status != E1000_SUCCESS) goto fail; e1000_i2c_stop(hw); break; fail: hw->mac.ops.release_swfw_sync(hw, swfw_mask); msec_delay(100); e1000_i2c_bus_clear(hw); retry++; if (retry < max_retry) DEBUGOUT("I2C byte read error - Retrying.\n"); else DEBUGOUT("I2C byte read error.\n"); } while (retry < max_retry); hw->mac.ops.release_swfw_sync(hw, swfw_mask); read_byte_out: return status; } /** * e1000_write_i2c_byte_generic - Writes 8 bit word over I2C * @hw: pointer to hardware structure * @byte_offset: byte offset to write * @dev_addr: device address * @data: value to write * * Performs byte write operation over I2C interface at * a specified device address. **/ s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, u8 data) { s32 status = E1000_SUCCESS; u32 max_retry = 1; u32 retry = 0; u16 swfw_mask = 0; DEBUGFUNC("e1000_write_i2c_byte_generic"); swfw_mask = E1000_SWFW_PHY0_SM; if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) { status = E1000_ERR_SWFW_SYNC; goto write_byte_out; } do { e1000_i2c_start(hw); status = e1000_clock_out_i2c_byte(hw, dev_addr); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_byte(hw, byte_offset); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_byte(hw, data); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; e1000_i2c_stop(hw); break; fail: e1000_i2c_bus_clear(hw); retry++; if (retry < max_retry) DEBUGOUT("I2C byte write error - Retrying.\n"); else DEBUGOUT("I2C byte write error.\n"); } while (retry < max_retry); hw->mac.ops.release_swfw_sync(hw, swfw_mask); write_byte_out: return status; } /** * e1000_i2c_start - Sets I2C start condition * @hw: pointer to hardware structure * * Sets I2C start condition (High -> Low on SDA while SCL is High) **/ static void e1000_i2c_start(struct e1000_hw *hw) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_i2c_start"); /* Start condition must begin with data and clock high */ e1000_set_i2c_data(hw, &i2cctl, 1); e1000_raise_i2c_clk(hw, &i2cctl); /* Setup time for start condition (4.7us) */ usec_delay(E1000_I2C_T_SU_STA); e1000_set_i2c_data(hw, &i2cctl, 0); /* Hold time for start condition (4us) */ usec_delay(E1000_I2C_T_HD_STA); e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us */ usec_delay(E1000_I2C_T_LOW); } /** * e1000_i2c_stop - Sets I2C stop condition * @hw: pointer to hardware structure * * Sets I2C stop condition (Low -> High on SDA while SCL is High) **/ static void e1000_i2c_stop(struct e1000_hw *hw) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_i2c_stop"); /* Stop condition must begin with data low and clock high */ e1000_set_i2c_data(hw, &i2cctl, 0); e1000_raise_i2c_clk(hw, &i2cctl); /* Setup time for stop condition (4us) */ usec_delay(E1000_I2C_T_SU_STO); e1000_set_i2c_data(hw, &i2cctl, 1); /* bus free time between stop and start (4.7us)*/ usec_delay(E1000_I2C_T_BUF); } /** * e1000_clock_in_i2c_byte - Clocks in one byte via I2C * @hw: pointer to hardware structure * @data: data byte to clock in * * Clocks in one byte data via I2C data/clock **/ static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data) { s32 i; bool bit = 0; DEBUGFUNC("e1000_clock_in_i2c_byte"); *data = 0; for (i = 7; i >= 0; i--) { e1000_clock_in_i2c_bit(hw, &bit); *data |= bit << i; } return E1000_SUCCESS; } /** * e1000_clock_out_i2c_byte - Clocks out one byte via I2C * @hw: pointer to hardware structure * @data: data byte clocked out * * Clocks out one byte data via I2C data/clock **/ static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data) { s32 status = E1000_SUCCESS; s32 i; u32 i2cctl; bool bit = 0; DEBUGFUNC("e1000_clock_out_i2c_byte"); for (i = 7; i >= 0; i--) { bit = (data >> i) & 0x1; status = e1000_clock_out_i2c_bit(hw, bit); if (status != E1000_SUCCESS) break; } /* Release SDA line (set high) */ i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); i2cctl |= E1000_I2C_DATA_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); E1000_WRITE_FLUSH(hw); return status; } /** * e1000_get_i2c_ack - Polls for I2C ACK * @hw: pointer to hardware structure * * Clocks in/out one bit via I2C data/clock **/ static s32 e1000_get_i2c_ack(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; u32 i = 0; u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); u32 timeout = 10; bool ack = TRUE; DEBUGFUNC("e1000_get_i2c_ack"); e1000_raise_i2c_clk(hw, &i2cctl); /* Minimum high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); /* Wait until SCL returns high */ for (i = 0; i < timeout; i++) { usec_delay(1); i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); if (i2cctl & E1000_I2C_CLK_IN) break; } if (!(i2cctl & E1000_I2C_CLK_IN)) return E1000_ERR_I2C; ack = e1000_get_i2c_data(&i2cctl); if (ack) { DEBUGOUT("I2C ack was not received.\n"); status = E1000_ERR_I2C; } e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us */ usec_delay(E1000_I2C_T_LOW); return status; } /** * e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock * @hw: pointer to hardware structure * @data: read data value * * Clocks in one bit via I2C data/clock **/ static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_clock_in_i2c_bit"); e1000_raise_i2c_clk(hw, &i2cctl); /* Minimum high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); *data = e1000_get_i2c_data(&i2cctl); e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us */ usec_delay(E1000_I2C_T_LOW); return E1000_SUCCESS; } /** * e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock * @hw: pointer to hardware structure * @data: data value to write * * Clocks out one bit via I2C data/clock **/ static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data) { s32 status; u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_clock_out_i2c_bit"); status = e1000_set_i2c_data(hw, &i2cctl, data); if (status == E1000_SUCCESS) { e1000_raise_i2c_clk(hw, &i2cctl); /* Minimum high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us. * This also takes care of the data hold time. */ usec_delay(E1000_I2C_T_LOW); } else { status = E1000_ERR_I2C; DEBUGOUT1("I2C data was not set to %X\n", data); } return status; } /** * e1000_raise_i2c_clk - Raises the I2C SCL clock * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * * Raises the I2C clock line '0'->'1' **/ static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) { DEBUGFUNC("e1000_raise_i2c_clk"); *i2cctl |= E1000_I2C_CLK_OUT; *i2cctl &= ~E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); E1000_WRITE_FLUSH(hw); /* SCL rise time (1000ns) */ usec_delay(E1000_I2C_T_RISE); } /** * e1000_lower_i2c_clk - Lowers the I2C SCL clock * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * * Lowers the I2C clock line '1'->'0' **/ static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) { DEBUGFUNC("e1000_lower_i2c_clk"); *i2cctl &= ~E1000_I2C_CLK_OUT; *i2cctl &= ~E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); E1000_WRITE_FLUSH(hw); /* SCL fall time (300ns) */ usec_delay(E1000_I2C_T_FALL); } /** * e1000_set_i2c_data - Sets the I2C data bit * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * @data: I2C data value (0 or 1) to set * * Sets the I2C data bit **/ static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data) { s32 status = E1000_SUCCESS; DEBUGFUNC("e1000_set_i2c_data"); if (data) *i2cctl |= E1000_I2C_DATA_OUT; else *i2cctl &= ~E1000_I2C_DATA_OUT; *i2cctl &= ~E1000_I2C_DATA_OE_N; *i2cctl |= E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); E1000_WRITE_FLUSH(hw); /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA); *i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); if (data != e1000_get_i2c_data(i2cctl)) { status = E1000_ERR_I2C; DEBUGOUT1("Error - I2C data was not set to %X.\n", data); } return status; } /** * e1000_get_i2c_data - Reads the I2C SDA data bit * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * * Returns the I2C data bit value **/ static bool e1000_get_i2c_data(u32 *i2cctl) { bool data; DEBUGFUNC("e1000_get_i2c_data"); if (*i2cctl & E1000_I2C_DATA_IN) data = 1; else data = 0; return data; } /** * e1000_i2c_bus_clear - Clears the I2C bus * @hw: pointer to hardware structure * * Clears the I2C bus by sending nine clock pulses. * Used when data line is stuck low. **/ void e1000_i2c_bus_clear(struct e1000_hw *hw) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); u32 i; DEBUGFUNC("e1000_i2c_bus_clear"); e1000_i2c_start(hw); e1000_set_i2c_data(hw, &i2cctl, 1); for (i = 0; i < 9; i++) { e1000_raise_i2c_clk(hw, &i2cctl); /* Min high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); e1000_lower_i2c_clk(hw, &i2cctl); /* Min low period of clock is 4.7us*/ usec_delay(E1000_I2C_T_LOW); } e1000_i2c_start(hw); /* Put the i2c bus back to default state */ e1000_i2c_stop(hw); } Index: projects/clang-trunk/sys/dev/e1000/e1000_82575.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_82575.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_82575.h (revision 287502) @@ -1,521 +1,521 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_82575_H_ #define _E1000_82575_H_ #define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \ (ID_LED_DEF1_DEF2 << 8) | \ (ID_LED_DEF1_DEF2 << 4) | \ (ID_LED_OFF1_ON2)) /* * Receive Address Register Count * Number of high/low register pairs in the RAR. The RAR (Receive Address * Registers) holds the directed and multicast addresses that we monitor. * These entries are also used for MAC-based filtering. */ /* * For 82576, there are an additional set of RARs that begin at an offset * separate from the first set of RARs. */ #define E1000_RAR_ENTRIES_82575 16 #define E1000_RAR_ENTRIES_82576 24 #define E1000_RAR_ENTRIES_82580 24 #define E1000_RAR_ENTRIES_I350 32 #define E1000_SW_SYNCH_MB 0x00000100 #define E1000_STAT_DEV_RST_SET 0x00100000 #define E1000_CTRL_DEV_RST 0x20000000 #ifdef E1000_BIT_FIELDS struct e1000_adv_data_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ union { u32 data; struct { u32 datalen:16; /* Data buffer length */ u32 rsvd:4; u32 dtyp:4; /* Descriptor type */ u32 dcmd:8; /* Descriptor command */ } config; } lower; union { u32 data; struct { u32 status:4; /* Descriptor status */ u32 idx:4; u32 popts:6; /* Packet Options */ u32 paylen:18; /* Payload length */ } options; } upper; }; #define E1000_TXD_DTYP_ADV_C 0x2 /* Advanced Context Descriptor */ #define E1000_TXD_DTYP_ADV_D 0x3 /* Advanced Data Descriptor */ #define E1000_ADV_TXD_CMD_DEXT 0x20 /* Descriptor extension (0 = legacy) */ #define E1000_ADV_TUCMD_IPV4 0x2 /* IP Packet Type: 1=IPv4 */ #define E1000_ADV_TUCMD_IPV6 0x0 /* IP Packet Type: 0=IPv6 */ #define E1000_ADV_TUCMD_L4T_UDP 0x0 /* L4 Packet TYPE of UDP */ #define E1000_ADV_TUCMD_L4T_TCP 0x4 /* L4 Packet TYPE of TCP */ #define E1000_ADV_TUCMD_MKRREQ 0x10 /* Indicates markers are required */ #define E1000_ADV_DCMD_EOP 0x1 /* End of Packet */ #define E1000_ADV_DCMD_IFCS 0x2 /* Insert FCS (Ethernet CRC) */ #define E1000_ADV_DCMD_RS 0x8 /* Report Status */ #define E1000_ADV_DCMD_VLE 0x40 /* Add VLAN tag */ #define E1000_ADV_DCMD_TSE 0x80 /* TCP Seg enable */ /* Extended Device Control */ #define E1000_CTRL_EXT_NSICR 0x00000001 /* Disable Intr Clear all on read */ struct e1000_adv_context_desc { union { u32 ip_config; struct { u32 iplen:9; u32 maclen:7; u32 vlan_tag:16; } fields; } ip_setup; u32 seq_num; union { u64 l4_config; struct { u32 mkrloc:9; u32 tucmd:11; u32 dtyp:4; u32 adv:8; u32 rsvd:4; u32 idx:4; u32 l4len:8; u32 mss:16; } fields; } l4_setup; }; #endif /* SRRCTL bit definitions */ #define E1000_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ #define E1000_SRRCTL_DESCTYPE_LEGACY 0x00000000 #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 #define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION 0x06000000 #define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 #define E1000_SRRCTL_TIMESTAMP 0x40000000 #define E1000_SRRCTL_DROP_EN 0x80000000 #define E1000_SRRCTL_BSIZEPKT_MASK 0x0000007F #define E1000_SRRCTL_BSIZEHDR_MASK 0x00003F00 #define E1000_TX_HEAD_WB_ENABLE 0x1 #define E1000_TX_SEQNUM_WB_ENABLE 0x2 #define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 #define E1000_MRQC_ENABLE_VMDQ 0x00000003 #define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005 #define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 #define E1000_MRQC_ENABLE_RSS_8Q 0x00000002 #define E1000_VMRCTL_MIRROR_PORT_SHIFT 8 #define E1000_VMRCTL_MIRROR_DSTPORT_MASK (7 << \ E1000_VMRCTL_MIRROR_PORT_SHIFT) #define E1000_VMRCTL_POOL_MIRROR_ENABLE (1 << 0) #define E1000_VMRCTL_UPLINK_MIRROR_ENABLE (1 << 1) #define E1000_VMRCTL_DOWNLINK_MIRROR_ENABLE (1 << 2) #define E1000_EICR_TX_QUEUE ( \ E1000_EICR_TX_QUEUE0 | \ E1000_EICR_TX_QUEUE1 | \ E1000_EICR_TX_QUEUE2 | \ E1000_EICR_TX_QUEUE3) #define E1000_EICR_RX_QUEUE ( \ E1000_EICR_RX_QUEUE0 | \ E1000_EICR_RX_QUEUE1 | \ E1000_EICR_RX_QUEUE2 | \ E1000_EICR_RX_QUEUE3) #define E1000_EIMS_RX_QUEUE E1000_EICR_RX_QUEUE #define E1000_EIMS_TX_QUEUE E1000_EICR_TX_QUEUE #define EIMS_ENABLE_MASK ( \ E1000_EIMS_RX_QUEUE | \ E1000_EIMS_TX_QUEUE | \ E1000_EIMS_TCP_TIMER | \ E1000_EIMS_OTHER) /* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ #define E1000_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */ #define E1000_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */ #define E1000_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ #define E1000_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */ #define E1000_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */ #define E1000_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */ #define E1000_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */ #define E1000_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */ #define E1000_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */ #define E1000_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */ /* Receive Descriptor - Advanced */ union e1000_adv_rx_desc { struct { __le64 pkt_addr; /* Packet buffer address */ __le64 hdr_addr; /* Header buffer address */ } read; struct { struct { union { __le32 data; struct { __le16 pkt_info; /*RSS type, Pkt type*/ /* Split Header, header buffer len */ __le16 hdr_info; } hs_rss; } lo_dword; union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length; /* Packet length */ __le16 vlan; /* VLAN tag */ } upper; } wb; /* writeback */ }; #define E1000_RXDADV_RSSTYPE_MASK 0x0000000F #define E1000_RXDADV_RSSTYPE_SHIFT 12 #define E1000_RXDADV_HDRBUFLEN_MASK 0x7FE0 #define E1000_RXDADV_HDRBUFLEN_SHIFT 5 #define E1000_RXDADV_SPLITHEADER_EN 0x00001000 #define E1000_RXDADV_SPH 0x8000 #define E1000_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */ #define E1000_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ #define E1000_RXDADV_ERR_HBO 0x00800000 /* RSS Hash results */ #define E1000_RXDADV_RSSTYPE_NONE 0x00000000 #define E1000_RXDADV_RSSTYPE_IPV4_TCP 0x00000001 #define E1000_RXDADV_RSSTYPE_IPV4 0x00000002 #define E1000_RXDADV_RSSTYPE_IPV6_TCP 0x00000003 #define E1000_RXDADV_RSSTYPE_IPV6_EX 0x00000004 #define E1000_RXDADV_RSSTYPE_IPV6 0x00000005 #define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006 #define E1000_RXDADV_RSSTYPE_IPV4_UDP 0x00000007 #define E1000_RXDADV_RSSTYPE_IPV6_UDP 0x00000008 #define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009 /* RSS Packet Types as indicated in the receive descriptor */ #define E1000_RXDADV_PKTTYPE_ILMASK 0x000000F0 #define E1000_RXDADV_PKTTYPE_TLMASK 0x00000F00 #define E1000_RXDADV_PKTTYPE_NONE 0x00000000 #define E1000_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPV4 hdr present */ #define E1000_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPV4 hdr + extensions */ #define E1000_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPV6 hdr present */ #define E1000_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPV6 hdr + extensions */ #define E1000_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */ #define E1000_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ #define E1000_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ #define E1000_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ #define E1000_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ #define E1000_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */ #define E1000_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */ #define E1000_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */ #define E1000_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ #define E1000_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ /* LinkSec results */ /* Security Processing bit Indication */ #define E1000_RXDADV_LNKSEC_STATUS_SECP 0x00020000 #define E1000_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 #define E1000_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 #define E1000_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 #define E1000_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 #define E1000_RXDADV_IPSEC_STATUS_SECP 0x00020000 #define E1000_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 #define E1000_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 #define E1000_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 #define E1000_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED 0x18000000 /* Transmit Descriptor - Advanced */ union e1000_adv_tx_desc { struct { __le64 buffer_addr; /* Address of descriptor's data buf */ __le32 cmd_type_len; __le32 olinfo_status; } read; struct { __le64 rsvd; /* Reserved */ __le32 nxtseq_seed; __le32 status; } wb; }; /* Adv Transmit Descriptor Config Masks */ #define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ #define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ #define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ #define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ #define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ #define E1000_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */ #define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ #define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ #define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ #define E1000_ADVTXD_MAC_LINKSEC 0x00040000 /* Apply LinkSec on pkt */ #define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp pkt */ #define E1000_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED prsnt in WB */ #define E1000_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */ #define E1000_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ #define E1000_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ #define E1000_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ /* 1st & Last TSO-full iSCSI PDU*/ #define E1000_ADVTXD_POPTS_ISCO_FULL 0x00001800 #define E1000_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ #define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ /* Context descriptors */ struct e1000_adv_tx_context_desc { __le32 vlan_macip_lens; __le32 seqnum_seed; __le32 type_tucmd_mlhl; __le32 mss_l4len_idx; }; #define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ #define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ #define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ #define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ #define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ #define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ #define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ /* IPSec Encrypt Enable for ESP */ #define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 /* Req requires Markers and CRC */ #define E1000_ADVTXD_TUCMD_MKRREQ 0x00002000 #define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ /* Adv ctxt IPSec SA IDX mask */ #define E1000_ADVTXD_IPSEC_SA_INDEX_MASK 0x000000FF /* Adv ctxt IPSec ESP len mask */ #define E1000_ADVTXD_IPSEC_ESP_LEN_MASK 0x000000FF /* Additional Transmit Descriptor Control definitions */ #define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ #define E1000_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wbk flushing */ /* Tx Queue Arbitration Priority 0=low, 1=high */ #define E1000_TXDCTL_PRIORITY 0x08000000 /* Additional Receive Descriptor Control definitions */ #define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ #define E1000_RXDCTL_SWFLSH 0x04000000 /* Rx Desc. wbk flushing */ /* Direct Cache Access (DCA) definitions */ #define E1000_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */ #define E1000_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */ #define E1000_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */ #define E1000_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ #define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ #define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */ #define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header ena */ #define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload ena */ #define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx Desc Relax Order */ #define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ #define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ #define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ #define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ #define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ #define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */ #define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */ #define E1000_DCA_TXCTRL_CPUID_SHIFT_82576 24 /* Tx CPUID */ #define E1000_DCA_RXCTRL_CPUID_SHIFT_82576 24 /* Rx CPUID */ /* Additional interrupt register bit definitions */ #define E1000_ICR_LSECPNS 0x00000020 /* PN threshold - server */ #define E1000_IMS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ #define E1000_ICS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ /* ETQF register bit definitions */ #define E1000_ETQF_FILTER_ENABLE (1 << 26) #define E1000_ETQF_IMM_INT (1 << 29) #define E1000_ETQF_1588 (1 << 30) #define E1000_ETQF_QUEUE_ENABLE (1 << 31) /* * ETQF filter list: one static filter per filter consumer. This is * to avoid filter collisions later. Add new filters * here!! * * Current filters: * EAPOL 802.1x (0x888e): Filter 0 */ #define E1000_ETQF_FILTER_EAPOL 0 #define E1000_FTQF_VF_BP 0x00008000 #define E1000_FTQF_1588_TIME_STAMP 0x08000000 #define E1000_FTQF_MASK 0xF0000000 #define E1000_FTQF_MASK_PROTO_BP 0x10000000 #define E1000_FTQF_MASK_SOURCE_ADDR_BP 0x20000000 #define E1000_FTQF_MASK_DEST_ADDR_BP 0x40000000 #define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000 #define E1000_NVM_APME_82575 0x0400 #define MAX_NUM_VFS 7 #define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof cntrl */ #define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof cntrl */ #define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */ #define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8 #define E1000_DTXSWC_LLE_SHIFT 16 #define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */ /* Easy defines for setting default pool, would normally be left a zero */ #define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7 #define E1000_VT_CTL_DEFAULT_POOL_MASK (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT) /* Other useful VMD_CTL register defines */ #define E1000_VT_CTL_IGNORE_MAC (1 << 28) #define E1000_VT_CTL_DISABLE_DEF_POOL (1 << 29) #define E1000_VT_CTL_VM_REPL_EN (1 << 30) /* Per VM Offload register setup */ #define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */ #define E1000_VMOLR_LPE 0x00010000 /* Accept Long packet */ #define E1000_VMOLR_RSSE 0x00020000 /* Enable RSS */ #define E1000_VMOLR_AUPE 0x01000000 /* Accept untagged packets */ #define E1000_VMOLR_ROMPE 0x02000000 /* Accept overflow multicast */ #define E1000_VMOLR_ROPE 0x04000000 /* Accept overflow unicast */ #define E1000_VMOLR_BAM 0x08000000 /* Accept Broadcast packets */ #define E1000_VMOLR_MPME 0x10000000 /* Multicast promiscuous mode */ #define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ #define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */ #define E1000_VMOLR_VPE 0x00800000 /* VLAN promiscuous enable */ #define E1000_VMOLR_UPE 0x20000000 /* Unicast promisuous enable */ #define E1000_DVMOLR_HIDVLAN 0x20000000 /* Vlan hiding enable */ #define E1000_DVMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ #define E1000_DVMOLR_STRCRC 0x80000000 /* CRC stripping enable */ #define E1000_PBRWAC_WALPB 0x00000007 /* Wrap around event on LAN Rx PB */ #define E1000_PBRWAC_PBE 0x00000008 /* Rx packet buffer empty */ #define E1000_VLVF_ARRAY_SIZE 32 #define E1000_VLVF_VLANID_MASK 0x00000FFF #define E1000_VLVF_POOLSEL_SHIFT 12 #define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT) #define E1000_VLVF_LVLAN 0x00100000 #define E1000_VLVF_VLANID_ENABLE 0x80000000 #define E1000_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ #define E1000_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ #define E1000_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ #define E1000_IOVCTL 0x05BBC #define E1000_IOVCTL_REUSE_VFQ 0x00000001 #define E1000_RPLOLR_STRVLAN 0x40000000 #define E1000_RPLOLR_STRCRC 0x80000000 #define E1000_TCTL_EXT_COLD 0x000FFC00 #define E1000_TCTL_EXT_COLD_SHIFT 10 #define E1000_DTXCTL_8023LL 0x0004 #define E1000_DTXCTL_VLAN_ADDED 0x0008 #define E1000_DTXCTL_OOS_ENABLE 0x0010 #define E1000_DTXCTL_MDP_EN 0x0020 #define E1000_DTXCTL_SPOOF_INT 0x0040 #define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT (1 << 14) #define ALL_QUEUES 0xFFFF /* Rx packet buffer size defines */ #define E1000_RXPBS_SIZE_MASK_82576 0x0000007F void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable); void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf); void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable); s32 e1000_init_nvm_params_82575(struct e1000_hw *hw); s32 e1000_init_hw_82575(struct e1000_hw *hw); enum e1000_promisc_type { e1000_promisc_disabled = 0, /* all promisc modes disabled */ e1000_promisc_unicast = 1, /* unicast promiscuous enabled */ e1000_promisc_multicast = 2, /* multicast promiscuous enabled */ e1000_promisc_enabled = 3, /* both uni and multicast promisc */ e1000_num_promisc_types }; void e1000_vfta_set_vf(struct e1000_hw *, u16, bool); void e1000_rlpml_set_vf(struct e1000_hw *, u16); s32 e1000_promisc_set_vf(struct e1000_hw *, enum e1000_promisc_type type); u16 e1000_rxpbs_adjust_82580(u32 data); s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data); -s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M); -s32 e1000_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M); +s32 e1000_set_eee_i350(struct e1000_hw *); +s32 e1000_set_eee_i354(struct e1000_hw *); s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *); s32 e1000_initialize_M88E1512_phy(struct e1000_hw *hw); /* I2C SDA and SCL timing parameters for standard mode */ #define E1000_I2C_T_HD_STA 4 #define E1000_I2C_T_LOW 5 #define E1000_I2C_T_HIGH 4 #define E1000_I2C_T_SU_STA 5 #define E1000_I2C_T_HD_DATA 5 #define E1000_I2C_T_SU_DATA 1 #define E1000_I2C_T_RISE 1 #define E1000_I2C_T_FALL 1 #define E1000_I2C_T_SU_STO 4 #define E1000_I2C_T_BUF 5 s32 e1000_set_i2c_bb(struct e1000_hw *hw); s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, u8 *data); s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, u8 data); void e1000_i2c_bus_clear(struct e1000_hw *hw); #endif /* _E1000_82575_H_ */ Index: projects/clang-trunk/sys/dev/e1000/e1000_api.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_api.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_api.c (revision 287502) @@ -1,1371 +1,1364 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" /** * e1000_init_mac_params - Initialize MAC function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the MAC * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_mac_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->mac.ops.init_params) { ret_val = hw->mac.ops.init_params(hw); if (ret_val) { DEBUGOUT("MAC Initialization Error\n"); goto out; } } else { DEBUGOUT("mac.init_mac_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_nvm_params - Initialize NVM function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the NVM * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_nvm_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->nvm.ops.init_params) { ret_val = hw->nvm.ops.init_params(hw); if (ret_val) { DEBUGOUT("NVM Initialization Error\n"); goto out; } } else { DEBUGOUT("nvm.init_nvm_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_phy_params - Initialize PHY function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the PHY * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_phy_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->phy.ops.init_params) { ret_val = hw->phy.ops.init_params(hw); if (ret_val) { DEBUGOUT("PHY Initialization Error\n"); goto out; } } else { DEBUGOUT("phy.init_phy_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_mbx_params - Initialize mailbox function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the PHY * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_mbx_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->mbx.ops.init_params) { ret_val = hw->mbx.ops.init_params(hw); if (ret_val) { DEBUGOUT("Mailbox Initialization Error\n"); goto out; } } else { DEBUGOUT("mbx.init_mbx_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_set_mac_type - Sets MAC type * @hw: pointer to the HW structure * * This function sets the mac type of the adapter based on the * device ID stored in the hw structure. * MUST BE FIRST FUNCTION CALLED (explicitly or through * e1000_setup_init_funcs()). **/ s32 e1000_set_mac_type(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_set_mac_type"); switch (hw->device_id) { case E1000_DEV_ID_82542: mac->type = e1000_82542; break; case E1000_DEV_ID_82543GC_FIBER: case E1000_DEV_ID_82543GC_COPPER: mac->type = e1000_82543; break; case E1000_DEV_ID_82544EI_COPPER: case E1000_DEV_ID_82544EI_FIBER: case E1000_DEV_ID_82544GC_COPPER: case E1000_DEV_ID_82544GC_LOM: mac->type = e1000_82544; break; case E1000_DEV_ID_82540EM: case E1000_DEV_ID_82540EM_LOM: case E1000_DEV_ID_82540EP: case E1000_DEV_ID_82540EP_LOM: case E1000_DEV_ID_82540EP_LP: mac->type = e1000_82540; break; case E1000_DEV_ID_82545EM_COPPER: case E1000_DEV_ID_82545EM_FIBER: mac->type = e1000_82545; break; case E1000_DEV_ID_82545GM_COPPER: case E1000_DEV_ID_82545GM_FIBER: case E1000_DEV_ID_82545GM_SERDES: mac->type = e1000_82545_rev_3; break; case E1000_DEV_ID_82546EB_COPPER: case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546EB_QUAD_COPPER: mac->type = e1000_82546; break; case E1000_DEV_ID_82546GB_COPPER: case E1000_DEV_ID_82546GB_FIBER: case E1000_DEV_ID_82546GB_SERDES: case E1000_DEV_ID_82546GB_PCIE: case E1000_DEV_ID_82546GB_QUAD_COPPER: case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: mac->type = e1000_82546_rev_3; break; case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EI_MOBILE: case E1000_DEV_ID_82541ER_LOM: mac->type = e1000_82541; break; case E1000_DEV_ID_82541ER: case E1000_DEV_ID_82541GI: case E1000_DEV_ID_82541GI_LF: case E1000_DEV_ID_82541GI_MOBILE: mac->type = e1000_82541_rev_2; break; case E1000_DEV_ID_82547EI: case E1000_DEV_ID_82547EI_MOBILE: mac->type = e1000_82547; break; case E1000_DEV_ID_82547GI: mac->type = e1000_82547_rev_2; break; case E1000_DEV_ID_82571EB_COPPER: case E1000_DEV_ID_82571EB_FIBER: case E1000_DEV_ID_82571EB_SERDES: case E1000_DEV_ID_82571EB_SERDES_DUAL: case E1000_DEV_ID_82571EB_SERDES_QUAD: case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571PT_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: mac->type = e1000_82571; break; case E1000_DEV_ID_82572EI: case E1000_DEV_ID_82572EI_COPPER: case E1000_DEV_ID_82572EI_FIBER: case E1000_DEV_ID_82572EI_SERDES: mac->type = e1000_82572; break; case E1000_DEV_ID_82573E: case E1000_DEV_ID_82573E_IAMT: case E1000_DEV_ID_82573L: mac->type = e1000_82573; break; case E1000_DEV_ID_82574L: case E1000_DEV_ID_82574LA: mac->type = e1000_82574; break; case E1000_DEV_ID_82583V: mac->type = e1000_82583; break; case E1000_DEV_ID_80003ES2LAN_COPPER_DPT: case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: case E1000_DEV_ID_80003ES2LAN_COPPER_SPT: case E1000_DEV_ID_80003ES2LAN_SERDES_SPT: mac->type = e1000_80003es2lan; break; case E1000_DEV_ID_ICH8_IFE: case E1000_DEV_ID_ICH8_IFE_GT: case E1000_DEV_ID_ICH8_IFE_G: case E1000_DEV_ID_ICH8_IGP_M: case E1000_DEV_ID_ICH8_IGP_M_AMT: case E1000_DEV_ID_ICH8_IGP_AMT: case E1000_DEV_ID_ICH8_IGP_C: case E1000_DEV_ID_ICH8_82567V_3: mac->type = e1000_ich8lan; break; case E1000_DEV_ID_ICH9_IFE: case E1000_DEV_ID_ICH9_IFE_GT: case E1000_DEV_ID_ICH9_IFE_G: case E1000_DEV_ID_ICH9_IGP_M: case E1000_DEV_ID_ICH9_IGP_M_AMT: case E1000_DEV_ID_ICH9_IGP_M_V: case E1000_DEV_ID_ICH9_IGP_AMT: case E1000_DEV_ID_ICH9_BM: case E1000_DEV_ID_ICH9_IGP_C: case E1000_DEV_ID_ICH10_R_BM_LM: case E1000_DEV_ID_ICH10_R_BM_LF: case E1000_DEV_ID_ICH10_R_BM_V: mac->type = e1000_ich9lan; break; case E1000_DEV_ID_ICH10_D_BM_LM: case E1000_DEV_ID_ICH10_D_BM_LF: case E1000_DEV_ID_ICH10_D_BM_V: mac->type = e1000_ich10lan; break; case E1000_DEV_ID_PCH_D_HV_DM: case E1000_DEV_ID_PCH_D_HV_DC: case E1000_DEV_ID_PCH_M_HV_LM: case E1000_DEV_ID_PCH_M_HV_LC: mac->type = e1000_pchlan; break; case E1000_DEV_ID_PCH2_LV_LM: case E1000_DEV_ID_PCH2_LV_V: mac->type = e1000_pch2lan; break; case E1000_DEV_ID_PCH_LPT_I217_LM: case E1000_DEV_ID_PCH_LPT_I217_V: case E1000_DEV_ID_PCH_LPTLP_I218_LM: case E1000_DEV_ID_PCH_LPTLP_I218_V: case E1000_DEV_ID_PCH_I218_LM2: case E1000_DEV_ID_PCH_I218_V2: case E1000_DEV_ID_PCH_I218_LM3: case E1000_DEV_ID_PCH_I218_V3: mac->type = e1000_pch_lpt; break; - case E1000_DEV_ID_PCH_SPT_I219_LM: - case E1000_DEV_ID_PCH_SPT_I219_V: - case E1000_DEV_ID_PCH_SPT_I219_LM2: - case E1000_DEV_ID_PCH_SPT_I219_V2: - mac->type = e1000_pch_spt; - break; case E1000_DEV_ID_82575EB_COPPER: case E1000_DEV_ID_82575EB_FIBER_SERDES: case E1000_DEV_ID_82575GB_QUAD_COPPER: mac->type = e1000_82575; break; case E1000_DEV_ID_82576: case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: case E1000_DEV_ID_82576_QUAD_COPPER: case E1000_DEV_ID_82576_QUAD_COPPER_ET2: case E1000_DEV_ID_82576_NS: case E1000_DEV_ID_82576_NS_SERDES: case E1000_DEV_ID_82576_SERDES_QUAD: mac->type = e1000_82576; break; case E1000_DEV_ID_82580_COPPER: case E1000_DEV_ID_82580_FIBER: case E1000_DEV_ID_82580_SERDES: case E1000_DEV_ID_82580_SGMII: case E1000_DEV_ID_82580_COPPER_DUAL: case E1000_DEV_ID_82580_QUAD_FIBER: case E1000_DEV_ID_DH89XXCC_SGMII: case E1000_DEV_ID_DH89XXCC_SERDES: case E1000_DEV_ID_DH89XXCC_BACKPLANE: case E1000_DEV_ID_DH89XXCC_SFP: mac->type = e1000_82580; break; case E1000_DEV_ID_I350_COPPER: case E1000_DEV_ID_I350_FIBER: case E1000_DEV_ID_I350_SERDES: case E1000_DEV_ID_I350_SGMII: case E1000_DEV_ID_I350_DA4: mac->type = e1000_i350; break; case E1000_DEV_ID_I210_COPPER_FLASHLESS: case E1000_DEV_ID_I210_SERDES_FLASHLESS: case E1000_DEV_ID_I210_COPPER: case E1000_DEV_ID_I210_COPPER_OEM1: case E1000_DEV_ID_I210_COPPER_IT: case E1000_DEV_ID_I210_FIBER: case E1000_DEV_ID_I210_SERDES: case E1000_DEV_ID_I210_SGMII: mac->type = e1000_i210; break; case E1000_DEV_ID_I211_COPPER: mac->type = e1000_i211; break; case E1000_DEV_ID_82576_VF: case E1000_DEV_ID_82576_VF_HV: mac->type = e1000_vfadapt; break; case E1000_DEV_ID_I350_VF: case E1000_DEV_ID_I350_VF_HV: mac->type = e1000_vfadapt_i350; break; case E1000_DEV_ID_I354_BACKPLANE_1GBPS: case E1000_DEV_ID_I354_SGMII: case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS: mac->type = e1000_i354; break; default: /* Should never have loaded on this device */ ret_val = -E1000_ERR_MAC_INIT; break; } return ret_val; } /** * e1000_setup_init_funcs - Initializes function pointers * @hw: pointer to the HW structure * @init_device: TRUE will initialize the rest of the function pointers * getting the device ready for use. FALSE will only set * MAC type and the function pointers for the other init * functions. Passing FALSE will not generate any hardware * reads or writes. * * This function must be called by a driver in order to use the rest * of the 'shared' code files. Called by drivers only. **/ s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) { s32 ret_val; /* Can't do much good without knowing the MAC type. */ ret_val = e1000_set_mac_type(hw); if (ret_val) { DEBUGOUT("ERROR: MAC type could not be set properly.\n"); goto out; } if (!hw->hw_addr) { DEBUGOUT("ERROR: Registers not mapped\n"); ret_val = -E1000_ERR_CONFIG; goto out; } /* * Init function pointers to generic implementations. We do this first * allowing a driver module to override it afterward. */ e1000_init_mac_ops_generic(hw); e1000_init_phy_ops_generic(hw); e1000_init_nvm_ops_generic(hw); e1000_init_mbx_ops_generic(hw); /* * Set up the init function pointers. These are functions within the * adapter family file that sets up function pointers for the rest of * the functions in that family. */ switch (hw->mac.type) { case e1000_82542: e1000_init_function_pointers_82542(hw); break; case e1000_82543: case e1000_82544: e1000_init_function_pointers_82543(hw); break; case e1000_82540: case e1000_82545: case e1000_82545_rev_3: case e1000_82546: case e1000_82546_rev_3: e1000_init_function_pointers_82540(hw); break; case e1000_82541: case e1000_82541_rev_2: case e1000_82547: case e1000_82547_rev_2: e1000_init_function_pointers_82541(hw); break; case e1000_82571: case e1000_82572: case e1000_82573: case e1000_82574: case e1000_82583: e1000_init_function_pointers_82571(hw); break; case e1000_80003es2lan: e1000_init_function_pointers_80003es2lan(hw); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: - case e1000_pch_spt: e1000_init_function_pointers_ich8lan(hw); break; case e1000_82575: case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: e1000_init_function_pointers_82575(hw); break; case e1000_i210: case e1000_i211: e1000_init_function_pointers_i210(hw); break; case e1000_vfadapt: e1000_init_function_pointers_vf(hw); break; case e1000_vfadapt_i350: e1000_init_function_pointers_vf(hw); break; default: DEBUGOUT("Hardware not supported\n"); ret_val = -E1000_ERR_CONFIG; break; } /* * Initialize the rest of the function pointers. These require some * register reads/writes in some cases. */ if (!(ret_val) && init_device) { ret_val = e1000_init_mac_params(hw); if (ret_val) goto out; ret_val = e1000_init_nvm_params(hw); if (ret_val) goto out; ret_val = e1000_init_phy_params(hw); if (ret_val) goto out; ret_val = e1000_init_mbx_params(hw); if (ret_val) goto out; } out: return ret_val; } /** * e1000_get_bus_info - Obtain bus information for adapter * @hw: pointer to the HW structure * * This will obtain information about the HW bus for which the * adapter is attached and stores it in the hw structure. This is a * function pointer entry point called by drivers. **/ s32 e1000_get_bus_info(struct e1000_hw *hw) { if (hw->mac.ops.get_bus_info) return hw->mac.ops.get_bus_info(hw); return E1000_SUCCESS; } /** * e1000_clear_vfta - Clear VLAN filter table * @hw: pointer to the HW structure * * This clears the VLAN filter table on the adapter. This is a function * pointer entry point called by drivers. **/ void e1000_clear_vfta(struct e1000_hw *hw) { if (hw->mac.ops.clear_vfta) hw->mac.ops.clear_vfta(hw); } /** * e1000_write_vfta - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: the 32-bit offset in which to write the value to. * @value: the 32-bit value to write at location offset. * * This writes a 32-bit value to a 32-bit offset in the VLAN filter * table. This is a function pointer entry point called by drivers. **/ void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) { if (hw->mac.ops.write_vfta) hw->mac.ops.write_vfta(hw, offset, value); } /** * e1000_update_mc_addr_list - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * * Updates the Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. **/ void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count) { if (hw->mac.ops.update_mc_addr_list) hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, mc_addr_count); } /** * e1000_force_mac_fc - Force MAC flow control * @hw: pointer to the HW structure * * Force the MAC's flow control settings. Currently no func pointer exists * and all implementations are handled in the generic version of this * function. **/ s32 e1000_force_mac_fc(struct e1000_hw *hw) { return e1000_force_mac_fc_generic(hw); } /** * e1000_check_for_link - Check/Store link connection * @hw: pointer to the HW structure * * This checks the link condition of the adapter and stores the * results in the hw->mac structure. This is a function pointer entry * point called by drivers. **/ s32 e1000_check_for_link(struct e1000_hw *hw) { if (hw->mac.ops.check_for_link) return hw->mac.ops.check_for_link(hw); return -E1000_ERR_CONFIG; } /** * e1000_check_mng_mode - Check management mode * @hw: pointer to the HW structure * * This checks if the adapter has manageability enabled. * This is a function pointer entry point called by drivers. **/ bool e1000_check_mng_mode(struct e1000_hw *hw) { if (hw->mac.ops.check_mng_mode) return hw->mac.ops.check_mng_mode(hw); return FALSE; } /** * e1000_mng_write_dhcp_info - Writes DHCP info to host interface * @hw: pointer to the HW structure * @buffer: pointer to the host interface * @length: size of the buffer * * Writes the DHCP information to the host interface. **/ s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) { return e1000_mng_write_dhcp_info_generic(hw, buffer, length); } /** * e1000_reset_hw - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. This is a function pointer * entry point called by drivers. **/ s32 e1000_reset_hw(struct e1000_hw *hw) { if (hw->mac.ops.reset_hw) return hw->mac.ops.reset_hw(hw); return -E1000_ERR_CONFIG; } /** * e1000_init_hw - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. This is a function * pointer entry point called by drivers. **/ s32 e1000_init_hw(struct e1000_hw *hw) { if (hw->mac.ops.init_hw) return hw->mac.ops.init_hw(hw); return -E1000_ERR_CONFIG; } /** * e1000_setup_link - Configures link and flow control * @hw: pointer to the HW structure * * This configures link and flow control settings for the adapter. This * is a function pointer entry point called by drivers. While modules can * also call this, they probably call their own version of this function. **/ s32 e1000_setup_link(struct e1000_hw *hw) { if (hw->mac.ops.setup_link) return hw->mac.ops.setup_link(hw); return -E1000_ERR_CONFIG; } /** * e1000_get_speed_and_duplex - Returns current speed and duplex * @hw: pointer to the HW structure * @speed: pointer to a 16-bit value to store the speed * @duplex: pointer to a 16-bit value to store the duplex. * * This returns the speed and duplex of the adapter in the two 'out' * variables passed in. This is a function pointer entry point called * by drivers. **/ s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) { if (hw->mac.ops.get_link_up_info) return hw->mac.ops.get_link_up_info(hw, speed, duplex); return -E1000_ERR_CONFIG; } /** * e1000_setup_led - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use and saves the current state * of the LED so it can be later restored. This is a function pointer entry * point called by drivers. **/ s32 e1000_setup_led(struct e1000_hw *hw) { if (hw->mac.ops.setup_led) return hw->mac.ops.setup_led(hw); return E1000_SUCCESS; } /** * e1000_cleanup_led - Restores SW controllable LED * @hw: pointer to the HW structure * * This restores the SW controllable LED to the value saved off by * e1000_setup_led. This is a function pointer entry point called by drivers. **/ s32 e1000_cleanup_led(struct e1000_hw *hw) { if (hw->mac.ops.cleanup_led) return hw->mac.ops.cleanup_led(hw); return E1000_SUCCESS; } /** * e1000_blink_led - Blink SW controllable LED * @hw: pointer to the HW structure * * This starts the adapter LED blinking. Request the LED to be setup first * and cleaned up after. This is a function pointer entry point called by * drivers. **/ s32 e1000_blink_led(struct e1000_hw *hw) { if (hw->mac.ops.blink_led) return hw->mac.ops.blink_led(hw); return E1000_SUCCESS; } /** * e1000_id_led_init - store LED configurations in SW * @hw: pointer to the HW structure * * Initializes the LED config in SW. This is a function pointer entry point * called by drivers. **/ s32 e1000_id_led_init(struct e1000_hw *hw) { if (hw->mac.ops.id_led_init) return hw->mac.ops.id_led_init(hw); return E1000_SUCCESS; } /** * e1000_led_on - Turn on SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED on. This is a function pointer entry point * called by drivers. **/ s32 e1000_led_on(struct e1000_hw *hw) { if (hw->mac.ops.led_on) return hw->mac.ops.led_on(hw); return E1000_SUCCESS; } /** * e1000_led_off - Turn off SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED off. This is a function pointer entry point * called by drivers. **/ s32 e1000_led_off(struct e1000_hw *hw) { if (hw->mac.ops.led_off) return hw->mac.ops.led_off(hw); return E1000_SUCCESS; } /** * e1000_reset_adaptive - Reset adaptive IFS * @hw: pointer to the HW structure * * Resets the adaptive IFS. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ void e1000_reset_adaptive(struct e1000_hw *hw) { e1000_reset_adaptive_generic(hw); } /** * e1000_update_adaptive - Update adaptive IFS * @hw: pointer to the HW structure * * Updates adapter IFS. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ void e1000_update_adaptive(struct e1000_hw *hw) { e1000_update_adaptive_generic(hw); } /** * e1000_disable_pcie_master - Disable PCI-Express master access * @hw: pointer to the HW structure * * Disables PCI-Express master access and verifies there are no pending * requests. Currently no func pointer exists and all implementations are * handled in the generic version of this function. **/ s32 e1000_disable_pcie_master(struct e1000_hw *hw) { return e1000_disable_pcie_master_generic(hw); } /** * e1000_config_collision_dist - Configure collision distance * @hw: pointer to the HW structure * * Configures the collision distance to the default value and is used * during link setup. **/ void e1000_config_collision_dist(struct e1000_hw *hw) { if (hw->mac.ops.config_collision_dist) hw->mac.ops.config_collision_dist(hw); } /** * e1000_rar_set - Sets a receive address register * @hw: pointer to the HW structure * @addr: address to set the RAR to * @index: the RAR to set * * Sets a Receive Address Register (RAR) to the specified address. **/ int e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) { if (hw->mac.ops.rar_set) return hw->mac.ops.rar_set(hw, addr, index); return E1000_SUCCESS; } /** * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state * @hw: pointer to the HW structure * * Ensures that the MDI/MDIX SW state is valid. **/ s32 e1000_validate_mdi_setting(struct e1000_hw *hw) { if (hw->mac.ops.validate_mdi_setting) return hw->mac.ops.validate_mdi_setting(hw); return E1000_SUCCESS; } /** * e1000_hash_mc_addr - Determines address location in multicast table * @hw: pointer to the HW structure * @mc_addr: Multicast address to hash. * * This hashes an address to determine its location in the multicast * table. Currently no func pointer exists and all implementations * are handled in the generic version of this function. **/ u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) { return e1000_hash_mc_addr_generic(hw, mc_addr); } /** * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX * @hw: pointer to the HW structure * * Enables packet filtering on transmit packets if manageability is enabled * and host interface is enabled. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) { return e1000_enable_tx_pkt_filtering_generic(hw); } /** * e1000_mng_host_if_write - Writes to the manageability host interface * @hw: pointer to the HW structure * @buffer: pointer to the host interface buffer * @length: size of the buffer * @offset: location in the buffer to write to * @sum: sum of the data (not checksum) * * This function writes the buffer content at the offset given on the host if. * It also does alignment considerations to do the writes in most efficient * way. Also fills up the sum of the buffer in *buffer parameter. **/ s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, u16 offset, u8 *sum) { return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum); } /** * e1000_mng_write_cmd_header - Writes manageability command header * @hw: pointer to the HW structure * @hdr: pointer to the host interface command header * * Writes the command header after does the checksum calculation. **/ s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, struct e1000_host_mng_command_header *hdr) { return e1000_mng_write_cmd_header_generic(hw, hdr); } /** * e1000_mng_enable_host_if - Checks host interface is enabled * @hw: pointer to the HW structure * * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND * * This function checks whether the HOST IF is enabled for command operation * and also checks whether the previous command is completed. It busy waits * in case of previous command is not completed. **/ s32 e1000_mng_enable_host_if(struct e1000_hw *hw) { return e1000_mng_enable_host_if_generic(hw); } /** + * e1000_set_obff_timer - Set Optimized Buffer Flush/Fill timer + * @hw: pointer to the HW structure + * @itr: u32 indicating itr value + * + * Set the OBFF timer based on the given interrupt rate. + **/ +s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr) +{ + if (hw->mac.ops.set_obff_timer) + return hw->mac.ops.set_obff_timer(hw, itr); + + return E1000_SUCCESS; +} + +/** * e1000_check_reset_block - Verifies PHY can be reset * @hw: pointer to the HW structure * * Checks if the PHY is in a state that can be reset or if manageability * has it tied up. This is a function pointer entry point called by drivers. **/ s32 e1000_check_reset_block(struct e1000_hw *hw) { if (hw->phy.ops.check_reset_block) return hw->phy.ops.check_reset_block(hw); return E1000_SUCCESS; } /** * e1000_read_phy_reg - Reads PHY register * @hw: pointer to the HW structure * @offset: the register to read * @data: the buffer to store the 16-bit read. * * Reads the PHY register and returns the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) { if (hw->phy.ops.read_reg) return hw->phy.ops.read_reg(hw, offset, data); return E1000_SUCCESS; } /** * e1000_write_phy_reg - Writes PHY register * @hw: pointer to the HW structure * @offset: the register to write * @data: the value to write. * * Writes the PHY register at offset with the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) { if (hw->phy.ops.write_reg) return hw->phy.ops.write_reg(hw, offset, data); return E1000_SUCCESS; } /** * e1000_release_phy - Generic release PHY * @hw: pointer to the HW structure * * Return if silicon family does not require a semaphore when accessing the * PHY. **/ void e1000_release_phy(struct e1000_hw *hw) { if (hw->phy.ops.release) hw->phy.ops.release(hw); } /** * e1000_acquire_phy - Generic acquire PHY * @hw: pointer to the HW structure * * Return success if silicon family does not require a semaphore when * accessing the PHY. **/ s32 e1000_acquire_phy(struct e1000_hw *hw) { if (hw->phy.ops.acquire) return hw->phy.ops.acquire(hw); return E1000_SUCCESS; } /** * e1000_cfg_on_link_up - Configure PHY upon link up * @hw: pointer to the HW structure **/ s32 e1000_cfg_on_link_up(struct e1000_hw *hw) { if (hw->phy.ops.cfg_on_link_up) return hw->phy.ops.cfg_on_link_up(hw); return E1000_SUCCESS; } /** * e1000_read_kmrn_reg - Reads register using Kumeran interface * @hw: pointer to the HW structure * @offset: the register to read * @data: the location to store the 16-bit value read. * * Reads a register out of the Kumeran interface. Currently no func pointer * exists and all implementations are handled in the generic version of * this function. **/ s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) { return e1000_read_kmrn_reg_generic(hw, offset, data); } /** * e1000_write_kmrn_reg - Writes register using Kumeran interface * @hw: pointer to the HW structure * @offset: the register to write * @data: the value to write. * * Writes a register to the Kumeran interface. Currently no func pointer * exists and all implementations are handled in the generic version of * this function. **/ s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) { return e1000_write_kmrn_reg_generic(hw, offset, data); } /** * e1000_get_cable_length - Retrieves cable length estimation * @hw: pointer to the HW structure * * This function estimates the cable length and stores them in * hw->phy.min_length and hw->phy.max_length. This is a function pointer * entry point called by drivers. **/ s32 e1000_get_cable_length(struct e1000_hw *hw) { if (hw->phy.ops.get_cable_length) return hw->phy.ops.get_cable_length(hw); return E1000_SUCCESS; } /** * e1000_get_phy_info - Retrieves PHY information from registers * @hw: pointer to the HW structure * * This function gets some information from various PHY registers and * populates hw->phy values with it. This is a function pointer entry * point called by drivers. **/ s32 e1000_get_phy_info(struct e1000_hw *hw) { if (hw->phy.ops.get_info) return hw->phy.ops.get_info(hw); return E1000_SUCCESS; } /** * e1000_phy_hw_reset - Hard PHY reset * @hw: pointer to the HW structure * * Performs a hard PHY reset. This is a function pointer entry point called * by drivers. **/ s32 e1000_phy_hw_reset(struct e1000_hw *hw) { if (hw->phy.ops.reset) return hw->phy.ops.reset(hw); return E1000_SUCCESS; } /** * e1000_phy_commit - Soft PHY reset * @hw: pointer to the HW structure * * Performs a soft PHY reset on those that apply. This is a function pointer * entry point called by drivers. **/ s32 e1000_phy_commit(struct e1000_hw *hw) { if (hw->phy.ops.commit) return hw->phy.ops.commit(hw); return E1000_SUCCESS; } /** * e1000_set_d0_lplu_state - Sets low power link up state for D0 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D0 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D0 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. This is a function pointer entry point called by drivers. **/ s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) { if (hw->phy.ops.set_d0_lplu_state) return hw->phy.ops.set_d0_lplu_state(hw, active); return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D3 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D3 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. This is a function pointer entry point called by drivers. **/ s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) { if (hw->phy.ops.set_d3_lplu_state) return hw->phy.ops.set_d3_lplu_state(hw, active); return E1000_SUCCESS; } /** * e1000_read_mac_addr - Reads MAC address * @hw: pointer to the HW structure * * Reads the MAC address out of the adapter and stores it in the HW structure. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_mac_addr(struct e1000_hw *hw) { if (hw->mac.ops.read_mac_addr) return hw->mac.ops.read_mac_addr(hw); return e1000_read_mac_addr_generic(hw); } /** * e1000_read_pba_string - Read device part number string * @hw: pointer to the HW structure * @pba_num: pointer to device part number * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number from the EEPROM and stores * the value in pba_num. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) { return e1000_read_pba_string_generic(hw, pba_num, pba_num_size); } /** * e1000_read_pba_length - Read device part number string length * @hw: pointer to the HW structure * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number length from the EEPROM and * stores the value in pba_num. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size) { return e1000_read_pba_length_generic(hw, pba_num_size); -} - -/** - * e1000_read_pba_num - Read device part number - * @hw: pointer to the HW structure - * @pba_num: pointer to device part number - * - * Reads the product board assembly (PBA) number from the EEPROM and stores - * the value in pba_num. - * Currently no func pointer exists and all implementations are handled in the - * generic version of this function. - **/ -s32 e1000_read_pba_num(struct e1000_hw *hw, u32 *pba_num) -{ - return e1000_read_pba_num_generic(hw, pba_num); } /** * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum * @hw: pointer to the HW structure * * Validates the NVM checksum is correct. This is a function pointer entry * point called by drivers. **/ s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) { if (hw->nvm.ops.validate) return hw->nvm.ops.validate(hw); return -E1000_ERR_CONFIG; } /** * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum * @hw: pointer to the HW structure * * Updates the NVM checksum. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ s32 e1000_update_nvm_checksum(struct e1000_hw *hw) { if (hw->nvm.ops.update) return hw->nvm.ops.update(hw); return -E1000_ERR_CONFIG; } /** * e1000_reload_nvm - Reloads EEPROM * @hw: pointer to the HW structure * * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the * extended control register. **/ void e1000_reload_nvm(struct e1000_hw *hw) { if (hw->nvm.ops.reload) hw->nvm.ops.reload(hw); } /** * e1000_read_nvm - Reads NVM (EEPROM) * @hw: pointer to the HW structure * @offset: the word offset to read * @words: number of 16-bit words to read * @data: pointer to the properly sized buffer for the data. * * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function * pointer entry point called by drivers. **/ s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { if (hw->nvm.ops.read) return hw->nvm.ops.read(hw, offset, words, data); return -E1000_ERR_CONFIG; } /** * e1000_write_nvm - Writes to NVM (EEPROM) * @hw: pointer to the HW structure * @offset: the word offset to read * @words: number of 16-bit words to write * @data: pointer to the properly sized buffer for the data. * * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function * pointer entry point called by drivers. **/ s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { if (hw->nvm.ops.write) return hw->nvm.ops.write(hw, offset, words, data); return E1000_SUCCESS; } /** * e1000_write_8bit_ctrl_reg - Writes 8bit Control register * @hw: pointer to the HW structure * @reg: 32bit register offset * @offset: the register to write * @data: the value to write. * * Writes the PHY register at offset with the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, u8 data) { return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data); } /** * e1000_power_up_phy - Restores link in case of PHY power down * @hw: pointer to the HW structure * * The phy may be powered down to save power, to turn off link when the * driver is unloaded, or wake on lan is not enabled (among others). **/ void e1000_power_up_phy(struct e1000_hw *hw) { if (hw->phy.ops.power_up) hw->phy.ops.power_up(hw); e1000_setup_link(hw); } /** * e1000_power_down_phy - Power down PHY * @hw: pointer to the HW structure * * The phy may be powered down to save power, to turn off link when the * driver is unloaded, or wake on lan is not enabled (among others). **/ void e1000_power_down_phy(struct e1000_hw *hw) { if (hw->phy.ops.power_down) hw->phy.ops.power_down(hw); } /** * e1000_power_up_fiber_serdes_link - Power up serdes link * @hw: pointer to the HW structure * * Power on the optics and PCS. **/ void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw) { if (hw->mac.ops.power_up_serdes) hw->mac.ops.power_up_serdes(hw); } /** * e1000_shutdown_fiber_serdes_link - Remove link during power down * @hw: pointer to the HW structure * * Shutdown the optics and PCS on driver unload. **/ void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw) { if (hw->mac.ops.shutdown_serdes) hw->mac.ops.shutdown_serdes(hw); } Index: projects/clang-trunk/sys/dev/e1000/e1000_api.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_api.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_api.h (revision 287502) @@ -1,168 +1,167 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_API_H_ #define _E1000_API_H_ #include "e1000_hw.h" extern void e1000_init_function_pointers_82542(struct e1000_hw *hw); extern void e1000_init_function_pointers_82543(struct e1000_hw *hw); extern void e1000_init_function_pointers_82540(struct e1000_hw *hw); extern void e1000_init_function_pointers_82571(struct e1000_hw *hw); extern void e1000_init_function_pointers_82541(struct e1000_hw *hw); extern void e1000_init_function_pointers_80003es2lan(struct e1000_hw *hw); extern void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw); extern void e1000_init_function_pointers_82575(struct e1000_hw *hw); extern void e1000_rx_fifo_flush_82575(struct e1000_hw *hw); extern void e1000_init_function_pointers_vf(struct e1000_hw *hw); extern void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw); extern void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw); extern void e1000_init_function_pointers_i210(struct e1000_hw *hw); s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr); s32 e1000_set_mac_type(struct e1000_hw *hw); s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device); s32 e1000_init_mac_params(struct e1000_hw *hw); s32 e1000_init_nvm_params(struct e1000_hw *hw); s32 e1000_init_phy_params(struct e1000_hw *hw); s32 e1000_init_mbx_params(struct e1000_hw *hw); s32 e1000_get_bus_info(struct e1000_hw *hw); void e1000_clear_vfta(struct e1000_hw *hw); void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); s32 e1000_force_mac_fc(struct e1000_hw *hw); s32 e1000_check_for_link(struct e1000_hw *hw); s32 e1000_reset_hw(struct e1000_hw *hw); s32 e1000_init_hw(struct e1000_hw *hw); s32 e1000_setup_link(struct e1000_hw *hw); s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex); s32 e1000_disable_pcie_master(struct e1000_hw *hw); void e1000_config_collision_dist(struct e1000_hw *hw); int e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index); u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr); void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count); s32 e1000_setup_led(struct e1000_hw *hw); s32 e1000_cleanup_led(struct e1000_hw *hw); s32 e1000_check_reset_block(struct e1000_hw *hw); s32 e1000_blink_led(struct e1000_hw *hw); s32 e1000_led_on(struct e1000_hw *hw); s32 e1000_led_off(struct e1000_hw *hw); s32 e1000_id_led_init(struct e1000_hw *hw); void e1000_reset_adaptive(struct e1000_hw *hw); void e1000_update_adaptive(struct e1000_hw *hw); s32 e1000_get_cable_length(struct e1000_hw *hw); s32 e1000_validate_mdi_setting(struct e1000_hw *hw); s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data); s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, u8 data); s32 e1000_get_phy_info(struct e1000_hw *hw); void e1000_release_phy(struct e1000_hw *hw); s32 e1000_acquire_phy(struct e1000_hw *hw); s32 e1000_cfg_on_link_up(struct e1000_hw *hw); s32 e1000_phy_hw_reset(struct e1000_hw *hw); s32 e1000_phy_commit(struct e1000_hw *hw); void e1000_power_up_phy(struct e1000_hw *hw); void e1000_power_down_phy(struct e1000_hw *hw); s32 e1000_read_mac_addr(struct e1000_hw *hw); -s32 e1000_read_pba_num(struct e1000_hw *hw, u32 *part_num); s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size); s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size); void e1000_reload_nvm(struct e1000_hw *hw); s32 e1000_update_nvm_checksum(struct e1000_hw *hw); s32 e1000_validate_nvm_checksum(struct e1000_hw *hw); s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data); s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data); s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active); s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active); bool e1000_check_mng_mode(struct e1000_hw *hw); bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw); s32 e1000_mng_enable_host_if(struct e1000_hw *hw); s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, u16 offset, u8 *sum); s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, struct e1000_host_mng_command_header *hdr); s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length); u32 e1000_translate_register_82542(u32 reg); /* * TBI_ACCEPT macro definition: * * This macro requires: * a = a pointer to struct e1000_hw * status = the 8 bit status field of the Rx descriptor with EOP set * errors = the 8 bit error field of the Rx descriptor with EOP set * length = the sum of all the length fields of the Rx descriptors that * make up the current frame * last_byte = the last byte of the frame DMAed by the hardware * min_frame_size = the minimum frame length we want to accept. * max_frame_size = the maximum frame length we want to accept. * * This macro is a conditional that should be used in the interrupt * handler's Rx processing routine when RxErrors have been detected. * * Typical use: * ... * if (TBI_ACCEPT) { * accept_frame = TRUE; * e1000_tbi_adjust_stats(adapter, MacAddress); * frame_length--; * } else { * accept_frame = FALSE; * } * ... */ /* The carrier extension symbol, as received by the NIC. */ #define CARRIER_EXTENSION 0x0F #define TBI_ACCEPT(a, status, errors, length, last_byte, \ min_frame_size, max_frame_size) \ (e1000_tbi_sbp_enabled_82543(a) && \ (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \ ((last_byte) == CARRIER_EXTENSION) && \ (((status) & E1000_RXD_STAT_VP) ? \ (((length) > ((min_frame_size) - VLAN_TAG_SIZE)) && \ ((length) <= ((max_frame_size) + 1))) : \ (((length) > (min_frame_size)) && \ ((length) <= ((max_frame_size) + VLAN_TAG_SIZE + 1))))) #define E1000_MAX(a, b) ((a) > (b) ? (a) : (b)) #define E1000_DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) /* ceil(a/b) */ #endif /* _E1000_API_H_ */ Index: projects/clang-trunk/sys/dev/e1000/e1000_defines.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_defines.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_defines.h (revision 287502) @@ -1,1506 +1,1472 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_DEFINES_H_ #define _E1000_DEFINES_H_ /* Number of Transmit and Receive Descriptors must be a multiple of 8 */ #define REQ_TX_DESCRIPTOR_MULTIPLE 8 #define REQ_RX_DESCRIPTOR_MULTIPLE 8 /* Definitions for power management and wakeup registers */ /* Wake Up Control */ #define E1000_WUC_APME 0x00000001 /* APM Enable */ #define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ #define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */ #define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */ #define E1000_WUC_PHY_WAKE 0x00000100 /* if PHY supports wakeup */ /* Wake Up Filter Control */ #define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ #define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ #define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ #define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ #define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ #define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ #define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ #define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ /* Wake Up Status */ #define E1000_WUS_LNKC E1000_WUFC_LNKC #define E1000_WUS_MAG E1000_WUFC_MAG #define E1000_WUS_EX E1000_WUFC_EX #define E1000_WUS_MC E1000_WUFC_MC #define E1000_WUS_BC E1000_WUFC_BC /* Extended Device Control */ #define E1000_CTRL_EXT_LPCD 0x00000004 /* LCD Power Cycle Done */ #define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* SW Definable Pin 4 data */ #define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* SW Definable Pin 6 data */ #define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* SW Definable Pin 3 data */ /* SDP 4/5 (bits 8,9) are reserved in >= 82575 */ #define E1000_CTRL_EXT_SDP4_DIR 0x00000100 /* Direction of SDP4 0=in 1=out */ #define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */ #define E1000_CTRL_EXT_SDP3_DIR 0x00000800 /* Direction of SDP3 0=in 1=out */ #define E1000_CTRL_EXT_FORCE_SMBUS 0x00000800 /* Force SMBus mode */ #define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ /* Physical Func Reset Done Indication */ #define E1000_CTRL_EXT_PFRSTD 0x00004000 #define E1000_CTRL_EXT_SDLPE 0X00040000 /* SerDes Low Power Enable */ #define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ #define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ #define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clk Gating */ #define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 /* Offset of the link mode field in Ctrl Ext register */ #define E1000_CTRL_EXT_LINK_MODE_OFFSET 22 #define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 #define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 #define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 #define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 #define E1000_CTRL_EXT_EIAME 0x01000000 #define E1000_CTRL_EXT_IRCA 0x00000001 #define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ #define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ #define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ #define E1000_CTRL_EXT_LSECCK 0x00001000 #define E1000_CTRL_EXT_PHYPDEN 0x00100000 #define E1000_I2CCMD_REG_ADDR_SHIFT 16 #define E1000_I2CCMD_PHY_ADDR_SHIFT 24 #define E1000_I2CCMD_OPCODE_READ 0x08000000 #define E1000_I2CCMD_OPCODE_WRITE 0x00000000 #define E1000_I2CCMD_READY 0x20000000 #define E1000_I2CCMD_ERROR 0x80000000 #define E1000_I2CCMD_SFP_DATA_ADDR(a) (0x0000 + (a)) #define E1000_I2CCMD_SFP_DIAG_ADDR(a) (0x0100 + (a)) #define E1000_MAX_SGMII_PHY_REG_ADDR 255 #define E1000_I2CCMD_PHY_TIMEOUT 200 #define E1000_IVAR_VALID 0x80 #define E1000_GPIE_NSICR 0x00000001 #define E1000_GPIE_MSIX_MODE 0x00000010 #define E1000_GPIE_EIAME 0x40000000 #define E1000_GPIE_PBA 0x80000000 /* Receive Descriptor bit definitions */ #define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ #define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ #define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ #define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ #define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ #define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ #define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ #define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ #define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ #define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ #define E1000_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ #define E1000_RXD_ERR_CE 0x01 /* CRC Error */ #define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ #define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ #define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ #define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ #define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ #define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ #define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ #define E1000_RXDEXT_STATERR_TST 0x00000100 /* Time Stamp taken */ #define E1000_RXDEXT_STATERR_LB 0x00040000 #define E1000_RXDEXT_STATERR_CE 0x01000000 #define E1000_RXDEXT_STATERR_SE 0x02000000 #define E1000_RXDEXT_STATERR_SEQ 0x04000000 #define E1000_RXDEXT_STATERR_CXE 0x10000000 #define E1000_RXDEXT_STATERR_TCPE 0x20000000 #define E1000_RXDEXT_STATERR_IPE 0x40000000 #define E1000_RXDEXT_STATERR_RXE 0x80000000 /* mask to determine if packets should be dropped due to frame errors */ #define E1000_RXD_ERR_FRAME_ERR_MASK ( \ E1000_RXD_ERR_CE | \ E1000_RXD_ERR_SE | \ E1000_RXD_ERR_SEQ | \ E1000_RXD_ERR_CXE | \ E1000_RXD_ERR_RXE) /* Same mask, but for extended and packet split descriptors */ #define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ E1000_RXDEXT_STATERR_CE | \ E1000_RXDEXT_STATERR_SE | \ E1000_RXDEXT_STATERR_SEQ | \ E1000_RXDEXT_STATERR_CXE | \ E1000_RXDEXT_STATERR_RXE) #define E1000_MRQC_RSS_ENABLE_2Q 0x00000001 #define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000 #define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 #define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 #define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 #define E1000_MRQC_RSS_FIELD_IPV6_EX 0x00080000 #define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 #define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 #define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 /* Management Control */ #define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ #define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ #define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ #define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ #define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ /* Enable MAC address filtering */ #define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 /* Enable MNG packets to host memory */ #define E1000_MANC_EN_MNG2HOST 0x00200000 #define E1000_MANC2H_PORT_623 0x00000020 /* Port 0x26f */ #define E1000_MANC2H_PORT_664 0x00000040 /* Port 0x298 */ #define E1000_MDEF_PORT_623 0x00000800 /* Port 0x26f */ #define E1000_MDEF_PORT_664 0x00000400 /* Port 0x298 */ /* Receive Control */ #define E1000_RCTL_RST 0x00000001 /* Software reset */ #define E1000_RCTL_EN 0x00000002 /* enable */ #define E1000_RCTL_SBP 0x00000004 /* store bad packet */ #define E1000_RCTL_UPE 0x00000008 /* unicast promisc enable */ #define E1000_RCTL_MPE 0x00000010 /* multicast promisc enable */ #define E1000_RCTL_LPE 0x00000020 /* long packet enable */ #define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ #define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ #define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ #define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ #define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ -#define E1000_RCTL_RDMTS_HEX 0x00010000 -#define E1000_RCTL_RDMTS1_HEX E1000_RCTL_RDMTS_HEX #define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ #define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ #define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ /* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ #define E1000_RCTL_SZ_2048 0x00000000 /* Rx buffer size 2048 */ #define E1000_RCTL_SZ_1024 0x00010000 /* Rx buffer size 1024 */ #define E1000_RCTL_SZ_512 0x00020000 /* Rx buffer size 512 */ #define E1000_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */ /* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ #define E1000_RCTL_SZ_16384 0x00010000 /* Rx buffer size 16384 */ #define E1000_RCTL_SZ_8192 0x00020000 /* Rx buffer size 8192 */ #define E1000_RCTL_SZ_4096 0x00030000 /* Rx buffer size 4096 */ #define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ #define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ #define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ #define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ #define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ #define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ #define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ /* Use byte values for the following shift parameters * Usage: * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & * E1000_PSRCTL_BSIZE0_MASK) | * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & * E1000_PSRCTL_BSIZE1_MASK) | * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & * E1000_PSRCTL_BSIZE2_MASK) | * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; * E1000_PSRCTL_BSIZE3_MASK)) * where value0 = [128..16256], default=256 * value1 = [1024..64512], default=4096 * value2 = [0..64512], default=4096 * value3 = [0..64512], default=0 */ #define E1000_PSRCTL_BSIZE0_MASK 0x0000007F #define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 #define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 #define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 #define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ #define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ #define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ #define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ /* SWFW_SYNC Definitions */ #define E1000_SWFW_EEP_SM 0x01 #define E1000_SWFW_PHY0_SM 0x02 #define E1000_SWFW_PHY1_SM 0x04 #define E1000_SWFW_CSR_SM 0x08 #define E1000_SWFW_PHY2_SM 0x20 #define E1000_SWFW_PHY3_SM 0x40 #define E1000_SWFW_SW_MNG_SM 0x400 /* Device Control */ #define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ #define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ #define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master reqs */ #define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ #define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ #define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ #define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ #define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ #define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ #define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ #define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ #define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ #define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ #define E1000_CTRL_LANPHYPC_OVERRIDE 0x00010000 /* SW control of LANPHYPC */ #define E1000_CTRL_LANPHYPC_VALUE 0x00020000 /* SW value of LANPHYPC */ #define E1000_CTRL_MEHE 0x00080000 /* Memory Error Handling Enable */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ #define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ #define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */ #define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ #define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ #define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ #define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ #define E1000_CTRL_RST 0x04000000 /* Global reset */ #define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ #define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ #define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ #define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ #define E1000_CTRL_I2C_ENA 0x02000000 /* I2C enable */ #define E1000_CTRL_MDIO_DIR E1000_CTRL_SWDPIO2 #define E1000_CTRL_MDIO E1000_CTRL_SWDPIN2 #define E1000_CTRL_MDC_DIR E1000_CTRL_SWDPIO3 #define E1000_CTRL_MDC E1000_CTRL_SWDPIN3 #define E1000_CONNSW_ENRGSRC 0x4 #define E1000_CONNSW_PHYSD 0x400 #define E1000_CONNSW_PHY_PDN 0x800 #define E1000_CONNSW_SERDESD 0x200 #define E1000_CONNSW_AUTOSENSE_CONF 0x2 #define E1000_CONNSW_AUTOSENSE_EN 0x1 #define E1000_PCS_CFG_PCS_EN 8 #define E1000_PCS_LCTL_FLV_LINK_UP 1 #define E1000_PCS_LCTL_FSV_10 0 #define E1000_PCS_LCTL_FSV_100 2 #define E1000_PCS_LCTL_FSV_1000 4 #define E1000_PCS_LCTL_FDV_FULL 8 #define E1000_PCS_LCTL_FSD 0x10 #define E1000_PCS_LCTL_FORCE_LINK 0x20 #define E1000_PCS_LCTL_FORCE_FCTRL 0x80 #define E1000_PCS_LCTL_AN_ENABLE 0x10000 #define E1000_PCS_LCTL_AN_RESTART 0x20000 #define E1000_PCS_LCTL_AN_TIMEOUT 0x40000 #define E1000_ENABLE_SERDES_LOOPBACK 0x0410 #define E1000_PCS_LSTS_LINK_OK 1 #define E1000_PCS_LSTS_SPEED_100 2 #define E1000_PCS_LSTS_SPEED_1000 4 #define E1000_PCS_LSTS_DUPLEX_FULL 8 #define E1000_PCS_LSTS_SYNK_OK 0x10 #define E1000_PCS_LSTS_AN_COMPLETE 0x10000 /* Device Status */ #define E1000_STATUS_FD 0x00000001 /* Duplex 0=half 1=full */ #define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ #define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ #define E1000_STATUS_FUNC_SHIFT 2 #define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ #define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ #define E1000_STATUS_SPEED_MASK 0x000000C0 #define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ #define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ #define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ #define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Compltn by NVM */ #define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ #define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master request status */ #define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ #define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ #define E1000_STATUS_2P5_SKU 0x00001000 /* Val of 2.5GBE SKU strap */ #define E1000_STATUS_2P5_SKU_OVER 0x00002000 /* Val of 2.5GBE SKU Over */ #define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ #define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */ /* Constants used to interpret the masked PCI-X bus speed. */ #define E1000_STATUS_PCIX_SPEED_66 0x00000000 /* PCI-X bus spd 50-66MHz */ #define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus spd 66-100MHz */ #define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus spd 100-133MHz*/ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 #define SPEED_2500 2500 #define HALF_DUPLEX 1 #define FULL_DUPLEX 2 #define PHY_FORCE_TIME 20 #define ADVERTISE_10_HALF 0x0001 #define ADVERTISE_10_FULL 0x0002 #define ADVERTISE_100_HALF 0x0004 #define ADVERTISE_100_FULL 0x0008 #define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ #define ADVERTISE_1000_FULL 0x0020 /* 1000/H is not supported, nor spec-compliant. */ #define E1000_ALL_SPEED_DUPLEX ( \ ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ ADVERTISE_100_FULL | ADVERTISE_1000_FULL) #define E1000_ALL_NOT_GIG ( \ ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ ADVERTISE_100_FULL) #define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL) #define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL) #define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF) #define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX /* LED Control */ #define E1000_PHY_LED0_MODE_MASK 0x00000007 #define E1000_PHY_LED0_IVRT 0x00000008 #define E1000_PHY_LED0_MASK 0x0000001F #define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F #define E1000_LEDCTL_LED0_MODE_SHIFT 0 #define E1000_LEDCTL_LED0_IVRT 0x00000040 #define E1000_LEDCTL_LED0_BLINK 0x00000080 #define E1000_LEDCTL_MODE_LINK_UP 0x2 #define E1000_LEDCTL_MODE_LED_ON 0xE #define E1000_LEDCTL_MODE_LED_OFF 0xF /* Transmit Descriptor bit definitions */ #define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ #define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ #define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ #define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ #define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ #define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ #define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ #define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ #define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ #define E1000_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ #define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ #define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ #define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ #define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ #define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ #define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ #define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ #define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ #define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ #define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ #define E1000_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ /* Transmit Control */ #define E1000_TCTL_EN 0x00000002 /* enable Tx */ #define E1000_TCTL_PSP 0x00000008 /* pad short packets */ #define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ #define E1000_TCTL_COLD 0x003ff000 /* collision distance */ #define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ #define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ /* Transmit Arbitration Count */ #define E1000_TARC0_ENABLE 0x00000400 /* Enable Tx Queue 0 */ /* SerDes Control */ #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 #define E1000_SCTL_ENABLE_SERDES_LOOPBACK 0x0410 /* Receive Checksum Control */ #define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ #define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ #define E1000_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ #define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ #define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ /* Header split receive */ #define E1000_RFCTL_NFSW_DIS 0x00000040 #define E1000_RFCTL_NFSR_DIS 0x00000080 #define E1000_RFCTL_ACK_DIS 0x00001000 #define E1000_RFCTL_EXTEN 0x00008000 #define E1000_RFCTL_IPV6_EX_DIS 0x00010000 #define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 #define E1000_RFCTL_LEF 0x00040000 /* Collision related configuration parameters */ #define E1000_COLLISION_THRESHOLD 15 #define E1000_CT_SHIFT 4 #define E1000_COLLISION_DISTANCE 63 #define E1000_COLD_SHIFT 12 /* Default values for the transmit IPG register */ #define DEFAULT_82542_TIPG_IPGT 10 #define DEFAULT_82543_TIPG_IPGT_FIBER 9 #define DEFAULT_82543_TIPG_IPGT_COPPER 8 #define E1000_TIPG_IPGT_MASK 0x000003FF #define DEFAULT_82542_TIPG_IPGR1 2 #define DEFAULT_82543_TIPG_IPGR1 8 #define E1000_TIPG_IPGR1_SHIFT 10 #define DEFAULT_82542_TIPG_IPGR2 10 #define DEFAULT_82543_TIPG_IPGR2 6 #define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 #define E1000_TIPG_IPGR2_SHIFT 20 /* Ethertype field values */ #define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ #define ETHERNET_FCS_SIZE 4 #define MAX_JUMBO_FRAME_SIZE 0x3F00 #define E1000_TX_PTR_GAP 0x1F /* Extended Configuration Control and Size */ #define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020 #define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001 #define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE 0x00000008 #define E1000_EXTCNF_CTRL_SWFLAG 0x00000020 #define E1000_EXTCNF_CTRL_GATE_PHY_CFG 0x00000080 #define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK 0x00FF0000 #define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT 16 #define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK 0x0FFF0000 #define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT 16 #define E1000_PHY_CTRL_D0A_LPLU 0x00000002 #define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004 #define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008 #define E1000_PHY_CTRL_GBE_DISABLE 0x00000040 #define E1000_KABGTXD_BGSQLBIAS 0x00050000 /* Low Power IDLE Control */ #define E1000_LPIC_LPIET_SHIFT 24 /* Low Power Idle Entry Time */ /* PBA constants */ #define E1000_PBA_8K 0x0008 /* 8KB */ #define E1000_PBA_10K 0x000A /* 10KB */ #define E1000_PBA_12K 0x000C /* 12KB */ #define E1000_PBA_14K 0x000E /* 14KB */ #define E1000_PBA_16K 0x0010 /* 16KB */ #define E1000_PBA_18K 0x0012 #define E1000_PBA_20K 0x0014 #define E1000_PBA_22K 0x0016 #define E1000_PBA_24K 0x0018 #define E1000_PBA_26K 0x001A #define E1000_PBA_30K 0x001E #define E1000_PBA_32K 0x0020 #define E1000_PBA_34K 0x0022 #define E1000_PBA_35K 0x0023 #define E1000_PBA_38K 0x0026 #define E1000_PBA_40K 0x0028 #define E1000_PBA_48K 0x0030 /* 48KB */ #define E1000_PBA_64K 0x0040 /* 64KB */ #define E1000_PBA_RXA_MASK 0xFFFF #define E1000_PBS_16K E1000_PBA_16K /* Uncorrectable/correctable ECC Error counts and enable bits */ #define E1000_PBECCSTS_CORR_ERR_CNT_MASK 0x000000FF #define E1000_PBECCSTS_UNCORR_ERR_CNT_MASK 0x0000FF00 #define E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT 8 #define E1000_PBECCSTS_ECC_ENABLE 0x00010000 #define IFS_MAX 80 #define IFS_MIN 40 #define IFS_RATIO 4 #define IFS_STEP 10 #define MIN_NUM_XMITS 1000 /* SW Semaphore Register */ #define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ #define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ #define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ #define E1000_SWSM2_LOCK 0x00000002 /* Secondary driver semaphore bit */ /* Interrupt Cause Read */ #define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ #define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ #define E1000_ICR_LSC 0x00000004 /* Link Status Change */ #define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ #define E1000_ICR_RXO 0x00000040 /* Rx overrun */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ #define E1000_ICR_VMMB 0x00000100 /* VM MB event */ #define E1000_ICR_RXCFG 0x00000400 /* Rx /c/ ordered set */ #define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ #define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ #define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ #define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ #define E1000_ICR_TXD_LOW 0x00008000 #define E1000_ICR_MNG 0x00040000 /* Manageability event */ #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ #define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ #define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_INT_ASSERTED 0x80000000 #define E1000_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ #define E1000_ICR_RXQ0 0x00100000 /* Rx Queue 0 Interrupt */ #define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ #define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ #define E1000_ICR_TXQ1 0x00800000 /* Tx Queue 1 Interrupt */ #define E1000_ICR_OTHER 0x01000000 /* Other Interrupts */ #define E1000_ICR_FER 0x00400000 /* Fatal Error */ #define E1000_ICR_THS 0x00800000 /* ICR.THS: Thermal Sensor Event*/ #define E1000_ICR_MDDET 0x10000000 /* Malicious Driver Detect */ +#define E1000_ITR_MASK 0x000FFFFF /* ITR value bitfield */ +#define E1000_ITR_MULT 256 /* ITR mulitplier in nsec */ + /* PBA ECC Register */ #define E1000_PBA_ECC_COUNTER_MASK 0xFFF00000 /* ECC counter mask */ #define E1000_PBA_ECC_COUNTER_SHIFT 20 /* ECC counter shift value */ #define E1000_PBA_ECC_CORR_EN 0x00000001 /* Enable ECC error correction */ #define E1000_PBA_ECC_STAT_CLR 0x00000002 /* Clear ECC error counter */ #define E1000_PBA_ECC_INT_EN 0x00000004 /* Enable ICR bit 5 on ECC error */ /* Extended Interrupt Cause Read */ #define E1000_EICR_RX_QUEUE0 0x00000001 /* Rx Queue 0 Interrupt */ #define E1000_EICR_RX_QUEUE1 0x00000002 /* Rx Queue 1 Interrupt */ #define E1000_EICR_RX_QUEUE2 0x00000004 /* Rx Queue 2 Interrupt */ #define E1000_EICR_RX_QUEUE3 0x00000008 /* Rx Queue 3 Interrupt */ #define E1000_EICR_TX_QUEUE0 0x00000100 /* Tx Queue 0 Interrupt */ #define E1000_EICR_TX_QUEUE1 0x00000200 /* Tx Queue 1 Interrupt */ #define E1000_EICR_TX_QUEUE2 0x00000400 /* Tx Queue 2 Interrupt */ #define E1000_EICR_TX_QUEUE3 0x00000800 /* Tx Queue 3 Interrupt */ #define E1000_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ #define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ /* TCP Timer */ #define E1000_TCPTIMER_KS 0x00000100 /* KickStart */ #define E1000_TCPTIMER_COUNT_ENABLE 0x00000200 /* Count Enable */ #define E1000_TCPTIMER_COUNT_FINISH 0x00000400 /* Count finish */ #define E1000_TCPTIMER_LOOP 0x00000800 /* Loop */ /* This defines the bits that are set in the Interrupt Mask * Set/Read Register. Each bit is documented below: * o RXT0 = Receiver Timer Interrupt (ring 0) * o TXDW = Transmit Descriptor Written Back * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) * o RXSEQ = Receive Sequence Error * o LSC = Link Status Change */ #define IMS_ENABLE_MASK ( \ E1000_IMS_RXT0 | \ E1000_IMS_TXDW | \ E1000_IMS_RXDMT0 | \ E1000_IMS_RXSEQ | \ E1000_IMS_LSC) /* Interrupt Mask Set */ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Tx desc written back */ #define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ #define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ #define E1000_IMS_RXO E1000_ICR_RXO /* Rx overrun */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ #define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW #define E1000_IMS_ECCER E1000_ICR_ECCER /* Uncorrectable ECC Error */ #define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ #define E1000_IMS_DRSTA E1000_ICR_DRSTA /* Device Reset Asserted */ #define E1000_IMS_DOUTSYNC E1000_ICR_DOUTSYNC /* NIC DMA out of sync */ #define E1000_IMS_RXQ0 E1000_ICR_RXQ0 /* Rx Queue 0 Interrupt */ #define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ #define E1000_IMS_TXQ0 E1000_ICR_TXQ0 /* Tx Queue 0 Interrupt */ #define E1000_IMS_TXQ1 E1000_ICR_TXQ1 /* Tx Queue 1 Interrupt */ #define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupts */ #define E1000_IMS_FER E1000_ICR_FER /* Fatal Error */ #define E1000_IMS_THS E1000_ICR_THS /* ICR.TS: Thermal Sensor Event*/ #define E1000_IMS_MDDET E1000_ICR_MDDET /* Malicious Driver Detect */ /* Extended Interrupt Mask Set */ #define E1000_EIMS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ #define E1000_EIMS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ #define E1000_EIMS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ #define E1000_EIMS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ #define E1000_EIMS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ #define E1000_EIMS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ #define E1000_EIMS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ #define E1000_EIMS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ #define E1000_EIMS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ #define E1000_EIMS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ /* Interrupt Cause Set */ #define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ /* Extended Interrupt Cause Set */ #define E1000_EICS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ #define E1000_EICS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ #define E1000_EICS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ #define E1000_EICS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ #define E1000_EICS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ #define E1000_EICS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ #define E1000_EICS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ #define E1000_EICS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ #define E1000_EICS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ #define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ #define E1000_EITR_ITR_INT_MASK 0x0000FFFF /* E1000_EITR_CNT_IGNR is only for 82576 and newer */ #define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ #define E1000_EITR_INTERVAL 0x00007FFC /* Transmit Descriptor Control */ #define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ #define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */ #define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */ #define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ #define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ #define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */ /* Enable the counting of descriptors still to be processed. */ #define E1000_TXDCTL_COUNT_DESC 0x00400000 /* Flow Control Constants */ #define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 #define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 #define FLOW_CONTROL_TYPE 0x8808 /* 802.1q VLAN Packet Size */ #define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */ #define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ /* Receive Address * Number of high/low register pairs in the RAR. The RAR (Receive Address * Registers) holds the directed and multicast addresses that we monitor. * Technically, we have 16 spots. However, we reserve one of these spots * (RAR[15]) for our directed address used by controllers with * manageability enabled, allowing us room for 15 multicast addresses. */ #define E1000_RAR_ENTRIES 15 #define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ #define E1000_RAL_MAC_ADDR_LEN 4 #define E1000_RAH_MAC_ADDR_LEN 2 #define E1000_RAH_QUEUE_MASK_82575 0x000C0000 #define E1000_RAH_POOL_1 0x00040000 /* Error Codes */ #define E1000_SUCCESS 0 #define E1000_ERR_NVM 1 #define E1000_ERR_PHY 2 #define E1000_ERR_CONFIG 3 #define E1000_ERR_PARAM 4 #define E1000_ERR_MAC_INIT 5 #define E1000_ERR_PHY_TYPE 6 #define E1000_ERR_RESET 9 #define E1000_ERR_MASTER_REQUESTS_PENDING 10 #define E1000_ERR_HOST_INTERFACE_COMMAND 11 #define E1000_BLK_PHY_RESET 12 #define E1000_ERR_SWFW_SYNC 13 #define E1000_NOT_IMPLEMENTED 14 #define E1000_ERR_MBX 15 #define E1000_ERR_INVALID_ARGUMENT 16 #define E1000_ERR_NO_SPACE 17 #define E1000_ERR_NVM_PBA_SECTION 18 #define E1000_ERR_I2C 19 #define E1000_ERR_INVM_VALUE_NOT_FOUND 20 /* Loop limit on how long we wait for auto-negotiation to complete */ #define FIBER_LINK_UP_LIMIT 50 #define COPPER_LINK_UP_LIMIT 10 #define PHY_AUTO_NEG_LIMIT 45 #define PHY_FORCE_LIMIT 20 /* Number of 100 microseconds we wait for PCI Express master disable */ #define MASTER_DISABLE_TIMEOUT 800 /* Number of milliseconds we wait for PHY configuration done after MAC reset */ #define PHY_CFG_TIMEOUT 100 /* Number of 2 milliseconds we wait for acquiring MDIO ownership. */ #define MDIO_OWNERSHIP_TIMEOUT 10 /* Number of milliseconds for NVM auto read done after MAC reset. */ #define AUTO_READ_DONE_TIMEOUT 10 /* Flow Control */ #define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ #define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ #define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ /* Transmit Configuration Word */ #define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ #define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ #define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ #define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ #define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ /* Receive Configuration Word */ #define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ #define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ #define E1000_RXCW_C 0x20000000 /* Receive config */ #define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ #define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ -/* HH Time Sync */ -#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ -#define E1000_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */ -#define E1000_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */ -#define E1000_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ - #define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ #define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ #define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 #define E1000_TSYNCRXCTL_TYPE_L4_V1 0x02 #define E1000_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 #define E1000_TSYNCRXCTL_TYPE_ALL 0x08 #define E1000_TSYNCRXCTL_TYPE_EVENT_V2 0x0A #define E1000_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */ #define E1000_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */ #define E1000_RXMTRL_PTP_V1_SYNC_MESSAGE 0x00000000 #define E1000_RXMTRL_PTP_V1_DELAY_REQ_MESSAGE 0x00010000 #define E1000_RXMTRL_PTP_V2_SYNC_MESSAGE 0x00000000 #define E1000_RXMTRL_PTP_V2_DELAY_REQ_MESSAGE 0x01000000 #define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF #define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00 #define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01 #define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE 0x02 #define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03 #define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04 #define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK 0x00000F00 #define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE 0x0000 #define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE 0x0100 #define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE 0x0200 #define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE 0x0300 #define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE 0x0800 #define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE 0x0900 #define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00 #define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE 0x0B00 #define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE 0x0C00 #define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE 0x0D00 #define E1000_TIMINCA_16NS_SHIFT 24 #define E1000_TIMINCA_INCPERIOD_SHIFT 24 #define E1000_TIMINCA_INCVALUE_MASK 0x00FFFFFF #define E1000_TSICR_TXTS 0x00000002 #define E1000_TSIM_TXTS 0x00000002 /* TUPLE Filtering Configuration */ #define E1000_TTQF_DISABLE_MASK 0xF0008000 /* TTQF Disable Mask */ #define E1000_TTQF_QUEUE_ENABLE 0x100 /* TTQF Queue Enable Bit */ #define E1000_TTQF_PROTOCOL_MASK 0xFF /* TTQF Protocol Mask */ /* TTQF TCP Bit, shift with E1000_TTQF_PROTOCOL SHIFT */ #define E1000_TTQF_PROTOCOL_TCP 0x0 /* TTQF UDP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ #define E1000_TTQF_PROTOCOL_UDP 0x1 /* TTQF SCTP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ #define E1000_TTQF_PROTOCOL_SCTP 0x2 #define E1000_TTQF_PROTOCOL_SHIFT 5 /* TTQF Protocol Shift */ #define E1000_TTQF_QUEUE_SHIFT 16 /* TTQF Queue Shfit */ #define E1000_TTQF_RX_QUEUE_MASK 0x70000 /* TTQF Queue Mask */ #define E1000_TTQF_MASK_ENABLE 0x10000000 /* TTQF Mask Enable Bit */ #define E1000_IMIR_CLEAR_MASK 0xF001FFFF /* IMIR Reg Clear Mask */ #define E1000_IMIR_PORT_BYPASS 0x20000 /* IMIR Port Bypass Bit */ #define E1000_IMIR_PRIORITY_SHIFT 29 /* IMIR Priority Shift */ #define E1000_IMIREXT_CLEAR_MASK 0x7FFFF /* IMIREXT Reg Clear Mask */ #define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ #define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ #define E1000_MDICNFG_PHY_MASK 0x03E00000 #define E1000_MDICNFG_PHY_SHIFT 21 #define E1000_MEDIA_PORT_COPPER 1 #define E1000_MEDIA_PORT_OTHER 2 #define E1000_M88E1112_AUTO_COPPER_SGMII 0x2 #define E1000_M88E1112_AUTO_COPPER_BASEX 0x3 #define E1000_M88E1112_STATUS_LINK 0x0004 /* Interface Link Bit */ #define E1000_M88E1112_MAC_CTRL_1 0x10 #define E1000_M88E1112_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */ #define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT 7 #define E1000_M88E1112_PAGE_ADDR 0x16 #define E1000_M88E1112_STATUS 0x01 #define E1000_THSTAT_LOW_EVENT 0x20000000 /* Low thermal threshold */ #define E1000_THSTAT_MID_EVENT 0x00200000 /* Mid thermal threshold */ #define E1000_THSTAT_HIGH_EVENT 0x00002000 /* High thermal threshold */ #define E1000_THSTAT_PWR_DOWN 0x00000001 /* Power Down Event */ #define E1000_THSTAT_LINK_THROTTLE 0x00000002 /* Link Spd Throttle Event */ /* I350 EEE defines */ #define E1000_IPCNFG_EEE_1G_AN 0x00000008 /* IPCNFG EEE Ena 1G AN */ #define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* IPCNFG EEE Ena 100M AN */ #define E1000_EEER_TX_LPI_EN 0x00010000 /* EEER Tx LPI Enable */ #define E1000_EEER_RX_LPI_EN 0x00020000 /* EEER Rx LPI Enable */ #define E1000_EEER_LPI_FC 0x00040000 /* EEER Ena on Flow Cntrl */ /* EEE status */ #define E1000_EEER_EEE_NEG 0x20000000 /* EEE capability nego */ #define E1000_EEER_RX_LPI_STATUS 0x40000000 /* Rx in LPI state */ #define E1000_EEER_TX_LPI_STATUS 0x80000000 /* Tx in LPI state */ #define E1000_EEE_LP_ADV_ADDR_I350 0x040F /* EEE LP Advertisement */ #define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */ #define E1000_M88E1543_EEE_CTRL_1 0x0 #define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */ #define E1000_EEE_ADV_DEV_I354 7 #define E1000_EEE_ADV_ADDR_I354 60 #define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */ #define E1000_EEE_ADV_1000_SUPPORTED (1 << 2) /* 1000BaseT EEE Supported */ #define E1000_PCS_STATUS_DEV_I354 3 #define E1000_PCS_STATUS_ADDR_I354 1 #define E1000_PCS_STATUS_RX_LPI_RCVD 0x0400 #define E1000_PCS_STATUS_TX_LPI_RCVD 0x0800 #define E1000_M88E1512_CFG_REG_1 0x0010 #define E1000_M88E1512_CFG_REG_2 0x0011 #define E1000_M88E1512_CFG_REG_3 0x0007 #define E1000_M88E1512_MODE 0x0014 #define E1000_EEE_SU_LPI_CLK_STP 0x00800000 /* EEE LPI Clock Stop */ #define E1000_EEE_LP_ADV_DEV_I210 7 /* EEE LP Adv Device */ #define E1000_EEE_LP_ADV_ADDR_I210 61 /* EEE LP Adv Register */ /* PCI Express Control */ #define E1000_GCR_RXD_NO_SNOOP 0x00000001 #define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 #define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 #define E1000_GCR_TXD_NO_SNOOP 0x00000008 #define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 #define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 #define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000 #define E1000_GCR_CMPL_TMOUT_10ms 0x00001000 #define E1000_GCR_CMPL_TMOUT_RESEND 0x00010000 #define E1000_GCR_CAP_VER2 0x00040000 #define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ E1000_GCR_RXDSCW_NO_SNOOP | \ E1000_GCR_RXDSCR_NO_SNOOP | \ E1000_GCR_TXD_NO_SNOOP | \ E1000_GCR_TXDSCW_NO_SNOOP | \ E1000_GCR_TXDSCR_NO_SNOOP) #define E1000_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */ /* mPHY address control and data registers */ #define E1000_MPHY_ADDR_CTL 0x0024 /* Address Control Reg */ #define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000 #define E1000_MPHY_DATA 0x0E10 /* Data Register */ /* AFE CSR Offset for PCS CLK */ #define E1000_MPHY_PCS_CLK_REG_OFFSET 0x0004 /* Override for near end digital loopback. */ #define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10 /* PHY Control Register */ #define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ #define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ #define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ #define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ #define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ #define MII_CR_POWER_DOWN 0x0800 /* Power down */ #define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ #define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ #define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ #define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ #define MII_CR_SPEED_1000 0x0040 #define MII_CR_SPEED_100 0x2000 #define MII_CR_SPEED_10 0x0000 /* PHY Status Register */ #define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ #define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ #define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ #define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ #define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ #define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ #define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ #define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ #define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ #define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ #define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ #define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ #define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ #define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ #define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ /* Autoneg Advertisement Register */ #define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ #define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ #define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ #define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ #define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ #define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ #define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ #define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ #define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ #define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ /* Link Partner Ability Register (Base Page) */ #define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */ #define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP 10T Half Dplx Capable */ #define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP 10T Full Dplx Capable */ #define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP 100TX Half Dplx Capable */ #define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP 100TX Full Dplx Capable */ #define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */ #define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ #define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asym Pause Direction bit */ #define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP detected Remote Fault */ #define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP rx'd link code word */ #define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */ /* Autoneg Expansion Register */ #define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ #define NWAY_ER_PAGE_RXD 0x0002 /* LP 10T Half Dplx Capable */ #define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP 10T Full Dplx Capable */ #define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP 100TX Half Dplx Capable */ #define NWAY_ER_PAR_DETECT_FAULT 0x0010 /* LP 100TX Full Dplx Capable */ /* 1000BASE-T Control Register */ #define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ #define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ #define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ /* 1=Repeater/switch device port 0=DTE device */ #define CR_1000T_REPEATER_DTE 0x0400 /* 1=Configure PHY as Master 0=Configure PHY as Slave */ #define CR_1000T_MS_VALUE 0x0800 /* 1=Master/Slave manual config value 0=Automatic Master/Slave config */ #define CR_1000T_MS_ENABLE 0x1000 #define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ #define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ #define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ #define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ #define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ /* 1000BASE-T Status Register */ #define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle err since last rd */ #define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asym pause direction bit */ #define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ #define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ #define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ #define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ #define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local Tx Master, 0=Slave */ #define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ #define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5 /* PHY 1000 MII Register/Bit Definitions */ /* PHY Registers defined by IEEE */ #define PHY_CONTROL 0x00 /* Control Register */ #define PHY_STATUS 0x01 /* Status Register */ #define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ #define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ #define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ #define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ #define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ #define PHY_NEXT_PAGE_TX 0x07 /* Next Page Tx */ #define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ #define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ #define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ #define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ #define PHY_CONTROL_LB 0x4000 /* PHY Loopback bit */ /* NVM Control */ #define E1000_EECD_SK 0x00000001 /* NVM Clock */ #define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ #define E1000_EECD_DI 0x00000004 /* NVM Data In */ #define E1000_EECD_DO 0x00000008 /* NVM Data Out */ #define E1000_EECD_REQ 0x00000040 /* NVM Access Request */ #define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */ #define E1000_EECD_PRES 0x00000100 /* NVM Present */ #define E1000_EECD_SIZE 0x00000200 /* NVM Size (0=64 word 1=256 word) */ #define E1000_EECD_BLOCKED 0x00008000 /* Bit banging access blocked flag */ #define E1000_EECD_ABORT 0x00010000 /* NVM operation aborted flag */ #define E1000_EECD_TIMEOUT 0x00020000 /* NVM read operation timeout flag */ #define E1000_EECD_ERROR_CLR 0x00040000 /* NVM error status clear bit */ /* NVM Addressing bits based on type 0=small, 1=large */ #define E1000_EECD_ADDR_BITS 0x00000400 #define E1000_EECD_TYPE 0x00002000 /* NVM Type (1-SPI, 0-Microwire) */ +#ifndef E1000_NVM_GRANT_ATTEMPTS #define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ +#endif #define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ #define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ #define E1000_EECD_SIZE_EX_SHIFT 11 #define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ #define E1000_EECD_AUPDEN 0x00100000 /* Ena Auto FLASH update */ #define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ #define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES) #define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */ #define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done */ #define E1000_EECD_FLASH_DETECTED_I210 0x00080000 /* FLASH detected */ #define E1000_EECD_SEC1VAL_I210 0x02000000 /* Sector One Valid */ #define E1000_FLUDONE_ATTEMPTS 20000 #define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */ #define E1000_I210_FIFO_SEL_RX 0x00 #define E1000_I210_FIFO_SEL_TX_QAV(_i) (0x02 + (_i)) #define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0) #define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06 #define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01 #define E1000_I210_FLASH_SECTOR_SIZE 0x1000 /* 4KB FLASH sector unit size */ /* Secure FLASH mode requires removing MSb */ #define E1000_I210_FW_PTR_MASK 0x7FFF /* Firmware code revision field word offset*/ #define E1000_I210_FW_VER_OFFSET 328 #define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write regs */ #define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ #define E1000_NVM_RW_REG_START 1 /* Start operation */ #define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ #define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */ #define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ #define E1000_FLASH_UPDATES 2000 /* NVM Word Offsets */ #define NVM_COMPAT 0x0003 #define NVM_ID_LED_SETTINGS 0x0004 -#define NVM_VERSION 0x0005 #define NVM_SERDES_AMPLITUDE 0x0006 /* SERDES output amplitude */ #define NVM_PHY_CLASS_WORD 0x0007 #define E1000_I210_NVM_FW_MODULE_PTR 0x0010 #define E1000_I350_NVM_FW_MODULE_PTR 0x0051 #define NVM_FUTURE_INIT_WORD1 0x0019 -#define NVM_ETRACK_WORD 0x0042 -#define NVM_ETRACK_HIWORD 0x0043 -#define NVM_COMB_VER_OFF 0x0083 -#define NVM_COMB_VER_PTR 0x003d - -/* NVM version defines */ -#define NVM_MAJOR_MASK 0xF000 -#define NVM_MINOR_MASK 0x0FF0 -#define NVM_IMAGE_ID_MASK 0x000F -#define NVM_COMB_VER_MASK 0x00FF -#define NVM_MAJOR_SHIFT 12 -#define NVM_MINOR_SHIFT 4 -#define NVM_COMB_VER_SHFT 8 -#define NVM_VER_INVALID 0xFFFF -#define NVM_ETRACK_SHIFT 16 -#define NVM_ETRACK_VALID 0x8000 -#define NVM_NEW_DEC_MASK 0x0F00 -#define NVM_HEX_CONV 16 -#define NVM_HEX_TENS 10 - -/* FW version defines */ -/* Offset of "Loader patch ptr" in Firmware Header */ -#define E1000_I350_NVM_FW_LOADER_PATCH_PTR_OFFSET 0x01 -/* Patch generation hour & minutes */ -#define E1000_I350_NVM_FW_VER_WORD1_OFFSET 0x04 -/* Patch generation month & day */ -#define E1000_I350_NVM_FW_VER_WORD2_OFFSET 0x05 -/* Patch generation year */ -#define E1000_I350_NVM_FW_VER_WORD3_OFFSET 0x06 -/* Patch major & minor numbers */ -#define E1000_I350_NVM_FW_VER_WORD4_OFFSET 0x07 - #define NVM_MAC_ADDR 0x0000 #define NVM_SUB_DEV_ID 0x000B #define NVM_SUB_VEN_ID 0x000C #define NVM_DEV_ID 0x000D #define NVM_VEN_ID 0x000E #define NVM_INIT_CTRL_2 0x000F #define NVM_INIT_CTRL_4 0x0013 #define NVM_LED_1_CFG 0x001C #define NVM_LED_0_2_CFG 0x001F #define NVM_COMPAT_VALID_CSUM 0x0001 #define NVM_FUTURE_INIT_WORD1_VALID_CSUM 0x0040 #define NVM_INIT_CONTROL2_REG 0x000F #define NVM_INIT_CONTROL3_PORT_B 0x0014 #define NVM_INIT_3GIO_3 0x001A #define NVM_SWDEF_PINS_CTRL_PORT_0 0x0020 #define NVM_INIT_CONTROL3_PORT_A 0x0024 #define NVM_CFG 0x0012 #define NVM_ALT_MAC_ADDR_PTR 0x0037 #define NVM_CHECKSUM_REG 0x003F #define NVM_COMPATIBILITY_REG_3 0x0003 #define NVM_COMPATIBILITY_BIT_MASK 0x8000 #define E1000_NVM_CFG_DONE_PORT_0 0x040000 /* MNG config cycle done */ #define E1000_NVM_CFG_DONE_PORT_1 0x080000 /* ...for second port */ #define E1000_NVM_CFG_DONE_PORT_2 0x100000 /* ...for third port */ #define E1000_NVM_CFG_DONE_PORT_3 0x200000 /* ...for fourth port */ #define NVM_82580_LAN_FUNC_OFFSET(a) ((a) ? (0x40 + (0x40 * (a))) : 0) /* Mask bits for fields in Word 0x24 of the NVM */ #define NVM_WORD24_COM_MDIO 0x0008 /* MDIO interface shared */ #define NVM_WORD24_EXT_MDIO 0x0004 /* MDIO accesses routed extrnl */ /* Offset of Link Mode bits for 82575/82576 */ #define NVM_WORD24_LNK_MODE_OFFSET 8 /* Offset of Link Mode bits for 82580 up */ #define NVM_WORD24_82580_LNK_MODE_OFFSET 4 /* Mask bits for fields in Word 0x0f of the NVM */ #define NVM_WORD0F_PAUSE_MASK 0x3000 #define NVM_WORD0F_PAUSE 0x1000 #define NVM_WORD0F_ASM_DIR 0x2000 #define NVM_WORD0F_SWPDIO_EXT_MASK 0x00F0 /* Mask bits for fields in Word 0x1a of the NVM */ #define NVM_WORD1A_ASPM_MASK 0x000C /* Mask bits for fields in Word 0x03 of the EEPROM */ #define NVM_COMPAT_LOM 0x0800 /* length of string needed to store PBA number */ #define E1000_PBANUM_LENGTH 11 /* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ #define NVM_SUM 0xBABA /* PBA (printed board assembly) number words */ #define NVM_PBA_OFFSET_0 8 #define NVM_PBA_OFFSET_1 9 #define NVM_PBA_PTR_GUARD 0xFAFA #define NVM_RESERVED_WORD 0xFFFF #define NVM_PHY_CLASS_A 0x8000 #define NVM_SERDES_AMPLITUDE_MASK 0x000F #define NVM_SIZE_MASK 0x1C00 #define NVM_SIZE_SHIFT 10 #define NVM_WORD_SIZE_BASE_SHIFT 6 #define NVM_SWDPIO_EXT_SHIFT 4 /* NVM Commands - Microwire */ #define NVM_READ_OPCODE_MICROWIRE 0x6 /* NVM read opcode */ #define NVM_WRITE_OPCODE_MICROWIRE 0x5 /* NVM write opcode */ #define NVM_ERASE_OPCODE_MICROWIRE 0x7 /* NVM erase opcode */ #define NVM_EWEN_OPCODE_MICROWIRE 0x13 /* NVM erase/write enable */ #define NVM_EWDS_OPCODE_MICROWIRE 0x10 /* NVM erase/write disable */ /* NVM Commands - SPI */ #define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ #define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */ #define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */ #define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ #define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */ #define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */ /* SPI NVM Status Register */ #define NVM_STATUS_RDY_SPI 0x01 /* Word definitions for ID LED Settings */ #define ID_LED_RESERVED_0000 0x0000 #define ID_LED_RESERVED_FFFF 0xFFFF #define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ (ID_LED_OFF1_OFF2 << 8) | \ (ID_LED_DEF1_DEF2 << 4) | \ (ID_LED_DEF1_DEF2)) #define ID_LED_DEF1_DEF2 0x1 #define ID_LED_DEF1_ON2 0x2 #define ID_LED_DEF1_OFF2 0x3 #define ID_LED_ON1_DEF2 0x4 #define ID_LED_ON1_ON2 0x5 #define ID_LED_ON1_OFF2 0x6 #define ID_LED_OFF1_DEF2 0x7 #define ID_LED_OFF1_ON2 0x8 #define ID_LED_OFF1_OFF2 0x9 #define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF #define IGP_ACTIVITY_LED_ENABLE 0x0300 #define IGP_LED3_MODE 0x07000000 /* PCI/PCI-X/PCI-EX Config space */ #define PCIX_COMMAND_REGISTER 0xE6 #define PCIX_STATUS_REGISTER_LO 0xE8 #define PCIX_STATUS_REGISTER_HI 0xEA #define PCI_HEADER_TYPE_REGISTER 0x0E #define PCIE_LINK_STATUS 0x12 #define PCIE_DEVICE_CONTROL2 0x28 #define PCIX_COMMAND_MMRBC_MASK 0x000C #define PCIX_COMMAND_MMRBC_SHIFT 0x2 #define PCIX_STATUS_HI_MMRBC_MASK 0x0060 #define PCIX_STATUS_HI_MMRBC_SHIFT 0x5 #define PCIX_STATUS_HI_MMRBC_4K 0x3 #define PCIX_STATUS_HI_MMRBC_2K 0x2 #define PCIX_STATUS_LO_FUNC_MASK 0x7 #define PCI_HEADER_TYPE_MULTIFUNC 0x80 #define PCIE_LINK_WIDTH_MASK 0x3F0 #define PCIE_LINK_WIDTH_SHIFT 4 #define PCIE_LINK_SPEED_MASK 0x0F #define PCIE_LINK_SPEED_2500 0x01 #define PCIE_LINK_SPEED_5000 0x02 #define PCIE_DEVICE_CONTROL2_16ms 0x0005 #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif #define PHY_REVISION_MASK 0xFFFFFFF0 #define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ #define MAX_PHY_MULTI_PAGE_REG 0xF /* Bit definitions for valid PHY IDs. * I = Integrated * E = External */ #define M88E1000_E_PHY_ID 0x01410C50 #define M88E1000_I_PHY_ID 0x01410C30 #define M88E1011_I_PHY_ID 0x01410C20 #define IGP01E1000_I_PHY_ID 0x02A80380 #define M88E1111_I_PHY_ID 0x01410CC0 #define M88E1543_E_PHY_ID 0x01410EA0 #define M88E1512_E_PHY_ID 0x01410DD0 #define M88E1112_E_PHY_ID 0x01410C90 #define I347AT4_E_PHY_ID 0x01410DC0 #define M88E1340M_E_PHY_ID 0x01410DF0 #define GG82563_E_PHY_ID 0x01410CA0 #define IGP03E1000_E_PHY_ID 0x02A80390 #define IFE_E_PHY_ID 0x02A80330 #define IFE_PLUS_E_PHY_ID 0x02A80320 #define IFE_C_E_PHY_ID 0x02A80310 #define BME1000_E_PHY_ID 0x01410CB0 #define BME1000_E_PHY_ID_R2 0x01410CB1 #define I82577_E_PHY_ID 0x01540050 #define I82578_E_PHY_ID 0x004DD040 #define I82579_E_PHY_ID 0x01540090 #define I217_E_PHY_ID 0x015400A0 #define I82580_I_PHY_ID 0x015403A0 #define I350_I_PHY_ID 0x015403B0 #define I210_I_PHY_ID 0x01410C00 #define IGP04E1000_E_PHY_ID 0x02A80391 #define M88_VENDOR 0x0141 /* M88E1000 Specific Registers */ #define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Reg */ #define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Reg */ #define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Cntrl */ #define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ #define M88E1000_PHY_EXT_CTRL 0x1A /* PHY extend control register */ #define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for pg number setting */ #define M88E1000_PHY_GEN_CONTROL 0x1E /* meaning depends on reg 29 */ #define M88E1000_PHY_VCO_REG_BIT8 0x100 /* Bits 8 & 11 are adjusted for */ #define M88E1000_PHY_VCO_REG_BIT11 0x800 /* improved BER performance */ /* M88E1000 PHY Specific Control Register */ #define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reverse enabled */ /* MDI Crossover Mode bits 6:5 Manual MDI configuration */ #define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 #define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ /* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */ #define M88E1000_PSCR_AUTO_X_1000T 0x0040 /* Auto crossover enabled all speeds */ #define M88E1000_PSCR_AUTO_X_MODE 0x0060 #define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Tx */ /* M88E1000 PHY Specific Status Register */ #define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ #define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ #define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ /* 0 = <50M * 1 = 50-80M * 2 = 80-110M * 3 = 110-140M * 4 = >140M */ #define M88E1000_PSSR_CABLE_LENGTH 0x0380 #define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */ #define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */ #define M88E1000_PSSR_DPLX 0x2000 /* 1=Duplex 0=Half Duplex */ #define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ #define M88E1000_PSSR_100MBS 0x4000 /* 01=100Mbs */ #define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ #define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 /* Number of times we will attempt to autonegotiate before downshifting if we * are the master */ #define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 #define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 /* Number of times we will attempt to autonegotiate before downshifting if we * are the slave */ #define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 #define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 #define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ /* Intel I347AT4 Registers */ #define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */ #define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */ #define I347AT4_PAGE_SELECT 0x16 /* I347AT4 Extended PHY Specific Control Register */ /* Number of times we will attempt to autonegotiate before downshifting if we * are the master */ #define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800 #define I347AT4_PSCR_DOWNSHIFT_MASK 0x7000 #define I347AT4_PSCR_DOWNSHIFT_1X 0x0000 #define I347AT4_PSCR_DOWNSHIFT_2X 0x1000 #define I347AT4_PSCR_DOWNSHIFT_3X 0x2000 #define I347AT4_PSCR_DOWNSHIFT_4X 0x3000 #define I347AT4_PSCR_DOWNSHIFT_5X 0x4000 #define I347AT4_PSCR_DOWNSHIFT_6X 0x5000 #define I347AT4_PSCR_DOWNSHIFT_7X 0x6000 #define I347AT4_PSCR_DOWNSHIFT_8X 0x7000 /* I347AT4 PHY Cable Diagnostics Control */ #define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */ /* M88E1112 only registers */ #define M88E1112_VCT_DSP_DISTANCE 0x001A /* M88EC018 Rev 2 specific DownShift settings */ #define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 #define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 #define I82578_EPSCR_DOWNSHIFT_ENABLE 0x0020 #define I82578_EPSCR_DOWNSHIFT_COUNTER_MASK 0x001C /* BME1000 PHY Specific Control Register */ #define BME1000_PSCR_ENABLE_DOWNSHIFT 0x0800 /* 1 = enable downshift */ /* Bits... * 15-5: page * 4-0: register offset */ #define GG82563_PAGE_SHIFT 5 #define GG82563_REG(page, reg) \ (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) #define GG82563_MIN_ALT_REG 30 /* GG82563 Specific Registers */ #define GG82563_PHY_SPEC_CTRL GG82563_REG(0, 16) /* PHY Spec Cntrl */ #define GG82563_PHY_PAGE_SELECT GG82563_REG(0, 22) /* Page Select */ #define GG82563_PHY_SPEC_CTRL_2 GG82563_REG(0, 26) /* PHY Spec Cntrl2 */ #define GG82563_PHY_PAGE_SELECT_ALT GG82563_REG(0, 29) /* Alt Page Select */ /* MAC Specific Control Register */ #define GG82563_PHY_MAC_SPEC_CTRL GG82563_REG(2, 21) #define GG82563_PHY_DSP_DISTANCE GG82563_REG(5, 26) /* DSP Distance */ /* Page 193 - Port Control Registers */ /* Kumeran Mode Control */ #define GG82563_PHY_KMRN_MODE_CTRL GG82563_REG(193, 16) #define GG82563_PHY_PWR_MGMT_CTRL GG82563_REG(193, 20) /* Pwr Mgt Ctrl */ /* Page 194 - KMRN Registers */ #define GG82563_PHY_INBAND_CTRL GG82563_REG(194, 18) /* Inband Ctrl */ /* MDI Control */ #define E1000_MDIC_REG_MASK 0x001F0000 #define E1000_MDIC_REG_SHIFT 16 #define E1000_MDIC_PHY_MASK 0x03E00000 #define E1000_MDIC_PHY_SHIFT 21 #define E1000_MDIC_OP_WRITE 0x04000000 #define E1000_MDIC_OP_READ 0x08000000 #define E1000_MDIC_READY 0x10000000 #define E1000_MDIC_ERROR 0x40000000 #define E1000_MDIC_DEST 0x80000000 /* SerDes Control */ #define E1000_GEN_CTL_READY 0x80000000 #define E1000_GEN_CTL_ADDRESS_SHIFT 8 #define E1000_GEN_POLL_TIMEOUT 640 /* LinkSec register fields */ #define E1000_LSECTXCAP_SUM_MASK 0x00FF0000 #define E1000_LSECTXCAP_SUM_SHIFT 16 #define E1000_LSECRXCAP_SUM_MASK 0x00FF0000 #define E1000_LSECRXCAP_SUM_SHIFT 16 #define E1000_LSECTXCTRL_EN_MASK 0x00000003 #define E1000_LSECTXCTRL_DISABLE 0x0 #define E1000_LSECTXCTRL_AUTH 0x1 #define E1000_LSECTXCTRL_AUTH_ENCRYPT 0x2 #define E1000_LSECTXCTRL_AISCI 0x00000020 #define E1000_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00 #define E1000_LSECTXCTRL_RSV_MASK 0x000000D8 #define E1000_LSECRXCTRL_EN_MASK 0x0000000C #define E1000_LSECRXCTRL_EN_SHIFT 2 #define E1000_LSECRXCTRL_DISABLE 0x0 #define E1000_LSECRXCTRL_CHECK 0x1 #define E1000_LSECRXCTRL_STRICT 0x2 #define E1000_LSECRXCTRL_DROP 0x3 #define E1000_LSECRXCTRL_PLSH 0x00000040 #define E1000_LSECRXCTRL_RP 0x00000080 #define E1000_LSECRXCTRL_RSV_MASK 0xFFFFFF33 /* Tx Rate-Scheduler Config fields */ #define E1000_RTTBCNRC_RS_ENA 0x80000000 #define E1000_RTTBCNRC_RF_DEC_MASK 0x00003FFF #define E1000_RTTBCNRC_RF_INT_SHIFT 14 #define E1000_RTTBCNRC_RF_INT_MASK \ (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT) /* DMA Coalescing register fields */ /* DMA Coalescing Watchdog Timer */ #define E1000_DMACR_DMACWT_MASK 0x00003FFF /* DMA Coalescing Rx Threshold */ #define E1000_DMACR_DMACTHR_MASK 0x00FF0000 #define E1000_DMACR_DMACTHR_SHIFT 16 /* Lx when no PCIe transactions */ #define E1000_DMACR_DMAC_LX_MASK 0x30000000 #define E1000_DMACR_DMAC_LX_SHIFT 28 #define E1000_DMACR_DMAC_EN 0x80000000 /* Enable DMA Coalescing */ /* DMA Coalescing BMC-to-OS Watchdog Enable */ #define E1000_DMACR_DC_BMC2OSW_EN 0x00008000 /* DMA Coalescing Transmit Threshold */ #define E1000_DMCTXTH_DMCTTHR_MASK 0x00000FFF #define E1000_DMCTLX_TTLX_MASK 0x00000FFF /* Time to LX request */ /* Rx Traffic Rate Threshold */ #define E1000_DMCRTRH_UTRESH_MASK 0x0007FFFF /* Rx packet rate in current window */ #define E1000_DMCRTRH_LRPRCW 0x80000000 /* DMA Coal Rx Traffic Current Count */ #define E1000_DMCCNT_CCOUNT_MASK 0x01FFFFFF /* Flow ctrl Rx Threshold High val */ #define E1000_FCRTC_RTH_COAL_MASK 0x0003FFF0 #define E1000_FCRTC_RTH_COAL_SHIFT 4 /* Lx power decision based on DMA coal */ #define E1000_PCIEMISC_LX_DECISION 0x00000080 #define E1000_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */ #define E1000_RXPBS_SIZE_I210_MASK 0x0000003F /* Rx packet buffer size */ #define E1000_TXPB0S_SIZE_I210_MASK 0x0000003F /* Tx packet buffer 0 size */ #define I210_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ #define I210_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ +#define E1000_DOBFFCTL_OBFFTHR_MASK 0x000000FF /* OBFF threshold */ +#define E1000_DOBFFCTL_EXIT_ACT_MASK 0x01000000 /* Exit active CB */ /* Proxy Filter Control */ #define E1000_PROXYFC_D0 0x00000001 /* Enable offload in D0 */ #define E1000_PROXYFC_EX 0x00000004 /* Directed exact proxy */ #define E1000_PROXYFC_MC 0x00000008 /* Directed MC Proxy */ #define E1000_PROXYFC_BC 0x00000010 /* Broadcast Proxy Enable */ #define E1000_PROXYFC_ARP_DIRECTED 0x00000020 /* Directed ARP Proxy Ena */ #define E1000_PROXYFC_IPV4 0x00000040 /* Directed IPv4 Enable */ #define E1000_PROXYFC_IPV6 0x00000080 /* Directed IPv6 Enable */ #define E1000_PROXYFC_NS 0x00000200 /* IPv6 Neighbor Solicitation */ #define E1000_PROXYFC_ARP 0x00000800 /* ARP Request Proxy Ena */ /* Proxy Status */ #define E1000_PROXYS_CLEAR 0xFFFFFFFF /* Clear */ /* Firmware Status */ #define E1000_FWSTS_FWRI 0x80000000 /* FW Reset Indication */ /* VF Control */ #define E1000_VTCTRL_RST 0x04000000 /* Reset VF */ #define E1000_STATUS_LAN_ID_MASK 0x00000000C /* Mask for Lan ID field */ /* Lan ID bit field offset in status register */ #define E1000_STATUS_LAN_ID_OFFSET 2 #define E1000_VFTA_ENTRIES 128 #define E1000_UNUSEDARG #ifndef ERROR_REPORT #define ERROR_REPORT(fmt) do { } while (0) #endif /* ERROR_REPORT */ #endif /* _E1000_DEFINES_H_ */ Index: projects/clang-trunk/sys/dev/e1000/e1000_hw.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_hw.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_hw.h (revision 287502) @@ -1,1030 +1,1026 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_HW_H_ #define _E1000_HW_H_ #include "e1000_osdep.h" #include "e1000_regs.h" #include "e1000_defines.h" struct e1000_hw; #define E1000_DEV_ID_82542 0x1000 #define E1000_DEV_ID_82543GC_FIBER 0x1001 #define E1000_DEV_ID_82543GC_COPPER 0x1004 #define E1000_DEV_ID_82544EI_COPPER 0x1008 #define E1000_DEV_ID_82544EI_FIBER 0x1009 #define E1000_DEV_ID_82544GC_COPPER 0x100C #define E1000_DEV_ID_82544GC_LOM 0x100D #define E1000_DEV_ID_82540EM 0x100E #define E1000_DEV_ID_82540EM_LOM 0x1015 #define E1000_DEV_ID_82540EP_LOM 0x1016 #define E1000_DEV_ID_82540EP 0x1017 #define E1000_DEV_ID_82540EP_LP 0x101E #define E1000_DEV_ID_82545EM_COPPER 0x100F #define E1000_DEV_ID_82545EM_FIBER 0x1011 #define E1000_DEV_ID_82545GM_COPPER 0x1026 #define E1000_DEV_ID_82545GM_FIBER 0x1027 #define E1000_DEV_ID_82545GM_SERDES 0x1028 #define E1000_DEV_ID_82546EB_COPPER 0x1010 #define E1000_DEV_ID_82546EB_FIBER 0x1012 #define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D #define E1000_DEV_ID_82546GB_COPPER 0x1079 #define E1000_DEV_ID_82546GB_FIBER 0x107A #define E1000_DEV_ID_82546GB_SERDES 0x107B #define E1000_DEV_ID_82546GB_PCIE 0x108A #define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 #define E1000_DEV_ID_82541EI 0x1013 #define E1000_DEV_ID_82541EI_MOBILE 0x1018 #define E1000_DEV_ID_82541ER_LOM 0x1014 #define E1000_DEV_ID_82541ER 0x1078 #define E1000_DEV_ID_82541GI 0x1076 #define E1000_DEV_ID_82541GI_LF 0x107C #define E1000_DEV_ID_82541GI_MOBILE 0x1077 #define E1000_DEV_ID_82547EI 0x1019 #define E1000_DEV_ID_82547EI_MOBILE 0x101A #define E1000_DEV_ID_82547GI 0x1075 #define E1000_DEV_ID_82571EB_COPPER 0x105E #define E1000_DEV_ID_82571EB_FIBER 0x105F #define E1000_DEV_ID_82571EB_SERDES 0x1060 #define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 #define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA #define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 #define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 #define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 #define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC #define E1000_DEV_ID_82572EI_COPPER 0x107D #define E1000_DEV_ID_82572EI_FIBER 0x107E #define E1000_DEV_ID_82572EI_SERDES 0x107F #define E1000_DEV_ID_82572EI 0x10B9 #define E1000_DEV_ID_82573E 0x108B #define E1000_DEV_ID_82573E_IAMT 0x108C #define E1000_DEV_ID_82573L 0x109A #define E1000_DEV_ID_82574L 0x10D3 #define E1000_DEV_ID_82574LA 0x10F6 #define E1000_DEV_ID_82583V 0x150C #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 #define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA #define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB #define E1000_DEV_ID_ICH8_82567V_3 0x1501 #define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 #define E1000_DEV_ID_ICH8_IGP_AMT 0x104A #define E1000_DEV_ID_ICH8_IGP_C 0x104B #define E1000_DEV_ID_ICH8_IFE 0x104C #define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 #define E1000_DEV_ID_ICH8_IFE_G 0x10C5 #define E1000_DEV_ID_ICH8_IGP_M 0x104D #define E1000_DEV_ID_ICH9_IGP_M 0x10BF #define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 #define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB #define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD #define E1000_DEV_ID_ICH9_BM 0x10E5 #define E1000_DEV_ID_ICH9_IGP_C 0x294C #define E1000_DEV_ID_ICH9_IFE 0x10C0 #define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 #define E1000_DEV_ID_ICH9_IFE_G 0x10C2 #define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC #define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD #define E1000_DEV_ID_ICH10_R_BM_V 0x10CE #define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE #define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF #define E1000_DEV_ID_ICH10_D_BM_V 0x1525 #define E1000_DEV_ID_PCH_M_HV_LM 0x10EA #define E1000_DEV_ID_PCH_M_HV_LC 0x10EB #define E1000_DEV_ID_PCH_D_HV_DM 0x10EF #define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 #define E1000_DEV_ID_PCH2_LV_LM 0x1502 #define E1000_DEV_ID_PCH2_LV_V 0x1503 #define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A #define E1000_DEV_ID_PCH_LPT_I217_V 0x153B #define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A #define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 #define E1000_DEV_ID_PCH_I218_LM2 0x15A0 #define E1000_DEV_ID_PCH_I218_V2 0x15A1 #define E1000_DEV_ID_PCH_I218_LM3 0x15A2 /* Wildcat Point PCH */ #define E1000_DEV_ID_PCH_I218_V3 0x15A3 /* Wildcat Point PCH */ -#define E1000_DEV_ID_PCH_SPT_I219_LM 0x156F /* Sunrise Point PCH */ -#define E1000_DEV_ID_PCH_SPT_I219_V 0x1570 /* Sunrise Point PCH */ -#define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */ -#define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */ #define E1000_DEV_ID_82576 0x10C9 #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 #define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 #define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 #define E1000_DEV_ID_82576_NS 0x150A #define E1000_DEV_ID_82576_NS_SERDES 0x1518 #define E1000_DEV_ID_82576_SERDES_QUAD 0x150D #define E1000_DEV_ID_82576_VF 0x10CA #define E1000_DEV_ID_82576_VF_HV 0x152D #define E1000_DEV_ID_I350_VF 0x1520 #define E1000_DEV_ID_I350_VF_HV 0x152F #define E1000_DEV_ID_82575EB_COPPER 0x10A7 #define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 #define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 #define E1000_DEV_ID_82580_COPPER 0x150E #define E1000_DEV_ID_82580_FIBER 0x150F #define E1000_DEV_ID_82580_SERDES 0x1510 #define E1000_DEV_ID_82580_SGMII 0x1511 #define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 #define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 #define E1000_DEV_ID_I350_COPPER 0x1521 #define E1000_DEV_ID_I350_FIBER 0x1522 #define E1000_DEV_ID_I350_SERDES 0x1523 #define E1000_DEV_ID_I350_SGMII 0x1524 #define E1000_DEV_ID_I350_DA4 0x1546 #define E1000_DEV_ID_I210_COPPER 0x1533 #define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 #define E1000_DEV_ID_I210_COPPER_IT 0x1535 #define E1000_DEV_ID_I210_FIBER 0x1536 #define E1000_DEV_ID_I210_SERDES 0x1537 #define E1000_DEV_ID_I210_SGMII 0x1538 #define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B #define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C #define E1000_DEV_ID_I211_COPPER 0x1539 #define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 #define E1000_DEV_ID_I354_SGMII 0x1F41 #define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 #define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 #define E1000_DEV_ID_DH89XXCC_SERDES 0x043A #define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C #define E1000_DEV_ID_DH89XXCC_SFP 0x0440 #define E1000_REVISION_0 0 #define E1000_REVISION_1 1 #define E1000_REVISION_2 2 #define E1000_REVISION_3 3 #define E1000_REVISION_4 4 #define E1000_FUNC_0 0 #define E1000_FUNC_1 1 #define E1000_FUNC_2 2 #define E1000_FUNC_3 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 enum e1000_mac_type { e1000_undefined = 0, e1000_82542, e1000_82543, e1000_82544, e1000_82540, e1000_82545, e1000_82545_rev_3, e1000_82546, e1000_82546_rev_3, e1000_82541, e1000_82541_rev_2, e1000_82547, e1000_82547_rev_2, e1000_82571, e1000_82572, e1000_82573, e1000_82574, e1000_82583, e1000_80003es2lan, e1000_ich8lan, e1000_ich9lan, e1000_ich10lan, e1000_pchlan, e1000_pch2lan, e1000_pch_lpt, - e1000_pch_spt, e1000_82575, e1000_82576, e1000_82580, e1000_i350, e1000_i354, e1000_i210, e1000_i211, e1000_vfadapt, e1000_vfadapt_i350, e1000_num_macs /* List is 1-based, so subtract 1 for TRUE count. */ }; enum e1000_media_type { e1000_media_type_unknown = 0, e1000_media_type_copper = 1, e1000_media_type_fiber = 2, e1000_media_type_internal_serdes = 3, e1000_num_media_types }; enum e1000_nvm_type { e1000_nvm_unknown = 0, e1000_nvm_none, e1000_nvm_eeprom_spi, e1000_nvm_eeprom_microwire, e1000_nvm_flash_hw, e1000_nvm_invm, e1000_nvm_flash_sw }; enum e1000_nvm_override { e1000_nvm_override_none = 0, e1000_nvm_override_spi_small, e1000_nvm_override_spi_large, e1000_nvm_override_microwire_small, e1000_nvm_override_microwire_large }; enum e1000_phy_type { e1000_phy_unknown = 0, e1000_phy_none, e1000_phy_m88, e1000_phy_igp, e1000_phy_igp_2, e1000_phy_gg82563, e1000_phy_igp_3, e1000_phy_ife, e1000_phy_bm, e1000_phy_82578, e1000_phy_82577, e1000_phy_82579, e1000_phy_i217, e1000_phy_82580, e1000_phy_vf, e1000_phy_i210, }; enum e1000_bus_type { e1000_bus_type_unknown = 0, e1000_bus_type_pci, e1000_bus_type_pcix, e1000_bus_type_pci_express, e1000_bus_type_reserved }; enum e1000_bus_speed { e1000_bus_speed_unknown = 0, e1000_bus_speed_33, e1000_bus_speed_66, e1000_bus_speed_100, e1000_bus_speed_120, e1000_bus_speed_133, e1000_bus_speed_2500, e1000_bus_speed_5000, e1000_bus_speed_reserved }; enum e1000_bus_width { e1000_bus_width_unknown = 0, e1000_bus_width_pcie_x1, e1000_bus_width_pcie_x2, e1000_bus_width_pcie_x4 = 4, e1000_bus_width_pcie_x8 = 8, e1000_bus_width_32, e1000_bus_width_64, e1000_bus_width_reserved }; enum e1000_1000t_rx_status { e1000_1000t_rx_status_not_ok = 0, e1000_1000t_rx_status_ok, e1000_1000t_rx_status_undefined = 0xFF }; enum e1000_rev_polarity { e1000_rev_polarity_normal = 0, e1000_rev_polarity_reversed, e1000_rev_polarity_undefined = 0xFF }; enum e1000_fc_mode { e1000_fc_none = 0, e1000_fc_rx_pause, e1000_fc_tx_pause, e1000_fc_full, e1000_fc_default = 0xFF }; enum e1000_ffe_config { e1000_ffe_config_enabled = 0, e1000_ffe_config_active, e1000_ffe_config_blocked }; enum e1000_dsp_config { e1000_dsp_config_disabled = 0, e1000_dsp_config_enabled, e1000_dsp_config_activated, e1000_dsp_config_undefined = 0xFF }; enum e1000_ms_type { e1000_ms_hw_default = 0, e1000_ms_force_master, e1000_ms_force_slave, e1000_ms_auto }; enum e1000_smart_speed { e1000_smart_speed_default = 0, e1000_smart_speed_on, e1000_smart_speed_off }; enum e1000_serdes_link_state { e1000_serdes_link_down = 0, e1000_serdes_link_autoneg_progress, e1000_serdes_link_autoneg_complete, e1000_serdes_link_forced_up }; #define __le16 u16 #define __le32 u32 #define __le64 u64 /* Receive Descriptor */ struct e1000_rx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ __le16 length; /* Length of data DMAed into data buffer */ __le16 csum; /* Packet checksum */ u8 status; /* Descriptor status */ u8 errors; /* Descriptor Errors */ __le16 special; }; /* Receive Descriptor - Extended */ union e1000_rx_desc_extended { struct { __le64 buffer_addr; __le64 reserved; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length; __le16 vlan; /* VLAN tag */ } upper; } wb; /* writeback */ }; #define MAX_PS_BUFFERS 4 /* Number of packet split data buffers (not including the header buffer) */ #define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) /* Receive Descriptor - Packet Split */ union e1000_rx_desc_packet_split { struct { /* one buffer for protocol header(s), three data buffers */ __le64 buffer_addr[MAX_PS_BUFFERS]; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length0; /* length of buffer 0 */ __le16 vlan; /* VLAN tag */ } middle; struct { __le16 header_status; /* length of buffers 1-3 */ __le16 length[PS_PAGE_BUFFERS]; } upper; __le64 reserved; } wb; /* writeback */ }; /* Transmit Descriptor */ struct e1000_tx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 cso; /* Checksum offset */ u8 cmd; /* Descriptor control */ } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 css; /* Checksum start */ __le16 special; } fields; } upper; }; /* Offload Context Descriptor */ struct e1000_context_desc { union { __le32 ip_config; struct { u8 ipcss; /* IP checksum start */ u8 ipcso; /* IP checksum offset */ __le16 ipcse; /* IP checksum end */ } ip_fields; } lower_setup; union { __le32 tcp_config; struct { u8 tucss; /* TCP checksum start */ u8 tucso; /* TCP checksum offset */ __le16 tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; __le32 cmd_and_length; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 hdr_len; /* Header length */ __le16 mss; /* Maximum segment size */ } fields; } tcp_seg_setup; }; /* Offload data descriptor */ struct e1000_data_desc { __le64 buffer_addr; /* Address of the descriptor's buffer address */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 typ_len_ext; u8 cmd; } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 popts; /* Packet Options */ __le16 special; } fields; } upper; }; /* Statistics counters collected by the MAC */ struct e1000_hw_stats { u64 crcerrs; u64 algnerrc; u64 symerrs; u64 rxerrc; u64 mpc; u64 scc; u64 ecol; u64 mcc; u64 latecol; u64 colc; u64 dc; u64 tncrs; u64 sec; u64 cexterr; u64 rlec; u64 xonrxc; u64 xontxc; u64 xoffrxc; u64 xofftxc; u64 fcruc; u64 prc64; u64 prc127; u64 prc255; u64 prc511; u64 prc1023; u64 prc1522; u64 gprc; u64 bprc; u64 mprc; u64 gptc; u64 gorc; u64 gotc; u64 rnbc; u64 ruc; u64 rfc; u64 roc; u64 rjc; u64 mgprc; u64 mgpdc; u64 mgptc; u64 tor; u64 tot; u64 tpr; u64 tpt; u64 ptc64; u64 ptc127; u64 ptc255; u64 ptc511; u64 ptc1023; u64 ptc1522; u64 mptc; u64 bptc; u64 tsctc; u64 tsctfc; u64 iac; u64 icrxptc; u64 icrxatc; u64 ictxptc; u64 ictxatc; u64 ictxqec; u64 ictxqmtc; u64 icrxdmtc; u64 icrxoc; u64 cbtmpc; u64 htdpmc; u64 cbrdpc; u64 cbrmpc; u64 rpthc; u64 hgptc; u64 htcbdpc; u64 hgorc; u64 hgotc; u64 lenerrs; u64 scvpc; u64 hrmpc; u64 doosync; u64 o2bgptc; u64 o2bspc; u64 b2ospc; u64 b2ogprc; }; struct e1000_vf_stats { u64 base_gprc; u64 base_gptc; u64 base_gorc; u64 base_gotc; u64 base_mprc; u64 base_gotlbc; u64 base_gptlbc; u64 base_gorlbc; u64 base_gprlbc; u32 last_gprc; u32 last_gptc; u32 last_gorc; u32 last_gotc; u32 last_mprc; u32 last_gotlbc; u32 last_gptlbc; u32 last_gorlbc; u32 last_gprlbc; u64 gprc; u64 gptc; u64 gorc; u64 gotc; u64 mprc; u64 gotlbc; u64 gptlbc; u64 gorlbc; u64 gprlbc; }; struct e1000_phy_stats { u32 idle_errors; u32 receive_errors; }; struct e1000_host_mng_dhcp_cookie { u32 signature; u8 status; u8 reserved0; u16 vlan_id; u32 reserved1; u16 reserved2; u8 reserved3; u8 checksum; }; /* Host Interface "Rev 1" */ struct e1000_host_command_header { u8 command_id; u8 command_length; u8 command_options; u8 checksum; }; #define E1000_HI_MAX_DATA_LENGTH 252 struct e1000_host_command_info { struct e1000_host_command_header command_header; u8 command_data[E1000_HI_MAX_DATA_LENGTH]; }; /* Host Interface "Rev 2" */ struct e1000_host_mng_command_header { u8 command_id; u8 checksum; u16 reserved1; u16 reserved2; u16 command_length; }; #define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 struct e1000_host_mng_command_info { struct e1000_host_mng_command_header command_header; u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; }; #include "e1000_mac.h" #include "e1000_phy.h" #include "e1000_nvm.h" #include "e1000_manage.h" #include "e1000_mbx.h" /* Function pointers for the MAC. */ struct e1000_mac_operations { s32 (*init_params)(struct e1000_hw *); s32 (*id_led_init)(struct e1000_hw *); s32 (*blink_led)(struct e1000_hw *); bool (*check_mng_mode)(struct e1000_hw *); s32 (*check_for_link)(struct e1000_hw *); s32 (*cleanup_led)(struct e1000_hw *); void (*clear_hw_cntrs)(struct e1000_hw *); void (*clear_vfta)(struct e1000_hw *); s32 (*get_bus_info)(struct e1000_hw *); void (*set_lan_id)(struct e1000_hw *); s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); s32 (*led_on)(struct e1000_hw *); s32 (*led_off)(struct e1000_hw *); void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); s32 (*reset_hw)(struct e1000_hw *); s32 (*init_hw)(struct e1000_hw *); void (*shutdown_serdes)(struct e1000_hw *); void (*power_up_serdes)(struct e1000_hw *); s32 (*setup_link)(struct e1000_hw *); s32 (*setup_physical_interface)(struct e1000_hw *); s32 (*setup_led)(struct e1000_hw *); void (*write_vfta)(struct e1000_hw *, u32, u32); void (*config_collision_dist)(struct e1000_hw *); int (*rar_set)(struct e1000_hw *, u8*, u32); s32 (*read_mac_addr)(struct e1000_hw *); s32 (*validate_mdi_setting)(struct e1000_hw *); + s32 (*set_obff_timer)(struct e1000_hw *, u32); s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); void (*release_swfw_sync)(struct e1000_hw *, u16); }; /* When to use various PHY register access functions: * * Func Caller * Function Does Does When to use * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * X_reg L,P,A n/a for simple PHY reg accesses * X_reg_locked P,A L for multiple accesses of different regs * on different pages * X_reg_page A L,P for multiple accesses of different regs * on the same page * * Where X=[read|write], L=locking, P=sets page, A=register access * */ struct e1000_phy_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*cfg_on_link_up)(struct e1000_hw *); s32 (*check_polarity)(struct e1000_hw *); s32 (*check_reset_block)(struct e1000_hw *); s32 (*commit)(struct e1000_hw *); s32 (*force_speed_duplex)(struct e1000_hw *); s32 (*get_cfg_done)(struct e1000_hw *hw); s32 (*get_cable_length)(struct e1000_hw *); s32 (*get_info)(struct e1000_hw *); s32 (*set_page)(struct e1000_hw *, u16); s32 (*read_reg)(struct e1000_hw *, u32, u16 *); s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *); void (*release)(struct e1000_hw *); s32 (*reset)(struct e1000_hw *); s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); s32 (*write_reg)(struct e1000_hw *, u32, u16); s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); s32 (*write_reg_page)(struct e1000_hw *, u32, u16); void (*power_up)(struct e1000_hw *); void (*power_down)(struct e1000_hw *); s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); }; /* Function pointers for the NVM. */ struct e1000_nvm_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*read)(struct e1000_hw *, u16, u16, u16 *); void (*release)(struct e1000_hw *); void (*reload)(struct e1000_hw *); s32 (*update)(struct e1000_hw *); s32 (*valid_led_default)(struct e1000_hw *, u16 *); s32 (*validate)(struct e1000_hw *); s32 (*write)(struct e1000_hw *, u16, u16, u16 *); }; struct e1000_mac_info { struct e1000_mac_operations ops; u8 addr[ETH_ADDR_LEN]; u8 perm_addr[ETH_ADDR_LEN]; enum e1000_mac_type type; u32 collision_delta; u32 ledctl_default; u32 ledctl_mode1; u32 ledctl_mode2; u32 mc_filter_type; u32 tx_packet_delta; u32 txcw; u16 current_ifs_val; u16 ifs_max_val; u16 ifs_min_val; u16 ifs_ratio; u16 ifs_step_size; u16 mta_reg_count; u16 uta_reg_count; /* Maximum size of the MTA register table in all supported adapters */ #define MAX_MTA_REG 128 u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; u8 forced_speed_duplex; bool adaptive_ifs; bool has_fwsm; bool arc_subsystem_valid; bool asf_firmware_present; bool autoneg; bool autoneg_failed; bool get_link_status; bool in_ifs_mode; bool report_tx_early; enum e1000_serdes_link_state serdes_link_state; bool serdes_has_link; bool tx_pkt_filtering; - u32 max_frame_size; + u32 max_frame_size; }; struct e1000_phy_info { struct e1000_phy_operations ops; enum e1000_phy_type type; enum e1000_1000t_rx_status local_rx; enum e1000_1000t_rx_status remote_rx; enum e1000_ms_type ms_type; enum e1000_ms_type original_ms_type; enum e1000_rev_polarity cable_polarity; enum e1000_smart_speed smart_speed; u32 addr; u32 id; u32 reset_delay_us; /* in usec */ u32 revision; enum e1000_media_type media_type; u16 autoneg_advertised; u16 autoneg_mask; u16 cable_length; u16 max_cable_length; u16 min_cable_length; u8 mdix; bool disable_polarity_correction; bool is_mdix; bool polarity_correction; bool speed_downgraded; bool autoneg_wait_to_complete; }; struct e1000_nvm_info { struct e1000_nvm_operations ops; enum e1000_nvm_type type; enum e1000_nvm_override override; u32 flash_bank_size; u32 flash_base_addr; u16 word_size; u16 delay_usec; u16 address_bits; u16 opcode_bits; u16 page_size; }; struct e1000_bus_info { enum e1000_bus_type type; enum e1000_bus_speed speed; enum e1000_bus_width width; u16 func; u16 pci_cmd_word; }; struct e1000_fc_info { u32 high_water; /* Flow control high-water mark */ u32 low_water; /* Flow control low-water mark */ u16 pause_time; /* Flow control pause timer */ u16 refresh_time; /* Flow control refresh timer */ bool send_xon; /* Flow control send XON */ bool strict_ieee; /* Strict IEEE mode */ enum e1000_fc_mode current_mode; /* FC mode in effect */ enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ }; struct e1000_mbx_operations { s32 (*init_params)(struct e1000_hw *hw); s32 (*read)(struct e1000_hw *, u32 *, u16, u16); s32 (*write)(struct e1000_hw *, u32 *, u16, u16); s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*check_for_msg)(struct e1000_hw *, u16); s32 (*check_for_ack)(struct e1000_hw *, u16); s32 (*check_for_rst)(struct e1000_hw *, u16); }; struct e1000_mbx_stats { u32 msgs_tx; u32 msgs_rx; u32 acks; u32 reqs; u32 rsts; }; struct e1000_mbx_info { struct e1000_mbx_operations ops; struct e1000_mbx_stats stats; u32 timeout; u32 usec_delay; u16 size; }; struct e1000_dev_spec_82541 { enum e1000_dsp_config dsp_config; enum e1000_ffe_config ffe_config; u16 spd_default; bool phy_init_script; }; struct e1000_dev_spec_82542 { bool dma_fairness; }; struct e1000_dev_spec_82543 { u32 tbi_compatibility; bool dma_fairness; bool init_phy_disabled; }; struct e1000_dev_spec_82571 { bool laa_is_present; u32 smb_counter; E1000_MUTEX swflag_mutex; }; struct e1000_dev_spec_80003es2lan { bool mdic_wa_enable; }; struct e1000_shadow_ram { u16 value; bool modified; }; #define E1000_SHADOW_RAM_WORDS 2048 /* I218 PHY Ultra Low Power (ULP) states */ enum e1000_ulp_state { e1000_ulp_state_unknown, e1000_ulp_state_off, e1000_ulp_state_on, }; struct e1000_dev_spec_ich8lan { bool kmrn_lock_loss_workaround_enabled; struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS]; E1000_MUTEX nvm_mutex; E1000_MUTEX swflag_mutex; bool nvm_k1_enabled; bool eee_disable; u16 eee_lp_ability; enum e1000_ulp_state ulp_state; }; struct e1000_dev_spec_82575 { bool sgmii_active; bool global_device_reset; bool eee_disable; bool module_plugged; bool clear_semaphore_once; u32 mtu; struct sfp_e1000_flags eth_flags; u8 media_port; bool media_changed; }; struct e1000_dev_spec_vf { u32 vf_number; u32 v2p_mailbox; }; struct e1000_hw { void *back; u8 *hw_addr; u8 *flash_address; unsigned long io_base; struct e1000_mac_info mac; struct e1000_fc_info fc; struct e1000_phy_info phy; struct e1000_nvm_info nvm; struct e1000_bus_info bus; struct e1000_mbx_info mbx; struct e1000_host_mng_dhcp_cookie mng_cookie; union { struct e1000_dev_spec_82541 _82541; struct e1000_dev_spec_82542 _82542; struct e1000_dev_spec_82543 _82543; struct e1000_dev_spec_82571 _82571; struct e1000_dev_spec_80003es2lan _80003es2lan; struct e1000_dev_spec_ich8lan ich8lan; struct e1000_dev_spec_82575 _82575; struct e1000_dev_spec_vf vf; } dev_spec; u16 device_id; u16 subsystem_vendor_id; u16 subsystem_device_id; u16 vendor_id; u8 revision_id; }; #include "e1000_82541.h" #include "e1000_82543.h" #include "e1000_82571.h" #include "e1000_80003es2lan.h" #include "e1000_ich8lan.h" #include "e1000_82575.h" #include "e1000_i210.h" /* These functions must be implemented by drivers */ void e1000_pci_clear_mwi(struct e1000_hw *hw); void e1000_pci_set_mwi(struct e1000_hw *hw); s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); #endif Index: projects/clang-trunk/sys/dev/e1000/e1000_i210.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_i210.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_i210.c (revision 287502) @@ -1,1003 +1,904 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw); static void e1000_release_nvm_i210(struct e1000_hw *hw); static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw); static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw); static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data); /** * e1000_acquire_nvm_i210 - Request for access to EEPROM * @hw: pointer to the HW structure * * Acquire the necessary semaphores for exclusive access to the EEPROM. * Set the EEPROM access request bit and wait for EEPROM access grant bit. * Return successful if access grant bit set, else clear the request for * EEPROM access and return -E1000_ERR_NVM (-1). **/ static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_acquire_nvm_i210"); ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); return ret_val; } /** * e1000_release_nvm_i210 - Release exclusive access to EEPROM * @hw: pointer to the HW structure * * Stop any current commands to the EEPROM and clear the EEPROM request bit, * then release the semaphores acquired. **/ static void e1000_release_nvm_i210(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_i210"); e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); } /** * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore * @hw: pointer to the HW structure * @mask: specifies which semaphore to acquire * * Acquire the SW/FW semaphore to access the PHY or NVM. The mask * will also specify which port we're acquiring the lock for. **/ s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) { u32 swfw_sync; u32 swmask = mask; u32 fwmask = mask << 16; s32 ret_val = E1000_SUCCESS; s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ DEBUGFUNC("e1000_acquire_swfw_sync_i210"); while (i < timeout) { if (e1000_get_hw_semaphore_i210(hw)) { ret_val = -E1000_ERR_SWFW_SYNC; goto out; } swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); if (!(swfw_sync & (fwmask | swmask))) break; /* * Firmware currently using resource (fwmask) * or other software thread using resource (swmask) */ e1000_put_hw_semaphore_generic(hw); msec_delay_irq(5); i++; } if (i == timeout) { DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); ret_val = -E1000_ERR_SWFW_SYNC; goto out; } swfw_sync |= swmask; E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); e1000_put_hw_semaphore_generic(hw); out: return ret_val; } /** * e1000_release_swfw_sync_i210 - Release SW/FW semaphore * @hw: pointer to the HW structure * @mask: specifies which semaphore to acquire * * Release the SW/FW semaphore used to access the PHY or NVM. The mask * will also specify which port we're releasing the lock for. **/ void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) { u32 swfw_sync; DEBUGFUNC("e1000_release_swfw_sync_i210"); while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS) ; /* Empty */ swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); swfw_sync &= ~mask; E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); e1000_put_hw_semaphore_generic(hw); } /** * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore * @hw: pointer to the HW structure * * Acquire the HW semaphore to access the PHY or NVM **/ static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw) { u32 swsm; s32 timeout = hw->nvm.word_size + 1; s32 i = 0; DEBUGFUNC("e1000_get_hw_semaphore_i210"); /* Get the SW semaphore */ while (i < timeout) { swsm = E1000_READ_REG(hw, E1000_SWSM); if (!(swsm & E1000_SWSM_SMBI)) break; usec_delay(50); i++; } if (i == timeout) { /* In rare circumstances, the SW semaphore may already be held * unintentionally. Clear the semaphore once before giving up. */ if (hw->dev_spec._82575.clear_semaphore_once) { hw->dev_spec._82575.clear_semaphore_once = FALSE; e1000_put_hw_semaphore_generic(hw); for (i = 0; i < timeout; i++) { swsm = E1000_READ_REG(hw, E1000_SWSM); if (!(swsm & E1000_SWSM_SMBI)) break; usec_delay(50); } } /* If we do not have the semaphore here, we have to give up. */ if (i == timeout) { DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); return -E1000_ERR_NVM; } } /* Get the FW semaphore. */ for (i = 0; i < timeout; i++) { swsm = E1000_READ_REG(hw, E1000_SWSM); E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); /* Semaphore acquired if bit latched */ if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) break; usec_delay(50); } if (i == timeout) { /* Release semaphores */ e1000_put_hw_semaphore_generic(hw); DEBUGOUT("Driver can't access the NVM\n"); return -E1000_ERR_NVM; } return E1000_SUCCESS; } /** * e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register * @hw: pointer to the HW structure * @offset: offset of word in the Shadow Ram to read * @words: number of words to read * @data: word read from the Shadow Ram * * Reads a 16 bit word from the Shadow Ram using the EERD register. * Uses necessary synchronization semaphores. **/ s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 status = E1000_SUCCESS; u16 i, count; DEBUGFUNC("e1000_read_nvm_srrd_i210"); /* We cannot hold synchronization semaphores for too long, * because of forceful takeover procedure. However it is more efficient * to read in bursts than synchronizing access for each word. */ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? E1000_EERD_EEWR_MAX_COUNT : (words - i); if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { status = e1000_read_nvm_eerd(hw, offset, count, data + i); hw->nvm.ops.release(hw); } else { status = E1000_ERR_SWFW_SYNC; } if (status != E1000_SUCCESS) break; } return status; } /** * e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR * @hw: pointer to the HW structure * @offset: offset within the Shadow RAM to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the Shadow RAM * * Writes data to Shadow RAM at offset using EEWR register. * * If e1000_update_nvm_checksum is not called after this function , the * data will not be committed to FLASH and also Shadow RAM will most likely * contain an invalid checksum. * * If error code is returned, data and Shadow RAM may be inconsistent - buffer * partially written. **/ s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 status = E1000_SUCCESS; u16 i, count; DEBUGFUNC("e1000_write_nvm_srwr_i210"); /* We cannot hold synchronization semaphores for too long, * because of forceful takeover procedure. However it is more efficient * to write in bursts than synchronizing access for each word. */ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? E1000_EERD_EEWR_MAX_COUNT : (words - i); if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { status = e1000_write_nvm_srwr(hw, offset, count, data + i); hw->nvm.ops.release(hw); } else { status = E1000_ERR_SWFW_SYNC; } if (status != E1000_SUCCESS) break; } return status; } /** * e1000_write_nvm_srwr - Write to Shadow Ram using EEWR * @hw: pointer to the HW structure * @offset: offset within the Shadow Ram to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the Shadow Ram * * Writes data to Shadow Ram at offset using EEWR register. * * If e1000_update_nvm_checksum is not called after this function , the * Shadow Ram will most likely contain an invalid checksum. **/ static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; u32 i, k, eewr = 0; u32 attempts = 100000; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_write_nvm_srwr"); /* * A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); ret_val = -E1000_ERR_NVM; goto out; } for (i = 0; i < words; i++) { eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | (data[i] << E1000_NVM_RW_REG_DATA) | E1000_NVM_RW_REG_START; E1000_WRITE_REG(hw, E1000_SRWR, eewr); for (k = 0; k < attempts; k++) { if (E1000_NVM_RW_REG_DONE & E1000_READ_REG(hw, E1000_SRWR)) { ret_val = E1000_SUCCESS; break; } usec_delay(5); } if (ret_val != E1000_SUCCESS) { DEBUGOUT("Shadow RAM write EEWR timed out\n"); break; } } out: return ret_val; } /** e1000_read_invm_word_i210 - Reads OTP * @hw: pointer to the HW structure * @address: the word address (aka eeprom offset) to read * @data: pointer to the data read * * Reads 16-bit words from the OTP. Return error when the word is not * stored in OTP. **/ static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data) { s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; u32 invm_dword; u16 i; u8 record_type, word_address; DEBUGFUNC("e1000_read_invm_word_i210"); for (i = 0; i < E1000_INVM_SIZE; i++) { invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); /* Get record type */ record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword); if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE) break; if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE) i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS; if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE) i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS; if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) { word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword); if (word_address == address) { *data = INVM_DWORD_TO_WORD_DATA(invm_dword); DEBUGOUT2("Read INVM Word 0x%02x = %x", address, *data); status = E1000_SUCCESS; break; } } } if (status != E1000_SUCCESS) DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address); return status; } /** e1000_read_invm_i210 - Read invm wrapper function for I210/I211 * @hw: pointer to the HW structure * @address: the word address (aka eeprom offset) to read * @data: pointer to the data read * * Wrapper function to return data formerly found in the NVM. **/ static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset, u16 E1000_UNUSEDARG words, u16 *data) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_read_invm_i210"); /* Only the MAC addr is required to be present in the iNVM */ switch (offset) { case NVM_MAC_ADDR: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]); ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1, &data[1]); ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2, &data[2]); if (ret_val != E1000_SUCCESS) DEBUGOUT("MAC Addr not found in iNVM\n"); break; case NVM_INIT_CTRL_2: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_INIT_CTRL_2_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_INIT_CTRL_4: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_INIT_CTRL_4_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_LED_1_CFG: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_LED_1_CFG_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_LED_0_2_CFG: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_LED_0_2_CFG_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_ID_LED_SETTINGS: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = ID_LED_RESERVED_FFFF; ret_val = E1000_SUCCESS; } break; case NVM_SUB_DEV_ID: *data = hw->subsystem_device_id; break; case NVM_SUB_VEN_ID: *data = hw->subsystem_vendor_id; break; case NVM_DEV_ID: *data = hw->device_id; break; case NVM_VEN_ID: *data = hw->vendor_id; break; default: DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset); *data = NVM_RESERVED_WORD; break; } return ret_val; } /** - * e1000_read_invm_version - Reads iNVM version and image type - * @hw: pointer to the HW structure - * @invm_ver: version structure for the version read - * - * Reads iNVM version and image type. - **/ -s32 e1000_read_invm_version(struct e1000_hw *hw, - struct e1000_fw_version *invm_ver) -{ - u32 *record = NULL; - u32 *next_record = NULL; - u32 i = 0; - u32 invm_dword = 0; - u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE / - E1000_INVM_RECORD_SIZE_IN_BYTES); - u32 buffer[E1000_INVM_SIZE]; - s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; - u16 version = 0; - - DEBUGFUNC("e1000_read_invm_version"); - - /* Read iNVM memory */ - for (i = 0; i < E1000_INVM_SIZE; i++) { - invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); - buffer[i] = invm_dword; - } - - /* Read version number */ - for (i = 1; i < invm_blocks; i++) { - record = &buffer[invm_blocks - i]; - next_record = &buffer[invm_blocks - i + 1]; - - /* Check if we have first version location used */ - if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) { - version = 0; - status = E1000_SUCCESS; - break; - } - /* Check if we have second version location used */ - else if ((i == 1) && - ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { - version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; - status = E1000_SUCCESS; - break; - } - /* - * Check if we have odd version location - * used and it is the last one used - */ - else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && - ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && - (i != 1))) { - version = (*next_record & E1000_INVM_VER_FIELD_TWO) - >> 13; - status = E1000_SUCCESS; - break; - } - /* - * Check if we have even version location - * used and it is the last one used - */ - else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && - ((*record & 0x3) == 0)) { - version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; - status = E1000_SUCCESS; - break; - } - } - - if (status == E1000_SUCCESS) { - invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) - >> E1000_INVM_MAJOR_SHIFT; - invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; - } - /* Read Image Type */ - for (i = 1; i < invm_blocks; i++) { - record = &buffer[invm_blocks - i]; - next_record = &buffer[invm_blocks - i + 1]; - - /* Check if we have image type in first location used */ - if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) { - invm_ver->invm_img_type = 0; - status = E1000_SUCCESS; - break; - } - /* Check if we have image type in first location used */ - else if ((((*record & 0x3) == 0) && - ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || - ((((*record & 0x3) != 0) && (i != 1)))) { - invm_ver->invm_img_type = - (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; - status = E1000_SUCCESS; - break; - } - } - return status; -} - -/** * e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *); DEBUGFUNC("e1000_validate_nvm_checksum_i210"); if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { /* * Replace the read function with semaphore grabbing with * the one that skips this for a while. * We have semaphore taken already here. */ read_op_ptr = hw->nvm.ops.read; hw->nvm.ops.read = e1000_read_nvm_eerd; status = e1000_validate_nvm_checksum_generic(hw); /* Revert original read operation. */ hw->nvm.ops.read = read_op_ptr; hw->nvm.ops.release(hw); } else { status = E1000_ERR_SWFW_SYNC; } return status; } /** * e1000_update_nvm_checksum_i210 - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM checksum by reading/adding each word of the EEPROM * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. Next commit EEPROM data onto the Flash. **/ s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_update_nvm_checksum_i210"); /* * Read the first word from the EEPROM. If this times out or fails, do * not continue or we could be in for a very long wait while every * EEPROM read fails */ ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data); if (ret_val != E1000_SUCCESS) { DEBUGOUT("EEPROM read failed\n"); goto out; } if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { /* * Do not use hw->nvm.ops.write, hw->nvm.ops.read * because we do not want to take the synchronization * semaphores twice here. */ for (i = 0; i < NVM_CHECKSUM_REG; i++) { ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data); if (ret_val) { hw->nvm.ops.release(hw); DEBUGOUT("NVM Read Error while updating checksum.\n"); goto out; } checksum += nvm_data; } checksum = (u16) NVM_SUM - checksum; ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val != E1000_SUCCESS) { hw->nvm.ops.release(hw); DEBUGOUT("NVM Write Error while updating checksum.\n"); goto out; } hw->nvm.ops.release(hw); ret_val = e1000_update_flash_i210(hw); } else { ret_val = E1000_ERR_SWFW_SYNC; } out: return ret_val; } /** * e1000_get_flash_presence_i210 - Check if flash device is detected. * @hw: pointer to the HW structure * **/ bool e1000_get_flash_presence_i210(struct e1000_hw *hw) { u32 eec = 0; bool ret_val = FALSE; DEBUGFUNC("e1000_get_flash_presence_i210"); eec = E1000_READ_REG(hw, E1000_EECD); if (eec & E1000_EECD_FLASH_DETECTED_I210) ret_val = TRUE; return ret_val; } /** * e1000_update_flash_i210 - Commit EEPROM to the flash * @hw: pointer to the HW structure * **/ s32 e1000_update_flash_i210(struct e1000_hw *hw) { s32 ret_val; u32 flup; DEBUGFUNC("e1000_update_flash_i210"); ret_val = e1000_pool_flash_update_done_i210(hw); if (ret_val == -E1000_ERR_NVM) { DEBUGOUT("Flash update time out\n"); goto out; } flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210; E1000_WRITE_REG(hw, E1000_EECD, flup); ret_val = e1000_pool_flash_update_done_i210(hw); if (ret_val == E1000_SUCCESS) DEBUGOUT("Flash update complete\n"); else DEBUGOUT("Flash update time out\n"); out: return ret_val; } /** * e1000_pool_flash_update_done_i210 - Pool FLUDONE status. * @hw: pointer to the HW structure * **/ s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_NVM; u32 i, reg; DEBUGFUNC("e1000_pool_flash_update_done_i210"); for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) { reg = E1000_READ_REG(hw, E1000_EECD); if (reg & E1000_EECD_FLUDONE_I210) { ret_val = E1000_SUCCESS; break; } usec_delay(5); } return ret_val; } /** * e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers * @hw: pointer to the HW structure * * Initialize the i210/i211 NVM parameters and function pointers. **/ static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw) { s32 ret_val; struct e1000_nvm_info *nvm = &hw->nvm; DEBUGFUNC("e1000_init_nvm_params_i210"); ret_val = e1000_init_nvm_params_82575(hw); nvm->ops.acquire = e1000_acquire_nvm_i210; nvm->ops.release = e1000_release_nvm_i210; nvm->ops.valid_led_default = e1000_valid_led_default_i210; if (e1000_get_flash_presence_i210(hw)) { hw->nvm.type = e1000_nvm_flash_hw; nvm->ops.read = e1000_read_nvm_srrd_i210; nvm->ops.write = e1000_write_nvm_srwr_i210; nvm->ops.validate = e1000_validate_nvm_checksum_i210; nvm->ops.update = e1000_update_nvm_checksum_i210; } else { hw->nvm.type = e1000_nvm_invm; nvm->ops.read = e1000_read_invm_i210; nvm->ops.write = e1000_null_write_nvm; nvm->ops.validate = e1000_null_ops_generic; nvm->ops.update = e1000_null_ops_generic; } return ret_val; } /** * e1000_init_function_pointers_i210 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_i210(struct e1000_hw *hw) { e1000_init_function_pointers_82575(hw); hw->nvm.ops.init_params = e1000_init_nvm_params_i210; return; } /** * e1000_valid_led_default_i210 - Verify a valid default LED config * @hw: pointer to the HW structure * @data: pointer to the NVM (EEPROM) * * Read the EEPROM for the current default LED configuration. If the * LED configuration is not valid, set to a valid LED configuration. **/ static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_i210"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { switch (hw->phy.media_type) { case e1000_media_type_internal_serdes: *data = ID_LED_DEFAULT_I210_SERDES; break; case e1000_media_type_copper: default: *data = ID_LED_DEFAULT_I210; break; } } out: return ret_val; } /** * __e1000_access_xmdio_reg - Read/write XMDIO register * @hw: pointer to the HW structure * @address: XMDIO address to program * @dev_addr: device address to program * @data: pointer to value to read/write from/to the XMDIO address * @read: boolean flag to indicate read or write **/ static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address, u8 dev_addr, u16 *data, bool read) { s32 ret_val; DEBUGFUNC("__e1000_access_xmdio_reg"); ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA | dev_addr); if (ret_val) return ret_val; if (read) ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data); else ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data); if (ret_val) return ret_val; /* Recalibrate the device back to 0 */ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0); if (ret_val) return ret_val; return ret_val; } /** * e1000_read_xmdio_reg - Read XMDIO register * @hw: pointer to the HW structure * @addr: XMDIO address to program * @dev_addr: device address to program * @data: value to be read from the EMI address **/ s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data) { DEBUGFUNC("e1000_read_xmdio_reg"); return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, TRUE); } /** * e1000_write_xmdio_reg - Write XMDIO register * @hw: pointer to the HW structure * @addr: XMDIO address to program * @dev_addr: device address to program * @data: value to be written to the XMDIO address **/ s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) { DEBUGFUNC("e1000_read_xmdio_reg"); return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, FALSE); } /** * e1000_pll_workaround_i210 * @hw: pointer to the HW structure * * Works around an errata in the PLL circuit where it occasionally * provides the wrong clock frequency after power up. **/ static s32 e1000_pll_workaround_i210(struct e1000_hw *hw) { s32 ret_val; u32 wuc, mdicnfg, ctrl, ctrl_ext, reg_val; u16 nvm_word, phy_word, pci_word, tmp_nvm; int i; /* Get and set needed register values */ wuc = E1000_READ_REG(hw, E1000_WUC); mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG); reg_val = mdicnfg & ~E1000_MDICNFG_EXT_MDIO; E1000_WRITE_REG(hw, E1000_MDICNFG, reg_val); /* Get data from NVM, or set default */ ret_val = e1000_read_invm_word_i210(hw, E1000_INVM_AUTOLOAD, &nvm_word); if (ret_val != E1000_SUCCESS) nvm_word = E1000_INVM_DEFAULT_AL; tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { /* check current state directly from internal PHY */ e1000_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | E1000_PHY_PLL_FREQ_REG), &phy_word); if ((phy_word & E1000_PHY_PLL_UNCONF) != E1000_PHY_PLL_UNCONF) { ret_val = E1000_SUCCESS; break; } else { ret_val = -E1000_ERR_PHY; } /* directly reset the internal PHY */ ctrl = E1000_READ_REG(hw, E1000_CTRL); E1000_WRITE_REG(hw, E1000_CTRL, ctrl|E1000_CTRL_PHY_RST); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= (E1000_CTRL_EXT_PHYPDEN | E1000_CTRL_EXT_SDLPE); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_REG(hw, E1000_WUC, 0); reg_val = (E1000_INVM_AUTOLOAD << 4) | (tmp_nvm << 16); E1000_WRITE_REG(hw, E1000_EEARBC_I210, reg_val); e1000_read_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); pci_word |= E1000_PCI_PMCSR_D3; e1000_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); msec_delay(1); pci_word &= ~E1000_PCI_PMCSR_D3; e1000_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); reg_val = (E1000_INVM_AUTOLOAD << 4) | (nvm_word << 16); E1000_WRITE_REG(hw, E1000_EEARBC_I210, reg_val); /* restore WUC register */ E1000_WRITE_REG(hw, E1000_WUC, wuc); } /* restore MDICNFG setting */ E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg); return ret_val; } /** * e1000_init_hw_i210 - Init hw for I210/I211 * @hw: pointer to the HW structure * * Called to initialize hw for i210 hw family. **/ s32 e1000_init_hw_i210(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_init_hw_i210"); if ((hw->mac.type >= e1000_i210) && !(e1000_get_flash_presence_i210(hw))) { ret_val = e1000_pll_workaround_i210(hw); if (ret_val != E1000_SUCCESS) return ret_val; } ret_val = e1000_init_hw_82575(hw); return ret_val; } Index: projects/clang-trunk/sys/dev/e1000/e1000_i210.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_i210.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_i210.h (revision 287502) @@ -1,111 +1,109 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_I210_H_ #define _E1000_I210_H_ bool e1000_get_flash_presence_i210(struct e1000_hw *hw); s32 e1000_update_flash_i210(struct e1000_hw *hw); s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw); s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw); s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); -s32 e1000_read_invm_version(struct e1000_hw *hw, - struct e1000_fw_version *invm_ver); s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data); s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data); s32 e1000_init_hw_i210(struct e1000_hw *hw); #define E1000_STM_OPCODE 0xDB00 #define E1000_EEPROM_FLASH_SIZE_WORD 0x11 #define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \ (u8)((invm_dword) & 0x7) #define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \ (u8)(((invm_dword) & 0x0000FE00) >> 9) #define INVM_DWORD_TO_WORD_DATA(invm_dword) \ (u16)(((invm_dword) & 0xFFFF0000) >> 16) enum E1000_INVM_STRUCTURE_TYPE { E1000_INVM_UNINITIALIZED_STRUCTURE = 0x00, E1000_INVM_WORD_AUTOLOAD_STRUCTURE = 0x01, E1000_INVM_CSR_AUTOLOAD_STRUCTURE = 0x02, E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE = 0x03, E1000_INVM_RSA_KEY_SHA256_STRUCTURE = 0x04, E1000_INVM_INVALIDATED_STRUCTURE = 0x0F, }; #define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8 #define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1 #define E1000_INVM_ULT_BYTES_SIZE 8 #define E1000_INVM_RECORD_SIZE_IN_BYTES 4 #define E1000_INVM_VER_FIELD_ONE 0x1FF8 #define E1000_INVM_VER_FIELD_TWO 0x7FE000 #define E1000_INVM_IMGTYPE_FIELD 0x1F800000 #define E1000_INVM_MAJOR_MASK 0x3F0 #define E1000_INVM_MINOR_MASK 0xF #define E1000_INVM_MAJOR_SHIFT 4 #define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \ (ID_LED_DEF1_DEF2 << 4) | \ (ID_LED_OFF1_OFF2)) #define ID_LED_DEFAULT_I210_SERDES ((ID_LED_DEF1_DEF2 << 8) | \ (ID_LED_DEF1_DEF2 << 4) | \ (ID_LED_OFF1_ON2)) /* NVM offset defaults for I211 devices */ #define NVM_INIT_CTRL_2_DEFAULT_I211 0X7243 #define NVM_INIT_CTRL_4_DEFAULT_I211 0x00C1 #define NVM_LED_1_CFG_DEFAULT_I211 0x0184 #define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C /* PLL Defines */ #define E1000_PCI_PMCSR 0x44 #define E1000_PCI_PMCSR_D3 0x03 #define E1000_MAX_PLL_TRIES 5 #define E1000_PHY_PLL_UNCONF 0xFF #define E1000_PHY_PLL_FREQ_PAGE 0xFC0000 #define E1000_PHY_PLL_FREQ_REG 0x000E #define E1000_INVM_DEFAULT_AL 0x202F #define E1000_INVM_AUTOLOAD 0x0A #define E1000_INVM_PLL_WO_VAL 0x0010 #endif Index: projects/clang-trunk/sys/dev/e1000/e1000_ich8lan.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_ich8lan.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_ich8lan.c (revision 287502) @@ -1,5887 +1,5341 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* 82562G 10/100 Network Connection * 82562G-2 10/100 Network Connection * 82562GT 10/100 Network Connection * 82562GT-2 10/100 Network Connection * 82562V 10/100 Network Connection * 82562V-2 10/100 Network Connection * 82566DC-2 Gigabit Network Connection * 82566DC Gigabit Network Connection * 82566DM-2 Gigabit Network Connection * 82566DM Gigabit Network Connection * 82566MC Gigabit Network Connection * 82566MM Gigabit Network Connection * 82567LM Gigabit Network Connection * 82567LF Gigabit Network Connection * 82567V Gigabit Network Connection * 82567LM-2 Gigabit Network Connection * 82567LF-2 Gigabit Network Connection * 82567V-2 Gigabit Network Connection * 82567LF-3 Gigabit Network Connection * 82567LM-3 Gigabit Network Connection * 82567LM-4 Gigabit Network Connection * 82577LM Gigabit Network Connection * 82577LC Gigabit Network Connection * 82578DM Gigabit Network Connection * 82578DC Gigabit Network Connection * 82579LM Gigabit Network Connection * 82579V Gigabit Network Connection * Ethernet Connection I217-LM * Ethernet Connection I217-V * Ethernet Connection I218-V * Ethernet Connection I218-LM * Ethernet Connection (2) I218-LM * Ethernet Connection (2) I218-V * Ethernet Connection (3) I218-LM * Ethernet Connection (3) I218-V */ #include "e1000_api.h" static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw); static void e1000_release_swflag_ich8lan(struct e1000_hw *hw); static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw); static void e1000_release_nvm_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw); static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index); static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index); static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw); static void e1000_update_mc_addr_list_pch2lan(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count); static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw); static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw); static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active); static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active); static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active); static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); -static s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data); static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw); -static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw); static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data); static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw); static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw); static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw); static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw); static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw); static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw); static s32 e1000_led_on_ich8lan(struct e1000_hw *hw); static s32 e1000_led_off_ich8lan(struct e1000_hw *hw); static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link); static s32 e1000_setup_led_pchlan(struct e1000_hw *hw); static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw); static s32 e1000_led_on_pchlan(struct e1000_hw *hw); static s32 e1000_led_off_pchlan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw); static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank); static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw); static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw); static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 *data); static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 *data); -static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, - u32 *data); -static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, - u32 offset, u32 *data); -static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, - u32 offset, u32 data); -static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, - u32 offset, u32 dword); static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 byte); static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw); static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw); static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw); static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw); static s32 e1000_k1_workaround_lv(struct e1000_hw *hw); static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate); +static s32 e1000_set_obff_timer_pch_lpt(struct e1000_hw *hw, u32 itr); /* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */ /* Offset 04h HSFSTS */ union ich8_hws_flash_status { struct ich8_hsfsts { u16 flcdone:1; /* bit 0 Flash Cycle Done */ u16 flcerr:1; /* bit 1 Flash Cycle Error */ u16 dael:1; /* bit 2 Direct Access error Log */ u16 berasesz:2; /* bit 4:3 Sector Erase Size */ u16 flcinprog:1; /* bit 5 flash cycle in Progress */ u16 reserved1:2; /* bit 13:6 Reserved */ u16 reserved2:6; /* bit 13:6 Reserved */ u16 fldesvalid:1; /* bit 14 Flash Descriptor Valid */ u16 flockdn:1; /* bit 15 Flash Config Lock-Down */ } hsf_status; u16 regval; }; /* ICH GbE Flash Hardware Sequencing Flash control Register bit breakdown */ /* Offset 06h FLCTL */ union ich8_hws_flash_ctrl { struct ich8_hsflctl { u16 flcgo:1; /* 0 Flash Cycle Go */ u16 flcycle:2; /* 2:1 Flash Cycle */ u16 reserved:5; /* 7:3 Reserved */ u16 fldbcount:2; /* 9:8 Flash Data Byte Count */ u16 flockdn:6; /* 15:10 Reserved */ } hsf_ctrl; u16 regval; }; /* ICH Flash Region Access Permissions */ union ich8_hws_flash_regacc { struct ich8_flracc { u32 grra:8; /* 0:7 GbE region Read Access */ u32 grwa:8; /* 8:15 GbE region Write Access */ u32 gmrag:8; /* 23:16 GbE Master Read Access Grant */ u32 gmwag:8; /* 31:24 GbE Master Write Access Grant */ } hsf_flregacc; u16 regval; }; /** * e1000_phy_is_accessible_pchlan - Check if able to access PHY registers * @hw: pointer to the HW structure * * Test access to the PHY registers by reading the PHY ID registers. If * the PHY ID is already known (e.g. resume path) compare it with known ID, * otherwise assume the read PHY ID is correct if it is valid. * * Assumes the sw/fw/hw semaphore is already acquired. **/ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) { u16 phy_reg = 0; u32 phy_id = 0; s32 ret_val = 0; u16 retry_count; u32 mac_reg = 0; for (retry_count = 0; retry_count < 2; retry_count++) { ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID1, &phy_reg); if (ret_val || (phy_reg == 0xFFFF)) continue; phy_id = (u32)(phy_reg << 16); ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID2, &phy_reg); if (ret_val || (phy_reg == 0xFFFF)) { phy_id = 0; continue; } phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); break; } if (hw->phy.id) { if (hw->phy.id == phy_id) goto out; } else if (phy_id) { hw->phy.id = phy_id; hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK); goto out; } /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ if (hw->mac.type < e1000_pch_lpt) { hw->phy.ops.release(hw); ret_val = e1000_set_mdio_slow_mode_hv(hw); if (!ret_val) ret_val = e1000_get_phy_id(hw); hw->phy.ops.acquire(hw); } if (ret_val) return FALSE; out: - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { - /* Only unforce SMBus if ME is not active */ - if (!(E1000_READ_REG(hw, E1000_FWSM) & - E1000_ICH_FWSM_FW_VALID)) { - /* Unforce SMBus mode in PHY */ - hw->phy.ops.read_reg_locked(hw, CV_SMB_CTRL, &phy_reg); - phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; - hw->phy.ops.write_reg_locked(hw, CV_SMB_CTRL, phy_reg); + if (hw->mac.type == e1000_pch_lpt) { + /* Unforce SMBus mode in PHY */ + hw->phy.ops.read_reg_locked(hw, CV_SMB_CTRL, &phy_reg); + phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; + hw->phy.ops.write_reg_locked(hw, CV_SMB_CTRL, phy_reg); - /* Unforce SMBus mode in MAC */ - mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); - mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; - E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); - } + /* Unforce SMBus mode in MAC */ + mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); } return TRUE; } /** * e1000_toggle_lanphypc_pch_lpt - toggle the LANPHYPC pin value * @hw: pointer to the HW structure * * Toggling the LANPHYPC pin value fully power-cycles the PHY and is * used to reset the PHY to a quiescent state when necessary. **/ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) { u32 mac_reg; DEBUGFUNC("e1000_toggle_lanphypc_pch_lpt"); /* Set Phy Config Counter to 50msec */ mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM3); mac_reg &= ~E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK; mac_reg |= E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC; E1000_WRITE_REG(hw, E1000_FEXTNVM3, mac_reg); /* Toggle LANPHYPC Value bit */ mac_reg = E1000_READ_REG(hw, E1000_CTRL); mac_reg |= E1000_CTRL_LANPHYPC_OVERRIDE; mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); usec_delay(10); mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); if (hw->mac.type < e1000_pch_lpt) { msec_delay(50); } else { u16 count = 20; do { msec_delay(5); } while (!(E1000_READ_REG(hw, E1000_CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--); msec_delay(30); } } /** * e1000_init_phy_workarounds_pchlan - PHY initialization workarounds * @hw: pointer to the HW structure * * Workarounds/flow necessary for PHY initialization during driver load * and resume paths. **/ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) { u32 mac_reg, fwsm = E1000_READ_REG(hw, E1000_FWSM); s32 ret_val; DEBUGFUNC("e1000_init_phy_workarounds_pchlan"); /* Gate automatic PHY configuration by hardware on managed and * non-managed 82579 and newer adapters. */ e1000_gate_hw_phy_config_ich8lan(hw, TRUE); /* It is not possible to be certain of the current state of ULP * so forcibly disable it. */ hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_unknown; e1000_disable_ulp_lpt_lp(hw, TRUE); ret_val = hw->phy.ops.acquire(hw); if (ret_val) { DEBUGOUT("Failed to initialize PHY flow\n"); goto out; } /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is * inaccessible and resetting the PHY is not blocked, toggle the * LANPHYPC Value bit to force the interconnect to PCIe mode. */ switch (hw->mac.type) { case e1000_pch_lpt: - case e1000_pch_spt: if (e1000_phy_is_accessible_pchlan(hw)) break; /* Before toggling LANPHYPC, see if PHY is accessible by * forcing MAC to SMBus mode first. */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); /* Wait 50 milliseconds for MAC to finish any retries * that it might be trying to perform from previous * attempts to acknowledge any phy read requests. */ msec_delay(50); /* fall-through */ case e1000_pch2lan: if (e1000_phy_is_accessible_pchlan(hw)) break; /* fall-through */ case e1000_pchlan: if ((hw->mac.type == e1000_pchlan) && (fwsm & E1000_ICH_FWSM_FW_VALID)) break; if (hw->phy.ops.check_reset_block(hw)) { DEBUGOUT("Required LANPHYPC toggle blocked by ME\n"); ret_val = -E1000_ERR_PHY; break; } /* Toggle LANPHYPC Value bit */ e1000_toggle_lanphypc_pch_lpt(hw); if (hw->mac.type >= e1000_pch_lpt) { if (e1000_phy_is_accessible_pchlan(hw)) break; /* Toggling LANPHYPC brings the PHY out of SMBus mode * so ensure that the MAC is also out of SMBus mode */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); if (e1000_phy_is_accessible_pchlan(hw)) break; ret_val = -E1000_ERR_PHY; } break; default: break; } hw->phy.ops.release(hw); if (!ret_val) { /* Check to see if able to reset PHY. Print error if not */ if (hw->phy.ops.check_reset_block(hw)) { ERROR_REPORT("Reset blocked by ME\n"); goto out; } /* Reset the PHY before any access to it. Doing so, ensures * that the PHY is in a known good state before we read/write * PHY registers. The generic reset is sufficient here, * because we haven't determined the PHY type yet. */ ret_val = e1000_phy_hw_reset_generic(hw); if (ret_val) goto out; /* On a successful reset, possibly need to wait for the PHY * to quiesce to an accessible state before returning control * to the calling function. If the PHY does not quiesce, then * return E1000E_BLK_PHY_RESET, as this is the condition that * the PHY is in. */ ret_val = hw->phy.ops.check_reset_block(hw); if (ret_val) ERROR_REPORT("ME blocked access to PHY after reset\n"); } out: /* Ungate automatic PHY configuration on non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(fwsm & E1000_ICH_FWSM_FW_VALID)) { msec_delay(10); e1000_gate_hw_phy_config_ich8lan(hw, FALSE); } return ret_val; } /** * e1000_init_phy_params_pchlan - Initialize PHY function pointers * @hw: pointer to the HW structure * * Initialize family-specific PHY parameters and function pointers. **/ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; DEBUGFUNC("e1000_init_phy_params_pchlan"); phy->addr = 1; phy->reset_delay_us = 100; phy->ops.acquire = e1000_acquire_swflag_ich8lan; phy->ops.check_reset_block = e1000_check_reset_block_ich8lan; phy->ops.get_cfg_done = e1000_get_cfg_done_ich8lan; phy->ops.set_page = e1000_set_page_igp; phy->ops.read_reg = e1000_read_phy_reg_hv; phy->ops.read_reg_locked = e1000_read_phy_reg_hv_locked; phy->ops.read_reg_page = e1000_read_phy_reg_page_hv; phy->ops.release = e1000_release_swflag_ich8lan; phy->ops.reset = e1000_phy_hw_reset_ich8lan; phy->ops.set_d0_lplu_state = e1000_set_lplu_state_pchlan; phy->ops.set_d3_lplu_state = e1000_set_lplu_state_pchlan; phy->ops.write_reg = e1000_write_phy_reg_hv; phy->ops.write_reg_locked = e1000_write_phy_reg_hv_locked; phy->ops.write_reg_page = e1000_write_phy_reg_page_hv; phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_ich8lan; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->id = e1000_phy_unknown; ret_val = e1000_init_phy_workarounds_pchlan(hw); if (ret_val) return ret_val; if (phy->id == e1000_phy_unknown) switch (hw->mac.type) { default: ret_val = e1000_get_phy_id(hw); if (ret_val) return ret_val; if ((phy->id != 0) && (phy->id != PHY_REVISION_MASK)) break; /* fall-through */ case e1000_pch2lan: case e1000_pch_lpt: - case e1000_pch_spt: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ ret_val = e1000_set_mdio_slow_mode_hv(hw); if (ret_val) return ret_val; ret_val = e1000_get_phy_id(hw); if (ret_val) return ret_val; break; } phy->type = e1000_get_phy_type_from_id(phy->id); switch (phy->type) { case e1000_phy_82577: case e1000_phy_82579: case e1000_phy_i217: phy->ops.check_polarity = e1000_check_polarity_82577; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82577; phy->ops.get_cable_length = e1000_get_cable_length_82577; phy->ops.get_info = e1000_get_phy_info_82577; phy->ops.commit = e1000_phy_sw_reset_generic; break; case e1000_phy_82578: phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.get_info = e1000_get_phy_info_m88; break; default: ret_val = -E1000_ERR_PHY; break; } return ret_val; } /** * e1000_init_phy_params_ich8lan - Initialize PHY function pointers * @hw: pointer to the HW structure * * Initialize family-specific PHY parameters and function pointers. **/ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 i = 0; DEBUGFUNC("e1000_init_phy_params_ich8lan"); phy->addr = 1; phy->reset_delay_us = 100; phy->ops.acquire = e1000_acquire_swflag_ich8lan; phy->ops.check_reset_block = e1000_check_reset_block_ich8lan; phy->ops.get_cable_length = e1000_get_cable_length_igp_2; phy->ops.get_cfg_done = e1000_get_cfg_done_ich8lan; phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.release = e1000_release_swflag_ich8lan; phy->ops.reset = e1000_phy_hw_reset_ich8lan; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_ich8lan; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_ich8lan; phy->ops.write_reg = e1000_write_phy_reg_igp; phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_ich8lan; /* We may need to do this twice - once for IGP and if that fails, * we'll set BM func pointers and try again */ ret_val = e1000_determine_phy_address(hw); if (ret_val) { phy->ops.write_reg = e1000_write_phy_reg_bm; phy->ops.read_reg = e1000_read_phy_reg_bm; ret_val = e1000_determine_phy_address(hw); if (ret_val) { DEBUGOUT("Cannot determine PHY addr. Erroring out\n"); return ret_val; } } phy->id = 0; while ((e1000_phy_unknown == e1000_get_phy_type_from_id(phy->id)) && (i++ < 100)) { msec_delay(1); ret_val = e1000_get_phy_id(hw); if (ret_val) return ret_val; } /* Verify phy id */ switch (phy->id) { case IGP03E1000_E_PHY_ID: phy->type = e1000_phy_igp_3; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->ops.read_reg_locked = e1000_read_phy_reg_igp_locked; phy->ops.write_reg_locked = e1000_write_phy_reg_igp_locked; phy->ops.get_info = e1000_get_phy_info_igp; phy->ops.check_polarity = e1000_check_polarity_igp; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; break; case IFE_E_PHY_ID: case IFE_PLUS_E_PHY_ID: case IFE_C_E_PHY_ID: phy->type = e1000_phy_ife; phy->autoneg_mask = E1000_ALL_NOT_GIG; phy->ops.get_info = e1000_get_phy_info_ife; phy->ops.check_polarity = e1000_check_polarity_ife; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_ife; break; case BME1000_E_PHY_ID: phy->type = e1000_phy_bm; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->ops.read_reg = e1000_read_phy_reg_bm; phy->ops.write_reg = e1000_write_phy_reg_bm; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; break; default: return -E1000_ERR_PHY; break; } return E1000_SUCCESS; } /** * e1000_init_nvm_params_ich8lan - Initialize NVM function pointers * @hw: pointer to the HW structure * * Initialize family-specific NVM parameters and function * pointers. **/ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 gfpreg, sector_base_addr, sector_end_addr; u16 i; - u32 nvm_size; DEBUGFUNC("e1000_init_nvm_params_ich8lan"); /* Can't read flash registers if the register set isn't mapped. */ nvm->type = e1000_nvm_flash_sw; - /* in SPT, gfpreg doesn't exist. NVM size is taken from the - * STRAP register - */ - if (hw->mac.type == e1000_pch_spt) { - nvm->flash_base_addr = 0; - nvm_size = - (((E1000_READ_REG(hw, E1000_STRAP) >> 1) & 0x1F) + 1) - * NVM_SIZE_MULTIPLIER; - nvm->flash_bank_size = nvm_size / 2; - /* Adjust to word count */ - nvm->flash_bank_size /= sizeof(u16); - /* Set the base address for flash register access */ - hw->flash_address = hw->hw_addr + E1000_FLASH_BASE_ADDR; - } else { - if (!hw->flash_address) { - DEBUGOUT("ERROR: Flash registers not mapped\n"); - return -E1000_ERR_CONFIG; - } + if (!hw->flash_address) { + DEBUGOUT("ERROR: Flash registers not mapped\n"); + return -E1000_ERR_CONFIG; + } - gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); + gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); - /* sector_X_addr is a "sector"-aligned address (4096 bytes) - * Add 1 to sector_end_addr since this sector is included in - * the overall size. - */ - sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; - sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; + /* sector_X_addr is a "sector"-aligned address (4096 bytes) + * Add 1 to sector_end_addr since this sector is included in + * the overall size. + */ + sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; + sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; - /* flash_base_addr is byte-aligned */ - nvm->flash_base_addr = sector_base_addr - << FLASH_SECTOR_ADDR_SHIFT; + /* flash_base_addr is byte-aligned */ + nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT; - /* find total size of the NVM, then cut in half since the total - * size represents two separate NVM banks. - */ - nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) - << FLASH_SECTOR_ADDR_SHIFT); - nvm->flash_bank_size /= 2; - /* Adjust to word count */ - nvm->flash_bank_size /= sizeof(u16); - } + /* find total size of the NVM, then cut in half since the total + * size represents two separate NVM banks. + */ + nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) + << FLASH_SECTOR_ADDR_SHIFT); + nvm->flash_bank_size /= 2; + /* Adjust to word count */ + nvm->flash_bank_size /= sizeof(u16); nvm->word_size = E1000_SHADOW_RAM_WORDS; /* Clear shadow ram */ for (i = 0; i < nvm->word_size; i++) { dev_spec->shadow_ram[i].modified = FALSE; dev_spec->shadow_ram[i].value = 0xFFFF; } E1000_MUTEX_INIT(&dev_spec->nvm_mutex); E1000_MUTEX_INIT(&dev_spec->swflag_mutex); /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_ich8lan; nvm->ops.release = e1000_release_nvm_ich8lan; - if (hw->mac.type == e1000_pch_spt) { - nvm->ops.read = e1000_read_nvm_spt; - nvm->ops.update = e1000_update_nvm_checksum_spt; - } else { - nvm->ops.read = e1000_read_nvm_ich8lan; - nvm->ops.update = e1000_update_nvm_checksum_ich8lan; - } + nvm->ops.read = e1000_read_nvm_ich8lan; + nvm->ops.update = e1000_update_nvm_checksum_ich8lan; nvm->ops.valid_led_default = e1000_valid_led_default_ich8lan; nvm->ops.validate = e1000_validate_nvm_checksum_ich8lan; nvm->ops.write = e1000_write_nvm_ich8lan; return E1000_SUCCESS; } /** * e1000_init_mac_params_ich8lan - Initialize MAC function pointers * @hw: pointer to the HW structure * * Initialize family-specific MAC parameters and function * pointers. **/ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; +#if defined(QV_RELEASE) || !defined(NO_PCH_LPT_B0_SUPPORT) u16 pci_cfg; +#endif /* QV_RELEASE || !defined(NO_PCH_LPT_B0_SUPPORT) */ DEBUGFUNC("e1000_init_mac_params_ich8lan"); /* Set media type function pointer */ hw->phy.media_type = e1000_media_type_copper; /* Set mta register count */ mac->mta_reg_count = 32; /* Set rar entry count */ mac->rar_entry_count = E1000_ICH_RAR_ENTRIES; if (mac->type == e1000_ich8lan) mac->rar_entry_count--; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* FWSM register */ mac->has_fwsm = TRUE; /* ARC subsystem not supported */ mac->arc_subsystem_valid = FALSE; /* Adaptive IFS supported */ mac->adaptive_ifs = TRUE; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_ich8lan; /* function id */ mac->ops.set_lan_id = e1000_set_lan_id_single_port; /* reset */ mac->ops.reset_hw = e1000_reset_hw_ich8lan; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_ich8lan; /* link setup */ mac->ops.setup_link = e1000_setup_link_ich8lan; /* physical interface setup */ mac->ops.setup_physical_interface = e1000_setup_copper_link_ich8lan; /* check for link */ mac->ops.check_for_link = e1000_check_for_copper_link_ich8lan; /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_ich8lan; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_ich8lan; /* LED and other operations */ switch (mac->type) { case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_ich8lan; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* blink LED */ mac->ops.blink_led = e1000_blink_led_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_ich8lan; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_ich8lan; mac->ops.led_off = e1000_led_off_ich8lan; break; case e1000_pch2lan: mac->rar_entry_count = E1000_PCH2_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch2lan; /* fall-through */ case e1000_pch_lpt: - case e1000_pch_spt: /* multicast address update for pch2 */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_pch2lan; - /* fall-through */ case e1000_pchlan: +#if defined(QV_RELEASE) || !defined(NO_PCH_LPT_B0_SUPPORT) /* save PCH revision_id */ e1000_read_pci_cfg(hw, E1000_PCI_REVISION_ID_REG, &pci_cfg); - /* SPT uses full byte for revision ID, - * as opposed to previous generations - */ - if (hw->mac.type >= e1000_pch_spt) - hw->revision_id = (u8)(pci_cfg &= 0x00FF); - else - hw->revision_id = (u8)(pci_cfg &= 0x000F); + hw->revision_id = (u8)(pci_cfg &= 0x000F); +#endif /* QV_RELEASE || !defined(NO_PCH_LPT_B0_SUPPORT) */ /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_pchlan; /* setup LED */ mac->ops.setup_led = e1000_setup_led_pchlan; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_pchlan; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_pchlan; mac->ops.led_off = e1000_led_off_pchlan; break; default: break; } - if ((mac->type == e1000_pch_lpt) || - (mac->type == e1000_pch_spt)) { + if (mac->type == e1000_pch_lpt) { mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch_lpt; mac->ops.setup_physical_interface = e1000_setup_copper_link_pch_lpt; + mac->ops.set_obff_timer = e1000_set_obff_timer_pch_lpt; } /* Enable PCS Lock-loss workaround for ICH8 */ if (mac->type == e1000_ich8lan) e1000_set_kmrn_lock_loss_workaround_ich8lan(hw, TRUE); return E1000_SUCCESS; } /** * __e1000_access_emi_reg_locked - Read/write EMI register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: pointer to value to read/write from/to the EMI address * @read: boolean flag to indicate read or write * * This helper function assumes the SW/FW/HW Semaphore is already acquired. **/ static s32 __e1000_access_emi_reg_locked(struct e1000_hw *hw, u16 address, u16 *data, bool read) { s32 ret_val; DEBUGFUNC("__e1000_access_emi_reg_locked"); ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_ADDR, address); if (ret_val) return ret_val; if (read) ret_val = hw->phy.ops.read_reg_locked(hw, I82579_EMI_DATA, data); else ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_DATA, *data); return ret_val; } /** * e1000_read_emi_reg_locked - Read Extended Management Interface register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: value to be read from the EMI address * * Assumes the SW/FW/HW Semaphore is already acquired. **/ s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data) { DEBUGFUNC("e1000_read_emi_reg_locked"); return __e1000_access_emi_reg_locked(hw, addr, data, TRUE); } /** * e1000_write_emi_reg_locked - Write Extended Management Interface register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: value to be written to the EMI address * * Assumes the SW/FW/HW Semaphore is already acquired. **/ s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data) { DEBUGFUNC("e1000_read_emi_reg_locked"); return __e1000_access_emi_reg_locked(hw, addr, &data, FALSE); } /** * e1000_set_eee_pchlan - Enable/disable EEE support * @hw: pointer to the HW structure * * Enable/disable EEE based on setting in dev_spec structure, the duplex of * the link and the EEE capabilities of the link partner. The LPI Control * register bits will remain set only if/when link is up. * * EEE LPI must not be asserted earlier than one second after link is up. * On 82579, EEE LPI should not be enabled until such time otherwise there * can be link issues with some switches. Other devices can have EEE LPI * enabled immediately upon link up since they have a timer in hardware which * prevents LPI from being asserted too early. **/ s32 e1000_set_eee_pchlan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; s32 ret_val; u16 lpa, pcs_status, adv, adv_addr, lpi_ctrl, data; DEBUGFUNC("e1000_set_eee_pchlan"); switch (hw->phy.type) { case e1000_phy_82579: lpa = I82579_EEE_LP_ABILITY; pcs_status = I82579_EEE_PCS_STATUS; adv_addr = I82579_EEE_ADVERTISEMENT; break; case e1000_phy_i217: lpa = I217_EEE_LP_ABILITY; pcs_status = I217_EEE_PCS_STATUS; adv_addr = I217_EEE_ADVERTISEMENT; break; default: return E1000_SUCCESS; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg_locked(hw, I82579_LPI_CTRL, &lpi_ctrl); if (ret_val) goto release; /* Clear bits that enable EEE in various speeds */ lpi_ctrl &= ~I82579_LPI_CTRL_ENABLE_MASK; /* Enable EEE if not disabled by user */ if (!dev_spec->eee_disable) { /* Save off link partner's EEE ability */ ret_val = e1000_read_emi_reg_locked(hw, lpa, &dev_spec->eee_lp_ability); if (ret_val) goto release; /* Read EEE advertisement */ ret_val = e1000_read_emi_reg_locked(hw, adv_addr, &adv); if (ret_val) goto release; /* Enable EEE only for speeds in which the link partner is * EEE capable and for which we advertise EEE. */ if (adv & dev_spec->eee_lp_ability & I82579_EEE_1000_SUPPORTED) lpi_ctrl |= I82579_LPI_CTRL_1000_ENABLE; if (adv & dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) { hw->phy.ops.read_reg_locked(hw, PHY_LP_ABILITY, &data); if (data & NWAY_LPAR_100TX_FD_CAPS) lpi_ctrl |= I82579_LPI_CTRL_100_ENABLE; else /* EEE is not supported in 100Half, so ignore * partner's EEE in 100 ability if full-duplex * is not advertised. */ dev_spec->eee_lp_ability &= ~I82579_EEE_100_SUPPORTED; } } if (hw->phy.type == e1000_phy_82579) { ret_val = e1000_read_emi_reg_locked(hw, I82579_LPI_PLL_SHUT, &data); if (ret_val) goto release; data &= ~I82579_LPI_100_PLL_SHUT; ret_val = e1000_write_emi_reg_locked(hw, I82579_LPI_PLL_SHUT, data); } /* R/Clr IEEE MMD 3.1 bits 11:10 - Tx/Rx LPI Received */ ret_val = e1000_read_emi_reg_locked(hw, pcs_status, &data); if (ret_val) goto release; ret_val = hw->phy.ops.write_reg_locked(hw, I82579_LPI_CTRL, lpi_ctrl); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_k1_workaround_lpt_lp - K1 workaround on Lynxpoint-LP * @hw: pointer to the HW structure * @link: link up bool flag * * When K1 is enabled for 1Gbps, the MAC can miss 2 DMA completion indications * preventing further DMA write requests. Workaround the issue by disabling * the de-assertion of the clock request when in 1Gpbs mode. * Also, set appropriate Tx re-transmission timeouts for 10 and 100Half link * speeds in order to avoid Tx hangs. **/ static s32 e1000_k1_workaround_lpt_lp(struct e1000_hw *hw, bool link) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); u32 status = E1000_READ_REG(hw, E1000_STATUS); s32 ret_val = E1000_SUCCESS; u16 reg; if (link && (status & E1000_STATUS_SPEED_1000)) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, ®); if (ret_val) goto release; ret_val = e1000_write_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, reg & ~E1000_KMRNCTRLSTA_K1_ENABLE); if (ret_val) goto release; usec_delay(10); E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6 | E1000_FEXTNVM6_REQ_PLL_CLK); ret_val = e1000_write_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, reg); release: hw->phy.ops.release(hw); } else { /* clear FEXTNVM6 bit 8 on link down or 10/100 */ fextnvm6 &= ~E1000_FEXTNVM6_REQ_PLL_CLK; - if ((hw->phy.revision > 5) || !link || - ((status & E1000_STATUS_SPEED_100) && - (status & E1000_STATUS_FD))) + if (!link || ((status & E1000_STATUS_SPEED_100) && + (status & E1000_STATUS_FD))) goto update_fextnvm6; ret_val = hw->phy.ops.read_reg(hw, I217_INBAND_CTRL, ®); if (ret_val) return ret_val; /* Clear link status transmit timeout */ reg &= ~I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_MASK; if (status & E1000_STATUS_SPEED_100) { /* Set inband Tx timeout to 5x10us for 100Half */ reg |= 5 << I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT; /* Do not extend the K1 entry latency for 100Half */ fextnvm6 &= ~E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION; } else { /* Set inband Tx timeout to 50x10us for 10Full/Half */ reg |= 50 << I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT; /* Extend the K1 entry latency for 10 Mbps */ fextnvm6 |= E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION; } ret_val = hw->phy.ops.write_reg(hw, I217_INBAND_CTRL, reg); if (ret_val) return ret_val; update_fextnvm6: E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6); } return ret_val; } +static u64 e1000_ltr2ns(u16 ltr) +{ + u32 value, scale; + + /* Determine the latency in nsec based on the LTR value & scale */ + value = ltr & E1000_LTRV_VALUE_MASK; + scale = (ltr & E1000_LTRV_SCALE_MASK) >> E1000_LTRV_SCALE_SHIFT; + + return value * (1 << (scale * E1000_LTRV_SCALE_FACTOR)); +} + /** + * e1000_platform_pm_pch_lpt - Set platform power management values + * @hw: pointer to the HW structure + * @link: bool indicating link status + * + * Set the Latency Tolerance Reporting (LTR) values for the "PCIe-like" + * GbE MAC in the Lynx Point PCH based on Rx buffer size and link speed + * when link is up (which must not exceed the maximum latency supported + * by the platform), otherwise specify there is no LTR requirement. + * Unlike TRUE-PCIe devices which set the LTR maximum snoop/no-snoop + * latencies in the LTR Extended Capability Structure in the PCIe Extended + * Capability register set, on this device LTR is set by writing the + * equivalent snoop/no-snoop latencies in the LTRV register in the MAC and + * set the SEND bit to send an Intel On-chip System Fabric sideband (IOSF-SB) + * message to the PMC. + * + * Use the LTR value to calculate the Optimized Buffer Flush/Fill (OBFF) + * high-water mark. + **/ +static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) +{ + u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | + link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; + u16 lat_enc = 0; /* latency encoded */ + s32 obff_hwm = 0; + + DEBUGFUNC("e1000_platform_pm_pch_lpt"); + + if (link) { + u16 speed, duplex, scale = 0; + u16 max_snoop, max_nosnoop; + u16 max_ltr_enc; /* max LTR latency encoded */ + s64 lat_ns; + s64 value; + u32 rxa; + + if (!hw->mac.max_frame_size) { + DEBUGOUT("max_frame_size not set.\n"); + return -E1000_ERR_CONFIG; + } + + hw->mac.ops.get_link_up_info(hw, &speed, &duplex); + if (!speed) { + DEBUGOUT("Speed not set.\n"); + return -E1000_ERR_CONFIG; + } + + /* Rx Packet Buffer Allocation size (KB) */ + rxa = E1000_READ_REG(hw, E1000_PBA) & E1000_PBA_RXA_MASK; + + /* Determine the maximum latency tolerated by the device. + * + * Per the PCIe spec, the tolerated latencies are encoded as + * a 3-bit encoded scale (only 0-5 are valid) multiplied by + * a 10-bit value (0-1023) to provide a range from 1 ns to + * 2^25*(2^10-1) ns. The scale is encoded as 0=2^0ns, + * 1=2^5ns, 2=2^10ns,...5=2^25ns. + */ + lat_ns = ((s64)rxa * 1024 - + (2 * (s64)hw->mac.max_frame_size)) * 8 * 1000; + if (lat_ns < 0) + lat_ns = 0; + else + lat_ns /= speed; + value = lat_ns; + + while (value > E1000_LTRV_VALUE_MASK) { + scale++; + value = E1000_DIVIDE_ROUND_UP(value, (1 << 5)); + } + if (scale > E1000_LTRV_SCALE_MAX) { + DEBUGOUT1("Invalid LTR latency scale %d\n", scale); + return -E1000_ERR_CONFIG; + } + lat_enc = (u16)((scale << E1000_LTRV_SCALE_SHIFT) | value); + + /* Determine the maximum latency tolerated by the platform */ + e1000_read_pci_cfg(hw, E1000_PCI_LTR_CAP_LPT, &max_snoop); + e1000_read_pci_cfg(hw, E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop); + max_ltr_enc = E1000_MAX(max_snoop, max_nosnoop); + + if (lat_enc > max_ltr_enc) { + lat_enc = max_ltr_enc; + lat_ns = e1000_ltr2ns(max_ltr_enc); + } + + if (lat_ns) { + lat_ns *= speed * 1000; + lat_ns /= 8; + lat_ns /= 1000000000; + obff_hwm = (s32)(rxa - lat_ns); + } + if ((obff_hwm < 0) || (obff_hwm > E1000_SVT_OFF_HWM_MASK)) { + DEBUGOUT1("Invalid high water mark %d\n", obff_hwm); + return -E1000_ERR_CONFIG; + } + } + + /* Set Snoop and No-Snoop latencies the same */ + reg |= lat_enc | (lat_enc << E1000_LTRV_NOSNOOP_SHIFT); + E1000_WRITE_REG(hw, E1000_LTRV, reg); + + /* Set OBFF high water mark */ + reg = E1000_READ_REG(hw, E1000_SVT) & ~E1000_SVT_OFF_HWM_MASK; + reg |= obff_hwm; + E1000_WRITE_REG(hw, E1000_SVT, reg); + + /* Enable OBFF */ + reg = E1000_READ_REG(hw, E1000_SVCR); + reg |= E1000_SVCR_OFF_EN; + /* Always unblock interrupts to the CPU even when the system is + * in OBFF mode. This ensures that small round-robin traffic + * (like ping) does not get dropped or experience long latency. + */ + reg |= E1000_SVCR_OFF_MASKINT; + E1000_WRITE_REG(hw, E1000_SVCR, reg); + + return E1000_SUCCESS; +} + +/** + * e1000_set_obff_timer_pch_lpt - Update Optimized Buffer Flush/Fill timer + * @hw: pointer to the HW structure + * @itr: interrupt throttling rate + * + * Configure OBFF with the updated interrupt rate. + **/ +static s32 e1000_set_obff_timer_pch_lpt(struct e1000_hw *hw, u32 itr) +{ + u32 svcr; + s32 timer; + + DEBUGFUNC("e1000_set_obff_timer_pch_lpt"); + + /* Convert ITR value into microseconds for OBFF timer */ + timer = itr & E1000_ITR_MASK; + timer = (timer * E1000_ITR_MULT) / 1000; + + if ((timer < 0) || (timer > E1000_ITR_MASK)) { + DEBUGOUT1("Invalid OBFF timer %d\n", timer); + return -E1000_ERR_CONFIG; + } + + svcr = E1000_READ_REG(hw, E1000_SVCR); + svcr &= ~E1000_SVCR_OFF_TIMER_MASK; + svcr |= timer << E1000_SVCR_OFF_TIMER_SHIFT; + E1000_WRITE_REG(hw, E1000_SVCR, svcr); + + return E1000_SUCCESS; +} + +/** * e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP * @hw: pointer to the HW structure * @to_sx: boolean indicating a system power state transition to Sx * * When link is down, configure ULP mode to significantly reduce the power * to the PHY. If on a Manageability Engine (ME) enabled system, tell the * ME firmware to start the ULP configuration. If not on an ME enabled * system, configure the ULP mode by software. */ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) { u32 mac_reg; s32 ret_val = E1000_SUCCESS; u16 phy_reg; - u16 oem_reg = 0; if ((hw->mac.type < e1000_pch_lpt) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_V) || (hw->device_id == E1000_DEV_ID_PCH_I218_LM2) || (hw->device_id == E1000_DEV_ID_PCH_I218_V2) || (hw->dev_spec.ich8lan.ulp_state == e1000_ulp_state_on)) return 0; if (E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID) { /* Request ME configure ULP mode in the PHY */ mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg |= E1000_H2ME_ULP | E1000_H2ME_ENFORCE_SETTINGS; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); goto out; } if (!to_sx) { int i = 0; /* Poll up to 5 seconds for Cable Disconnected indication */ while (!(E1000_READ_REG(hw, E1000_FEXT) & E1000_FEXT_PHY_CABLE_DISCONNECTED)) { /* Bail if link is re-acquired */ if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU) return -E1000_ERR_PHY; if (i++ == 100) break; msec_delay(50); } DEBUGOUT2("CABLE_DISCONNECTED %s set after %dmsec\n", (E1000_READ_REG(hw, E1000_FEXT) & E1000_FEXT_PHY_CABLE_DISCONNECTED) ? "" : "not", i * 50); } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; /* Force SMBus mode in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) goto release; phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); /* Force SMBus mode in MAC */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); - /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable - * LPLU and disable Gig speed when entering ULP - */ - if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) { - ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS, - &oem_reg); - if (ret_val) - goto release; - - phy_reg = oem_reg; - phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS; - - ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, - phy_reg); - - if (ret_val) - goto release; - } - /* Set Inband ULP Exit, Reset to SMBus mode and * Disable SMBus Release on PERST# in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, &phy_reg); if (ret_val) goto release; phy_reg |= (I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_DISABLE_SMB_PERST); if (to_sx) { if (E1000_READ_REG(hw, E1000_WUFC) & E1000_WUFC_LNKC) phy_reg |= I218_ULP_CONFIG1_WOL_HOST; - else - phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST; phy_reg |= I218_ULP_CONFIG1_STICKY_ULP; - phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT; } else { phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT; - phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP; - phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST; } e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); /* Set Disable SMBus Release on PERST# in MAC */ mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM7); mac_reg |= E1000_FEXTNVM7_DISABLE_SMB_PERST; E1000_WRITE_REG(hw, E1000_FEXTNVM7, mac_reg); /* Commit ULP changes in PHY by starting auto ULP configuration */ phy_reg |= I218_ULP_CONFIG1_START; e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); - - if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) && - to_sx && (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { - - ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, - oem_reg); - if (ret_val) - goto release; - } - release: hw->phy.ops.release(hw); out: if (ret_val) DEBUGOUT1("Error in ULP enable flow: %d\n", ret_val); else hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_on; return ret_val; } /** * e1000_disable_ulp_lpt_lp - unconfigure Ultra Low Power mode for LynxPoint-LP * @hw: pointer to the HW structure * @force: boolean indicating whether or not to force disabling ULP * * Un-configure ULP mode when link is up, the system is transitioned from * Sx or the driver is unloaded. If on a Manageability Engine (ME) enabled * system, poll for an indication from ME that ULP has been un-configured. * If not on an ME enabled system, un-configure the ULP mode by software. * * During nominal operation, this function is called when link is acquired * to disable ULP mode (force=FALSE); otherwise, for example when unloading * the driver or during Sx->S0 transitions, this is called with force=TRUE * to forcibly disable ULP. */ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) { s32 ret_val = E1000_SUCCESS; u32 mac_reg; u16 phy_reg; int i = 0; if ((hw->mac.type < e1000_pch_lpt) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_V) || (hw->device_id == E1000_DEV_ID_PCH_I218_LM2) || (hw->device_id == E1000_DEV_ID_PCH_I218_V2) || (hw->dev_spec.ich8lan.ulp_state == e1000_ulp_state_off)) return 0; if (E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID) { if (force) { /* Request ME un-configure ULP mode in the PHY */ mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg &= ~E1000_H2ME_ULP; mac_reg |= E1000_H2ME_ENFORCE_SETTINGS; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } /* Poll up to 100msec for ME to clear ULP_CFG_DONE */ while (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_ULP_CFG_DONE) { if (i++ == 10) { ret_val = -E1000_ERR_PHY; goto out; } msec_delay(10); } DEBUGOUT1("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); if (force) { mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg &= ~E1000_H2ME_ENFORCE_SETTINGS; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } else { /* Clear H2ME.ULP after ME ULP configuration */ mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg &= ~E1000_H2ME_ULP; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } goto out; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; if (force) /* Toggle LANPHYPC Value bit */ e1000_toggle_lanphypc_pch_lpt(hw); /* Unforce SMBus mode in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) { /* The MAC might be in PCIe mode, so temporarily force to * SMBus mode in order to access the PHY. */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); msec_delay(50); ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) goto release; } phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); /* Unforce SMBus mode in MAC */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); /* When ULP mode was previously entered, K1 was disabled by the * hardware. Re-Enable K1 in the PHY when exiting ULP. */ ret_val = e1000_read_phy_reg_hv_locked(hw, HV_PM_CTRL, &phy_reg); if (ret_val) goto release; phy_reg |= HV_PM_CTRL_K1_ENABLE; e1000_write_phy_reg_hv_locked(hw, HV_PM_CTRL, phy_reg); /* Clear ULP enabled configuration */ ret_val = e1000_read_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, &phy_reg); if (ret_val) goto release; phy_reg &= ~(I218_ULP_CONFIG1_IND | I218_ULP_CONFIG1_STICKY_ULP | I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_WOL_HOST | I218_ULP_CONFIG1_INBAND_EXIT | I218_ULP_CONFIG1_DISABLE_SMB_PERST); e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); /* Commit ULP changes by starting auto ULP configuration */ phy_reg |= I218_ULP_CONFIG1_START; e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); /* Clear Disable SMBus Release on PERST# in MAC */ mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM7); mac_reg &= ~E1000_FEXTNVM7_DISABLE_SMB_PERST; E1000_WRITE_REG(hw, E1000_FEXTNVM7, mac_reg); release: hw->phy.ops.release(hw); if (force) { hw->phy.ops.reset(hw); msec_delay(50); } out: if (ret_val) DEBUGOUT1("Error in ULP disable flow: %d\n", ret_val); else hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_off; return ret_val; } /** * e1000_check_for_copper_link_ich8lan - Check for link (Copper) * @hw: pointer to the HW structure * * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; - s32 ret_val, tipg_reg = 0; - u16 emi_addr, emi_val = 0; + s32 ret_val; bool link; u16 phy_reg; DEBUGFUNC("e1000_check_for_copper_link_ich8lan"); /* We only want to go out to the PHY registers to see if Auto-Neg * has completed and/or if our link status has changed. The * get_link_status flag is set upon receiving a Link Status * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) return E1000_SUCCESS; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (hw->mac.type == e1000_pchlan) { ret_val = e1000_k1_gig_workaround_hv(hw, link); if (ret_val) return ret_val; } /* When connected at 10Mbps half-duplex, some parts are excessively * aggressive resulting in many collisions. To avoid this, increase * the IPG and reduce Rx latency in the PHY. */ if (((hw->mac.type == e1000_pch2lan) || - (hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) && link) { - u16 speed, duplex; + (hw->mac.type == e1000_pch_lpt)) && link) { + u32 reg; + reg = E1000_READ_REG(hw, E1000_STATUS); + if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) { + u16 emi_addr; - e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex); - tipg_reg = E1000_READ_REG(hw, E1000_TIPG); - tipg_reg &= ~E1000_TIPG_IPGT_MASK; + reg = E1000_READ_REG(hw, E1000_TIPG); + reg &= ~E1000_TIPG_IPGT_MASK; + reg |= 0xFF; + E1000_WRITE_REG(hw, E1000_TIPG, reg); - if (duplex == HALF_DUPLEX && speed == SPEED_10) { - tipg_reg |= 0xFF; /* Reduce Rx latency in analog PHY */ - emi_val = 0; - } else if (hw->mac.type == e1000_pch_spt && - duplex == FULL_DUPLEX && speed != SPEED_1000) { - tipg_reg |= 0xC; - emi_val = 1; - } else { - /* Roll back the default values */ - tipg_reg |= 0x08; - emi_val = 1; - } + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; - E1000_WRITE_REG(hw, E1000_TIPG, tipg_reg); + if (hw->mac.type == e1000_pch2lan) + emi_addr = I82579_RX_CONFIG; + else + emi_addr = I217_RX_CONFIG; + ret_val = e1000_write_emi_reg_locked(hw, emi_addr, 0); - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; + hw->phy.ops.release(hw); - if (hw->mac.type == e1000_pch2lan) - emi_addr = I82579_RX_CONFIG; - else - emi_addr = I217_RX_CONFIG; - ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); - - hw->phy.ops.release(hw); - - if (ret_val) - return ret_val; - - if (hw->mac.type == e1000_pch_spt) { - u16 data; - u16 ptr_gap; - - if (speed == SPEED_1000) { - ret_val = hw->phy.ops.acquire(hw); - if (ret_val) - return ret_val; - - ret_val = hw->phy.ops.read_reg_locked(hw, - PHY_REG(776, 20), - &data); - if (ret_val) { - hw->phy.ops.release(hw); - return ret_val; - } - - ptr_gap = (data & (0x3FF << 2)) >> 2; - if (ptr_gap < 0x18) { - data &= ~(0x3FF << 2); - data |= (0x18 << 2); - ret_val = - hw->phy.ops.write_reg_locked(hw, - PHY_REG(776, 20), data); - } - hw->phy.ops.release(hw); - if (ret_val) - return ret_val; - } + if (ret_val) + return ret_val; } } - /* I217 Packet Loss issue: - * ensure that FEXTNVM4 Beacon Duration is set correctly - * on power up. - * Set the Beacon Duration for I217 to 8 usec - */ - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { - u32 mac_reg; - - mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4); - mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; - mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC; - E1000_WRITE_REG(hw, E1000_FEXTNVM4, mac_reg); - } - /* Work-around I218 hang issue */ if ((hw->device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (hw->device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (hw->device_id == E1000_DEV_ID_PCH_I218_LM3) || (hw->device_id == E1000_DEV_ID_PCH_I218_V3)) { ret_val = e1000_k1_workaround_lpt_lp(hw, link); if (ret_val) return ret_val; } + if (hw->mac.type == e1000_pch_lpt) { + /* Set platform power management values for + * Latency Tolerance Reporting (LTR) + * Optimized Buffer Flush/Fill (OBFF) + */ + ret_val = e1000_platform_pm_pch_lpt(hw, link); + if (ret_val) + return ret_val; + } + /* Clear link partner's EEE ability */ hw->dev_spec.ich8lan.eee_lp_ability = 0; - /* FEXTNVM6 K1-off workaround */ - if (hw->mac.type == e1000_pch_spt) { - u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG); - u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); - - if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) - fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; - else - fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; - - E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6); - } - if (!link) return E1000_SUCCESS; /* No link detected */ mac->get_link_status = FALSE; switch (hw->mac.type) { case e1000_pch2lan: ret_val = e1000_k1_workaround_lv(hw); if (ret_val) return ret_val; /* fall-thru */ case e1000_pchlan: if (hw->phy.type == e1000_phy_82578) { ret_val = e1000_link_stall_workaround_hv(hw); if (ret_val) return ret_val; } /* Workaround for PCHx parts in half-duplex: * Set the number of preambles removed from the packet * when it is passed from the PHY to the MAC to prevent * the MAC from misinterpreting the packet type. */ hw->phy.ops.read_reg(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg); phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK; if ((E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD) phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT); hw->phy.ops.write_reg(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg); break; default: break; } /* Check if there was DownShift, must be checked * immediately after link-up */ e1000_check_downshift_generic(hw); /* Enable/Disable EEE after link up */ if (hw->phy.type > e1000_phy_82579) { ret_val = e1000_set_eee_pchlan(hw); if (ret_val) return ret_val; } /* If we are forcing speed/duplex, then we simply return since * we have already determined whether we have link or not. */ if (!mac->autoneg) return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to * configure Collision Distance in the MAC. */ mac->ops.config_collision_dist(hw); /* Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); return ret_val; } /** * e1000_init_function_pointers_ich8lan - Initialize ICH8 function pointers * @hw: pointer to the HW structure * * Initialize family-specific function pointers for PHY, MAC, and NVM. **/ void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_ich8lan"); hw->mac.ops.init_params = e1000_init_mac_params_ich8lan; hw->nvm.ops.init_params = e1000_init_nvm_params_ich8lan; switch (hw->mac.type) { case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: hw->phy.ops.init_params = e1000_init_phy_params_ich8lan; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: - case e1000_pch_spt: hw->phy.ops.init_params = e1000_init_phy_params_pchlan; break; default: break; } } /** * e1000_acquire_nvm_ich8lan - Acquire NVM mutex * @hw: pointer to the HW structure * * Acquires the mutex for performing NVM operations. **/ static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_acquire_nvm_ich8lan"); E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.nvm_mutex); return E1000_SUCCESS; } /** * e1000_release_nvm_ich8lan - Release NVM mutex * @hw: pointer to the HW structure * * Releases the mutex used while performing NVM operations. **/ static void e1000_release_nvm_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_ich8lan"); E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.nvm_mutex); return; } /** * e1000_acquire_swflag_ich8lan - Acquire software control flag * @hw: pointer to the HW structure * * Acquires the software control flag for performing PHY and select * MAC CSR accesses. **/ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) { u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_acquire_swflag_ich8lan"); E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.swflag_mutex); while (timeout) { extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (!(extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)) break; msec_delay_irq(1); timeout--; } if (!timeout) { DEBUGOUT("SW has already locked the resource.\n"); ret_val = -E1000_ERR_CONFIG; goto out; } timeout = SW_FLAG_TIMEOUT; extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); while (timeout) { extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) break; msec_delay_irq(1); timeout--; } if (!timeout) { DEBUGOUT2("Failed to acquire the semaphore, FW or HW has it: FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n", E1000_READ_REG(hw, E1000_FWSM), extcnf_ctrl); extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); ret_val = -E1000_ERR_CONFIG; goto out; } out: if (ret_val) E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); return ret_val; } /** * e1000_release_swflag_ich8lan - Release software control flag * @hw: pointer to the HW structure * * Releases the software control flag for performing PHY and select * MAC CSR accesses. **/ static void e1000_release_swflag_ich8lan(struct e1000_hw *hw) { u32 extcnf_ctrl; DEBUGFUNC("e1000_release_swflag_ich8lan"); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) { extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); } else { DEBUGOUT("Semaphore unexpectedly released by sw/fw/hw\n"); } E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); return; } /** * e1000_check_mng_mode_ich8lan - Checks management mode * @hw: pointer to the HW structure * * This checks if the adapter has any manageability enabled. * This is a function pointer entry point only called by read/write * routines for the PHY and NVM parts. **/ static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw) { u32 fwsm; DEBUGFUNC("e1000_check_mng_mode_ich8lan"); fwsm = E1000_READ_REG(hw, E1000_FWSM); return (fwsm & E1000_ICH_FWSM_FW_VALID) && ((fwsm & E1000_FWSM_MODE_MASK) == (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); } /** * e1000_check_mng_mode_pchlan - Checks management mode * @hw: pointer to the HW structure * * This checks if the adapter has iAMT enabled. * This is a function pointer entry point only called by read/write * routines for the PHY and NVM parts. **/ static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw) { u32 fwsm; DEBUGFUNC("e1000_check_mng_mode_pchlan"); fwsm = E1000_READ_REG(hw, E1000_FWSM); return (fwsm & E1000_ICH_FWSM_FW_VALID) && (fwsm & (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); } /** * e1000_rar_set_pch2lan - Set receive address register * @hw: pointer to the HW structure * @addr: pointer to the receive address * @index: receive address array register * * Sets the receive address array register at index to the address passed * in by addr. For 82579, RAR[0] is the base address register that is to * contain the MAC address but RAR[1-6] are reserved for manageability (ME). * Use SHRA[0-3] in place of those reserved for ME. **/ static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; DEBUGFUNC("e1000_rar_set_pch2lan"); /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* If MAC address zero, no need to set the AV bit */ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; if (index == 0) { E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); E1000_WRITE_FLUSH(hw); return E1000_SUCCESS; } /* RAR[1-6] are owned by manageability. Skip those and program the * next address into the SHRA register array. */ if (index < (u32) (hw->mac.rar_entry_count)) { s32 ret_val; ret_val = e1000_acquire_swflag_ich8lan(hw); if (ret_val) goto out; E1000_WRITE_REG(hw, E1000_SHRAL(index - 1), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_SHRAH(index - 1), rar_high); E1000_WRITE_FLUSH(hw); e1000_release_swflag_ich8lan(hw); /* verify the register updates */ if ((E1000_READ_REG(hw, E1000_SHRAL(index - 1)) == rar_low) && (E1000_READ_REG(hw, E1000_SHRAH(index - 1)) == rar_high)) return E1000_SUCCESS; DEBUGOUT2("SHRA[%d] might be locked by ME - FWSM=0x%8.8x\n", (index - 1), E1000_READ_REG(hw, E1000_FWSM)); } out: DEBUGOUT1("Failed to write receive address at index %d\n", index); return -E1000_ERR_CONFIG; } /** * e1000_rar_set_pch_lpt - Set receive address registers * @hw: pointer to the HW structure * @addr: pointer to the receive address * @index: receive address array register * * Sets the receive address register array at index to the address passed * in by addr. For LPT, RAR[0] is the base address register that is to * contain the MAC address. SHRA[0-10] are the shared receive address * registers that are shared between the Host and manageability engine (ME). **/ static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; u32 wlock_mac; DEBUGFUNC("e1000_rar_set_pch_lpt"); /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* If MAC address zero, no need to set the AV bit */ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; if (index == 0) { E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); E1000_WRITE_FLUSH(hw); return E1000_SUCCESS; } /* The manageability engine (ME) can lock certain SHRAR registers that * it is using - those registers are unavailable for use. */ if (index < hw->mac.rar_entry_count) { wlock_mac = E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_WLOCK_MAC_MASK; wlock_mac >>= E1000_FWSM_WLOCK_MAC_SHIFT; /* Check if all SHRAR registers are locked */ if (wlock_mac == 1) goto out; if ((wlock_mac == 0) || (index <= wlock_mac)) { s32 ret_val; ret_val = e1000_acquire_swflag_ich8lan(hw); if (ret_val) goto out; E1000_WRITE_REG(hw, E1000_SHRAL_PCH_LPT(index - 1), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_SHRAH_PCH_LPT(index - 1), rar_high); E1000_WRITE_FLUSH(hw); e1000_release_swflag_ich8lan(hw); /* verify the register updates */ if ((E1000_READ_REG(hw, E1000_SHRAL_PCH_LPT(index - 1)) == rar_low) && (E1000_READ_REG(hw, E1000_SHRAH_PCH_LPT(index - 1)) == rar_high)) return E1000_SUCCESS; } } out: DEBUGOUT1("Failed to write receive address at index %d\n", index); return -E1000_ERR_CONFIG; } /** * e1000_update_mc_addr_list_pch2lan - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * * Updates entire Multicast Table Array of the PCH2 MAC and PHY. * The caller must have a packed mc_addr_list of multicast addresses. **/ static void e1000_update_mc_addr_list_pch2lan(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count) { u16 phy_reg = 0; int i; s32 ret_val; DEBUGFUNC("e1000_update_mc_addr_list_pch2lan"); e1000_update_mc_addr_list_generic(hw, mc_addr_list, mc_addr_count); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg); if (ret_val) goto release; for (i = 0; i < hw->mac.mta_reg_count; i++) { hw->phy.ops.write_reg_page(hw, BM_MTA(i), (u16)(hw->mac.mta_shadow[i] & 0xFFFF)); hw->phy.ops.write_reg_page(hw, (BM_MTA(i) + 1), (u16)((hw->mac.mta_shadow[i] >> 16) & 0xFFFF)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); release: hw->phy.ops.release(hw); } /** * e1000_check_reset_block_ich8lan - Check if PHY reset is blocked * @hw: pointer to the HW structure * * Checks if firmware is blocking the reset of the PHY. * This is a function pointer entry point only called by * reset routines. **/ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) { u32 fwsm; bool blocked = FALSE; int i = 0; DEBUGFUNC("e1000_check_reset_block_ich8lan"); do { fwsm = E1000_READ_REG(hw, E1000_FWSM); if (!(fwsm & E1000_ICH_FWSM_RSPCIPHY)) { blocked = TRUE; msec_delay(10); continue; } blocked = FALSE; } while (blocked && (i++ < 10)); return blocked ? E1000_BLK_PHY_RESET : E1000_SUCCESS; } /** * e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states * @hw: pointer to the HW structure * * Assumes semaphore already acquired. * **/ static s32 e1000_write_smbus_addr(struct e1000_hw *hw) { u16 phy_data; u32 strap = E1000_READ_REG(hw, E1000_STRAP); u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >> E1000_STRAP_SMT_FREQ_SHIFT; s32 ret_val; strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data); if (ret_val) return ret_val; phy_data &= ~HV_SMB_ADDR_MASK; phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT); phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID; if (hw->phy.type == e1000_phy_i217) { /* Restore SMBus frequency */ if (freq--) { phy_data &= ~HV_SMB_ADDR_FREQ_MASK; phy_data |= (freq & (1 << 0)) << HV_SMB_ADDR_FREQ_LOW_SHIFT; phy_data |= (freq & (1 << 1)) << (HV_SMB_ADDR_FREQ_HIGH_SHIFT - 1); } else { DEBUGOUT("Unsupported SMB frequency in PHY\n"); } } return e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data); } /** * e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration * @hw: pointer to the HW structure * * SW should configure the LCD from the NVM extended configuration region * as a workaround for certain parts. **/ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask; s32 ret_val = E1000_SUCCESS; u16 word_addr, reg_data, reg_addr, phy_page = 0; DEBUGFUNC("e1000_sw_lcd_config_ich8lan"); /* Initialize the PHY from the NVM on ICH platforms. This * is needed due to an issue where the NVM configuration is * not properly autoloaded after power transitions. * Therefore, after each PHY reset, we will load the * configuration data out of the NVM manually. */ switch (hw->mac.type) { case e1000_ich8lan: if (phy->type != e1000_phy_igp_3) return ret_val; if ((hw->device_id == E1000_DEV_ID_ICH8_IGP_AMT) || (hw->device_id == E1000_DEV_ID_ICH8_IGP_C)) { sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG; break; } /* Fall-thru */ case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: - case e1000_pch_spt: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: return ret_val; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; data = E1000_READ_REG(hw, E1000_FEXTNVM); if (!(data & sw_cfg_mask)) goto release; /* Make sure HW does not configure LCD from PHY * extended configuration before SW configuration */ data = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if ((hw->mac.type < e1000_pch2lan) && (data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE)) goto release; cnf_size = E1000_READ_REG(hw, E1000_EXTCNF_SIZE); cnf_size &= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK; cnf_size >>= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT; if (!cnf_size) goto release; cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK; cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT; if (((hw->mac.type == e1000_pchlan) && !(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE)) || (hw->mac.type > e1000_pchlan)) { /* HW configures the SMBus address and LEDs when the * OEM and LCD Write Enable bits are set in the NVM. * When both NVM bits are cleared, SW will configure * them instead. */ ret_val = e1000_write_smbus_addr(hw); if (ret_val) goto release; data = E1000_READ_REG(hw, E1000_LEDCTL); ret_val = e1000_write_phy_reg_hv_locked(hw, HV_LED_CONFIG, (u16)data); if (ret_val) goto release; } /* Configure LCD from extended configuration region. */ /* cnf_base_addr is in DWORD */ word_addr = (u16)(cnf_base_addr << 1); for (i = 0; i < cnf_size; i++) { ret_val = hw->nvm.ops.read(hw, (word_addr + i * 2), 1, ®_data); if (ret_val) goto release; ret_val = hw->nvm.ops.read(hw, (word_addr + i * 2 + 1), 1, ®_addr); if (ret_val) goto release; /* Save off the PHY page for future writes. */ if (reg_addr == IGP01E1000_PHY_PAGE_SELECT) { phy_page = reg_data; continue; } reg_addr &= PHY_REG_MASK; reg_addr |= phy_page; ret_val = phy->ops.write_reg_locked(hw, (u32)reg_addr, reg_data); if (ret_val) goto release; } release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_k1_gig_workaround_hv - K1 Si workaround * @hw: pointer to the HW structure * @link: link up bool flag * * If K1 is enabled for 1Gbps, the MAC might stall when transitioning * from a lower speed. This workaround disables K1 whenever link is at 1Gig * If link is down, the function will restore the default K1 setting located * in the NVM. **/ static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link) { s32 ret_val = E1000_SUCCESS; u16 status_reg = 0; bool k1_enable = hw->dev_spec.ich8lan.nvm_k1_enabled; DEBUGFUNC("e1000_k1_gig_workaround_hv"); if (hw->mac.type != e1000_pchlan) return E1000_SUCCESS; /* Wrap the whole flow with the sw flag */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* Disable K1 when link is 1Gbps, otherwise use the NVM setting */ if (link) { if (hw->phy.type == e1000_phy_82578) { ret_val = hw->phy.ops.read_reg_locked(hw, BM_CS_STATUS, &status_reg); if (ret_val) goto release; status_reg &= (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | BM_CS_STATUS_SPEED_MASK); if (status_reg == (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | BM_CS_STATUS_SPEED_1000)) k1_enable = FALSE; } if (hw->phy.type == e1000_phy_82577) { ret_val = hw->phy.ops.read_reg_locked(hw, HV_M_STATUS, &status_reg); if (ret_val) goto release; status_reg &= (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE | HV_M_STATUS_SPEED_MASK); if (status_reg == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE | HV_M_STATUS_SPEED_1000)) k1_enable = FALSE; } /* Link stall fix for link up */ ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19), 0x0100); if (ret_val) goto release; } else { /* Link stall fix for link down */ ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19), 0x4100); if (ret_val) goto release; } ret_val = e1000_configure_k1_ich8lan(hw, k1_enable); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_configure_k1_ich8lan - Configure K1 power state * @hw: pointer to the HW structure * @enable: K1 state to configure * * Configure the K1 power state based on the provided parameter. * Assumes semaphore already acquired. * * Success returns 0, Failure returns -E1000_ERR_PHY (-2) **/ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable) { s32 ret_val; u32 ctrl_reg = 0; u32 ctrl_ext = 0; u32 reg = 0; u16 kmrn_reg = 0; DEBUGFUNC("e1000_configure_k1_ich8lan"); ret_val = e1000_read_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, &kmrn_reg); if (ret_val) return ret_val; if (k1_enable) kmrn_reg |= E1000_KMRNCTRLSTA_K1_ENABLE; else kmrn_reg &= ~E1000_KMRNCTRLSTA_K1_ENABLE; ret_val = e1000_write_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, kmrn_reg); if (ret_val) return ret_val; usec_delay(20); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); reg = ctrl_reg & ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); reg |= E1000_CTRL_FRCSPD; E1000_WRITE_REG(hw, E1000_CTRL, reg); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS); E1000_WRITE_FLUSH(hw); usec_delay(20); E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); usec_delay(20); return E1000_SUCCESS; } /** * e1000_oem_bits_config_ich8lan - SW-based LCD Configuration * @hw: pointer to the HW structure * @d0_state: boolean if entering d0 or d3 device state * * SW will configure Gbe Disable and LPLU based on the NVM. The four bits are * collectively called OEM bits. The OEM Write Enable bit and SW Config bit * in NVM determines whether HW should configure LPLU and Gbe Disable. **/ static s32 e1000_oem_bits_config_ich8lan(struct e1000_hw *hw, bool d0_state) { s32 ret_val = 0; u32 mac_reg; u16 oem_reg; DEBUGFUNC("e1000_oem_bits_config_ich8lan"); if (hw->mac.type < e1000_pchlan) return ret_val; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; if (hw->mac.type == e1000_pchlan) { mac_reg = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (mac_reg & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) goto release; } mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM); if (!(mac_reg & E1000_FEXTNVM_SW_CONFIG_ICH8M)) goto release; mac_reg = E1000_READ_REG(hw, E1000_PHY_CTRL); ret_val = hw->phy.ops.read_reg_locked(hw, HV_OEM_BITS, &oem_reg); if (ret_val) goto release; oem_reg &= ~(HV_OEM_BITS_GBE_DIS | HV_OEM_BITS_LPLU); if (d0_state) { if (mac_reg & E1000_PHY_CTRL_GBE_DISABLE) oem_reg |= HV_OEM_BITS_GBE_DIS; if (mac_reg & E1000_PHY_CTRL_D0A_LPLU) oem_reg |= HV_OEM_BITS_LPLU; } else { if (mac_reg & (E1000_PHY_CTRL_GBE_DISABLE | E1000_PHY_CTRL_NOND0A_GBE_DISABLE)) oem_reg |= HV_OEM_BITS_GBE_DIS; if (mac_reg & (E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_NOND0A_LPLU)) oem_reg |= HV_OEM_BITS_LPLU; } /* Set Restart auto-neg to activate the bits */ if ((d0_state || (hw->mac.type != e1000_pchlan)) && !hw->phy.ops.check_reset_block(hw)) oem_reg |= HV_OEM_BITS_RESTART_AN; ret_val = hw->phy.ops.write_reg_locked(hw, HV_OEM_BITS, oem_reg); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_set_mdio_slow_mode_hv - Set slow MDIO access mode * @hw: pointer to the HW structure **/ static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw) { s32 ret_val; u16 data; DEBUGFUNC("e1000_set_mdio_slow_mode_hv"); ret_val = hw->phy.ops.read_reg(hw, HV_KMRN_MODE_CTRL, &data); if (ret_val) return ret_val; data |= HV_KMRN_MDIO_SLOW; ret_val = hw->phy.ops.write_reg(hw, HV_KMRN_MODE_CTRL, data); return ret_val; } /** * e1000_hv_phy_workarounds_ich8lan - A series of Phy workarounds to be * done after every PHY reset. **/ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 phy_data; DEBUGFUNC("e1000_hv_phy_workarounds_ich8lan"); if (hw->mac.type != e1000_pchlan) return E1000_SUCCESS; /* Set MDIO slow mode before any other MDIO access */ if (hw->phy.type == e1000_phy_82577) { ret_val = e1000_set_mdio_slow_mode_hv(hw); if (ret_val) return ret_val; } if (((hw->phy.type == e1000_phy_82577) && ((hw->phy.revision == 1) || (hw->phy.revision == 2))) || ((hw->phy.type == e1000_phy_82578) && (hw->phy.revision == 1))) { /* Disable generation of early preamble */ ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 25), 0x4431); if (ret_val) return ret_val; /* Preamble tuning for SSC */ ret_val = hw->phy.ops.write_reg(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204); if (ret_val) return ret_val; } if (hw->phy.type == e1000_phy_82578) { /* Return registers to default by doing a soft reset then * writing 0x3140 to the control register. */ if (hw->phy.revision < 2) { e1000_phy_sw_reset_generic(hw); ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, 0x3140); } } /* Select page 0 */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; hw->phy.addr = 1; ret_val = e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 0); hw->phy.ops.release(hw); if (ret_val) return ret_val; /* Configure the K1 Si workaround during phy reset assuming there is * link so that it disables K1 if link is in 1Gbps. */ ret_val = e1000_k1_gig_workaround_hv(hw, TRUE); if (ret_val) return ret_val; /* Workaround for link disconnects on a busy hub in half duplex */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg_locked(hw, BM_PORT_GEN_CFG, &phy_data); if (ret_val) goto release; ret_val = hw->phy.ops.write_reg_locked(hw, BM_PORT_GEN_CFG, phy_data & 0x00FF); if (ret_val) goto release; /* set MSE higher to enable link to stay up when noise is high */ ret_val = e1000_write_emi_reg_locked(hw, I82577_MSE_THRESHOLD, 0x0034); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_copy_rx_addrs_to_phy_ich8lan - Copy Rx addresses from MAC to PHY * @hw: pointer to the HW structure **/ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) { u32 mac_reg; u16 i, phy_reg = 0; s32 ret_val; DEBUGFUNC("e1000_copy_rx_addrs_to_phy_ich8lan"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg); if (ret_val) goto release; /* Copy both RAL/H (rar_entry_count) and SHRAL/H to PHY */ for (i = 0; i < (hw->mac.rar_entry_count); i++) { mac_reg = E1000_READ_REG(hw, E1000_RAL(i)); hw->phy.ops.write_reg_page(hw, BM_RAR_L(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_M(i), (u16)((mac_reg >> 16) & 0xFFFF)); mac_reg = E1000_READ_REG(hw, E1000_RAH(i)); hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), (u16)((mac_reg & E1000_RAH_AV) >> 16)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); release: hw->phy.ops.release(hw); } static u32 e1000_calc_rx_da_crc(u8 mac[]) { u32 poly = 0xEDB88320; /* Polynomial for 802.3 CRC calculation */ u32 i, j, mask, crc; DEBUGFUNC("e1000_calc_rx_da_crc"); crc = 0xffffffff; for (i = 0; i < 6; i++) { crc = crc ^ mac[i]; for (j = 8; j > 0; j--) { mask = (crc & 1) * (-1); crc = (crc >> 1) ^ (poly & mask); } } return ~crc; } /** * e1000_lv_jumbo_workaround_ich8lan - required for jumbo frame operation * with 82579 PHY * @hw: pointer to the HW structure * @enable: flag to enable/disable workaround when enabling/disabling jumbos **/ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) { s32 ret_val = E1000_SUCCESS; u16 phy_reg, data; u32 mac_reg; u16 i; DEBUGFUNC("e1000_lv_jumbo_workaround_ich8lan"); if (hw->mac.type < e1000_pch2lan) return E1000_SUCCESS; /* disable Rx path while enabling/disabling workaround */ hw->phy.ops.read_reg(hw, PHY_REG(769, 20), &phy_reg); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 20), phy_reg | (1 << 14)); if (ret_val) return ret_val; if (enable) { /* Write Rx addresses (rar_entry_count for RAL/H, and * SHRAL/H) and initial CRC values to the MAC */ for (i = 0; i < hw->mac.rar_entry_count; i++) { u8 mac_addr[ETH_ADDR_LEN] = {0}; u32 addr_high, addr_low; addr_high = E1000_READ_REG(hw, E1000_RAH(i)); if (!(addr_high & E1000_RAH_AV)) continue; addr_low = E1000_READ_REG(hw, E1000_RAL(i)); mac_addr[0] = (addr_low & 0xFF); mac_addr[1] = ((addr_low >> 8) & 0xFF); mac_addr[2] = ((addr_low >> 16) & 0xFF); mac_addr[3] = ((addr_low >> 24) & 0xFF); mac_addr[4] = (addr_high & 0xFF); mac_addr[5] = ((addr_high >> 8) & 0xFF); E1000_WRITE_REG(hw, E1000_PCH_RAICC(i), e1000_calc_rx_da_crc(mac_addr)); } /* Write Rx addresses to the PHY */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* Enable jumbo frame workaround in the MAC */ mac_reg = E1000_READ_REG(hw, E1000_FFLT_DBG); mac_reg &= ~(1 << 14); mac_reg |= (7 << 15); E1000_WRITE_REG(hw, E1000_FFLT_DBG, mac_reg); mac_reg = E1000_READ_REG(hw, E1000_RCTL); mac_reg |= E1000_RCTL_SECRC; E1000_WRITE_REG(hw, E1000_RCTL, mac_reg); ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, &data); if (ret_val) return ret_val; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, data | (1 << 0)); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, &data); if (ret_val) return ret_val; data &= ~(0xF << 8); data |= (0xB << 8); ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, data); if (ret_val) return ret_val; /* Enable jumbo frame workaround in the PHY */ hw->phy.ops.read_reg(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); data |= (0x37 << 5); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 23), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(769, 16), &data); data &= ~(1 << 13); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 16), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(776, 20), &data); data &= ~(0x3FF << 2); data |= (E1000_TX_PTR_GAP << 2); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 20), data); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 23), 0xF100); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, HV_PM_CTRL, &data); ret_val = hw->phy.ops.write_reg(hw, HV_PM_CTRL, data | (1 << 10)); if (ret_val) return ret_val; } else { /* Write MAC register values back to h/w defaults */ mac_reg = E1000_READ_REG(hw, E1000_FFLT_DBG); mac_reg &= ~(0xF << 14); E1000_WRITE_REG(hw, E1000_FFLT_DBG, mac_reg); mac_reg = E1000_READ_REG(hw, E1000_RCTL); mac_reg &= ~E1000_RCTL_SECRC; E1000_WRITE_REG(hw, E1000_RCTL, mac_reg); ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, &data); if (ret_val) return ret_val; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, data & ~(1 << 0)); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, &data); if (ret_val) return ret_val; data &= ~(0xF << 8); data |= (0xB << 8); ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, data); if (ret_val) return ret_val; /* Write PHY register values back to h/w defaults */ hw->phy.ops.read_reg(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 23), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(769, 16), &data); data |= (1 << 13); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 16), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(776, 20), &data); data &= ~(0x3FF << 2); data |= (0x8 << 2); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 20), data); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 23), 0x7E00); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, HV_PM_CTRL, &data); ret_val = hw->phy.ops.write_reg(hw, HV_PM_CTRL, data & ~(1 << 10)); if (ret_val) return ret_val; } /* re-enable Rx path after enabling/disabling workaround */ return hw->phy.ops.write_reg(hw, PHY_REG(769, 20), phy_reg & ~(1 << 14)); } /** * e1000_lv_phy_workarounds_ich8lan - A series of Phy workarounds to be * done after every PHY reset. **/ static s32 e1000_lv_phy_workarounds_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_lv_phy_workarounds_ich8lan"); if (hw->mac.type != e1000_pch2lan) return E1000_SUCCESS; /* Set MDIO slow mode before any other MDIO access */ ret_val = e1000_set_mdio_slow_mode_hv(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* set MSE higher to enable link to stay up when noise is high */ ret_val = e1000_write_emi_reg_locked(hw, I82579_MSE_THRESHOLD, 0x0034); if (ret_val) goto release; /* drop link after 5 times MSE threshold was reached */ ret_val = e1000_write_emi_reg_locked(hw, I82579_MSE_LINK_DOWN, 0x0005); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_k1_gig_workaround_lv - K1 Si workaround * @hw: pointer to the HW structure * * Workaround to set the K1 beacon duration for 82579 parts in 10Mbps * Disable K1 for 1000 and 100 speeds **/ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 status_reg = 0; DEBUGFUNC("e1000_k1_workaround_lv"); if (hw->mac.type != e1000_pch2lan) return E1000_SUCCESS; /* Set K1 beacon duration based on 10Mbs speed */ ret_val = hw->phy.ops.read_reg(hw, HV_M_STATUS, &status_reg); if (ret_val) return ret_val; if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) { if (status_reg & (HV_M_STATUS_SPEED_1000 | HV_M_STATUS_SPEED_100)) { u16 pm_phy_reg; /* LV 1G/100 Packet drop issue wa */ ret_val = hw->phy.ops.read_reg(hw, HV_PM_CTRL, &pm_phy_reg); if (ret_val) return ret_val; pm_phy_reg &= ~HV_PM_CTRL_K1_ENABLE; ret_val = hw->phy.ops.write_reg(hw, HV_PM_CTRL, pm_phy_reg); if (ret_val) return ret_val; } else { u32 mac_reg; mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4); mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC; E1000_WRITE_REG(hw, E1000_FEXTNVM4, mac_reg); } } return ret_val; } /** * e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware * @hw: pointer to the HW structure * @gate: boolean set to TRUE to gate, FALSE to ungate * * Gate/ungate the automatic PHY configuration via hardware; perform * the configuration via software instead. **/ static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate) { u32 extcnf_ctrl; DEBUGFUNC("e1000_gate_hw_phy_config_ich8lan"); if (hw->mac.type < e1000_pch2lan) return; extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (gate) extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; else extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); } /** * e1000_lan_init_done_ich8lan - Check for PHY config completion * @hw: pointer to the HW structure * * Check the appropriate indication the MAC has finished configuring the * PHY after a software reset. **/ static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw) { u32 data, loop = E1000_ICH8_LAN_INIT_TIMEOUT; DEBUGFUNC("e1000_lan_init_done_ich8lan"); /* Wait for basic configuration completes before proceeding */ do { data = E1000_READ_REG(hw, E1000_STATUS); data &= E1000_STATUS_LAN_INIT_DONE; usec_delay(100); } while ((!data) && --loop); /* If basic configuration is incomplete before the above loop * count reaches 0, loading the configuration from NVM will * leave the PHY in a bad state possibly resulting in no link. */ if (loop == 0) DEBUGOUT("LAN_INIT_DONE not set, increase timeout\n"); /* Clear the Init Done bit for the next init event */ data = E1000_READ_REG(hw, E1000_STATUS); data &= ~E1000_STATUS_LAN_INIT_DONE; E1000_WRITE_REG(hw, E1000_STATUS, data); } /** * e1000_post_phy_reset_ich8lan - Perform steps required after a PHY reset * @hw: pointer to the HW structure **/ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 reg; DEBUGFUNC("e1000_post_phy_reset_ich8lan"); if (hw->phy.ops.check_reset_block(hw)) return E1000_SUCCESS; /* Allow time for h/w to get to quiescent state after reset */ msec_delay(10); /* Perform any necessary post-reset workarounds */ switch (hw->mac.type) { case e1000_pchlan: ret_val = e1000_hv_phy_workarounds_ich8lan(hw); if (ret_val) return ret_val; break; case e1000_pch2lan: ret_val = e1000_lv_phy_workarounds_ich8lan(hw); if (ret_val) return ret_val; break; default: break; } /* Clear the host wakeup bit after lcd reset */ if (hw->mac.type >= e1000_pchlan) { hw->phy.ops.read_reg(hw, BM_PORT_GEN_CFG, ®); reg &= ~BM_WUC_HOST_WU_BIT; hw->phy.ops.write_reg(hw, BM_PORT_GEN_CFG, reg); } /* Configure the LCD with the extended configuration region in NVM */ ret_val = e1000_sw_lcd_config_ich8lan(hw); if (ret_val) return ret_val; /* Configure the LCD with the OEM bits in NVM */ ret_val = e1000_oem_bits_config_ich8lan(hw, TRUE); if (hw->mac.type == e1000_pch2lan) { /* Ungate automatic PHY configuration on non-managed 82579 */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { msec_delay(10); e1000_gate_hw_phy_config_ich8lan(hw, FALSE); } /* Set EEE LPI Update Timer to 200usec */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_write_emi_reg_locked(hw, I82579_LPI_UPDATE_TIMER, 0x1387); hw->phy.ops.release(hw); } return ret_val; } /** * e1000_phy_hw_reset_ich8lan - Performs a PHY reset * @hw: pointer to the HW structure * * Resets the PHY * This is a function pointer entry point called by drivers * or other shared routines. **/ static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_phy_hw_reset_ich8lan"); /* Gate automatic PHY configuration by hardware on non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) e1000_gate_hw_phy_config_ich8lan(hw, TRUE); ret_val = e1000_phy_hw_reset_generic(hw); if (ret_val) return ret_val; return e1000_post_phy_reset_ich8lan(hw); } /** * e1000_set_lplu_state_pchlan - Set Low Power Link Up state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU state according to the active flag. For PCH, if OEM write * bit are disabled in the NVM, writing the LPLU bits in the MAC will not set * the phy speed. This function will manually set the LPLU bit and restart * auto-neg as hw would do. D3 and D0 LPLU will call the same function * since it configures the same bit. **/ static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active) { s32 ret_val; u16 oem_reg; DEBUGFUNC("e1000_set_lplu_state_pchlan"); ret_val = hw->phy.ops.read_reg(hw, HV_OEM_BITS, &oem_reg); if (ret_val) return ret_val; if (active) oem_reg |= HV_OEM_BITS_LPLU; else oem_reg &= ~HV_OEM_BITS_LPLU; if (!hw->phy.ops.check_reset_block(hw)) oem_reg |= HV_OEM_BITS_RESTART_AN; return hw->phy.ops.write_reg(hw, HV_OEM_BITS, oem_reg); } /** * e1000_set_d0_lplu_state_ich8lan - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 phy_ctrl; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_set_d0_lplu_state_ich8lan"); if (phy->type == e1000_phy_ife) return E1000_SUCCESS; phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); if (active) { phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* Call gig speed drop workaround on LPLU before accessing * any PHY registers */ if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else { phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } } return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state_ich8lan - Set Low Power Linkup D3 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D3 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 phy_ctrl; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_set_d3_lplu_state_ich8lan"); phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); if (!active) { phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* Call gig speed drop workaround on LPLU before accessing * any PHY registers */ if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); } return ret_val; } /** * e1000_valid_nvm_bank_detect_ich8lan - finds out the valid bank 0 or 1 * @hw: pointer to the HW structure * @bank: pointer to the variable that returns the active bank * * Reads signature byte from the NVM using the flash access registers. * Word 0x13 bits 15:14 = 10b indicate a valid signature for that bank. **/ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) { u32 eecd; struct e1000_nvm_info *nvm = &hw->nvm; u32 bank1_offset = nvm->flash_bank_size * sizeof(u16); u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1; u8 sig_byte = 0; s32 ret_val; DEBUGFUNC("e1000_valid_nvm_bank_detect_ich8lan"); switch (hw->mac.type) { - /* In SPT, read from the CTRL_EXT reg instead of - * accessing the sector valid bits from the nvm - */ - case e1000_pch_spt: - *bank = E1000_READ_REG(hw, E1000_CTRL_EXT) - & E1000_CTRL_EXT_NVMVS; - if ((*bank == 0) || (*bank == 1)) { - DEBUGOUT("ERROR: No valid NVM bank present\n"); - return -E1000_ERR_NVM; - } else { - *bank = *bank - 2; - return E1000_SUCCESS; - } - break; case e1000_ich8lan: case e1000_ich9lan: eecd = E1000_READ_REG(hw, E1000_EECD); if ((eecd & E1000_EECD_SEC1VAL_VALID_MASK) == E1000_EECD_SEC1VAL_VALID_MASK) { if (eecd & E1000_EECD_SEC1VAL) *bank = 1; else *bank = 0; return E1000_SUCCESS; } DEBUGOUT("Unable to determine valid NVM bank via EEC - reading flash signature\n"); /* fall-thru */ default: /* set bank to 0 in case flash read fails */ *bank = 0; /* Check bank 0 */ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset, &sig_byte); if (ret_val) return ret_val; if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 0; return E1000_SUCCESS; } /* Check bank 1 */ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset + bank1_offset, &sig_byte); if (ret_val) return ret_val; if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 1; return E1000_SUCCESS; } DEBUGOUT("ERROR: No valid NVM bank present\n"); return -E1000_ERR_NVM; } } /** - * e1000_read_nvm_spt - NVM access for SPT - * @hw: pointer to the HW structure - * @offset: The offset (in bytes) of the word(s) to read. - * @words: Size of data to read in words. - * @data: pointer to the word(s) to read at offset. - * - * Reads a word(s) from the NVM - **/ -static s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, - u16 *data) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; - u32 act_offset; - s32 ret_val = E1000_SUCCESS; - u32 bank = 0; - u32 dword = 0; - u16 offset_to_read; - u16 i; - - DEBUGFUNC("e1000_read_nvm_spt"); - - if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || - (words == 0)) { - DEBUGOUT("nvm parameter(s) out of bounds\n"); - ret_val = -E1000_ERR_NVM; - goto out; - } - - nvm->ops.acquire(hw); - - ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); - if (ret_val != E1000_SUCCESS) { - DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); - bank = 0; - } - - act_offset = (bank) ? nvm->flash_bank_size : 0; - act_offset += offset; - - ret_val = E1000_SUCCESS; - - for (i = 0; i < words; i += 2) { - if (words - i == 1) { - if (dev_spec->shadow_ram[offset+i].modified) { - data[i] = dev_spec->shadow_ram[offset+i].value; - } else { - offset_to_read = act_offset + i - - ((act_offset + i) % 2); - ret_val = - e1000_read_flash_dword_ich8lan(hw, - offset_to_read, - &dword); - if (ret_val) - break; - if ((act_offset + i) % 2 == 0) - data[i] = (u16)(dword & 0xFFFF); - else - data[i] = (u16)((dword >> 16) & 0xFFFF); - } - } else { - offset_to_read = act_offset + i; - if (!(dev_spec->shadow_ram[offset+i].modified) || - !(dev_spec->shadow_ram[offset+i+1].modified)) { - ret_val = - e1000_read_flash_dword_ich8lan(hw, - offset_to_read, - &dword); - if (ret_val) - break; - } - if (dev_spec->shadow_ram[offset+i].modified) - data[i] = dev_spec->shadow_ram[offset+i].value; - else - data[i] = (u16) (dword & 0xFFFF); - if (dev_spec->shadow_ram[offset+i].modified) - data[i+1] = - dev_spec->shadow_ram[offset+i+1].value; - else - data[i+1] = (u16) (dword >> 16 & 0xFFFF); - } - } - - nvm->ops.release(hw); - -out: - if (ret_val) - DEBUGOUT1("NVM read error: %d\n", ret_val); - - return ret_val; -} - -/** * e1000_read_nvm_ich8lan - Read word(s) from the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the word(s) to read. * @words: Size of data to read in words * @data: Pointer to the word(s) to read at offset. * * Reads a word(s) from the NVM using the flash access registers. **/ static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 act_offset; s32 ret_val = E1000_SUCCESS; u32 bank = 0; u16 i, word; DEBUGFUNC("e1000_read_nvm_ich8lan"); if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); ret_val = -E1000_ERR_NVM; goto out; } nvm->ops.acquire(hw); ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); if (ret_val != E1000_SUCCESS) { DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); bank = 0; } act_offset = (bank) ? nvm->flash_bank_size : 0; act_offset += offset; ret_val = E1000_SUCCESS; for (i = 0; i < words; i++) { if (dev_spec->shadow_ram[offset+i].modified) { data[i] = dev_spec->shadow_ram[offset+i].value; } else { ret_val = e1000_read_flash_word_ich8lan(hw, act_offset + i, &word); if (ret_val) break; data[i] = word; } } nvm->ops.release(hw); out: if (ret_val) DEBUGOUT1("NVM read error: %d\n", ret_val); return ret_val; } /** * e1000_flash_cycle_init_ich8lan - Initialize flash * @hw: pointer to the HW structure * * This function does initial flash setup so that a new read/write/erase cycle * can be started. **/ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) { union ich8_hws_flash_status hsfsts; s32 ret_val = -E1000_ERR_NVM; DEBUGFUNC("e1000_flash_cycle_init_ich8lan"); hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); /* Check if the flash descriptor is valid */ if (!hsfsts.hsf_status.fldesvalid) { DEBUGOUT("Flash descriptor invalid. SW Sequencing must be used.\n"); return -E1000_ERR_NVM; } /* Clear FCERR and DAEL in hw status by writing 1 */ hsfsts.hsf_status.flcerr = 1; hsfsts.hsf_status.dael = 1; - if (hw->mac.type == e1000_pch_spt) - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - hsfsts.regval & 0xFFFF); - else - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); /* Either we should have a hardware SPI cycle in progress * bit to check against, in order to start a new cycle or * FDONE bit should be changed in the hardware so that it * is 1 after hardware reset, which can then be used as an * indication whether a cycle is in progress or has been * completed. */ if (!hsfsts.hsf_status.flcinprog) { /* There is no cycle running at present, * so we can start a cycle. * Begin by setting Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - if (hw->mac.type == e1000_pch_spt) - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - hsfsts.regval & 0xFFFF); - else - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, - hsfsts.regval); + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); ret_val = E1000_SUCCESS; } else { s32 i; /* Otherwise poll for sometime so the current * cycle has a chance to end before giving up. */ for (i = 0; i < ICH_FLASH_READ_COMMAND_TIMEOUT; i++) { hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (!hsfsts.hsf_status.flcinprog) { ret_val = E1000_SUCCESS; break; } usec_delay(1); } if (ret_val == E1000_SUCCESS) { /* Successful in waiting for previous cycle to timeout, * now set the Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - if (hw->mac.type == e1000_pch_spt) - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - hsfsts.regval & 0xFFFF); - else - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, - hsfsts.regval); + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, + hsfsts.regval); } else { DEBUGOUT("Flash controller busy, cannot get access\n"); } } return ret_val; } /** * e1000_flash_cycle_ich8lan - Starts flash cycle (read/write/erase) * @hw: pointer to the HW structure * @timeout: maximum time to wait for completion * * This function starts a flash cycle and waits for its completion. **/ static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) { union ich8_hws_flash_ctrl hsflctl; union ich8_hws_flash_status hsfsts; u32 i = 0; DEBUGFUNC("e1000_flash_cycle_ich8lan"); /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ - if (hw->mac.type == e1000_pch_spt) - hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; - else - hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcgo = 1; - if (hw->mac.type == e1000_pch_spt) - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - hsflctl.regval << 16); - else - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); /* wait till FDONE bit is set to 1 */ do { hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcdone) break; usec_delay(1); } while (i++ < timeout); if (hsfsts.hsf_status.flcdone && !hsfsts.hsf_status.flcerr) return E1000_SUCCESS; return -E1000_ERR_NVM; } /** - * e1000_read_flash_dword_ich8lan - Read dword from flash - * @hw: pointer to the HW structure - * @offset: offset to data location - * @data: pointer to the location for storing the data - * - * Reads the flash dword at offset into data. Offset is converted - * to bytes before read. - **/ -static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset, - u32 *data) -{ - DEBUGFUNC("e1000_read_flash_dword_ich8lan"); - - if (!data) - return -E1000_ERR_NVM; - - /* Must convert word offset into bytes. */ - offset <<= 1; - - return e1000_read_flash_data32_ich8lan(hw, offset, data); -} - -/** * e1000_read_flash_word_ich8lan - Read word from flash * @hw: pointer to the HW structure * @offset: offset to data location * @data: pointer to the location for storing the data * * Reads the flash word at offset into data. Offset is converted * to bytes before read. **/ static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, u16 *data) { DEBUGFUNC("e1000_read_flash_word_ich8lan"); if (!data) return -E1000_ERR_NVM; /* Must convert offset into bytes. */ offset <<= 1; return e1000_read_flash_data_ich8lan(hw, offset, 2, data); } /** * e1000_read_flash_byte_ich8lan - Read byte from flash * @hw: pointer to the HW structure * @offset: The offset of the byte to read. * @data: Pointer to a byte to store the value read. * * Reads a single byte from the NVM using the flash access registers. **/ static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 *data) { s32 ret_val; u16 word = 0; - /* In SPT, only 32 bits access is supported, - * so this function should not be called. - */ - if (hw->mac.type == e1000_pch_spt) - return -E1000_ERR_NVM; - else - ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); + ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); if (ret_val) return ret_val; *data = (u8)word; return E1000_SUCCESS; } /** * e1000_read_flash_data_ich8lan - Read byte or word from NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the byte or word to read. * @size: Size of data to read, 1=byte 2=word * @data: Pointer to the word to store the value read. * * Reads a byte or word from the NVM using the flash access registers. **/ static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 *data) { union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; u32 flash_data = 0; s32 ret_val = -E1000_ERR_NVM; u8 count = 0; DEBUGFUNC("e1000_read_flash_data_ich8lan"); if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); do { usec_delay(1); /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ hsflctl.hsf_ctrl.fldbcount = size - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); ret_val = e1000_flash_cycle_ich8lan(hw, ICH_FLASH_READ_COMMAND_TIMEOUT); /* Check if FCERR is set to 1, if set to 1, clear it * and try the whole sequence a few more times, else * read in (shift in) the Flash Data0, the order is * least significant byte first msb to lsb */ if (ret_val == E1000_SUCCESS) { flash_data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0); if (size == 1) *data = (u8)(flash_data & 0x000000FF); else if (size == 2) *data = (u16)(flash_data & 0x0000FFFF); break; } else { /* If we've gotten here, then things are probably * completely hosed, but if the error condition is * detected, it won't hurt to give it another try... * ICH_FLASH_CYCLE_REPEAT_COUNT times. */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) { /* Repeat for some time before giving up. */ continue; } else if (!hsfsts.hsf_status.flcdone) { DEBUGOUT("Timeout error - flash cycle did not complete.\n"); break; } } } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); return ret_val; } /** - * e1000_read_flash_data32_ich8lan - Read dword from NVM - * @hw: pointer to the HW structure - * @offset: The offset (in bytes) of the dword to read. - * @data: Pointer to the dword to store the value read. - * - * Reads a byte or word from the NVM using the flash access registers. - **/ - -static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, - u32 *data) -{ - union ich8_hws_flash_status hsfsts; - union ich8_hws_flash_ctrl hsflctl; - u32 flash_linear_addr; - s32 ret_val = -E1000_ERR_NVM; - u8 count = 0; - - DEBUGFUNC("e1000_read_flash_data_ich8lan"); - - if (offset > ICH_FLASH_LINEAR_ADDR_MASK || - hw->mac.type != e1000_pch_spt) - return -E1000_ERR_NVM; - flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + - hw->nvm.flash_base_addr); - - do { - usec_delay(1); - /* Steps */ - ret_val = e1000_flash_cycle_init_ich8lan(hw); - if (ret_val != E1000_SUCCESS) - break; - /* In SPT, This register is in Lan memory space, not flash. - * Therefore, only 32 bit access is supported - */ - hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; - - /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ - hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; - hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; - /* In SPT, This register is in Lan memory space, not flash. - * Therefore, only 32 bit access is supported - */ - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - (u32)hsflctl.regval << 16); - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); - - ret_val = e1000_flash_cycle_ich8lan(hw, - ICH_FLASH_READ_COMMAND_TIMEOUT); - - /* Check if FCERR is set to 1, if set to 1, clear it - * and try the whole sequence a few more times, else - * read in (shift in) the Flash Data0, the order is - * least significant byte first msb to lsb - */ - if (ret_val == E1000_SUCCESS) { - *data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0); - break; - } else { - /* If we've gotten here, then things are probably - * completely hosed, but if the error condition is - * detected, it won't hurt to give it another try... - * ICH_FLASH_CYCLE_REPEAT_COUNT times. - */ - hsfsts.regval = E1000_READ_FLASH_REG16(hw, - ICH_FLASH_HSFSTS); - if (hsfsts.hsf_status.flcerr) { - /* Repeat for some time before giving up. */ - continue; - } else if (!hsfsts.hsf_status.flcdone) { - DEBUGOUT("Timeout error - flash cycle did not complete.\n"); - break; - } - } - } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); - - return ret_val; -} -/** * e1000_write_nvm_ich8lan - Write word(s) to the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the word(s) to write. * @words: Size of data to write in words * @data: Pointer to the word(s) to write at offset. * * Writes a byte or word to the NVM using the flash access registers. **/ static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u16 i; DEBUGFUNC("e1000_write_nvm_ich8lan"); if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } nvm->ops.acquire(hw); for (i = 0; i < words; i++) { dev_spec->shadow_ram[offset+i].modified = TRUE; dev_spec->shadow_ram[offset+i].value = data[i]; } nvm->ops.release(hw); return E1000_SUCCESS; } /** - * e1000_update_nvm_checksum_spt - Update the checksum for NVM - * @hw: pointer to the HW structure - * - * The NVM checksum is updated by calling the generic update_nvm_checksum, - * which writes the checksum to the shadow ram. The changes in the shadow - * ram are then committed to the EEPROM by processing each bank at a time - * checking for the modified bit and writing only the pending changes. - * After a successful commit, the shadow ram is cleared and is ready for - * future writes. - **/ -static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw) -{ - struct e1000_nvm_info *nvm = &hw->nvm; - struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; - u32 i, act_offset, new_bank_offset, old_bank_offset, bank; - s32 ret_val; - u32 dword = 0; - - DEBUGFUNC("e1000_update_nvm_checksum_spt"); - - ret_val = e1000_update_nvm_checksum_generic(hw); - if (ret_val) - goto out; - - if (nvm->type != e1000_nvm_flash_sw) - goto out; - - nvm->ops.acquire(hw); - - /* We're writing to the opposite bank so if we're on bank 1, - * write to bank 0 etc. We also need to erase the segment that - * is going to be written - */ - ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); - if (ret_val != E1000_SUCCESS) { - DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); - bank = 0; - } - - if (bank == 0) { - new_bank_offset = nvm->flash_bank_size; - old_bank_offset = 0; - ret_val = e1000_erase_flash_bank_ich8lan(hw, 1); - if (ret_val) - goto release; - } else { - old_bank_offset = nvm->flash_bank_size; - new_bank_offset = 0; - ret_val = e1000_erase_flash_bank_ich8lan(hw, 0); - if (ret_val) - goto release; - } - for (i = 0; i < E1000_SHADOW_RAM_WORDS; i += 2) { - /* Determine whether to write the value stored - * in the other NVM bank or a modified value stored - * in the shadow RAM - */ - ret_val = e1000_read_flash_dword_ich8lan(hw, - i + old_bank_offset, - &dword); - - if (dev_spec->shadow_ram[i].modified) { - dword &= 0xffff0000; - dword |= (dev_spec->shadow_ram[i].value & 0xffff); - } - if (dev_spec->shadow_ram[i + 1].modified) { - dword &= 0x0000ffff; - dword |= ((dev_spec->shadow_ram[i + 1].value & 0xffff) - << 16); - } - if (ret_val) - break; - - /* If the word is 0x13, then make sure the signature bits - * (15:14) are 11b until the commit has completed. - * This will allow us to write 10b which indicates the - * signature is valid. We want to do this after the write - * has completed so that we don't mark the segment valid - * while the write is still in progress - */ - if (i == E1000_ICH_NVM_SIG_WORD - 1) - dword |= E1000_ICH_NVM_SIG_MASK << 16; - - /* Convert offset to bytes. */ - act_offset = (i + new_bank_offset) << 1; - - usec_delay(100); - - /* Write the data to the new bank. Offset in words*/ - act_offset = i + new_bank_offset; - ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, - dword); - if (ret_val) - break; - } - - /* Don't bother writing the segment valid bits if sector - * programming failed. - */ - if (ret_val) { - DEBUGOUT("Flash commit failed.\n"); - goto release; - } - - /* Finally validate the new segment by setting bit 15:14 - * to 10b in word 0x13 , this can be done without an - * erase as well since these bits are 11 to start with - * and we need to change bit 14 to 0b - */ - act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; - - /*offset in words but we read dword*/ - --act_offset; - ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); - - if (ret_val) - goto release; - - dword &= 0xBFFFFFFF; - ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); - - if (ret_val) - goto release; - - /* And invalidate the previously valid segment by setting - * its signature word (0x13) high_byte to 0b. This can be - * done without an erase because flash erase sets all bits - * to 1's. We can write 1's to 0's without an erase - */ - act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; - - /* offset in words but we read dword*/ - act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1; - ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); - - if (ret_val) - goto release; - - dword &= 0x00FFFFFF; - ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); - - if (ret_val) - goto release; - - /* Great! Everything worked, we can now clear the cached entries. */ - for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { - dev_spec->shadow_ram[i].modified = FALSE; - dev_spec->shadow_ram[i].value = 0xFFFF; - } - -release: - nvm->ops.release(hw); - - /* Reload the EEPROM, or else modifications will not appear - * until after the next adapter reset. - */ - if (!ret_val) { - nvm->ops.reload(hw); - msec_delay(10); - } - -out: - if (ret_val) - DEBUGOUT1("NVM update error: %d\n", ret_val); - - return ret_val; -} - -/** * e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM * @hw: pointer to the HW structure * * The NVM checksum is updated by calling the generic update_nvm_checksum, * which writes the checksum to the shadow ram. The changes in the shadow * ram are then committed to the EEPROM by processing each bank at a time * checking for the modified bit and writing only the pending changes. * After a successful commit, the shadow ram is cleared and is ready for * future writes. **/ static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 i, act_offset, new_bank_offset, old_bank_offset, bank; s32 ret_val; u16 data = 0; DEBUGFUNC("e1000_update_nvm_checksum_ich8lan"); ret_val = e1000_update_nvm_checksum_generic(hw); if (ret_val) goto out; if (nvm->type != e1000_nvm_flash_sw) goto out; nvm->ops.acquire(hw); /* We're writing to the opposite bank so if we're on bank 1, * write to bank 0 etc. We also need to erase the segment that * is going to be written */ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); if (ret_val != E1000_SUCCESS) { DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); bank = 0; } if (bank == 0) { new_bank_offset = nvm->flash_bank_size; old_bank_offset = 0; ret_val = e1000_erase_flash_bank_ich8lan(hw, 1); if (ret_val) goto release; } else { old_bank_offset = nvm->flash_bank_size; new_bank_offset = 0; ret_val = e1000_erase_flash_bank_ich8lan(hw, 0); if (ret_val) goto release; } for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { if (dev_spec->shadow_ram[i].modified) { data = dev_spec->shadow_ram[i].value; } else { ret_val = e1000_read_flash_word_ich8lan(hw, i + old_bank_offset, &data); if (ret_val) break; } /* If the word is 0x13, then make sure the signature bits * (15:14) are 11b until the commit has completed. * This will allow us to write 10b which indicates the * signature is valid. We want to do this after the write * has completed so that we don't mark the segment valid * while the write is still in progress */ if (i == E1000_ICH_NVM_SIG_WORD) data |= E1000_ICH_NVM_SIG_MASK; /* Convert offset to bytes. */ act_offset = (i + new_bank_offset) << 1; usec_delay(100); /* Write the bytes to the new bank. */ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, (u8)data); if (ret_val) break; usec_delay(100); ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset + 1, (u8)(data >> 8)); if (ret_val) break; } /* Don't bother writing the segment valid bits if sector * programming failed. */ if (ret_val) { DEBUGOUT("Flash commit failed.\n"); goto release; } /* Finally validate the new segment by setting bit 15:14 * to 10b in word 0x13 , this can be done without an * erase as well since these bits are 11 to start with * and we need to change bit 14 to 0b */ act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; ret_val = e1000_read_flash_word_ich8lan(hw, act_offset, &data); if (ret_val) goto release; data &= 0xBFFF; ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset * 2 + 1, (u8)(data >> 8)); if (ret_val) goto release; /* And invalidate the previously valid segment by setting * its signature word (0x13) high_byte to 0b. This can be * done without an erase because flash erase sets all bits * to 1's. We can write 1's to 0's without an erase */ act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, 0); if (ret_val) goto release; /* Great! Everything worked, we can now clear the cached entries. */ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { dev_spec->shadow_ram[i].modified = FALSE; dev_spec->shadow_ram[i].value = 0xFFFF; } release: nvm->ops.release(hw); /* Reload the EEPROM, or else modifications will not appear * until after the next adapter reset. */ if (!ret_val) { nvm->ops.reload(hw); msec_delay(10); } out: if (ret_val) DEBUGOUT1("NVM update error: %d\n", ret_val); return ret_val; } /** * e1000_validate_nvm_checksum_ich8lan - Validate EEPROM checksum * @hw: pointer to the HW structure * * Check to see if checksum needs to be fixed by reading bit 6 in word 0x19. * If the bit is 0, that the EEPROM had been modified, but the checksum was not * calculated, in which case we need to calculate the checksum and set bit 6. **/ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) { s32 ret_val; u16 data; u16 word; u16 valid_csum_mask; DEBUGFUNC("e1000_validate_nvm_checksum_ich8lan"); /* Read NVM and check Invalid Image CSUM bit. If this bit is 0, * the checksum needs to be fixed. This bit is an indication that * the NVM was prepared by OEM software and did not calculate * the checksum...a likely scenario. */ switch (hw->mac.type) { case e1000_pch_lpt: - case e1000_pch_spt: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; default: word = NVM_FUTURE_INIT_WORD1; valid_csum_mask = NVM_FUTURE_INIT_WORD1_VALID_CSUM; break; } ret_val = hw->nvm.ops.read(hw, word, 1, &data); if (ret_val) return ret_val; if (!(data & valid_csum_mask)) { data |= valid_csum_mask; ret_val = hw->nvm.ops.write(hw, word, 1, &data); if (ret_val) return ret_val; ret_val = hw->nvm.ops.update(hw); if (ret_val) return ret_val; } return e1000_validate_nvm_checksum_generic(hw); } /** * e1000_write_flash_data_ich8lan - Writes bytes to the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the byte/word to read. * @size: Size of data to read, 1=byte 2=word * @data: The byte(s) to write to the NVM. * * Writes one/two bytes to the NVM using the flash access registers. **/ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 data) { union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; u32 flash_data = 0; s32 ret_val; u8 count = 0; DEBUGFUNC("e1000_write_ich8_data"); - if (hw->mac.type == e1000_pch_spt) { - if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK) - return -E1000_ERR_NVM; - } else { - if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) - return -E1000_ERR_NVM; - } + if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) + return -E1000_ERR_NVM; flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); do { usec_delay(1); /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; - /* In SPT, This register is in Lan memory space, not - * flash. Therefore, only 32 bit access is supported - */ - if (hw->mac.type == e1000_pch_spt) - hsflctl.regval = - E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; - else - hsflctl.regval = - E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ hsflctl.hsf_ctrl.fldbcount = size - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; - /* In SPT, This register is in Lan memory space, - * not flash. Therefore, only 32 bit access is - * supported - */ - if (hw->mac.type == e1000_pch_spt) - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - hsflctl.regval << 16); - else - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, - hsflctl.regval); + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); if (size == 1) flash_data = (u32)data & 0x00FF; else flash_data = (u32)data; E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, flash_data); /* check if FCERR is set to 1 , if set to 1, clear it * and try the whole sequence a few more times else done */ ret_val = e1000_flash_cycle_ich8lan(hw, ICH_FLASH_WRITE_COMMAND_TIMEOUT); if (ret_val == E1000_SUCCESS) break; /* If we're here, then things are most likely * completely hosed, but if the error condition * is detected, it won't hurt to give it another * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) /* Repeat for some time before giving up. */ continue; if (!hsfsts.hsf_status.flcdone) { DEBUGOUT("Timeout error - flash cycle did not complete.\n"); break; } } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); return ret_val; } -/** -* e1000_write_flash_data32_ich8lan - Writes 4 bytes to the NVM -* @hw: pointer to the HW structure -* @offset: The offset (in bytes) of the dwords to read. -* @data: The 4 bytes to write to the NVM. -* -* Writes one/two/four bytes to the NVM using the flash access registers. -**/ -static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, - u32 data) -{ - union ich8_hws_flash_status hsfsts; - union ich8_hws_flash_ctrl hsflctl; - u32 flash_linear_addr; - s32 ret_val; - u8 count = 0; - DEBUGFUNC("e1000_write_flash_data32_ich8lan"); - - if (hw->mac.type == e1000_pch_spt) { - if (offset > ICH_FLASH_LINEAR_ADDR_MASK) - return -E1000_ERR_NVM; - } - flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + - hw->nvm.flash_base_addr); - do { - usec_delay(1); - /* Steps */ - ret_val = e1000_flash_cycle_init_ich8lan(hw); - if (ret_val != E1000_SUCCESS) - break; - - /* In SPT, This register is in Lan memory space, not - * flash. Therefore, only 32 bit access is supported - */ - if (hw->mac.type == e1000_pch_spt) - hsflctl.regval = E1000_READ_FLASH_REG(hw, - ICH_FLASH_HSFSTS) - >> 16; - else - hsflctl.regval = E1000_READ_FLASH_REG16(hw, - ICH_FLASH_HSFCTL); - - hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; - hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; - - /* In SPT, This register is in Lan memory space, - * not flash. Therefore, only 32 bit access is - * supported - */ - if (hw->mac.type == e1000_pch_spt) - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - hsflctl.regval << 16); - else - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, - hsflctl.regval); - - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); - - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, data); - - /* check if FCERR is set to 1 , if set to 1, clear it - * and try the whole sequence a few more times else done - */ - ret_val = e1000_flash_cycle_ich8lan(hw, - ICH_FLASH_WRITE_COMMAND_TIMEOUT); - - if (ret_val == E1000_SUCCESS) - break; - - /* If we're here, then things are most likely - * completely hosed, but if the error condition - * is detected, it won't hurt to give it another - * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. - */ - hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); - - if (hsfsts.hsf_status.flcerr) - /* Repeat for some time before giving up. */ - continue; - if (!hsfsts.hsf_status.flcdone) { - DEBUGOUT("Timeout error - flash cycle did not complete.\n"); - break; - } - } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); - - return ret_val; -} - /** * e1000_write_flash_byte_ich8lan - Write a single byte to NVM * @hw: pointer to the HW structure * @offset: The index of the byte to read. * @data: The byte to write to the NVM. * * Writes a single byte to the NVM using the flash access registers. **/ static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 data) { u16 word = (u16)data; DEBUGFUNC("e1000_write_flash_byte_ich8lan"); return e1000_write_flash_data_ich8lan(hw, offset, 1, word); } /** -* e1000_retry_write_flash_dword_ich8lan - Writes a dword to NVM -* @hw: pointer to the HW structure -* @offset: The offset of the word to write. -* @dword: The dword to write to the NVM. -* -* Writes a single dword to the NVM using the flash access registers. -* Goes through a retry algorithm before giving up. -**/ -static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, - u32 offset, u32 dword) -{ - s32 ret_val; - u16 program_retries; - - DEBUGFUNC("e1000_retry_write_flash_dword_ich8lan"); - - /* Must convert word offset into bytes. */ - offset <<= 1; - - ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); - - if (!ret_val) - return ret_val; - for (program_retries = 0; program_retries < 100; program_retries++) { - DEBUGOUT2("Retrying Byte %8.8X at offset %u\n", dword, offset); - usec_delay(100); - ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); - if (ret_val == E1000_SUCCESS) - break; - } - if (program_retries == 100) - return -E1000_ERR_NVM; - - return E1000_SUCCESS; -} - - -/** * e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM * @hw: pointer to the HW structure * @offset: The offset of the byte to write. * @byte: The byte to write to the NVM. * * Writes a single byte to the NVM using the flash access registers. * Goes through a retry algorithm before giving up. **/ static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 byte) { s32 ret_val; u16 program_retries; DEBUGFUNC("e1000_retry_write_flash_byte_ich8lan"); ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); if (!ret_val) return ret_val; for (program_retries = 0; program_retries < 100; program_retries++) { DEBUGOUT2("Retrying Byte %2.2X at offset %u\n", byte, offset); usec_delay(100); ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); if (ret_val == E1000_SUCCESS) break; } if (program_retries == 100) return -E1000_ERR_NVM; return E1000_SUCCESS; } /** * e1000_erase_flash_bank_ich8lan - Erase a bank (4k) from NVM * @hw: pointer to the HW structure * @bank: 0 for first bank, 1 for second bank, etc. * * Erases the bank specified. Each bank is a 4k block. Banks are 0 based. * bank N is 4096 * N + flash_reg_addr. **/ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) { struct e1000_nvm_info *nvm = &hw->nvm; union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; /* bank size is in 16bit words - adjust to bytes */ u32 flash_bank_size = nvm->flash_bank_size * 2; s32 ret_val; s32 count = 0; s32 j, iteration, sector_size; DEBUGFUNC("e1000_erase_flash_bank_ich8lan"); hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); /* Determine HW Sector size: Read BERASE bits of hw flash status * register * 00: The Hw sector is 256 bytes, hence we need to erase 16 * consecutive sectors. The start index for the nth Hw sector * can be calculated as = bank * 4096 + n * 256 * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector. * The start index for the nth Hw sector can be calculated * as = bank * 4096 * 10: The Hw sector is 8K bytes, nth sector = bank * 8192 * (ich9 only, otherwise error condition) * 11: The Hw sector is 64K bytes, nth sector = bank * 65536 */ switch (hsfsts.hsf_status.berasesz) { case 0: /* Hw sector size 256 */ sector_size = ICH_FLASH_SEG_SIZE_256; iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_256; break; case 1: sector_size = ICH_FLASH_SEG_SIZE_4K; iteration = 1; break; case 2: sector_size = ICH_FLASH_SEG_SIZE_8K; iteration = 1; break; case 3: sector_size = ICH_FLASH_SEG_SIZE_64K; iteration = 1; break; default: return -E1000_ERR_NVM; } /* Start with the base address, then add the sector offset. */ flash_linear_addr = hw->nvm.flash_base_addr; flash_linear_addr += (bank) ? flash_bank_size : 0; for (j = 0; j < iteration; j++) { do { u32 timeout = ICH_FLASH_ERASE_COMMAND_TIMEOUT; /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val) return ret_val; /* Write a value 11 (block Erase) in Flash * Cycle field in hw flash control */ - if (hw->mac.type == e1000_pch_spt) - hsflctl.regval = - E1000_READ_FLASH_REG(hw, - ICH_FLASH_HSFSTS)>>16; - else - hsflctl.regval = - E1000_READ_FLASH_REG16(hw, - ICH_FLASH_HSFCTL); + hsflctl.regval = + E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; - if (hw->mac.type == e1000_pch_spt) - E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, - hsflctl.regval << 16); - else - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, - hsflctl.regval); + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, + hsflctl.regval); /* Write the last 24 bits of an index within the * block into Flash Linear address field in Flash * Address. */ flash_linear_addr += (j * sector_size); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); ret_val = e1000_flash_cycle_ich8lan(hw, timeout); if (ret_val == E1000_SUCCESS) break; /* Check if FCERR is set to 1. If 1, * clear it and try the whole sequence * a few more times else Done */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) /* repeat for some time before giving up */ continue; else if (!hsfsts.hsf_status.flcdone) return ret_val; } while (++count < ICH_FLASH_CYCLE_REPEAT_COUNT); } return E1000_SUCCESS; } /** * e1000_valid_led_default_ich8lan - Set the default LED settings * @hw: pointer to the HW structure * @data: Pointer to the LED settings * * Reads the LED default settings from the NVM to data. If the NVM LED * settings is all 0's or F's, set the LED default to a valid LED default * setting. **/ static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_ich8lan"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) *data = ID_LED_DEFAULT_ICH8LAN; return E1000_SUCCESS; } /** * e1000_id_led_init_pchlan - store LED configurations * @hw: pointer to the HW structure * * PCH does not control LEDs via the LEDCTL register, rather it uses * the PHY LED configuration register. * * PCH also does not have an "always on" or "always off" mode which * complicates the ID feature. Instead of using the "on" mode to indicate * in ledctl_mode2 the LEDs to use for ID (see e1000_id_led_init_generic()), * use "link_up" mode. The LEDs will still ID on request if there is no * link based on logic in e1000_led_[on|off]_pchlan(). **/ static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; const u32 ledctl_on = E1000_LEDCTL_MODE_LINK_UP; const u32 ledctl_off = E1000_LEDCTL_MODE_LINK_UP | E1000_PHY_LED0_IVRT; u16 data, i, temp, shift; DEBUGFUNC("e1000_id_led_init_pchlan"); /* Get default ID LED modes */ ret_val = hw->nvm.ops.valid_led_default(hw, &data); if (ret_val) return ret_val; mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); mac->ledctl_mode1 = mac->ledctl_default; mac->ledctl_mode2 = mac->ledctl_default; for (i = 0; i < 4; i++) { temp = (data >> (i << 2)) & E1000_LEDCTL_LED0_MODE_MASK; shift = (i * 5); switch (temp) { case ID_LED_ON1_DEF2: case ID_LED_ON1_ON2: case ID_LED_ON1_OFF2: mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode1 |= (ledctl_on << shift); break; case ID_LED_OFF1_DEF2: case ID_LED_OFF1_ON2: case ID_LED_OFF1_OFF2: mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode1 |= (ledctl_off << shift); break; default: /* Do nothing */ break; } switch (temp) { case ID_LED_DEF1_ON2: case ID_LED_ON1_ON2: case ID_LED_OFF1_ON2: mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode2 |= (ledctl_on << shift); break; case ID_LED_DEF1_OFF2: case ID_LED_ON1_OFF2: case ID_LED_OFF1_OFF2: mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode2 |= (ledctl_off << shift); break; default: /* Do nothing */ break; } } return E1000_SUCCESS; } /** * e1000_get_bus_info_ich8lan - Get/Set the bus type and width * @hw: pointer to the HW structure * * ICH8 use the PCI Express bus, but does not contain a PCI Express Capability * register, so the the bus width is hard coded. **/ static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; s32 ret_val; DEBUGFUNC("e1000_get_bus_info_ich8lan"); ret_val = e1000_get_bus_info_pcie_generic(hw); /* ICH devices are "PCI Express"-ish. They have * a configuration space, but do not contain * PCI Express Capability registers, so bus width * must be hardcoded. */ if (bus->width == e1000_bus_width_unknown) bus->width = e1000_bus_width_pcie_x1; return ret_val; } /** * e1000_reset_hw_ich8lan - Reset the hardware * @hw: pointer to the HW structure * * Does a full reset of the hardware which includes a reset of the PHY and * MAC. **/ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u16 kum_cfg; u32 ctrl, reg; s32 ret_val; DEBUGFUNC("e1000_reset_hw_ich8lan"); /* Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); /* Disable the Transmit and Receive units. Then delay to allow * any pending transactions to complete before we hit the MAC * with the global reset. */ E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); /* Workaround for ICH8 bit corruption issue in FIFO memory */ if (hw->mac.type == e1000_ich8lan) { /* Set Tx and Rx buffer allocation to 8k apiece. */ E1000_WRITE_REG(hw, E1000_PBA, E1000_PBA_8K); /* Set Packet Buffer Size to 16k. */ E1000_WRITE_REG(hw, E1000_PBS, E1000_PBS_16K); } if (hw->mac.type == e1000_pchlan) { /* Save the NVM K1 bit setting*/ ret_val = e1000_read_nvm(hw, E1000_NVM_K1_CONFIG, 1, &kum_cfg); if (ret_val) return ret_val; if (kum_cfg & E1000_NVM_K1_ENABLE) dev_spec->nvm_k1_enabled = TRUE; else dev_spec->nvm_k1_enabled = FALSE; } ctrl = E1000_READ_REG(hw, E1000_CTRL); if (!hw->phy.ops.check_reset_block(hw)) { /* Full-chip reset requires MAC and PHY reset at the same * time to make sure the interface between MAC and the * external PHY is reset. */ ctrl |= E1000_CTRL_PHY_RST; /* Gate automatic PHY configuration by hardware on * non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) e1000_gate_hw_phy_config_ich8lan(hw, TRUE); } ret_val = e1000_acquire_swflag_ich8lan(hw); DEBUGOUT("Issuing a global reset to ich8lan\n"); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_RST)); /* cannot issue a flush here because it hangs the hardware */ msec_delay(20); /* Set Phy Config Counter to 50msec */ if (hw->mac.type == e1000_pch2lan) { reg = E1000_READ_REG(hw, E1000_FEXTNVM3); reg &= ~E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK; reg |= E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC; E1000_WRITE_REG(hw, E1000_FEXTNVM3, reg); } if (!ret_val) E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); if (ctrl & E1000_CTRL_PHY_RST) { ret_val = hw->phy.ops.get_cfg_done(hw); if (ret_val) return ret_val; ret_val = e1000_post_phy_reset_ich8lan(hw); if (ret_val) return ret_val; } /* For PCH, this write will make sure that any noise * will be detected as a CRC error and be dropped rather than show up * as a bad packet to the DMA engine. */ if (hw->mac.type == e1000_pchlan) E1000_WRITE_REG(hw, E1000_CRC_OFFSET, 0x65656565); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); reg = E1000_READ_REG(hw, E1000_KABGTXD); reg |= E1000_KABGTXD_BGSQLBIAS; E1000_WRITE_REG(hw, E1000_KABGTXD, reg); return E1000_SUCCESS; } /** * e1000_init_hw_ich8lan - Initialize the hardware * @hw: pointer to the HW structure * * Prepares the hardware for transmit and receive by doing the following: * - initialize hardware bits * - initialize LED identification * - setup receive address registers * - setup flow control * - setup transmit descriptors * - clear statistics **/ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 ctrl_ext, txdctl, snoop; s32 ret_val; u16 i; DEBUGFUNC("e1000_init_hw_ich8lan"); e1000_initialize_hw_bits_ich8lan(hw); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); /* An error is not fatal and we should not stop init due to this */ if (ret_val) DEBUGOUT("Error initializing identification LED\n"); /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* The 82578 Rx buffer will stall if wakeup is enabled in host and * the ME. Disable wakeup by clearing the host wakeup bit. * Reset the phy after disabling host wakeup to reset the Rx buffer. */ if (hw->phy.type == e1000_phy_82578) { hw->phy.ops.read_reg(hw, BM_PORT_GEN_CFG, &i); i &= ~BM_WUC_HOST_WU_BIT; hw->phy.ops.write_reg(hw, BM_PORT_GEN_CFG, i); ret_val = e1000_phy_hw_reset_ich8lan(hw); if (ret_val) return ret_val; } /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); /* Set the transmit descriptor write-back policy for both queues */ txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0)); txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB); txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) | E1000_TXDCTL_MAX_TX_DESC_PREFETCH); E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl); txdctl = E1000_READ_REG(hw, E1000_TXDCTL(1)); txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB); txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) | E1000_TXDCTL_MAX_TX_DESC_PREFETCH); E1000_WRITE_REG(hw, E1000_TXDCTL(1), txdctl); /* ICH8 has opposite polarity of no_snoop bits. * By default, we should use snoop behavior. */ if (mac->type == e1000_ich8lan) snoop = PCIE_ICH8_SNOOP_ALL; else snoop = (u32) ~(PCIE_NO_SNOOP_ALL); e1000_set_pcie_no_snoop_generic(hw, snoop); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_RO_DIS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); /* Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_ich8lan(hw); return ret_val; } /** * e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits * @hw: pointer to the HW structure * * Sets/Clears required hardware bits necessary for correctly setting up the * hardware for transmit and receive. **/ static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_initialize_hw_bits_ich8lan"); /* Extended Device Control */ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg |= (1 << 22); /* Enable PHY low-power state when MAC is at D3 w/o WoL */ if (hw->mac.type >= e1000_pchlan) reg |= E1000_CTRL_EXT_PHYPDEN; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); /* Transmit Descriptor Control 0 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); /* Transmit Descriptor Control 1 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); /* Transmit Arbitration Control 0 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); if (hw->mac.type == e1000_ich8lan) reg |= (1 << 28) | (1 << 29); reg |= (1 << 23) | (1 << 24) | (1 << 26) | (1 << 27); E1000_WRITE_REG(hw, E1000_TARC(0), reg); /* Transmit Arbitration Control 1 */ reg = E1000_READ_REG(hw, E1000_TARC(1)); if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) reg &= ~(1 << 28); else reg |= (1 << 28); reg |= (1 << 24) | (1 << 26) | (1 << 30); E1000_WRITE_REG(hw, E1000_TARC(1), reg); /* Device Status */ if (hw->mac.type == e1000_ich8lan) { reg = E1000_READ_REG(hw, E1000_STATUS); reg &= ~(1 << 31); E1000_WRITE_REG(hw, E1000_STATUS, reg); } /* work-around descriptor data corruption issue during nfs v2 udp * traffic, just disable the nfs filtering capability */ reg = E1000_READ_REG(hw, E1000_RFCTL); reg |= (E1000_RFCTL_NFSW_DIS | E1000_RFCTL_NFSR_DIS); /* Disable IPv6 extension header parsing because some malformed * IPv6 headers can hang the Rx. */ if (hw->mac.type == e1000_ich8lan) reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS); E1000_WRITE_REG(hw, E1000_RFCTL, reg); /* Enable ECC on Lynxpoint */ - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type == e1000_pch_lpt) { reg = E1000_READ_REG(hw, E1000_PBECCSTS); reg |= E1000_PBECCSTS_ECC_ENABLE; E1000_WRITE_REG(hw, E1000_PBECCSTS, reg); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_MEHE; E1000_WRITE_REG(hw, E1000_CTRL, reg); } return; } /** * e1000_setup_link_ich8lan - Setup flow control and link settings * @hw: pointer to the HW structure * * Determines which flow control settings to use, then configures flow * control. Calls the appropriate media-specific link configuration * function. Assuming the adapter has a valid link partner, a valid link * should be established. Assumes the hardware has previously been reset * and the transmitter and receiver are not enabled. **/ static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_setup_link_ich8lan"); if (hw->phy.ops.check_reset_block(hw)) return E1000_SUCCESS; /* ICH parts do not have a word in the NVM to determine * the default flow control setting, so we explicitly * set it to full. */ if (hw->fc.requested_mode == e1000_fc_default) hw->fc.requested_mode = e1000_fc_full; /* Save off the requested flow control mode for use later. Depending * on the link partner's capabilities, we may or may not use this mode. */ hw->fc.current_mode = hw->fc.requested_mode; DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); /* Continue to configure the copper link. */ ret_val = hw->mac.ops.setup_physical_interface(hw); if (ret_val) return ret_val; E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); if ((hw->phy.type == e1000_phy_82578) || (hw->phy.type == e1000_phy_82579) || (hw->phy.type == e1000_phy_i217) || (hw->phy.type == e1000_phy_82577)) { E1000_WRITE_REG(hw, E1000_FCRTV_PCH, hw->fc.refresh_time); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(BM_PORT_CTRL_PAGE, 27), hw->fc.pause_time); if (ret_val) return ret_val; } return e1000_set_fc_watermarks_generic(hw); } /** * e1000_setup_copper_link_ich8lan - Configure MAC/PHY interface * @hw: pointer to the HW structure * * Configures the kumeran interface to the PHY to wait the appropriate time * when polling the PHY, then call the generic setup_copper_link to finish * configuring the copper link. **/ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 reg_data; DEBUGFUNC("e1000_setup_copper_link_ich8lan"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Set the mac to wait the maximum time between each iteration * and increase the max iterations when polling the phy; * this fixes erroneous timeouts at 10Mbps. */ ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_TIMEOUTS, 0xFFFF); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, ®_data); if (ret_val) return ret_val; reg_data |= 0x3F; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, reg_data); if (ret_val) return ret_val; switch (hw->phy.type) { case e1000_phy_igp_3: ret_val = e1000_copper_link_setup_igp(hw); if (ret_val) return ret_val; break; case e1000_phy_bm: case e1000_phy_82578: ret_val = e1000_copper_link_setup_m88(hw); if (ret_val) return ret_val; break; case e1000_phy_82577: case e1000_phy_82579: ret_val = e1000_copper_link_setup_82577(hw); if (ret_val) return ret_val; break; case e1000_phy_ife: ret_val = hw->phy.ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, ®_data); if (ret_val) return ret_val; reg_data &= ~IFE_PMC_AUTO_MDIX; switch (hw->phy.mdix) { case 1: reg_data &= ~IFE_PMC_FORCE_MDIX; break; case 2: reg_data |= IFE_PMC_FORCE_MDIX; break; case 0: default: reg_data |= IFE_PMC_AUTO_MDIX; break; } ret_val = hw->phy.ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, reg_data); if (ret_val) return ret_val; break; default: break; } return e1000_setup_copper_link_generic(hw); } /** * e1000_setup_copper_link_pch_lpt - Configure MAC/PHY interface * @hw: pointer to the HW structure * * Calls the PHY specific link setup function and then calls the * generic setup_copper_link to finish configuring the link for * Lynxpoint PCH devices **/ static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_setup_copper_link_pch_lpt"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ret_val = e1000_copper_link_setup_82577(hw); if (ret_val) return ret_val; return e1000_setup_copper_link_generic(hw); } /** * e1000_get_link_up_info_ich8lan - Get current link speed and duplex * @hw: pointer to the HW structure * @speed: pointer to store current link speed * @duplex: pointer to store the current link duplex * * Calls the generic get_speed_and_duplex to retrieve the current link * information and then calls the Kumeran lock loss workaround for links at * gigabit speeds. **/ static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed, u16 *duplex) { s32 ret_val; DEBUGFUNC("e1000_get_link_up_info_ich8lan"); ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); if (ret_val) return ret_val; if ((hw->mac.type == e1000_ich8lan) && (hw->phy.type == e1000_phy_igp_3) && (*speed == SPEED_1000)) { ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw); } return ret_val; } /** * e1000_kmrn_lock_loss_workaround_ich8lan - Kumeran workaround * @hw: pointer to the HW structure * * Work-around for 82566 Kumeran PCS lock loss: * On link status change (i.e. PCI reset, speed change) and link is up and * speed is gigabit- * 0) if workaround is optionally disabled do nothing * 1) wait 1ms for Kumeran link to come up * 2) check Kumeran Diagnostic register PCS lock loss bit * 3) if not set the link is locked (all is good), otherwise... * 4) reset the PHY * 5) repeat up to 10 times * Note: this is only called for IGP3 copper when speed is 1gb. **/ static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 phy_ctrl; s32 ret_val; u16 i, data; bool link; DEBUGFUNC("e1000_kmrn_lock_loss_workaround_ich8lan"); if (!dev_spec->kmrn_lock_loss_workaround_enabled) return E1000_SUCCESS; /* Make sure link is up before proceeding. If not just return. * Attempting this while link is negotiating fouled up link * stability */ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (!link) return E1000_SUCCESS; for (i = 0; i < 10; i++) { /* read once to clear */ ret_val = hw->phy.ops.read_reg(hw, IGP3_KMRN_DIAG, &data); if (ret_val) return ret_val; /* and again to get new status */ ret_val = hw->phy.ops.read_reg(hw, IGP3_KMRN_DIAG, &data); if (ret_val) return ret_val; /* check for PCS lock */ if (!(data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS)) return E1000_SUCCESS; /* Issue PHY reset */ hw->phy.ops.reset(hw); msec_delay_irq(5); } /* Disable GigE link negotiation */ phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); phy_ctrl |= (E1000_PHY_CTRL_GBE_DISABLE | E1000_PHY_CTRL_NOND0A_GBE_DISABLE); E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); /* Call gig speed drop workaround on Gig disable before accessing * any PHY registers */ e1000_gig_downshift_workaround_ich8lan(hw); /* unable to acquire PCS lock */ return -E1000_ERR_PHY; } /** * e1000_set_kmrn_lock_loss_workaround_ich8lan - Set Kumeran workaround state * @hw: pointer to the HW structure * @state: boolean value used to set the current Kumeran workaround state * * If ICH8, set the current Kumeran workaround state (enabled - TRUE * /disabled - FALSE). **/ void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; DEBUGFUNC("e1000_set_kmrn_lock_loss_workaround_ich8lan"); if (hw->mac.type != e1000_ich8lan) { DEBUGOUT("Workaround applies to ICH8 only.\n"); return; } dev_spec->kmrn_lock_loss_workaround_enabled = state; return; } /** * e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3 * @hw: pointer to the HW structure * * Workaround for 82566 power-down on D3 entry: * 1) disable gigabit link * 2) write VR power-down enable * 3) read it back * Continue if successful, else issue LCD reset and repeat **/ void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw) { u32 reg; u16 data; u8 retry = 0; DEBUGFUNC("e1000_igp3_phy_powerdown_workaround_ich8lan"); if (hw->phy.type != e1000_phy_igp_3) return; /* Try the workaround twice (if needed) */ do { /* Disable link */ reg = E1000_READ_REG(hw, E1000_PHY_CTRL); reg |= (E1000_PHY_CTRL_GBE_DISABLE | E1000_PHY_CTRL_NOND0A_GBE_DISABLE); E1000_WRITE_REG(hw, E1000_PHY_CTRL, reg); /* Call gig speed drop workaround on Gig disable before * accessing any PHY registers */ if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); /* Write VR power-down enable */ hw->phy.ops.read_reg(hw, IGP3_VR_CTRL, &data); data &= ~IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; hw->phy.ops.write_reg(hw, IGP3_VR_CTRL, data | IGP3_VR_CTRL_MODE_SHUTDOWN); /* Read it back and test */ hw->phy.ops.read_reg(hw, IGP3_VR_CTRL, &data); data &= IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; if ((data == IGP3_VR_CTRL_MODE_SHUTDOWN) || retry) break; /* Issue PHY reset and repeat at most one more time */ reg = E1000_READ_REG(hw, E1000_CTRL); E1000_WRITE_REG(hw, E1000_CTRL, reg | E1000_CTRL_PHY_RST); retry++; } while (retry); } /** * e1000_gig_downshift_workaround_ich8lan - WoL from S5 stops working * @hw: pointer to the HW structure * * Steps to take when dropping from 1Gb/s (eg. link cable removal (LSC), * LPLU, Gig disable, MDIC PHY reset): * 1) Set Kumeran Near-end loopback * 2) Clear Kumeran Near-end loopback * Should only be called for ICH8[m] devices with any 1G Phy. **/ void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) { s32 ret_val; u16 reg_data; DEBUGFUNC("e1000_gig_downshift_workaround_ich8lan"); if ((hw->mac.type != e1000_ich8lan) || (hw->phy.type == e1000_phy_ife)) return; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ®_data); if (ret_val) return; reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, reg_data); if (ret_val) return; reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK; e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, reg_data); } /** * e1000_suspend_workarounds_ich8lan - workarounds needed during S0->Sx * @hw: pointer to the HW structure * * During S0 to Sx transition, it is possible the link remains at gig * instead of negotiating to a lower speed. Before going to Sx, set * 'Gig Disable' to force link speed negotiation to a lower speed based on * the LPLU setting in the NVM or custom setting. For PCH and newer parts, * the OEM bits PHY register (LED, GbE disable and LPLU configurations) also * needs to be written. * Parts that support (and are linked to a partner which support) EEE in * 100Mbps should disable LPLU since 100Mbps w/ EEE requires less power * than 10Mbps w/o EEE. **/ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 phy_ctrl; s32 ret_val; DEBUGFUNC("e1000_suspend_workarounds_ich8lan"); phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); phy_ctrl |= E1000_PHY_CTRL_GBE_DISABLE; if (hw->phy.type == e1000_phy_i217) { u16 phy_reg, device_id = hw->device_id; if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (device_id == E1000_DEV_ID_PCH_I218_LM3) || - (device_id == E1000_DEV_ID_PCH_I218_V3) || - (hw->mac.type == e1000_pch_spt)) { + (device_id == E1000_DEV_ID_PCH_I218_V3)) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6 & ~E1000_FEXTNVM6_REQ_PLL_CLK); } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; if (!dev_spec->eee_disable) { u16 eee_advert; ret_val = e1000_read_emi_reg_locked(hw, I217_EEE_ADVERTISEMENT, &eee_advert); if (ret_val) goto release; /* Disable LPLU if both link partners support 100BaseT * EEE and 100Full is advertised on both ends of the * link, and enable Auto Enable LPI since there will * be no driver to enable LPI while in Sx. */ if ((eee_advert & I82579_EEE_100_SUPPORTED) && (dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) && (hw->phy.autoneg_advertised & ADVERTISE_100_FULL)) { phy_ctrl &= ~(E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_NOND0A_LPLU); /* Set Auto Enable LPI after link up */ hw->phy.ops.read_reg_locked(hw, I217_LPI_GPIO_CTRL, &phy_reg); phy_reg |= I217_LPI_GPIO_CTRL_AUTO_EN_LPI; hw->phy.ops.write_reg_locked(hw, I217_LPI_GPIO_CTRL, phy_reg); } } /* For i217 Intel Rapid Start Technology support, * when the system is going into Sx and no manageability engine * is present, the driver must configure proxy to reset only on * power good. LPI (Low Power Idle) state must also reset only * on power good, as well as the MTA (Multicast table array). * The SMBus release must also be disabled on LCD reset. */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { /* Enable proxy to reset only on power good. */ hw->phy.ops.read_reg_locked(hw, I217_PROXY_CTRL, &phy_reg); phy_reg |= I217_PROXY_CTRL_AUTO_DISABLE; hw->phy.ops.write_reg_locked(hw, I217_PROXY_CTRL, phy_reg); /* Set bit enable LPI (EEE) to reset only on * power good. */ hw->phy.ops.read_reg_locked(hw, I217_SxCTRL, &phy_reg); phy_reg |= I217_SxCTRL_ENABLE_LPI_RESET; hw->phy.ops.write_reg_locked(hw, I217_SxCTRL, phy_reg); /* Disable the SMB release on LCD reset. */ hw->phy.ops.read_reg_locked(hw, I217_MEMPWR, &phy_reg); phy_reg &= ~I217_MEMPWR_DISABLE_SMB_RELEASE; hw->phy.ops.write_reg_locked(hw, I217_MEMPWR, phy_reg); } /* Enable MTA to reset for Intel Rapid Start Technology * Support */ hw->phy.ops.read_reg_locked(hw, I217_CGFREG, &phy_reg); phy_reg |= I217_CGFREG_ENABLE_MTA_RESET; hw->phy.ops.write_reg_locked(hw, I217_CGFREG, phy_reg); release: hw->phy.ops.release(hw); } out: E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); if (hw->mac.type >= e1000_pchlan) { e1000_oem_bits_config_ich8lan(hw, FALSE); /* Reset PHY to activate OEM bits on 82577/8 */ if (hw->mac.type == e1000_pchlan) e1000_phy_hw_reset_generic(hw); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; e1000_write_smbus_addr(hw); hw->phy.ops.release(hw); } return; } /** * e1000_resume_workarounds_pchlan - workarounds needed during Sx->S0 * @hw: pointer to the HW structure * * During Sx to S0 transitions on non-managed devices or managed devices * on which PHY resets are not blocked, if the PHY registers cannot be * accessed properly by the s/w toggle the LANPHYPC value to power cycle * the PHY. * On i217, setup Intel Rapid Start Technology. **/ -u32 e1000_resume_workarounds_pchlan(struct e1000_hw *hw) +void e1000_resume_workarounds_pchlan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_resume_workarounds_pchlan"); if (hw->mac.type < e1000_pch2lan) - return E1000_SUCCESS; + return; ret_val = e1000_init_phy_workarounds_pchlan(hw); if (ret_val) { DEBUGOUT1("Failed to init PHY flow ret_val=%d\n", ret_val); - return ret_val; + return; } /* For i217 Intel Rapid Start Technology support when the system * is transitioning from Sx and no manageability engine is present * configure SMBus to restore on reset, disable proxy, and enable * the reset on MTA (Multicast table array). */ if (hw->phy.type == e1000_phy_i217) { u16 phy_reg; ret_val = hw->phy.ops.acquire(hw); if (ret_val) { DEBUGOUT("Failed to setup iRST\n"); - return ret_val; + return; } /* Clear Auto Enable LPI after link up */ hw->phy.ops.read_reg_locked(hw, I217_LPI_GPIO_CTRL, &phy_reg); phy_reg &= ~I217_LPI_GPIO_CTRL_AUTO_EN_LPI; hw->phy.ops.write_reg_locked(hw, I217_LPI_GPIO_CTRL, phy_reg); if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { /* Restore clear on SMB if no manageability engine * is present */ ret_val = hw->phy.ops.read_reg_locked(hw, I217_MEMPWR, &phy_reg); if (ret_val) goto release; phy_reg |= I217_MEMPWR_DISABLE_SMB_RELEASE; hw->phy.ops.write_reg_locked(hw, I217_MEMPWR, phy_reg); /* Disable Proxy */ hw->phy.ops.write_reg_locked(hw, I217_PROXY_CTRL, 0); } /* Enable reset on MTA */ ret_val = hw->phy.ops.read_reg_locked(hw, I217_CGFREG, &phy_reg); if (ret_val) goto release; phy_reg &= ~I217_CGFREG_ENABLE_MTA_RESET; hw->phy.ops.write_reg_locked(hw, I217_CGFREG, phy_reg); release: if (ret_val) DEBUGOUT1("Error %d in resume workarounds\n", ret_val); hw->phy.ops.release(hw); - return ret_val; } - return E1000_SUCCESS; } /** * e1000_cleanup_led_ich8lan - Restore the default LED operation * @hw: pointer to the HW structure * * Return the LED back to the default configuration. **/ static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_cleanup_led_ich8lan"); if (hw->phy.type == e1000_phy_ife) return hw->phy.ops.write_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); return E1000_SUCCESS; } /** * e1000_led_on_ich8lan - Turn LEDs on * @hw: pointer to the HW structure * * Turn on the LEDs. **/ static s32 e1000_led_on_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_led_on_ich8lan"); if (hw->phy.type == e1000_phy_ife) return hw->phy.ops.write_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON)); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); return E1000_SUCCESS; } /** * e1000_led_off_ich8lan - Turn LEDs off * @hw: pointer to the HW structure * * Turn off the LEDs. **/ static s32 e1000_led_off_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_led_off_ich8lan"); if (hw->phy.type == e1000_phy_ife) return hw->phy.ops.write_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_OFF)); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); return E1000_SUCCESS; } /** * e1000_setup_led_pchlan - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use. **/ static s32 e1000_setup_led_pchlan(struct e1000_hw *hw) { DEBUGFUNC("e1000_setup_led_pchlan"); return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_mode1); } /** * e1000_cleanup_led_pchlan - Restore the default LED operation * @hw: pointer to the HW structure * * Return the LED back to the default configuration. **/ static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw) { DEBUGFUNC("e1000_cleanup_led_pchlan"); return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_default); } /** * e1000_led_on_pchlan - Turn LEDs on * @hw: pointer to the HW structure * * Turn on the LEDs. **/ static s32 e1000_led_on_pchlan(struct e1000_hw *hw) { u16 data = (u16)hw->mac.ledctl_mode2; u32 i, led; DEBUGFUNC("e1000_led_on_pchlan"); /* If no link, then turn LED on by setting the invert bit * for each LED that's mode is "link_up" in ledctl_mode2. */ if (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { for (i = 0; i < 3; i++) { led = (data >> (i * 5)) & E1000_PHY_LED0_MASK; if ((led & E1000_PHY_LED0_MODE_MASK) != E1000_LEDCTL_MODE_LINK_UP) continue; if (led & E1000_PHY_LED0_IVRT) data &= ~(E1000_PHY_LED0_IVRT << (i * 5)); else data |= (E1000_PHY_LED0_IVRT << (i * 5)); } } return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, data); } /** * e1000_led_off_pchlan - Turn LEDs off * @hw: pointer to the HW structure * * Turn off the LEDs. **/ static s32 e1000_led_off_pchlan(struct e1000_hw *hw) { u16 data = (u16)hw->mac.ledctl_mode1; u32 i, led; DEBUGFUNC("e1000_led_off_pchlan"); /* If no link, then turn LED off by clearing the invert bit * for each LED that's mode is "link_up" in ledctl_mode1. */ if (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { for (i = 0; i < 3; i++) { led = (data >> (i * 5)) & E1000_PHY_LED0_MASK; if ((led & E1000_PHY_LED0_MODE_MASK) != E1000_LEDCTL_MODE_LINK_UP) continue; if (led & E1000_PHY_LED0_IVRT) data &= ~(E1000_PHY_LED0_IVRT << (i * 5)); else data |= (E1000_PHY_LED0_IVRT << (i * 5)); } } return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, data); } /** * e1000_get_cfg_done_ich8lan - Read config done bit after Full or PHY reset * @hw: pointer to the HW structure * * Read appropriate register for the config done bit for completion status * and configure the PHY through s/w for EEPROM-less parts. * * NOTE: some silicon which is EEPROM-less will fail trying to read the * config done bit, so only an error is logged and continues. If we were * to return with error, EEPROM-less silicon would not be able to be reset * or change link. **/ static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u32 bank = 0; u32 status; DEBUGFUNC("e1000_get_cfg_done_ich8lan"); e1000_get_cfg_done_generic(hw); /* Wait for indication from h/w that it has completed basic config */ if (hw->mac.type >= e1000_ich10lan) { e1000_lan_init_done_ich8lan(hw); } else { ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) { /* When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ DEBUGOUT("Auto Read Done did not complete\n"); ret_val = E1000_SUCCESS; } } /* Clear PHY Reset Asserted bit */ status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_PHYRA) E1000_WRITE_REG(hw, E1000_STATUS, status & ~E1000_STATUS_PHYRA); else DEBUGOUT("PHY Reset Asserted not set - needs delay\n"); /* If EEPROM is not marked present, init the IGP 3 PHY manually */ if (hw->mac.type <= e1000_ich9lan) { if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) && (hw->phy.type == e1000_phy_igp_3)) { e1000_phy_init_script_igp3(hw); } } else { if (e1000_valid_nvm_bank_detect_ich8lan(hw, &bank)) { /* Maybe we should do a basic PHY config */ DEBUGOUT("EEPROM not present\n"); ret_val = -E1000_ERR_CONFIG; } } return ret_val; } /** * e1000_power_down_phy_copper_ich8lan - Remove link during PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw) { /* If the management interface is not enabled, then power down */ if (!(hw->mac.ops.check_mng_mode(hw) || hw->phy.ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_ich8lan - Clear statistical counters * @hw: pointer to the HW structure * * Clears hardware counters specific to the silicon family and calls * clear_hw_cntrs_generic to clear all general purpose counters. **/ static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw) { u16 phy_data; s32 ret_val; DEBUGFUNC("e1000_clear_hw_cntrs_ich8lan"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); E1000_READ_REG(hw, E1000_IAC); E1000_READ_REG(hw, E1000_ICRXOC); /* Clear PHY statistics registers */ if ((hw->phy.type == e1000_phy_82578) || (hw->phy.type == e1000_phy_82579) || (hw->phy.type == e1000_phy_i217) || (hw->phy.type == e1000_phy_82577)) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; ret_val = hw->phy.ops.set_page(hw, HV_STATS_PAGE << IGP_PAGE_SHIFT); if (ret_val) goto release; hw->phy.ops.read_reg_page(hw, HV_SCC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_SCC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_ECOL_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_ECOL_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_MCC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_MCC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_LATECOL_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_LATECOL_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_COLC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_COLC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_DC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_DC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_TNCRS_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_TNCRS_LOWER, &phy_data); release: hw->phy.ops.release(hw); } } Index: projects/clang-trunk/sys/dev/e1000/e1000_ich8lan.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_ich8lan.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_ich8lan.h (revision 287502) @@ -1,317 +1,322 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_ICH8LAN_H_ #define _E1000_ICH8LAN_H_ -#define ICH_FLASH_GFPREG 0xE000 -#define ICH_FLASH_HSFSTS 0xE004 -#define ICH_FLASH_HSFCTL 0xE006 -#define ICH_FLASH_FADDR 0xE008 -#define ICH_FLASH_FDATA0 0xE010 +#define ICH_FLASH_GFPREG 0x0000 +#define ICH_FLASH_HSFSTS 0x0004 +#define ICH_FLASH_HSFCTL 0x0006 +#define ICH_FLASH_FADDR 0x0008 +#define ICH_FLASH_FDATA0 0x0010 /* Requires up to 10 seconds when MNG might be accessing part. */ #define ICH_FLASH_READ_COMMAND_TIMEOUT 10000000 #define ICH_FLASH_WRITE_COMMAND_TIMEOUT 10000000 #define ICH_FLASH_ERASE_COMMAND_TIMEOUT 10000000 #define ICH_FLASH_LINEAR_ADDR_MASK 0x00FFFFFF #define ICH_FLASH_CYCLE_REPEAT_COUNT 10 #define ICH_CYCLE_READ 0 #define ICH_CYCLE_WRITE 2 #define ICH_CYCLE_ERASE 3 #define FLASH_GFPREG_BASE_MASK 0x1FFF #define FLASH_SECTOR_ADDR_SHIFT 12 #define ICH_FLASH_SEG_SIZE_256 256 #define ICH_FLASH_SEG_SIZE_4K 4096 #define ICH_FLASH_SEG_SIZE_8K 8192 #define ICH_FLASH_SEG_SIZE_64K 65536 #define E1000_ICH_FWSM_RSPCIPHY 0x00000040 /* Reset PHY on PCI Reset */ /* FW established a valid mode */ #define E1000_ICH_FWSM_FW_VALID 0x00008000 #define E1000_ICH_FWSM_PCIM2PCI 0x01000000 /* ME PCIm-to-PCI active */ #define E1000_ICH_FWSM_PCIM2PCI_COUNT 2000 #define E1000_ICH_MNG_IAMT_MODE 0x2 #define E1000_FWSM_WLOCK_MAC_MASK 0x0380 #define E1000_FWSM_WLOCK_MAC_SHIFT 7 #define E1000_FWSM_ULP_CFG_DONE 0x00000400 /* Low power cfg done */ /* Shared Receive Address Registers */ #define E1000_SHRAL_PCH_LPT(_i) (0x05408 + ((_i) * 8)) #define E1000_SHRAH_PCH_LPT(_i) (0x0540C + ((_i) * 8)) #define E1000_H2ME 0x05B50 /* Host to ME */ #define E1000_H2ME_ULP 0x00000800 /* ULP Indication Bit */ #define E1000_H2ME_ENFORCE_SETTINGS 0x00001000 /* Enforce Settings */ #define ID_LED_DEFAULT_ICH8LAN ((ID_LED_DEF1_DEF2 << 12) | \ (ID_LED_OFF1_OFF2 << 8) | \ (ID_LED_OFF1_ON2 << 4) | \ (ID_LED_DEF1_DEF2)) #define E1000_ICH_NVM_SIG_WORD 0x13 #define E1000_ICH_NVM_SIG_MASK 0xC000 #define E1000_ICH_NVM_VALID_SIG_MASK 0xC0 #define E1000_ICH_NVM_SIG_VALUE 0x80 #define E1000_ICH8_LAN_INIT_TIMEOUT 1500 /* FEXT register bit definition */ #define E1000_FEXT_PHY_CABLE_DISCONNECTED 0x00000004 #define E1000_FEXTNVM_SW_CONFIG 1 #define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* different on ICH8M */ #define E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK 0x0C000000 #define E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC 0x08000000 #define E1000_FEXTNVM4_BEACON_DURATION_MASK 0x7 #define E1000_FEXTNVM4_BEACON_DURATION_8USEC 0x7 #define E1000_FEXTNVM4_BEACON_DURATION_16USEC 0x3 #define E1000_FEXTNVM6_REQ_PLL_CLK 0x00000100 #define E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION 0x00000200 -#define E1000_FEXTNVM6_K1_OFF_ENABLE 0x80000000 -/* bit for disabling packet buffer read */ -#define E1000_FEXTNVM7_DISABLE_PB_READ 0x00040000 -#define E1000_FEXTNVM7_SIDE_CLK_UNGATE 0x00000004 + #define E1000_FEXTNVM7_DISABLE_SMB_PERST 0x00000020 -#define E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS 0x00000800 -#define E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS 0x00001000 -#define E1000_FEXTNVM11_DISABLE_PB_READ 0x00000200 -#define E1000_FEXTNVM11_DISABLE_MULR_FIX 0x00002000 -/* bit24: RXDCTL thresholds granularity: 0 - cache lines, 1 - descriptors */ -#define E1000_RXDCTL_THRESH_UNIT_DESC 0x01000000 - -#define NVM_SIZE_MULTIPLIER 4096 /*multiplier for NVMS field*/ -#define E1000_FLASH_BASE_ADDR 0xE000 /*offset of NVM access regs*/ -#define E1000_CTRL_EXT_NVMVS 0x3 /*NVM valid sector */ -#define E1000_SPT_B_STEP_REV 0x10 /*SPT B step Rev ID*/ -#define E1000_TARC0_CB_MULTIQ_2_REQ (1 << 29) -#define E1000_TARC0_CB_MULTIQ_3_REQ (1 << 28 | 1 << 29) #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 #define E1000_PCH2_RAR_ENTRIES 5 /* RAR[0], SHRA[0-3] */ #define E1000_PCH_LPT_RAR_ENTRIES 12 /* RAR[0], SHRA[0-10] */ #define PHY_PAGE_SHIFT 5 #define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \ ((reg) & MAX_PHY_REG_ADDRESS)) #define IGP3_KMRN_DIAG PHY_REG(770, 19) /* KMRN Diagnostic */ #define IGP3_VR_CTRL PHY_REG(776, 18) /* Voltage Regulator Control */ #define IGP3_KMRN_DIAG_PCS_LOCK_LOSS 0x0002 #define IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK 0x0300 #define IGP3_VR_CTRL_MODE_SHUTDOWN 0x0200 /* PHY Wakeup Registers and defines */ #define BM_PORT_GEN_CFG PHY_REG(BM_PORT_CTRL_PAGE, 17) #define BM_RCTL PHY_REG(BM_WUC_PAGE, 0) #define BM_WUC PHY_REG(BM_WUC_PAGE, 1) #define BM_WUFC PHY_REG(BM_WUC_PAGE, 2) #define BM_WUS PHY_REG(BM_WUC_PAGE, 3) #define BM_RAR_L(_i) (BM_PHY_REG(BM_WUC_PAGE, 16 + ((_i) << 2))) #define BM_RAR_M(_i) (BM_PHY_REG(BM_WUC_PAGE, 17 + ((_i) << 2))) #define BM_RAR_H(_i) (BM_PHY_REG(BM_WUC_PAGE, 18 + ((_i) << 2))) #define BM_RAR_CTRL(_i) (BM_PHY_REG(BM_WUC_PAGE, 19 + ((_i) << 2))) #define BM_MTA(_i) (BM_PHY_REG(BM_WUC_PAGE, 128 + ((_i) << 1))) #define BM_RCTL_UPE 0x0001 /* Unicast Promiscuous Mode */ #define BM_RCTL_MPE 0x0002 /* Multicast Promiscuous Mode */ #define BM_RCTL_MO_SHIFT 3 /* Multicast Offset Shift */ #define BM_RCTL_MO_MASK (3 << 3) /* Multicast Offset Mask */ #define BM_RCTL_BAM 0x0020 /* Broadcast Accept Mode */ #define BM_RCTL_PMCF 0x0040 /* Pass MAC Control Frames */ #define BM_RCTL_RFCE 0x0080 /* Rx Flow Control Enable */ #define HV_LED_CONFIG PHY_REG(768, 30) /* LED Configuration */ #define HV_MUX_DATA_CTRL PHY_REG(776, 16) #define HV_MUX_DATA_CTRL_GEN_TO_MAC 0x0400 #define HV_MUX_DATA_CTRL_FORCE_SPEED 0x0004 #define HV_STATS_PAGE 778 /* Half-duplex collision counts */ #define HV_SCC_UPPER PHY_REG(HV_STATS_PAGE, 16) /* Single Collision */ #define HV_SCC_LOWER PHY_REG(HV_STATS_PAGE, 17) #define HV_ECOL_UPPER PHY_REG(HV_STATS_PAGE, 18) /* Excessive Coll. */ #define HV_ECOL_LOWER PHY_REG(HV_STATS_PAGE, 19) #define HV_MCC_UPPER PHY_REG(HV_STATS_PAGE, 20) /* Multiple Collision */ #define HV_MCC_LOWER PHY_REG(HV_STATS_PAGE, 21) #define HV_LATECOL_UPPER PHY_REG(HV_STATS_PAGE, 23) /* Late Collision */ #define HV_LATECOL_LOWER PHY_REG(HV_STATS_PAGE, 24) #define HV_COLC_UPPER PHY_REG(HV_STATS_PAGE, 25) /* Collision */ #define HV_COLC_LOWER PHY_REG(HV_STATS_PAGE, 26) #define HV_DC_UPPER PHY_REG(HV_STATS_PAGE, 27) /* Defer Count */ #define HV_DC_LOWER PHY_REG(HV_STATS_PAGE, 28) #define HV_TNCRS_UPPER PHY_REG(HV_STATS_PAGE, 29) /* Tx with no CRS */ #define HV_TNCRS_LOWER PHY_REG(HV_STATS_PAGE, 30) #define E1000_FCRTV_PCH 0x05F40 /* PCH Flow Control Refresh Timer Value */ #define E1000_NVM_K1_CONFIG 0x1B /* NVM K1 Config Word */ #define E1000_NVM_K1_ENABLE 0x1 /* NVM Enable K1 bit */ -#define K1_ENTRY_LATENCY 0 -#define K1_MIN_TIME 1 /* SMBus Control Phy Register */ #define CV_SMB_CTRL PHY_REG(769, 23) #define CV_SMB_CTRL_FORCE_SMBUS 0x0001 /* I218 Ultra Low Power Configuration 1 Register */ #define I218_ULP_CONFIG1 PHY_REG(779, 16) #define I218_ULP_CONFIG1_START 0x0001 /* Start auto ULP config */ #define I218_ULP_CONFIG1_IND 0x0004 /* Pwr up from ULP indication */ #define I218_ULP_CONFIG1_STICKY_ULP 0x0010 /* Set sticky ULP mode */ #define I218_ULP_CONFIG1_INBAND_EXIT 0x0020 /* Inband on ULP exit */ #define I218_ULP_CONFIG1_WOL_HOST 0x0040 /* WoL Host on ULP exit */ #define I218_ULP_CONFIG1_RESET_TO_SMBUS 0x0100 /* Reset to SMBus mode */ #define I218_ULP_CONFIG1_DISABLE_SMB_PERST 0x1000 /* Disable on PERST# */ /* SMBus Address Phy Register */ #define HV_SMB_ADDR PHY_REG(768, 26) #define HV_SMB_ADDR_MASK 0x007F #define HV_SMB_ADDR_PEC_EN 0x0200 #define HV_SMB_ADDR_VALID 0x0080 #define HV_SMB_ADDR_FREQ_MASK 0x1100 #define HV_SMB_ADDR_FREQ_LOW_SHIFT 8 #define HV_SMB_ADDR_FREQ_HIGH_SHIFT 12 /* Strapping Option Register - RO */ #define E1000_STRAP 0x0000C #define E1000_STRAP_SMBUS_ADDRESS_MASK 0x00FE0000 #define E1000_STRAP_SMBUS_ADDRESS_SHIFT 17 #define E1000_STRAP_SMT_FREQ_MASK 0x00003000 #define E1000_STRAP_SMT_FREQ_SHIFT 12 /* OEM Bits Phy Register */ #define HV_OEM_BITS PHY_REG(768, 25) #define HV_OEM_BITS_LPLU 0x0004 /* Low Power Link Up */ #define HV_OEM_BITS_GBE_DIS 0x0040 /* Gigabit Disable */ #define HV_OEM_BITS_RESTART_AN 0x0400 /* Restart Auto-negotiation */ /* KMRN Mode Control */ #define HV_KMRN_MODE_CTRL PHY_REG(769, 16) #define HV_KMRN_MDIO_SLOW 0x0400 /* KMRN FIFO Control and Status */ #define HV_KMRN_FIFO_CTRLSTA PHY_REG(770, 16) #define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK 0x7000 #define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT 12 /* PHY Power Management Control */ #define HV_PM_CTRL PHY_REG(770, 17) #define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100 #define HV_PM_CTRL_K1_ENABLE 0x4000 #define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in ms */ /* Inband Control */ #define I217_INBAND_CTRL PHY_REG(770, 18) #define I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_MASK 0x3F00 #define I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT 8 /* Low Power Idle GPIO Control */ #define I217_LPI_GPIO_CTRL PHY_REG(772, 18) #define I217_LPI_GPIO_CTRL_AUTO_EN_LPI 0x0800 /* PHY Low Power Idle Control */ #define I82579_LPI_CTRL PHY_REG(772, 20) #define I82579_LPI_CTRL_100_ENABLE 0x2000 #define I82579_LPI_CTRL_1000_ENABLE 0x4000 #define I82579_LPI_CTRL_ENABLE_MASK 0x6000 /* 82579 DFT Control */ #define I82579_DFT_CTRL PHY_REG(769, 20) #define I82579_DFT_CTRL_GATE_PHY_RESET 0x0040 /* Gate PHY Reset on MAC Reset */ /* Extended Management Interface (EMI) Registers */ #define I82579_EMI_ADDR 0x10 #define I82579_EMI_DATA 0x11 #define I82579_LPI_UPDATE_TIMER 0x4805 /* in 40ns units + 40 ns base value */ #define I82579_MSE_THRESHOLD 0x084F /* 82579 Mean Square Error Threshold */ #define I82577_MSE_THRESHOLD 0x0887 /* 82577 Mean Square Error Threshold */ #define I82579_MSE_LINK_DOWN 0x2411 /* MSE count before dropping link */ #define I82579_RX_CONFIG 0x3412 /* Receive configuration */ #define I82579_LPI_PLL_SHUT 0x4412 /* LPI PLL Shut Enable */ #define I82579_EEE_PCS_STATUS 0x182E /* IEEE MMD Register 3.1 >> 8 */ #define I82579_EEE_CAPABILITY 0x0410 /* IEEE MMD Register 3.20 */ #define I82579_EEE_ADVERTISEMENT 0x040E /* IEEE MMD Register 7.60 */ #define I82579_EEE_LP_ABILITY 0x040F /* IEEE MMD Register 7.61 */ #define I82579_EEE_100_SUPPORTED (1 << 1) /* 100BaseTx EEE */ #define I82579_EEE_1000_SUPPORTED (1 << 2) /* 1000BaseTx EEE */ #define I82579_LPI_100_PLL_SHUT (1 << 2) /* 100M LPI PLL Shut Enabled */ #define I217_EEE_PCS_STATUS 0x9401 /* IEEE MMD Register 3.1 */ #define I217_EEE_CAPABILITY 0x8000 /* IEEE MMD Register 3.20 */ #define I217_EEE_ADVERTISEMENT 0x8001 /* IEEE MMD Register 7.60 */ #define I217_EEE_LP_ABILITY 0x8002 /* IEEE MMD Register 7.61 */ #define I217_RX_CONFIG 0xB20C /* Receive configuration */ #define E1000_EEE_RX_LPI_RCVD 0x0400 /* Tx LP idle received */ #define E1000_EEE_TX_LPI_RCVD 0x0800 /* Rx LP idle received */ /* Intel Rapid Start Technology Support */ #define I217_PROXY_CTRL BM_PHY_REG(BM_WUC_PAGE, 70) #define I217_PROXY_CTRL_AUTO_DISABLE 0x0080 #define I217_SxCTRL PHY_REG(BM_PORT_CTRL_PAGE, 28) #define I217_SxCTRL_ENABLE_LPI_RESET 0x1000 #define I217_CGFREG PHY_REG(772, 29) #define I217_CGFREG_ENABLE_MTA_RESET 0x0002 #define I217_MEMPWR PHY_REG(772, 26) #define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010 /* Receive Address Initial CRC Calculation */ #define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4)) +/* Latency Tolerance Reporting */ +#define E1000_LTRV 0x000F8 +#define E1000_LTRV_VALUE_MASK 0x000003FF +#define E1000_LTRV_SCALE_MAX 5 +#define E1000_LTRV_SCALE_FACTOR 5 +#define E1000_LTRV_SCALE_SHIFT 10 +#define E1000_LTRV_SCALE_MASK 0x00001C00 +#define E1000_LTRV_REQ_SHIFT 15 +#define E1000_LTRV_NOSNOOP_SHIFT 16 +#define E1000_LTRV_SEND (1 << 30) + +/* Proprietary Latency Tolerance Reporting PCI Capability */ +#define E1000_PCI_LTR_CAP_LPT 0xA8 + +/* OBFF Control & Threshold Defines */ +#define E1000_SVCR_OFF_EN 0x00000001 +#define E1000_SVCR_OFF_MASKINT 0x00001000 +#define E1000_SVCR_OFF_TIMER_MASK 0xFFFF0000 +#define E1000_SVCR_OFF_TIMER_SHIFT 16 +#define E1000_SVT_OFF_HWM_MASK 0x0000001F + +#if defined(QV_RELEASE) || !defined(NO_PCH_LPT_B0_SUPPORT) #define E1000_PCI_REVISION_ID_REG 0x08 +#endif /* defined(QV_RELEASE) || !defined(NO_PCH_LPT_B0_SUPPORT) */ void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state); void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw); void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw); void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw); -u32 e1000_resume_workarounds_pchlan(struct e1000_hw *hw); +void e1000_resume_workarounds_pchlan(struct e1000_hw *hw); s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable); void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw); s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable); s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data); s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data); s32 e1000_set_eee_pchlan(struct e1000_hw *hw); s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx); s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force); #endif /* _E1000_ICH8LAN_H_ */ Index: projects/clang-trunk/sys/dev/e1000/e1000_mac.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_mac.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_mac.c (revision 287502) @@ -1,2241 +1,2253 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw); static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw); static void e1000_config_collision_dist_generic(struct e1000_hw *hw); static int e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index); /** * e1000_init_mac_ops_generic - Initialize MAC function pointers * @hw: pointer to the HW structure * * Setups up the function pointers to no-op functions **/ void e1000_init_mac_ops_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_ops_generic"); /* General Setup */ mac->ops.init_params = e1000_null_ops_generic; mac->ops.init_hw = e1000_null_ops_generic; mac->ops.reset_hw = e1000_null_ops_generic; mac->ops.setup_physical_interface = e1000_null_ops_generic; mac->ops.get_bus_info = e1000_null_ops_generic; mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie; mac->ops.read_mac_addr = e1000_read_mac_addr_generic; mac->ops.config_collision_dist = e1000_config_collision_dist_generic; mac->ops.clear_hw_cntrs = e1000_null_mac_generic; /* LED */ mac->ops.cleanup_led = e1000_null_ops_generic; mac->ops.setup_led = e1000_null_ops_generic; mac->ops.blink_led = e1000_null_ops_generic; mac->ops.led_on = e1000_null_ops_generic; mac->ops.led_off = e1000_null_ops_generic; /* LINK */ mac->ops.setup_link = e1000_null_ops_generic; mac->ops.get_link_up_info = e1000_null_link_info; mac->ops.check_for_link = e1000_null_ops_generic; + mac->ops.set_obff_timer = e1000_null_set_obff_timer; /* Management */ mac->ops.check_mng_mode = e1000_null_mng_mode; /* VLAN, MC, etc. */ mac->ops.update_mc_addr_list = e1000_null_update_mc; mac->ops.clear_vfta = e1000_null_mac_generic; mac->ops.write_vfta = e1000_null_write_vfta; mac->ops.rar_set = e1000_rar_set_generic; mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic; } /** * e1000_null_ops_generic - No-op function, returns 0 * @hw: pointer to the HW structure **/ s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_ops_generic"); return E1000_SUCCESS; } /** * e1000_null_mac_generic - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_mac_generic"); return; } /** * e1000_null_link_info - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw, u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d) { DEBUGFUNC("e1000_null_link_info"); return E1000_SUCCESS; } /** * e1000_null_mng_mode - No-op function, return FALSE * @hw: pointer to the HW structure **/ bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_mng_mode"); return FALSE; } /** * e1000_null_update_mc - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw, u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) { DEBUGFUNC("e1000_null_update_mc"); return; } /** * e1000_null_write_vfta - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw, u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b) { DEBUGFUNC("e1000_null_write_vfta"); return; } /** * e1000_null_rar_set - No-op function, return 0 * @hw: pointer to the HW structure **/ int e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw, u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) { DEBUGFUNC("e1000_null_rar_set"); + return E1000_SUCCESS; +} + +/** + * e1000_null_set_obff_timer - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_set_obff_timer(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG a) +{ + DEBUGFUNC("e1000_null_set_obff_timer"); return E1000_SUCCESS; } /** * e1000_get_bus_info_pci_generic - Get PCI(x) bus information * @hw: pointer to the HW structure * * Determines and stores the system bus information for a particular * network interface. The following bus information is determined and stored: * bus speed, bus width, type (PCI/PCIx), and PCI(-x) function. **/ s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_bus_info *bus = &hw->bus; u32 status = E1000_READ_REG(hw, E1000_STATUS); s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_get_bus_info_pci_generic"); /* PCI or PCI-X? */ bus->type = (status & E1000_STATUS_PCIX_MODE) ? e1000_bus_type_pcix : e1000_bus_type_pci; /* Bus speed */ if (bus->type == e1000_bus_type_pci) { bus->speed = (status & E1000_STATUS_PCI66) ? e1000_bus_speed_66 : e1000_bus_speed_33; } else { switch (status & E1000_STATUS_PCIX_SPEED) { case E1000_STATUS_PCIX_SPEED_66: bus->speed = e1000_bus_speed_66; break; case E1000_STATUS_PCIX_SPEED_100: bus->speed = e1000_bus_speed_100; break; case E1000_STATUS_PCIX_SPEED_133: bus->speed = e1000_bus_speed_133; break; default: bus->speed = e1000_bus_speed_reserved; break; } } /* Bus width */ bus->width = (status & E1000_STATUS_BUS64) ? e1000_bus_width_64 : e1000_bus_width_32; /* Which PCI(-X) function? */ mac->ops.set_lan_id(hw); return ret_val; } /** * e1000_get_bus_info_pcie_generic - Get PCIe bus information * @hw: pointer to the HW structure * * Determines and stores the system bus information for a particular * network interface. The following bus information is determined and stored: * bus speed, bus width, type (PCIe), and PCIe function. **/ s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_bus_info *bus = &hw->bus; s32 ret_val; u16 pcie_link_status; DEBUGFUNC("e1000_get_bus_info_pcie_generic"); bus->type = e1000_bus_type_pci_express; ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS, &pcie_link_status); if (ret_val) { bus->width = e1000_bus_width_unknown; bus->speed = e1000_bus_speed_unknown; } else { switch (pcie_link_status & PCIE_LINK_SPEED_MASK) { case PCIE_LINK_SPEED_2500: bus->speed = e1000_bus_speed_2500; break; case PCIE_LINK_SPEED_5000: bus->speed = e1000_bus_speed_5000; break; default: bus->speed = e1000_bus_speed_unknown; break; } bus->width = (enum e1000_bus_width)((pcie_link_status & PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT); } mac->ops.set_lan_id(hw); return E1000_SUCCESS; } /** * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices * * @hw: pointer to the HW structure * * Determines the LAN function id by reading memory-mapped registers * and swaps the port value if requested. **/ static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; u32 reg; /* The status register reports the correct function number * for the device regardless of function swap state. */ reg = E1000_READ_REG(hw, E1000_STATUS); bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; } /** * e1000_set_lan_id_multi_port_pci - Set LAN id for PCI multiple port devices * @hw: pointer to the HW structure * * Determines the LAN function id by reading PCI config space. **/ void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; u16 pci_header_type; u32 status; e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type); if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) { status = E1000_READ_REG(hw, E1000_STATUS); bus->func = (status & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; } else { bus->func = 0; } } /** * e1000_set_lan_id_single_port - Set LAN id for a single port device * @hw: pointer to the HW structure * * Sets the LAN function id to zero for a single port device. **/ void e1000_set_lan_id_single_port(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; bus->func = 0; } /** * e1000_clear_vfta_generic - Clear VLAN filter table * @hw: pointer to the HW structure * * Clears the register array which contains the VLAN filter table by * setting all the values to 0. **/ void e1000_clear_vfta_generic(struct e1000_hw *hw) { u32 offset; DEBUGFUNC("e1000_clear_vfta_generic"); for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); E1000_WRITE_FLUSH(hw); } } /** * e1000_write_vfta_generic - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: register offset in VLAN filter table * @value: register value written to VLAN filter table * * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value) { DEBUGFUNC("e1000_write_vfta_generic"); E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); E1000_WRITE_FLUSH(hw); } /** * e1000_init_rx_addrs_generic - Initialize receive address's * @hw: pointer to the HW structure * @rar_count: receive address registers * * Setup the receive address registers by setting the base receive address * register to the devices MAC address and clearing all the other receive * address registers to 0. **/ void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count) { u32 i; u8 mac_addr[ETH_ADDR_LEN] = {0}; DEBUGFUNC("e1000_init_rx_addrs_generic"); /* Setup the receive address */ DEBUGOUT("Programming MAC Address into RAR[0]\n"); hw->mac.ops.rar_set(hw, hw->mac.addr, 0); /* Zero out the other (rar_entry_count - 1) receive addresses */ DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1); for (i = 1; i < rar_count; i++) hw->mac.ops.rar_set(hw, mac_addr, i); } /** * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr * @hw: pointer to the HW structure * * Checks the nvm for an alternate MAC address. An alternate MAC address * can be setup by pre-boot software and must be treated like a permanent * address and must override the actual permanent MAC address. If an * alternate MAC address is found it is programmed into RAR0, replacing * the permanent address that was installed into RAR0 by the Si on reset. * This function will return SUCCESS unless it encounters an error while * reading the EEPROM. **/ s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) { u32 i; s32 ret_val; u16 offset, nvm_alt_mac_addr_offset, nvm_data; u8 alt_mac_addr[ETH_ADDR_LEN]; DEBUGFUNC("e1000_check_alt_mac_addr_generic"); ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data); if (ret_val) return ret_val; /* not supported on older hardware or 82573 */ if ((hw->mac.type < e1000_82571) || (hw->mac.type == e1000_82573)) return E1000_SUCCESS; /* Alternate MAC address is handled by the option ROM for 82580 * and newer. SW support not required. */ if (hw->mac.type >= e1000_82580) return E1000_SUCCESS; ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1, &nvm_alt_mac_addr_offset); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if ((nvm_alt_mac_addr_offset == 0xFFFF) || (nvm_alt_mac_addr_offset == 0x0000)) /* There is no Alternate MAC Address */ return E1000_SUCCESS; if (hw->bus.func == E1000_FUNC_1) nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; if (hw->bus.func == E1000_FUNC_2) nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2; if (hw->bus.func == E1000_FUNC_3) nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3; for (i = 0; i < ETH_ADDR_LEN; i += 2) { offset = nvm_alt_mac_addr_offset + (i >> 1); ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } alt_mac_addr[i] = (u8)(nvm_data & 0xFF); alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); } /* if multicast bit is set, the alternate address will not be used */ if (alt_mac_addr[0] & 0x01) { DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n"); return E1000_SUCCESS; } /* We have a valid alternate MAC address, and we want to treat it the * same as the normal permanent MAC address stored by the HW into the * RAR. Do this by mapping this address into RAR0. */ hw->mac.ops.rar_set(hw, alt_mac_addr, 0); return E1000_SUCCESS; } /** * e1000_rar_set_generic - Set receive address register * @hw: pointer to the HW structure * @addr: pointer to the receive address * @index: receive address array register * * Sets the receive address array register at index to the address passed * in by addr. **/ static int e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; DEBUGFUNC("e1000_rar_set_generic"); /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* If MAC address zero, no need to set the AV bit */ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; /* Some bridges will combine consecutive 32-bit writes into * a single burst write, which will malfunction on some parts. * The flushes avoid this. */ E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); E1000_WRITE_FLUSH(hw); return E1000_SUCCESS; } /** * e1000_hash_mc_addr_generic - Generate a multicast hash value * @hw: pointer to the HW structure * @mc_addr: pointer to a multicast address * * Generates a multicast address hash value which is used to determine * the multicast filter table array address and new table value. **/ u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr) { u32 hash_value, hash_mask; u8 bit_shift = 0; DEBUGFUNC("e1000_hash_mc_addr_generic"); /* Register count multiplied by bits per register */ hash_mask = (hw->mac.mta_reg_count * 32) - 1; /* For a mc_filter_type of 0, bit_shift is the number of left-shifts * where 0xFF would still fall within the hash mask. */ while (hash_mask >> bit_shift != 0xFF) bit_shift++; /* The portion of the address that is used for the hash table * is determined by the mc_filter_type setting. * The algorithm is such that there is a total of 8 bits of shifting. * The bit_shift for a mc_filter_type of 0 represents the number of * left-shifts where the MSB of mc_addr[5] would still fall within * the hash_mask. Case 0 does this exactly. Since there are a total * of 8 bits of shifting, then mc_addr[4] will shift right the * remaining number of bits. Thus 8 - bit_shift. The rest of the * cases are a variation of this algorithm...essentially raising the * number of bits to shift mc_addr[5] left, while still keeping the * 8-bit shifting total. * * For example, given the following Destination MAC Address and an * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), * we can see that the bit_shift for case 0 is 4. These are the hash * values resulting from each mc_filter_type... * [0] [1] [2] [3] [4] [5] * 01 AA 00 12 34 56 * LSB MSB * * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 */ switch (hw->mac.mc_filter_type) { default: case 0: break; case 1: bit_shift += 1; break; case 2: bit_shift += 2; break; case 3: bit_shift += 4; break; } hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | (((u16) mc_addr[5]) << bit_shift))); return hash_value; } /** * e1000_update_mc_addr_list_generic - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * * Updates entire Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. **/ void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count) { u32 hash_value, hash_bit, hash_reg; int i; DEBUGFUNC("e1000_update_mc_addr_list_generic"); /* clear mta_shadow */ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); /* update mta_shadow from mc_addr_list */ for (i = 0; (u32) i < mc_addr_count; i++) { hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list); hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); hash_bit = hash_value & 0x1F; hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); mc_addr_list += (ETH_ADDR_LEN); } /* replace the entire MTA table */ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); E1000_WRITE_FLUSH(hw); } /** * e1000_pcix_mmrbc_workaround_generic - Fix incorrect MMRBC value * @hw: pointer to the HW structure * * In certain situations, a system BIOS may report that the PCIx maximum * memory read byte count (MMRBC) value is higher than than the actual * value. We check the PCIx command register with the current PCIx status * register. **/ void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw) { u16 cmd_mmrbc; u16 pcix_cmd; u16 pcix_stat_hi_word; u16 stat_mmrbc; DEBUGFUNC("e1000_pcix_mmrbc_workaround_generic"); /* Workaround for PCI-X issue when BIOS sets MMRBC incorrectly */ if (hw->bus.type != e1000_bus_type_pcix) return; e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word); cmd_mmrbc = (pcix_cmd & PCIX_COMMAND_MMRBC_MASK) >> PCIX_COMMAND_MMRBC_SHIFT; stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >> PCIX_STATUS_HI_MMRBC_SHIFT; if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K) stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K; if (cmd_mmrbc > stat_mmrbc) { pcix_cmd &= ~PCIX_COMMAND_MMRBC_MASK; pcix_cmd |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT; e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); } } /** * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters * @hw: pointer to the HW structure * * Clears the base hardware counters by reading the counter registers. **/ void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_base_generic"); E1000_READ_REG(hw, E1000_CRCERRS); E1000_READ_REG(hw, E1000_SYMERRS); E1000_READ_REG(hw, E1000_MPC); E1000_READ_REG(hw, E1000_SCC); E1000_READ_REG(hw, E1000_ECOL); E1000_READ_REG(hw, E1000_MCC); E1000_READ_REG(hw, E1000_LATECOL); E1000_READ_REG(hw, E1000_COLC); E1000_READ_REG(hw, E1000_DC); E1000_READ_REG(hw, E1000_SEC); E1000_READ_REG(hw, E1000_RLEC); E1000_READ_REG(hw, E1000_XONRXC); E1000_READ_REG(hw, E1000_XONTXC); E1000_READ_REG(hw, E1000_XOFFRXC); E1000_READ_REG(hw, E1000_XOFFTXC); E1000_READ_REG(hw, E1000_FCRUC); E1000_READ_REG(hw, E1000_GPRC); E1000_READ_REG(hw, E1000_BPRC); E1000_READ_REG(hw, E1000_MPRC); E1000_READ_REG(hw, E1000_GPTC); E1000_READ_REG(hw, E1000_GORCL); E1000_READ_REG(hw, E1000_GORCH); E1000_READ_REG(hw, E1000_GOTCL); E1000_READ_REG(hw, E1000_GOTCH); E1000_READ_REG(hw, E1000_RNBC); E1000_READ_REG(hw, E1000_RUC); E1000_READ_REG(hw, E1000_RFC); E1000_READ_REG(hw, E1000_ROC); E1000_READ_REG(hw, E1000_RJC); E1000_READ_REG(hw, E1000_TORL); E1000_READ_REG(hw, E1000_TORH); E1000_READ_REG(hw, E1000_TOTL); E1000_READ_REG(hw, E1000_TOTH); E1000_READ_REG(hw, E1000_TPR); E1000_READ_REG(hw, E1000_TPT); E1000_READ_REG(hw, E1000_MPTC); E1000_READ_REG(hw, E1000_BPTC); } /** * e1000_check_for_copper_link_generic - Check for link (Copper) * @hw: pointer to the HW structure * * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. **/ s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; bool link; DEBUGFUNC("e1000_check_for_copper_link"); /* We only want to go out to the PHY registers to see if Auto-Neg * has completed and/or if our link status has changed. The * get_link_status flag is set upon receiving a Link Status * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) return E1000_SUCCESS; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (!link) return E1000_SUCCESS; /* No link detected */ mac->get_link_status = FALSE; /* Check if there was DownShift, must be checked * immediately after link-up */ e1000_check_downshift_generic(hw); /* If we are forcing speed/duplex, then we simply return since * we have already determined whether we have link or not. */ if (!mac->autoneg) return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to * configure Collision Distance in the MAC. */ mac->ops.config_collision_dist(hw); /* Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); return ret_val; } /** * e1000_check_for_fiber_link_generic - Check for link (Fiber) * @hw: pointer to the HW structure * * Checks for link up on the hardware. If link is not up and we have * a signal, then we need to force link up. **/ s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 rxcw; u32 ctrl; u32 status; s32 ret_val; DEBUGFUNC("e1000_check_for_fiber_link_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); status = E1000_READ_REG(hw, E1000_STATUS); rxcw = E1000_READ_REG(hw, E1000_RXCW); /* If we don't have link (auto-negotiation failed or link partner * cannot auto-negotiate), the cable is plugged in (we have signal), * and our link partner is not trying to auto-negotiate with us (we * are receiving idles or data), we need to force link up. We also * need to give auto-negotiation time to complete, in case the cable * was just plugged in. The autoneg_failed flag does this. */ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) { if (!mac->autoneg_failed) { mac->autoneg_failed = TRUE; return E1000_SUCCESS; } DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); /* Disable auto-negotiation in the TXCW register */ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); /* Force link-up and also force full-duplex. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Configure Flow Control after forcing link up. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) { DEBUGOUT("Error configuring flow control\n"); return ret_val; } } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { /* If we are forcing link and we are receiving /C/ ordered * sets, re-enable auto-negotiation in the TXCW register * and disable forced link in the Device Control register * in an attempt to auto-negotiate with our link partner. */ DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); mac->serdes_has_link = TRUE; } return E1000_SUCCESS; } /** * e1000_check_for_serdes_link_generic - Check for link (Serdes) * @hw: pointer to the HW structure * * Checks for link up on the hardware. If link is not up and we have * a signal, then we need to force link up. **/ s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 rxcw; u32 ctrl; u32 status; s32 ret_val; DEBUGFUNC("e1000_check_for_serdes_link_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); status = E1000_READ_REG(hw, E1000_STATUS); rxcw = E1000_READ_REG(hw, E1000_RXCW); /* If we don't have link (auto-negotiation failed or link partner * cannot auto-negotiate), and our link partner is not trying to * auto-negotiate with us (we are receiving idles or data), * we need to force link up. We also need to give auto-negotiation * time to complete. */ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) { if (!mac->autoneg_failed) { mac->autoneg_failed = TRUE; return E1000_SUCCESS; } DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); /* Disable auto-negotiation in the TXCW register */ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); /* Force link-up and also force full-duplex. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Configure Flow Control after forcing link up. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) { DEBUGOUT("Error configuring flow control\n"); return ret_val; } } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { /* If we are forcing link and we are receiving /C/ ordered * sets, re-enable auto-negotiation in the TXCW register * and disable forced link in the Device Control register * in an attempt to auto-negotiate with our link partner. */ DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); mac->serdes_has_link = TRUE; } else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) { /* If we force link for non-auto-negotiation switch, check * link status based on MAC synchronization for internal * serdes media type. */ /* SYNCH bit and IV bit are sticky. */ usec_delay(10); rxcw = E1000_READ_REG(hw, E1000_RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { mac->serdes_has_link = TRUE; DEBUGOUT("SERDES: Link up - forced.\n"); } } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - force failed.\n"); } } if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) { status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_LU) { /* SYNCH bit and IV bit are sticky, so reread rxcw. */ usec_delay(10); rxcw = E1000_READ_REG(hw, E1000_RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { mac->serdes_has_link = TRUE; DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n"); } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n"); } } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - no sync.\n"); } } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - autoneg failed\n"); } } return E1000_SUCCESS; } /** * e1000_set_default_fc_generic - Set flow control default values * @hw: pointer to the HW structure * * Read the EEPROM for the default values for flow control and store the * values. **/ s32 e1000_set_default_fc_generic(struct e1000_hw *hw) { s32 ret_val; u16 nvm_data; u16 nvm_offset = 0; DEBUGFUNC("e1000_set_default_fc_generic"); /* Read and store word 0x0F of the EEPROM. This word contains bits * that determine the hardware's default PAUSE (flow control) mode, * a bit that determines whether the HW defaults to enabling or * disabling auto-negotiation, and the direction of the * SW defined pins. If there is no SW over-ride of the flow * control setting, then the variable hw->fc will * be initialized based on a value in the EEPROM. */ if (hw->mac.type == e1000_i350) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(hw->bus.func); ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG + nvm_offset, 1, &nvm_data); } else { ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data); } if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (!(nvm_data & NVM_WORD0F_PAUSE_MASK)) hw->fc.requested_mode = e1000_fc_none; else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == NVM_WORD0F_ASM_DIR) hw->fc.requested_mode = e1000_fc_tx_pause; else hw->fc.requested_mode = e1000_fc_full; return E1000_SUCCESS; } /** * e1000_setup_link_generic - Setup flow control and link settings * @hw: pointer to the HW structure * * Determines which flow control settings to use, then configures flow * control. Calls the appropriate media-specific link configuration * function. Assuming the adapter has a valid link partner, a valid link * should be established. Assumes the hardware has previously been reset * and the transmitter and receiver are not enabled. **/ s32 e1000_setup_link_generic(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_setup_link_generic"); /* In the case of the phy reset being blocked, we already have a link. * We do not need to set it up again. */ if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) return E1000_SUCCESS; /* If requested flow control is set to default, set flow control * based on the EEPROM flow control settings. */ if (hw->fc.requested_mode == e1000_fc_default) { ret_val = e1000_set_default_fc_generic(hw); if (ret_val) return ret_val; } /* Save off the requested flow control mode for use later. Depending * on the link partner's capabilities, we may or may not use this mode. */ hw->fc.current_mode = hw->fc.requested_mode; DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); /* Call the necessary media_type subroutine to configure the link. */ ret_val = hw->mac.ops.setup_physical_interface(hw); if (ret_val) return ret_val; /* Initialize the flow control address, type, and PAUSE timer * registers to their default values. This is done even if flow * control is disabled, because it does not hurt anything to * initialize these registers. */ DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); return e1000_set_fc_watermarks_generic(hw); } /** * e1000_commit_fc_settings_generic - Configure flow control * @hw: pointer to the HW structure * * Write the flow control settings to the Transmit Config Word Register (TXCW) * base on the flow control settings in e1000_mac_info. **/ s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 txcw; DEBUGFUNC("e1000_commit_fc_settings_generic"); /* Check for a software override of the flow control settings, and * setup the device accordingly. If auto-negotiation is enabled, then * software will have to set the "PAUSE" bits to the correct value in * the Transmit Config Word Register (TXCW) and re-start auto- * negotiation. However, if auto-negotiation is disabled, then * software will have to manually configure the two flow control enable * bits in the CTRL register. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause frames, * but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames but we * do not support receiving pause frames). * 3: Both Rx and Tx flow control (symmetric) are enabled. */ switch (hw->fc.current_mode) { case e1000_fc_none: /* Flow control completely disabled by a software over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); break; case e1000_fc_rx_pause: /* Rx Flow control is enabled and Tx Flow control is disabled * by a software over-ride. Since there really isn't a way to * advertise that we are capable of Rx Pause ONLY, we will * advertise that we support both symmetric and asymmetric Rx * PAUSE. Later, we will disable the adapter's ability to send * PAUSE frames. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); break; case e1000_fc_tx_pause: /* Tx Flow control is enabled, and Rx Flow control is disabled, * by a software over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); break; case e1000_fc_full: /* Flow control (both Rx and Tx) is enabled by a software * over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); break; default: DEBUGOUT("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; break; } E1000_WRITE_REG(hw, E1000_TXCW, txcw); mac->txcw = txcw; return E1000_SUCCESS; } /** * e1000_poll_fiber_serdes_link_generic - Poll for link up * @hw: pointer to the HW structure * * Polls for link up by reading the status register, if link fails to come * up with auto-negotiation, then the link is forced if a signal is detected. **/ s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 i, status; s32 ret_val; DEBUGFUNC("e1000_poll_fiber_serdes_link_generic"); /* If we have a signal (the cable is plugged in, or assumed TRUE for * serdes media) then poll for a "Link-Up" indication in the Device * Status Register. Time-out if a link isn't seen in 500 milliseconds * seconds (Auto-negotiation should complete in less than 500 * milliseconds even if the other end is doing it in SW). */ for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { msec_delay(10); status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_LU) break; } if (i == FIBER_LINK_UP_LIMIT) { DEBUGOUT("Never got a valid link from auto-neg!!!\n"); mac->autoneg_failed = TRUE; /* AutoNeg failed to achieve a link, so we'll call * mac->check_for_link. This routine will force the * link up if we detect a signal. This will allow us to * communicate with non-autonegotiating link partners. */ ret_val = mac->ops.check_for_link(hw); if (ret_val) { DEBUGOUT("Error while checking for link\n"); return ret_val; } mac->autoneg_failed = FALSE; } else { mac->autoneg_failed = FALSE; DEBUGOUT("Valid Link Found\n"); } return E1000_SUCCESS; } /** * e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes * @hw: pointer to the HW structure * * Configures collision distance and flow control for fiber and serdes * links. Upon successful setup, poll for link. **/ s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_setup_fiber_serdes_link_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Take the link out of reset */ ctrl &= ~E1000_CTRL_LRST; hw->mac.ops.config_collision_dist(hw); ret_val = e1000_commit_fc_settings_generic(hw); if (ret_val) return ret_val; /* Since auto-negotiation is enabled, take the link out of reset (the * link will be in reset, because we previously reset the chip). This * will restart auto-negotiation. If auto-negotiation is successful * then the link-up status bit will be set and the flow control enable * bits (RFCE and TFCE) will be set according to their negotiated value. */ DEBUGOUT("Auto-negotiation enabled\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); E1000_WRITE_FLUSH(hw); msec_delay(1); /* For these adapters, the SW definable pin 1 is set when the optics * detect a signal. If we have a signal, then poll for a "Link-Up" * indication. */ if (hw->phy.media_type == e1000_media_type_internal_serdes || (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) { ret_val = e1000_poll_fiber_serdes_link_generic(hw); } else { DEBUGOUT("No signal detected\n"); } return ret_val; } /** * e1000_config_collision_dist_generic - Configure collision distance * @hw: pointer to the HW structure * * Configures the collision distance to the default value and is used * during link setup. **/ static void e1000_config_collision_dist_generic(struct e1000_hw *hw) { u32 tctl; DEBUGFUNC("e1000_config_collision_dist_generic"); tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_COLD; tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; E1000_WRITE_REG(hw, E1000_TCTL, tctl); E1000_WRITE_FLUSH(hw); } /** * e1000_set_fc_watermarks_generic - Set flow control high/low watermarks * @hw: pointer to the HW structure * * Sets the flow control high/low threshold (watermark) registers. If * flow control XON frame transmission is enabled, then set XON frame * transmission as well. **/ s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw) { u32 fcrtl = 0, fcrth = 0; DEBUGFUNC("e1000_set_fc_watermarks_generic"); /* Set the flow control receive threshold registers. Normally, * these registers will be set to a default threshold that may be * adjusted later by the driver's runtime code. However, if the * ability to transmit pause frames is not enabled, then these * registers will be set to 0. */ if (hw->fc.current_mode & e1000_fc_tx_pause) { /* We need to set up the Receive Threshold high and low water * marks as well as (optionally) enabling the transmission of * XON frames. */ fcrtl = hw->fc.low_water; if (hw->fc.send_xon) fcrtl |= E1000_FCRTL_XONE; fcrth = hw->fc.high_water; } E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl); E1000_WRITE_REG(hw, E1000_FCRTH, fcrth); return E1000_SUCCESS; } /** * e1000_force_mac_fc_generic - Force the MAC's flow control settings * @hw: pointer to the HW structure * * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the * device control register to reflect the adapter settings. TFCE and RFCE * need to be explicitly set by software when a copper PHY is used because * autonegotiation is managed by the PHY rather than the MAC. Software must * also configure these bits when link is forced on a fiber connection. **/ s32 e1000_force_mac_fc_generic(struct e1000_hw *hw) { u32 ctrl; DEBUGFUNC("e1000_force_mac_fc_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Because we didn't get link via the internal auto-negotiation * mechanism (we either forced link or we got link via PHY * auto-neg), we have to manually enable/disable transmit an * receive flow control. * * The "Case" statement below enables/disable flow control * according to the "hw->fc.current_mode" parameter. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause * frames but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames * frames but we do not receive pause frames). * 3: Both Rx and Tx flow control (symmetric) is enabled. * other: No other values should be possible at this point. */ DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode); switch (hw->fc.current_mode) { case e1000_fc_none: ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); break; case e1000_fc_rx_pause: ctrl &= (~E1000_CTRL_TFCE); ctrl |= E1000_CTRL_RFCE; break; case e1000_fc_tx_pause: ctrl &= (~E1000_CTRL_RFCE); ctrl |= E1000_CTRL_TFCE; break; case e1000_fc_full: ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); break; default: DEBUGOUT("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; } E1000_WRITE_REG(hw, E1000_CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_config_fc_after_link_up_generic - Configures flow control after link * @hw: pointer to the HW structure * * Checks the status of auto-negotiation after link up to ensure that the * speed and duplex were not forced. If the link needed to be forced, then * flow control needs to be forced also. If auto-negotiation is enabled * and did not fail, then we configure flow control based on our link * partner. **/ s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val = E1000_SUCCESS; u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg; u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; u16 speed, duplex; DEBUGFUNC("e1000_config_fc_after_link_up_generic"); /* Check for the case where we have fiber media and auto-neg failed * so we had to force link. In this case, we need to force the * configuration of the MAC to match the "fc" parameter. */ if (mac->autoneg_failed) { if (hw->phy.media_type == e1000_media_type_fiber || hw->phy.media_type == e1000_media_type_internal_serdes) ret_val = e1000_force_mac_fc_generic(hw); } else { if (hw->phy.media_type == e1000_media_type_copper) ret_val = e1000_force_mac_fc_generic(hw); } if (ret_val) { DEBUGOUT("Error forcing flow control settings\n"); return ret_val; } /* Check for the case where we have copper media and auto-neg is * enabled. In this case, we need to check and see if Auto-Neg * has completed, and if so, how the PHY and link partner has * flow control configured. */ if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { /* Read the MII Status Register and check to see if AutoNeg * has completed. We read this twice because this reg has * some "sticky" (latched) bits. */ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { DEBUGOUT("Copper PHY and Auto Neg has not completed.\n"); return ret_val; } /* The AutoNeg process has completed, so we now need to * read both the Auto Negotiation Advertisement * Register (Address 4) and the Auto_Negotiation Base * Page Ability Register (Address 5) to determine how * flow control was negotiated. */ ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg); if (ret_val) return ret_val; /* Two bits in the Auto Negotiation Advertisement Register * (Address 4) and two bits in the Auto Negotiation Base * Page Ability Register (Address 5) determine flow control * for both the PHY and the link partner. The following * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, * 1999, describes these PAUSE resolution bits and how flow * control is determined based upon these settings. * NOTE: DC = Don't Care * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution *-------|---------|-------|---------|-------------------- * 0 | 0 | DC | DC | e1000_fc_none * 0 | 1 | 0 | DC | e1000_fc_none * 0 | 1 | 1 | 0 | e1000_fc_none * 0 | 1 | 1 | 1 | e1000_fc_tx_pause * 1 | 0 | 0 | DC | e1000_fc_none * 1 | DC | 1 | DC | e1000_fc_full * 1 | 1 | 0 | 0 | e1000_fc_none * 1 | 1 | 0 | 1 | e1000_fc_rx_pause * * Are both PAUSE bits set to 1? If so, this implies * Symmetric Flow Control is enabled at both ends. The * ASM_DIR bits are irrelevant per the spec. * * For Symmetric Flow Control: * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | DC | 1 | DC | E1000_fc_full * */ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { /* Now we need to check if the user selected Rx ONLY * of pause frames. In this case, we had to advertise * FULL flow control because we could not advertise Rx * ONLY. Hence, we must now check to see if we need to * turn OFF the TRANSMISSION of PAUSE frames. */ if (hw->fc.requested_mode == e1000_fc_full) { hw->fc.current_mode = e1000_fc_full; DEBUGOUT("Flow Control = FULL.\n"); } else { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } } /* For receiving PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 0 | 1 | 1 | 1 | e1000_fc_tx_pause */ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc.current_mode = e1000_fc_tx_pause; DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); } /* For transmitting PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | 1 | 0 | 1 | e1000_fc_rx_pause */ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } else { /* Per the IEEE spec, at this point flow control * should be disabled. */ hw->fc.current_mode = e1000_fc_none; DEBUGOUT("Flow Control = NONE.\n"); } /* Now we need to do one last check... If we auto- * negotiated to HALF DUPLEX, flow control should not be * enabled per IEEE 802.3 spec. */ ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex); if (ret_val) { DEBUGOUT("Error getting link speed and duplex\n"); return ret_val; } if (duplex == HALF_DUPLEX) hw->fc.current_mode = e1000_fc_none; /* Now we call a subroutine to actually force the MAC * controller to use the correct flow control settings. */ ret_val = e1000_force_mac_fc_generic(hw); if (ret_val) { DEBUGOUT("Error forcing flow control settings\n"); return ret_val; } } /* Check for the case where we have SerDes media and auto-neg is * enabled. In this case, we need to check and see if Auto-Neg * has completed, and if so, how the PHY and link partner has * flow control configured. */ if ((hw->phy.media_type == e1000_media_type_internal_serdes) && mac->autoneg) { /* Read the PCS_LSTS and check to see if AutoNeg * has completed. */ pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT); if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) { DEBUGOUT("PCS Auto Neg has not completed.\n"); return ret_val; } /* The AutoNeg process has completed, so we now need to * read both the Auto Negotiation Advertisement * Register (PCS_ANADV) and the Auto_Negotiation Base * Page Ability Register (PCS_LPAB) to determine how * flow control was negotiated. */ pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB); /* Two bits in the Auto Negotiation Advertisement Register * (PCS_ANADV) and two bits in the Auto Negotiation Base * Page Ability Register (PCS_LPAB) determine flow control * for both the PHY and the link partner. The following * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, * 1999, describes these PAUSE resolution bits and how flow * control is determined based upon these settings. * NOTE: DC = Don't Care * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution *-------|---------|-------|---------|-------------------- * 0 | 0 | DC | DC | e1000_fc_none * 0 | 1 | 0 | DC | e1000_fc_none * 0 | 1 | 1 | 0 | e1000_fc_none * 0 | 1 | 1 | 1 | e1000_fc_tx_pause * 1 | 0 | 0 | DC | e1000_fc_none * 1 | DC | 1 | DC | e1000_fc_full * 1 | 1 | 0 | 0 | e1000_fc_none * 1 | 1 | 0 | 1 | e1000_fc_rx_pause * * Are both PAUSE bits set to 1? If so, this implies * Symmetric Flow Control is enabled at both ends. The * ASM_DIR bits are irrelevant per the spec. * * For Symmetric Flow Control: * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | DC | 1 | DC | e1000_fc_full * */ if ((pcs_adv_reg & E1000_TXCW_PAUSE) && (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) { /* Now we need to check if the user selected Rx ONLY * of pause frames. In this case, we had to advertise * FULL flow control because we could not advertise Rx * ONLY. Hence, we must now check to see if we need to * turn OFF the TRANSMISSION of PAUSE frames. */ if (hw->fc.requested_mode == e1000_fc_full) { hw->fc.current_mode = e1000_fc_full; DEBUGOUT("Flow Control = FULL.\n"); } else { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } } /* For receiving PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 0 | 1 | 1 | 1 | e1000_fc_tx_pause */ else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) && (pcs_adv_reg & E1000_TXCW_ASM_DIR) && (pcs_lp_ability_reg & E1000_TXCW_PAUSE) && (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { hw->fc.current_mode = e1000_fc_tx_pause; DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); } /* For transmitting PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | 1 | 0 | 1 | e1000_fc_rx_pause */ else if ((pcs_adv_reg & E1000_TXCW_PAUSE) && (pcs_adv_reg & E1000_TXCW_ASM_DIR) && !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) && (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } else { /* Per the IEEE spec, at this point flow control * should be disabled. */ hw->fc.current_mode = e1000_fc_none; DEBUGOUT("Flow Control = NONE.\n"); } /* Now we call a subroutine to actually force the MAC * controller to use the correct flow control settings. */ pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL); pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL; E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg); ret_val = e1000_force_mac_fc_generic(hw); if (ret_val) { DEBUGOUT("Error forcing flow control settings\n"); return ret_val; } } return E1000_SUCCESS; } /** * e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * Read the status register for the current speed/duplex and store the current * speed and duplex for copper connections. **/ s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex) { u32 status; DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic"); status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_SPEED_1000) { *speed = SPEED_1000; DEBUGOUT("1000 Mbs, "); } else if (status & E1000_STATUS_SPEED_100) { *speed = SPEED_100; DEBUGOUT("100 Mbs, "); } else { *speed = SPEED_10; DEBUGOUT("10 Mbs, "); } if (status & E1000_STATUS_FD) { *duplex = FULL_DUPLEX; DEBUGOUT("Full Duplex\n"); } else { *duplex = HALF_DUPLEX; DEBUGOUT("Half Duplex\n"); } return E1000_SUCCESS; } /** * e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * Sets the speed and duplex to gigabit full duplex (the only possible option) * for fiber/serdes links. **/ s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw, u16 *speed, u16 *duplex) { DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic"); *speed = SPEED_1000; *duplex = FULL_DUPLEX; return E1000_SUCCESS; } /** * e1000_get_hw_semaphore_generic - Acquire hardware semaphore * @hw: pointer to the HW structure * * Acquire the HW semaphore to access the PHY or NVM **/ s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) { u32 swsm; s32 timeout = hw->nvm.word_size + 1; s32 i = 0; DEBUGFUNC("e1000_get_hw_semaphore_generic"); /* Get the SW semaphore */ while (i < timeout) { swsm = E1000_READ_REG(hw, E1000_SWSM); if (!(swsm & E1000_SWSM_SMBI)) break; usec_delay(50); i++; } if (i == timeout) { DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); return -E1000_ERR_NVM; } /* Get the FW semaphore. */ for (i = 0; i < timeout; i++) { swsm = E1000_READ_REG(hw, E1000_SWSM); E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); /* Semaphore acquired if bit latched */ if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) break; usec_delay(50); } if (i == timeout) { /* Release semaphores */ e1000_put_hw_semaphore_generic(hw); DEBUGOUT("Driver can't access the NVM\n"); return -E1000_ERR_NVM; } return E1000_SUCCESS; } /** * e1000_put_hw_semaphore_generic - Release hardware semaphore * @hw: pointer to the HW structure * * Release hardware semaphore used to access the PHY or NVM **/ void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) { u32 swsm; DEBUGFUNC("e1000_put_hw_semaphore_generic"); swsm = E1000_READ_REG(hw, E1000_SWSM); swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); E1000_WRITE_REG(hw, E1000_SWSM, swsm); } /** * e1000_get_auto_rd_done_generic - Check for auto read completion * @hw: pointer to the HW structure * * Check EEPROM for Auto Read done bit. **/ s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw) { s32 i = 0; DEBUGFUNC("e1000_get_auto_rd_done_generic"); while (i < AUTO_READ_DONE_TIMEOUT) { if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD) break; msec_delay(1); i++; } if (i == AUTO_READ_DONE_TIMEOUT) { DEBUGOUT("Auto read by HW from NVM has not completed.\n"); return -E1000_ERR_RESET; } return E1000_SUCCESS; } /** * e1000_valid_led_default_generic - Verify a valid default LED config * @hw: pointer to the HW structure * @data: pointer to the NVM (EEPROM) * * Read the EEPROM for the current default LED configuration. If the * LED configuration is not valid, set to a valid LED configuration. **/ s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_generic"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) *data = ID_LED_DEFAULT; return E1000_SUCCESS; } /** * e1000_id_led_init_generic - * @hw: pointer to the HW structure * **/ s32 e1000_id_led_init_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; const u32 ledctl_mask = 0x000000FF; const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; u16 data, i, temp; const u16 led_mask = 0x0F; DEBUGFUNC("e1000_id_led_init_generic"); ret_val = hw->nvm.ops.valid_led_default(hw, &data); if (ret_val) return ret_val; mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); mac->ledctl_mode1 = mac->ledctl_default; mac->ledctl_mode2 = mac->ledctl_default; for (i = 0; i < 4; i++) { temp = (data >> (i << 2)) & led_mask; switch (temp) { case ID_LED_ON1_DEF2: case ID_LED_ON1_ON2: case ID_LED_ON1_OFF2: mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode1 |= ledctl_on << (i << 3); break; case ID_LED_OFF1_DEF2: case ID_LED_OFF1_ON2: case ID_LED_OFF1_OFF2: mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode1 |= ledctl_off << (i << 3); break; default: /* Do nothing */ break; } switch (temp) { case ID_LED_DEF1_ON2: case ID_LED_ON1_ON2: case ID_LED_OFF1_ON2: mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode2 |= ledctl_on << (i << 3); break; case ID_LED_DEF1_OFF2: case ID_LED_ON1_OFF2: case ID_LED_OFF1_OFF2: mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode2 |= ledctl_off << (i << 3); break; default: /* Do nothing */ break; } } return E1000_SUCCESS; } /** * e1000_setup_led_generic - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use and saves the current state * of the LED so it can be later restored. **/ s32 e1000_setup_led_generic(struct e1000_hw *hw) { u32 ledctl; DEBUGFUNC("e1000_setup_led_generic"); if (hw->mac.ops.setup_led != e1000_setup_led_generic) return -E1000_ERR_CONFIG; if (hw->phy.media_type == e1000_media_type_fiber) { ledctl = E1000_READ_REG(hw, E1000_LEDCTL); hw->mac.ledctl_default = ledctl; /* Turn off LED0 */ ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK | E1000_LEDCTL_LED0_MODE_MASK); ledctl |= (E1000_LEDCTL_MODE_LED_OFF << E1000_LEDCTL_LED0_MODE_SHIFT); E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); } else if (hw->phy.media_type == e1000_media_type_copper) { E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); } return E1000_SUCCESS; } /** * e1000_cleanup_led_generic - Set LED config to default operation * @hw: pointer to the HW structure * * Remove the current LED configuration and set the LED configuration * to the default value, saved from the EEPROM. **/ s32 e1000_cleanup_led_generic(struct e1000_hw *hw) { DEBUGFUNC("e1000_cleanup_led_generic"); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); return E1000_SUCCESS; } /** * e1000_blink_led_generic - Blink LED * @hw: pointer to the HW structure * * Blink the LEDs which are set to be on. **/ s32 e1000_blink_led_generic(struct e1000_hw *hw) { u32 ledctl_blink = 0; u32 i; DEBUGFUNC("e1000_blink_led_generic"); if (hw->phy.media_type == e1000_media_type_fiber) { /* always blink LED0 for PCI-E fiber */ ledctl_blink = E1000_LEDCTL_LED0_BLINK | (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); } else { /* Set the blink bit for each LED that's "on" (0x0E) * (or "off" if inverted) in ledctl_mode2. The blink * logic in hardware only works when mode is set to "on" * so it must be changed accordingly when the mode is * "off" and inverted. */ ledctl_blink = hw->mac.ledctl_mode2; for (i = 0; i < 32; i += 8) { u32 mode = (hw->mac.ledctl_mode2 >> i) & E1000_LEDCTL_LED0_MODE_MASK; u32 led_default = hw->mac.ledctl_default >> i; if ((!(led_default & E1000_LEDCTL_LED0_IVRT) && (mode == E1000_LEDCTL_MODE_LED_ON)) || ((led_default & E1000_LEDCTL_LED0_IVRT) && (mode == E1000_LEDCTL_MODE_LED_OFF))) { ledctl_blink &= ~(E1000_LEDCTL_LED0_MODE_MASK << i); ledctl_blink |= (E1000_LEDCTL_LED0_BLINK | E1000_LEDCTL_MODE_LED_ON) << i; } } } E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink); return E1000_SUCCESS; } /** * e1000_led_on_generic - Turn LED on * @hw: pointer to the HW structure * * Turn LED on. **/ s32 e1000_led_on_generic(struct e1000_hw *hw) { u32 ctrl; DEBUGFUNC("e1000_led_on_generic"); switch (hw->phy.media_type) { case e1000_media_type_fiber: ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); break; case e1000_media_type_copper: E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); break; default: break; } return E1000_SUCCESS; } /** * e1000_led_off_generic - Turn LED off * @hw: pointer to the HW structure * * Turn LED off. **/ s32 e1000_led_off_generic(struct e1000_hw *hw) { u32 ctrl; DEBUGFUNC("e1000_led_off_generic"); switch (hw->phy.media_type) { case e1000_media_type_fiber: ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); break; case e1000_media_type_copper: E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); break; default: break; } return E1000_SUCCESS; } /** * e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities * @hw: pointer to the HW structure * @no_snoop: bitmap of snoop events * * Set the PCI-express register to snoop for events enabled in 'no_snoop'. **/ void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop) { u32 gcr; DEBUGFUNC("e1000_set_pcie_no_snoop_generic"); if (hw->bus.type != e1000_bus_type_pci_express) return; if (no_snoop) { gcr = E1000_READ_REG(hw, E1000_GCR); gcr &= ~(PCIE_NO_SNOOP_ALL); gcr |= no_snoop; E1000_WRITE_REG(hw, E1000_GCR, gcr); } } /** * e1000_disable_pcie_master_generic - Disables PCI-express master access * @hw: pointer to the HW structure * * Returns E1000_SUCCESS if successful, else returns -10 * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused * the master requests to be disabled. * * Disables PCI-Express master access and verifies there are no pending * requests. **/ s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw) { u32 ctrl; s32 timeout = MASTER_DISABLE_TIMEOUT; DEBUGFUNC("e1000_disable_pcie_master_generic"); if (hw->bus.type != e1000_bus_type_pci_express) return E1000_SUCCESS; ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); while (timeout) { if (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_GIO_MASTER_ENABLE) || E1000_REMOVED(hw->hw_addr)) break; usec_delay(100); timeout--; } if (!timeout) { DEBUGOUT("Master requests are pending.\n"); return -E1000_ERR_MASTER_REQUESTS_PENDING; } return E1000_SUCCESS; } /** * e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing * @hw: pointer to the HW structure * * Reset the Adaptive Interframe Spacing throttle to default values. **/ void e1000_reset_adaptive_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_reset_adaptive_generic"); if (!mac->adaptive_ifs) { DEBUGOUT("Not in Adaptive IFS mode!\n"); return; } mac->current_ifs_val = 0; mac->ifs_min_val = IFS_MIN; mac->ifs_max_val = IFS_MAX; mac->ifs_step_size = IFS_STEP; mac->ifs_ratio = IFS_RATIO; mac->in_ifs_mode = FALSE; E1000_WRITE_REG(hw, E1000_AIT, 0); } /** * e1000_update_adaptive_generic - Update Adaptive Interframe Spacing * @hw: pointer to the HW structure * * Update the Adaptive Interframe Spacing Throttle value based on the * time between transmitted packets and time between collisions. **/ void e1000_update_adaptive_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_update_adaptive_generic"); if (!mac->adaptive_ifs) { DEBUGOUT("Not in Adaptive IFS mode!\n"); return; } if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) { if (mac->tx_packet_delta > MIN_NUM_XMITS) { mac->in_ifs_mode = TRUE; if (mac->current_ifs_val < mac->ifs_max_val) { if (!mac->current_ifs_val) mac->current_ifs_val = mac->ifs_min_val; else mac->current_ifs_val += mac->ifs_step_size; E1000_WRITE_REG(hw, E1000_AIT, mac->current_ifs_val); } } } else { if (mac->in_ifs_mode && (mac->tx_packet_delta <= MIN_NUM_XMITS)) { mac->current_ifs_val = 0; mac->in_ifs_mode = FALSE; E1000_WRITE_REG(hw, E1000_AIT, 0); } } } /** * e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings * @hw: pointer to the HW structure * * Verify that when not using auto-negotiation that MDI/MDIx is correctly * set, which is forced to MDI mode only. **/ static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw) { DEBUGFUNC("e1000_validate_mdi_setting_generic"); if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { DEBUGOUT("Invalid MDI setting detected\n"); hw->phy.mdix = 1; return -E1000_ERR_CONFIG; } return E1000_SUCCESS; } /** * e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings * @hw: pointer to the HW structure * * Validate the MDI/MDIx setting, allowing for auto-crossover during forced * operation. **/ s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic"); return E1000_SUCCESS; } /** * e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register * @hw: pointer to the HW structure * @reg: 32bit register offset such as E1000_SCTL * @offset: register offset to write to * @data: data to write at register offset * * Writes an address/data control type register. There are several of these * and they all have the format address << 8 | data and bit 31 is polled for * completion. **/ s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, u32 offset, u8 data) { u32 i, regvalue = 0; DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic"); /* Set up the address and data */ regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT); E1000_WRITE_REG(hw, reg, regvalue); /* Poll the ready bit to see if the MDI read completed */ for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) { usec_delay(5); regvalue = E1000_READ_REG(hw, reg); if (regvalue & E1000_GEN_CTL_READY) break; } if (!(regvalue & E1000_GEN_CTL_READY)) { DEBUGOUT1("Reg %08x did not indicate ready\n", reg); return -E1000_ERR_PHY; } return E1000_SUCCESS; } Index: projects/clang-trunk/sys/dev/e1000/e1000_mac.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_mac.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_mac.h (revision 287502) @@ -1,94 +1,97 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_MAC_H_ #define _E1000_MAC_H_ void e1000_init_mac_ops_generic(struct e1000_hw *hw); +#ifndef E1000_REMOVED #define E1000_REMOVED(a) (0) +#endif /* E1000_REMOVED */ void e1000_null_mac_generic(struct e1000_hw *hw); s32 e1000_null_ops_generic(struct e1000_hw *hw); s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d); bool e1000_null_mng_mode(struct e1000_hw *hw); void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a); void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b); int e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a); +s32 e1000_null_set_obff_timer(struct e1000_hw *hw, u32 a); s32 e1000_blink_led_generic(struct e1000_hw *hw); s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw); s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw); s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw); s32 e1000_cleanup_led_generic(struct e1000_hw *hw); s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw); s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw); s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw); s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw); s32 e1000_force_mac_fc_generic(struct e1000_hw *hw); s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw); s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw); s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); void e1000_set_lan_id_single_port(struct e1000_hw *hw); void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw); s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex); s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex); s32 e1000_id_led_init_generic(struct e1000_hw *hw); s32 e1000_led_on_generic(struct e1000_hw *hw); s32 e1000_led_off_generic(struct e1000_hw *hw); void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count); s32 e1000_set_default_fc_generic(struct e1000_hw *hw); s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw); s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw); s32 e1000_setup_led_generic(struct e1000_hw *hw); s32 e1000_setup_link_generic(struct e1000_hw *hw); s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw); s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, u32 offset, u8 data); u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr); void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw); void e1000_clear_vfta_generic(struct e1000_hw *hw); void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw); void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); void e1000_reset_adaptive_generic(struct e1000_hw *hw); void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); void e1000_update_adaptive_generic(struct e1000_hw *hw); void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); #endif Index: projects/clang-trunk/sys/dev/e1000/e1000_nvm.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_nvm.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_nvm.c (revision 287502) @@ -1,1381 +1,1235 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" static void e1000_reload_nvm_generic(struct e1000_hw *hw); /** * e1000_init_nvm_ops_generic - Initialize NVM function pointers * @hw: pointer to the HW structure * * Setups up the function pointers to no-op functions **/ void e1000_init_nvm_ops_generic(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; DEBUGFUNC("e1000_init_nvm_ops_generic"); /* Initialize function pointers */ nvm->ops.init_params = e1000_null_ops_generic; nvm->ops.acquire = e1000_null_ops_generic; nvm->ops.read = e1000_null_read_nvm; nvm->ops.release = e1000_null_nvm_generic; nvm->ops.reload = e1000_reload_nvm_generic; nvm->ops.update = e1000_null_ops_generic; nvm->ops.valid_led_default = e1000_null_led_default; nvm->ops.validate = e1000_null_ops_generic; nvm->ops.write = e1000_null_write_nvm; } /** * e1000_null_nvm_read - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_read_nvm(struct e1000_hw E1000_UNUSEDARG *hw, u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, u16 E1000_UNUSEDARG *c) { DEBUGFUNC("e1000_null_read_nvm"); return E1000_SUCCESS; } /** * e1000_null_nvm_generic - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_nvm_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_nvm_generic"); return; } /** * e1000_null_led_default - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_led_default(struct e1000_hw E1000_UNUSEDARG *hw, u16 E1000_UNUSEDARG *data) { DEBUGFUNC("e1000_null_led_default"); return E1000_SUCCESS; } /** * e1000_null_write_nvm - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_write_nvm(struct e1000_hw E1000_UNUSEDARG *hw, u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, u16 E1000_UNUSEDARG *c) { DEBUGFUNC("e1000_null_write_nvm"); return E1000_SUCCESS; } /** * e1000_raise_eec_clk - Raise EEPROM clock * @hw: pointer to the HW structure * @eecd: pointer to the EEPROM * * Enable/Raise the EEPROM clock bit. **/ static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) { *eecd = *eecd | E1000_EECD_SK; E1000_WRITE_REG(hw, E1000_EECD, *eecd); E1000_WRITE_FLUSH(hw); usec_delay(hw->nvm.delay_usec); } /** * e1000_lower_eec_clk - Lower EEPROM clock * @hw: pointer to the HW structure * @eecd: pointer to the EEPROM * * Clear/Lower the EEPROM clock bit. **/ static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) { *eecd = *eecd & ~E1000_EECD_SK; E1000_WRITE_REG(hw, E1000_EECD, *eecd); E1000_WRITE_FLUSH(hw); usec_delay(hw->nvm.delay_usec); } /** * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM * @hw: pointer to the HW structure * @data: data to send to the EEPROM * @count: number of bits to shift out * * We need to shift 'count' bits out to the EEPROM. So, the value in the * "data" parameter will be shifted out to the EEPROM one bit at a time. * In order to do this, "data" must be broken down into bits. **/ static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u32 mask; DEBUGFUNC("e1000_shift_out_eec_bits"); mask = 0x01 << (count - 1); if (nvm->type == e1000_nvm_eeprom_microwire) eecd &= ~E1000_EECD_DO; else if (nvm->type == e1000_nvm_eeprom_spi) eecd |= E1000_EECD_DO; do { eecd &= ~E1000_EECD_DI; if (data & mask) eecd |= E1000_EECD_DI; E1000_WRITE_REG(hw, E1000_EECD, eecd); E1000_WRITE_FLUSH(hw); usec_delay(nvm->delay_usec); e1000_raise_eec_clk(hw, &eecd); e1000_lower_eec_clk(hw, &eecd); mask >>= 1; } while (mask); eecd &= ~E1000_EECD_DI; E1000_WRITE_REG(hw, E1000_EECD, eecd); } /** * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM * @hw: pointer to the HW structure * @count: number of bits to shift in * * In order to read a register from the EEPROM, we need to shift 'count' bits * in from the EEPROM. Bits are "shifted in" by raising the clock input to * the EEPROM (setting the SK bit), and then reading the value of the data out * "DO" bit. During this "shifting in" process the data in "DI" bit should * always be clear. **/ static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count) { u32 eecd; u32 i; u16 data; DEBUGFUNC("e1000_shift_in_eec_bits"); eecd = E1000_READ_REG(hw, E1000_EECD); eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); data = 0; for (i = 0; i < count; i++) { data <<= 1; e1000_raise_eec_clk(hw, &eecd); eecd = E1000_READ_REG(hw, E1000_EECD); eecd &= ~E1000_EECD_DI; if (eecd & E1000_EECD_DO) data |= 1; e1000_lower_eec_clk(hw, &eecd); } return data; } /** * e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion * @hw: pointer to the HW structure * @ee_reg: EEPROM flag for polling * * Polls the EEPROM status bit for either read or write completion based * upon the value of 'ee_reg'. **/ s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) { u32 attempts = 100000; u32 i, reg = 0; DEBUGFUNC("e1000_poll_eerd_eewr_done"); for (i = 0; i < attempts; i++) { if (ee_reg == E1000_NVM_POLL_READ) reg = E1000_READ_REG(hw, E1000_EERD); else reg = E1000_READ_REG(hw, E1000_EEWR); if (reg & E1000_NVM_RW_REG_DONE) return E1000_SUCCESS; usec_delay(5); } return -E1000_ERR_NVM; } /** * e1000_acquire_nvm_generic - Generic request for access to EEPROM * @hw: pointer to the HW structure * * Set the EEPROM access request bit and wait for EEPROM access grant bit. * Return successful if access grant bit set, else clear the request for * EEPROM access and return -E1000_ERR_NVM (-1). **/ s32 e1000_acquire_nvm_generic(struct e1000_hw *hw) { u32 eecd = E1000_READ_REG(hw, E1000_EECD); s32 timeout = E1000_NVM_GRANT_ATTEMPTS; DEBUGFUNC("e1000_acquire_nvm_generic"); E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ); eecd = E1000_READ_REG(hw, E1000_EECD); while (timeout) { if (eecd & E1000_EECD_GNT) break; usec_delay(5); eecd = E1000_READ_REG(hw, E1000_EECD); timeout--; } if (!timeout) { eecd &= ~E1000_EECD_REQ; E1000_WRITE_REG(hw, E1000_EECD, eecd); DEBUGOUT("Could not acquire NVM grant\n"); return -E1000_ERR_NVM; } return E1000_SUCCESS; } /** * e1000_standby_nvm - Return EEPROM to standby state * @hw: pointer to the HW structure * * Return the EEPROM to a standby state. **/ static void e1000_standby_nvm(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); DEBUGFUNC("e1000_standby_nvm"); if (nvm->type == e1000_nvm_eeprom_microwire) { eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); E1000_WRITE_REG(hw, E1000_EECD, eecd); E1000_WRITE_FLUSH(hw); usec_delay(nvm->delay_usec); e1000_raise_eec_clk(hw, &eecd); /* Select EEPROM */ eecd |= E1000_EECD_CS; E1000_WRITE_REG(hw, E1000_EECD, eecd); E1000_WRITE_FLUSH(hw); usec_delay(nvm->delay_usec); e1000_lower_eec_clk(hw, &eecd); } else if (nvm->type == e1000_nvm_eeprom_spi) { /* Toggle CS to flush commands */ eecd |= E1000_EECD_CS; E1000_WRITE_REG(hw, E1000_EECD, eecd); E1000_WRITE_FLUSH(hw); usec_delay(nvm->delay_usec); eecd &= ~E1000_EECD_CS; E1000_WRITE_REG(hw, E1000_EECD, eecd); E1000_WRITE_FLUSH(hw); usec_delay(nvm->delay_usec); } } /** * e1000_stop_nvm - Terminate EEPROM command * @hw: pointer to the HW structure * * Terminates the current command by inverting the EEPROM's chip select pin. **/ void e1000_stop_nvm(struct e1000_hw *hw) { u32 eecd; DEBUGFUNC("e1000_stop_nvm"); eecd = E1000_READ_REG(hw, E1000_EECD); if (hw->nvm.type == e1000_nvm_eeprom_spi) { /* Pull CS high */ eecd |= E1000_EECD_CS; e1000_lower_eec_clk(hw, &eecd); } else if (hw->nvm.type == e1000_nvm_eeprom_microwire) { /* CS on Microwire is active-high */ eecd &= ~(E1000_EECD_CS | E1000_EECD_DI); E1000_WRITE_REG(hw, E1000_EECD, eecd); e1000_raise_eec_clk(hw, &eecd); e1000_lower_eec_clk(hw, &eecd); } } /** * e1000_release_nvm_generic - Release exclusive access to EEPROM * @hw: pointer to the HW structure * * Stop any current commands to the EEPROM and clear the EEPROM request bit. **/ void e1000_release_nvm_generic(struct e1000_hw *hw) { u32 eecd; DEBUGFUNC("e1000_release_nvm_generic"); e1000_stop_nvm(hw); eecd = E1000_READ_REG(hw, E1000_EECD); eecd &= ~E1000_EECD_REQ; E1000_WRITE_REG(hw, E1000_EECD, eecd); } /** * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write * @hw: pointer to the HW structure * * Setups the EEPROM for reading and writing. **/ static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u8 spi_stat_reg; DEBUGFUNC("e1000_ready_nvm_eeprom"); if (nvm->type == e1000_nvm_eeprom_microwire) { /* Clear SK and DI */ eecd &= ~(E1000_EECD_DI | E1000_EECD_SK); E1000_WRITE_REG(hw, E1000_EECD, eecd); /* Set CS */ eecd |= E1000_EECD_CS; E1000_WRITE_REG(hw, E1000_EECD, eecd); } else if (nvm->type == e1000_nvm_eeprom_spi) { u16 timeout = NVM_MAX_RETRY_SPI; /* Clear SK and CS */ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); E1000_WRITE_REG(hw, E1000_EECD, eecd); E1000_WRITE_FLUSH(hw); usec_delay(1); /* Read "Status Register" repeatedly until the LSB is cleared. * The EEPROM will signal that the command has been completed * by clearing bit 0 of the internal status register. If it's * not cleared within 'timeout', then error out. */ while (timeout) { e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, hw->nvm.opcode_bits); spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8); if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) break; usec_delay(5); e1000_standby_nvm(hw); timeout--; } if (!timeout) { DEBUGOUT("SPI NVM Status error\n"); return -E1000_ERR_NVM; } } return E1000_SUCCESS; } /** * e1000_read_nvm_spi - Read EEPROM's using SPI * @hw: pointer to the HW structure * @offset: offset of word in the EEPROM to read * @words: number of words to read * @data: word read from the EEPROM * * Reads a 16 bit word from the EEPROM. **/ s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; u32 i = 0; s32 ret_val; u16 word_in; u8 read_opcode = NVM_READ_OPCODE_SPI; DEBUGFUNC("e1000_read_nvm_spi"); /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } ret_val = nvm->ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_ready_nvm_eeprom(hw); if (ret_val) goto release; e1000_standby_nvm(hw); if ((nvm->address_bits == 8) && (offset >= 128)) read_opcode |= NVM_A8_OPCODE_SPI; /* Send the READ command (opcode + addr) */ e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits); /* Read the data. SPI NVMs increment the address with each byte * read and will roll over if reading beyond the end. This allows * us to read the whole NVM from any offset */ for (i = 0; i < words; i++) { word_in = e1000_shift_in_eec_bits(hw, 16); data[i] = (word_in >> 8) | (word_in << 8); } release: nvm->ops.release(hw); return ret_val; } /** * e1000_read_nvm_microwire - Reads EEPROM's using microwire * @hw: pointer to the HW structure * @offset: offset of word in the EEPROM to read * @words: number of words to read * @data: word read from the EEPROM * * Reads a 16 bit word from the EEPROM. **/ s32 e1000_read_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; u32 i = 0; s32 ret_val; u8 read_opcode = NVM_READ_OPCODE_MICROWIRE; DEBUGFUNC("e1000_read_nvm_microwire"); /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } ret_val = nvm->ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_ready_nvm_eeprom(hw); if (ret_val) goto release; for (i = 0; i < words; i++) { /* Send the READ command (opcode + addr) */ e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); e1000_shift_out_eec_bits(hw, (u16)(offset + i), nvm->address_bits); /* Read the data. For microwire, each word requires the * overhead of setup and tear-down. */ data[i] = e1000_shift_in_eec_bits(hw, 16); e1000_standby_nvm(hw); } release: nvm->ops.release(hw); return ret_val; } /** * e1000_read_nvm_eerd - Reads EEPROM using EERD register * @hw: pointer to the HW structure * @offset: offset of word in the EEPROM to read * @words: number of words to read * @data: word read from the EEPROM * * Reads a 16 bit word from the EEPROM using the EERD register. **/ s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; u32 i, eerd = 0; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_read_nvm_eerd"); /* A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } for (i = 0; i < words; i++) { eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + E1000_NVM_RW_REG_START; E1000_WRITE_REG(hw, E1000_EERD, eerd); ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); if (ret_val) break; data[i] = (E1000_READ_REG(hw, E1000_EERD) >> E1000_NVM_RW_REG_DATA); } if (ret_val) DEBUGOUT1("NVM read error: %d\n", ret_val); return ret_val; } /** * e1000_write_nvm_spi - Write to EEPROM using SPI * @hw: pointer to the HW structure * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the EEPROM * * Writes data to EEPROM at offset using SPI interface. * * If e1000_update_nvm_checksum is not called after this function , the * EEPROM will most likely contain an invalid checksum. **/ s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; s32 ret_val = -E1000_ERR_NVM; u16 widx = 0; DEBUGFUNC("e1000_write_nvm_spi"); /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } while (widx < words) { u8 write_opcode = NVM_WRITE_OPCODE_SPI; ret_val = nvm->ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_ready_nvm_eeprom(hw); if (ret_val) { nvm->ops.release(hw); return ret_val; } e1000_standby_nvm(hw); /* Send the WRITE ENABLE command (8 bit opcode) */ e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, nvm->opcode_bits); e1000_standby_nvm(hw); /* Some SPI eeproms use the 8th address bit embedded in the * opcode */ if ((nvm->address_bits == 8) && (offset >= 128)) write_opcode |= NVM_A8_OPCODE_SPI; /* Send the Write command (8-bit opcode + addr) */ e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), nvm->address_bits); /* Loop to allow for up to whole page write of eeprom */ while (widx < words) { u16 word_out = data[widx]; word_out = (word_out >> 8) | (word_out << 8); e1000_shift_out_eec_bits(hw, word_out, 16); widx++; if ((((offset + widx) * 2) % nvm->page_size) == 0) { e1000_standby_nvm(hw); break; } } msec_delay(10); nvm->ops.release(hw); } return ret_val; } /** * e1000_write_nvm_microwire - Writes EEPROM using microwire * @hw: pointer to the HW structure * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the EEPROM * * Writes data to EEPROM at offset using microwire interface. * * If e1000_update_nvm_checksum is not called after this function , the * EEPROM will most likely contain an invalid checksum. **/ s32 e1000_write_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; s32 ret_val; u32 eecd; u16 words_written = 0; u16 widx = 0; DEBUGFUNC("e1000_write_nvm_microwire"); /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } ret_val = nvm->ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_ready_nvm_eeprom(hw); if (ret_val) goto release; e1000_shift_out_eec_bits(hw, NVM_EWEN_OPCODE_MICROWIRE, (u16)(nvm->opcode_bits + 2)); e1000_shift_out_eec_bits(hw, 0, (u16)(nvm->address_bits - 2)); e1000_standby_nvm(hw); while (words_written < words) { e1000_shift_out_eec_bits(hw, NVM_WRITE_OPCODE_MICROWIRE, nvm->opcode_bits); e1000_shift_out_eec_bits(hw, (u16)(offset + words_written), nvm->address_bits); e1000_shift_out_eec_bits(hw, data[words_written], 16); e1000_standby_nvm(hw); for (widx = 0; widx < 200; widx++) { eecd = E1000_READ_REG(hw, E1000_EECD); if (eecd & E1000_EECD_DO) break; usec_delay(50); } if (widx == 200) { DEBUGOUT("NVM Write did not complete\n"); ret_val = -E1000_ERR_NVM; goto release; } e1000_standby_nvm(hw); words_written++; } e1000_shift_out_eec_bits(hw, NVM_EWDS_OPCODE_MICROWIRE, (u16)(nvm->opcode_bits + 2)); e1000_shift_out_eec_bits(hw, 0, (u16)(nvm->address_bits - 2)); release: nvm->ops.release(hw); return ret_val; } /** * e1000_read_pba_string_generic - Read device part number * @hw: pointer to the HW structure * @pba_num: pointer to device part number * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number from the EEPROM and stores * the value in pba_num. **/ s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) { s32 ret_val; u16 nvm_data; u16 pba_ptr; u16 offset; u16 length; DEBUGFUNC("e1000_read_pba_string_generic"); if ((hw->mac.type >= e1000_i210) && !e1000_get_flash_presence_i210(hw)) { DEBUGOUT("Flashless no PBA string\n"); return -E1000_ERR_NVM_PBA_SECTION; } if (pba_num == NULL) { DEBUGOUT("PBA string buffer was null\n"); return -E1000_ERR_INVALID_ARGUMENT; } ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } /* if nvm_data is not ptr guard the PBA must be in legacy format which * means pba_ptr is actually our second data word for the PBA number * and we can decode it into an ascii string */ if (nvm_data != NVM_PBA_PTR_GUARD) { DEBUGOUT("NVM PBA number is not stored as string\n"); /* make sure callers buffer is big enough to store the PBA */ if (pba_num_size < E1000_PBANUM_LENGTH) { DEBUGOUT("PBA string buffer too small\n"); return E1000_ERR_NO_SPACE; } /* extract hex string from data and pba_ptr */ pba_num[0] = (nvm_data >> 12) & 0xF; pba_num[1] = (nvm_data >> 8) & 0xF; pba_num[2] = (nvm_data >> 4) & 0xF; pba_num[3] = nvm_data & 0xF; pba_num[4] = (pba_ptr >> 12) & 0xF; pba_num[5] = (pba_ptr >> 8) & 0xF; pba_num[6] = '-'; pba_num[7] = 0; pba_num[8] = (pba_ptr >> 4) & 0xF; pba_num[9] = pba_ptr & 0xF; /* put a null character on the end of our string */ pba_num[10] = '\0'; /* switch all the data but the '-' to hex char */ for (offset = 0; offset < 10; offset++) { if (pba_num[offset] < 0xA) pba_num[offset] += '0'; else if (pba_num[offset] < 0x10) pba_num[offset] += 'A' - 0xA; } return E1000_SUCCESS; } ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (length == 0xFFFF || length == 0) { DEBUGOUT("NVM PBA number section invalid length\n"); return -E1000_ERR_NVM_PBA_SECTION; } /* check if pba_num buffer is big enough */ if (pba_num_size < (((u32)length * 2) - 1)) { DEBUGOUT("PBA string buffer too small\n"); return -E1000_ERR_NO_SPACE; } /* trim pba length from start of string */ pba_ptr++; length--; for (offset = 0; offset < length; offset++) { ret_val = hw->nvm.ops.read(hw, pba_ptr + offset, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } pba_num[offset * 2] = (u8)(nvm_data >> 8); pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF); } pba_num[offset * 2] = '\0'; return E1000_SUCCESS; } /** * e1000_read_pba_length_generic - Read device part number length * @hw: pointer to the HW structure * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number length from the EEPROM and * stores the value in pba_num_size. **/ s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size) { s32 ret_val; u16 nvm_data; u16 pba_ptr; u16 length; DEBUGFUNC("e1000_read_pba_length_generic"); if (pba_num_size == NULL) { DEBUGOUT("PBA buffer size was null\n"); return -E1000_ERR_INVALID_ARGUMENT; } ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } /* if data is not ptr guard the PBA must be in legacy format */ if (nvm_data != NVM_PBA_PTR_GUARD) { *pba_num_size = E1000_PBANUM_LENGTH; return E1000_SUCCESS; } ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (length == 0xFFFF || length == 0) { DEBUGOUT("NVM PBA number section invalid length\n"); return -E1000_ERR_NVM_PBA_SECTION; } /* Convert from length in u16 values to u8 chars, add 1 for NULL, * and subtract 2 because length field is included in length. */ *pba_num_size = ((u32)length * 2) - 1; return E1000_SUCCESS; } -/** - * e1000_read_pba_num_generic - Read device part number - * @hw: pointer to the HW structure - * @pba_num: pointer to device part number - * - * Reads the product board assembly (PBA) number from the EEPROM and stores - * the value in pba_num. - **/ -s32 e1000_read_pba_num_generic(struct e1000_hw *hw, u32 *pba_num) -{ - s32 ret_val; - u16 nvm_data; - DEBUGFUNC("e1000_read_pba_num_generic"); - - ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } else if (nvm_data == NVM_PBA_PTR_GUARD) { - DEBUGOUT("NVM Not Supported\n"); - return -E1000_NOT_IMPLEMENTED; - } - *pba_num = (u32)(nvm_data << 16); - - ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - return ret_val; - } - *pba_num |= nvm_data; - - return E1000_SUCCESS; -} - - /** * e1000_read_pba_raw * @hw: pointer to the HW structure * @eeprom_buf: optional pointer to EEPROM image * @eeprom_buf_size: size of EEPROM image in words * @max_pba_block_size: PBA block size limit * @pba: pointer to output PBA structure * * Reads PBA from EEPROM image when eeprom_buf is not NULL. * Reads PBA from physical EEPROM device when eeprom_buf is NULL. * **/ s32 e1000_read_pba_raw(struct e1000_hw *hw, u16 *eeprom_buf, u32 eeprom_buf_size, u16 max_pba_block_size, struct e1000_pba *pba) { s32 ret_val; u16 pba_block_size; if (pba == NULL) return -E1000_ERR_PARAM; if (eeprom_buf == NULL) { ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 2, &pba->word[0]); if (ret_val) return ret_val; } else { if (eeprom_buf_size > NVM_PBA_OFFSET_1) { pba->word[0] = eeprom_buf[NVM_PBA_OFFSET_0]; pba->word[1] = eeprom_buf[NVM_PBA_OFFSET_1]; } else { return -E1000_ERR_PARAM; } } if (pba->word[0] == NVM_PBA_PTR_GUARD) { if (pba->pba_block == NULL) return -E1000_ERR_PARAM; ret_val = e1000_get_pba_block_size(hw, eeprom_buf, eeprom_buf_size, &pba_block_size); if (ret_val) return ret_val; if (pba_block_size > max_pba_block_size) return -E1000_ERR_PARAM; if (eeprom_buf == NULL) { ret_val = e1000_read_nvm(hw, pba->word[1], pba_block_size, pba->pba_block); if (ret_val) return ret_val; } else { if (eeprom_buf_size > (u32)(pba->word[1] + pba_block_size)) { memcpy(pba->pba_block, &eeprom_buf[pba->word[1]], pba_block_size * sizeof(u16)); } else { return -E1000_ERR_PARAM; } } } return E1000_SUCCESS; } /** * e1000_write_pba_raw * @hw: pointer to the HW structure * @eeprom_buf: optional pointer to EEPROM image * @eeprom_buf_size: size of EEPROM image in words * @pba: pointer to PBA structure * * Writes PBA to EEPROM image when eeprom_buf is not NULL. * Writes PBA to physical EEPROM device when eeprom_buf is NULL. * **/ s32 e1000_write_pba_raw(struct e1000_hw *hw, u16 *eeprom_buf, u32 eeprom_buf_size, struct e1000_pba *pba) { s32 ret_val; if (pba == NULL) return -E1000_ERR_PARAM; if (eeprom_buf == NULL) { ret_val = e1000_write_nvm(hw, NVM_PBA_OFFSET_0, 2, &pba->word[0]); if (ret_val) return ret_val; } else { if (eeprom_buf_size > NVM_PBA_OFFSET_1) { eeprom_buf[NVM_PBA_OFFSET_0] = pba->word[0]; eeprom_buf[NVM_PBA_OFFSET_1] = pba->word[1]; } else { return -E1000_ERR_PARAM; } } if (pba->word[0] == NVM_PBA_PTR_GUARD) { if (pba->pba_block == NULL) return -E1000_ERR_PARAM; if (eeprom_buf == NULL) { ret_val = e1000_write_nvm(hw, pba->word[1], pba->pba_block[0], pba->pba_block); if (ret_val) return ret_val; } else { if (eeprom_buf_size > (u32)(pba->word[1] + pba->pba_block[0])) { memcpy(&eeprom_buf[pba->word[1]], pba->pba_block, pba->pba_block[0] * sizeof(u16)); } else { return -E1000_ERR_PARAM; } } } return E1000_SUCCESS; } /** * e1000_get_pba_block_size * @hw: pointer to the HW structure * @eeprom_buf: optional pointer to EEPROM image * @eeprom_buf_size: size of EEPROM image in words * @pba_data_size: pointer to output variable * * Returns the size of the PBA block in words. Function operates on EEPROM * image if the eeprom_buf pointer is not NULL otherwise it accesses physical * EEPROM device. * **/ s32 e1000_get_pba_block_size(struct e1000_hw *hw, u16 *eeprom_buf, u32 eeprom_buf_size, u16 *pba_block_size) { s32 ret_val; u16 pba_word[2]; u16 length; DEBUGFUNC("e1000_get_pba_block_size"); if (eeprom_buf == NULL) { ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 2, &pba_word[0]); if (ret_val) return ret_val; } else { if (eeprom_buf_size > NVM_PBA_OFFSET_1) { pba_word[0] = eeprom_buf[NVM_PBA_OFFSET_0]; pba_word[1] = eeprom_buf[NVM_PBA_OFFSET_1]; } else { return -E1000_ERR_PARAM; } } if (pba_word[0] == NVM_PBA_PTR_GUARD) { if (eeprom_buf == NULL) { ret_val = e1000_read_nvm(hw, pba_word[1] + 0, 1, &length); if (ret_val) return ret_val; } else { if (eeprom_buf_size > pba_word[1]) length = eeprom_buf[pba_word[1] + 0]; else return -E1000_ERR_PARAM; } if (length == 0xFFFF || length == 0) return -E1000_ERR_NVM_PBA_SECTION; } else { /* PBA number in legacy format, there is no PBA Block. */ length = 0; } if (pba_block_size != NULL) *pba_block_size = length; return E1000_SUCCESS; } /** * e1000_read_mac_addr_generic - Read device MAC address * @hw: pointer to the HW structure * * Reads the device MAC address from the EEPROM and stores the value. * Since devices with two ports use the same EEPROM, we increment the * last bit in the MAC address for the second port. **/ s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) { u32 rar_high; u32 rar_low; u16 i; rar_high = E1000_READ_REG(hw, E1000_RAH(0)); rar_low = E1000_READ_REG(hw, E1000_RAL(0)); for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8)); for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8)); for (i = 0; i < ETH_ADDR_LEN; i++) hw->mac.addr[i] = hw->mac.perm_addr[i]; return E1000_SUCCESS; } /** * e1000_validate_nvm_checksum_generic - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_validate_nvm_checksum_generic"); for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } checksum += nvm_data; } if (checksum != (u16) NVM_SUM) { DEBUGOUT("NVM Checksum Invalid\n"); return -E1000_ERR_NVM; } return E1000_SUCCESS; } /** * e1000_update_nvm_checksum_generic - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM checksum by reading/adding each word of the EEPROM * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. **/ s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_update_nvm_checksum"); for (i = 0; i < NVM_CHECKSUM_REG; i++) { ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error while updating checksum.\n"); return ret_val; } checksum += nvm_data; } checksum = (u16) NVM_SUM - checksum; ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) DEBUGOUT("NVM Write Error while updating checksum.\n"); return ret_val; } /** * e1000_reload_nvm_generic - Reloads EEPROM * @hw: pointer to the HW structure * * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the * extended control register. **/ static void e1000_reload_nvm_generic(struct e1000_hw *hw) { u32 ctrl_ext; DEBUGFUNC("e1000_reload_nvm_generic"); usec_delay(10); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_EE_RST; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); -} - -/** - * e1000_get_fw_version - Get firmware version information - * @hw: pointer to the HW structure - * @fw_vers: pointer to output version structure - * - * unsupported/not present features return 0 in version structure - **/ -void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) -{ - u16 eeprom_verh, eeprom_verl, etrack_test, fw_version; - u8 q, hval, rem, result; - u16 comb_verh, comb_verl, comb_offset; - - memset(fw_vers, 0, sizeof(struct e1000_fw_version)); - - /* basic eeprom version numbers, bits used vary by part and by tool - * used to create the nvm images */ - /* Check which data format we have */ - switch (hw->mac.type) { - case e1000_i211: - e1000_read_invm_version(hw, fw_vers); - return; - case e1000_82575: - case e1000_82576: - case e1000_82580: - hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); - /* Use this format, unless EETRACK ID exists, - * then use alternate format - */ - if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) { - hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); - fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) - >> NVM_MAJOR_SHIFT; - fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK) - >> NVM_MINOR_SHIFT; - fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK); - goto etrack_id; - } - break; - case e1000_i210: - if (!(e1000_get_flash_presence_i210(hw))) { - e1000_read_invm_version(hw, fw_vers); - return; - } - /* fall through */ - case e1000_i350: - hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); - /* find combo image version */ - hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); - if ((comb_offset != 0x0) && - (comb_offset != NVM_VER_INVALID)) { - - hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset - + 1), 1, &comb_verh); - hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset), - 1, &comb_verl); - - /* get Option Rom version if it exists and is valid */ - if ((comb_verh && comb_verl) && - ((comb_verh != NVM_VER_INVALID) && - (comb_verl != NVM_VER_INVALID))) { - - fw_vers->or_valid = TRUE; - fw_vers->or_major = - comb_verl >> NVM_COMB_VER_SHFT; - fw_vers->or_build = - (comb_verl << NVM_COMB_VER_SHFT) - | (comb_verh >> NVM_COMB_VER_SHFT); - fw_vers->or_patch = - comb_verh & NVM_COMB_VER_MASK; - } - } - break; - default: - hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); - return; - } - hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); - fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) - >> NVM_MAJOR_SHIFT; - - /* check for old style version format in newer images*/ - if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) { - eeprom_verl = (fw_version & NVM_COMB_VER_MASK); - } else { - eeprom_verl = (fw_version & NVM_MINOR_MASK) - >> NVM_MINOR_SHIFT; - } - /* Convert minor value to hex before assigning to output struct - * Val to be converted will not be higher than 99, per tool output - */ - q = eeprom_verl / NVM_HEX_CONV; - hval = q * NVM_HEX_TENS; - rem = eeprom_verl % NVM_HEX_CONV; - result = hval + rem; - fw_vers->eep_minor = result; - -etrack_id: - if ((etrack_test & NVM_MAJOR_MASK) == NVM_ETRACK_VALID) { - hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl); - hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh); - fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) - | eeprom_verl; - } else if ((etrack_test & NVM_ETRACK_VALID) == 0) { - hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh); - hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl); - fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) | - eeprom_verl; - } } Index: projects/clang-trunk/sys/dev/e1000/e1000_nvm.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_nvm.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_nvm.h (revision 287502) @@ -1,99 +1,82 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_NVM_H_ #define _E1000_NVM_H_ +#if !defined(NO_READ_PBA_RAW) || !defined(NO_WRITE_PBA_RAW) struct e1000_pba { u16 word[2]; u16 *pba_block; }; +#endif -struct e1000_fw_version { - u32 etrack_id; - u16 eep_major; - u16 eep_minor; - u16 eep_build; - u8 invm_major; - u8 invm_minor; - u8 invm_img_type; - - bool or_valid; - u16 or_major; - u16 or_build; - u16 or_patch; -}; - - void e1000_init_nvm_ops_generic(struct e1000_hw *hw); s32 e1000_null_read_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); void e1000_null_nvm_generic(struct e1000_hw *hw); s32 e1000_null_led_default(struct e1000_hw *hw, u16 *data); s32 e1000_null_write_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); s32 e1000_acquire_nvm_generic(struct e1000_hw *hw); s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg); s32 e1000_read_mac_addr_generic(struct e1000_hw *hw); -s32 e1000_read_pba_num_generic(struct e1000_hw *hw, u32 *pba_num); s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size); s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size); s32 e1000_read_pba_raw(struct e1000_hw *hw, u16 *eeprom_buf, u32 eeprom_buf_size, u16 max_pba_block_size, struct e1000_pba *pba); s32 e1000_write_pba_raw(struct e1000_hw *hw, u16 *eeprom_buf, u32 eeprom_buf_size, struct e1000_pba *pba); s32 e1000_get_pba_block_size(struct e1000_hw *hw, u16 *eeprom_buf, u32 eeprom_buf_size, u16 *pba_block_size); s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_read_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data); s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw); s32 e1000_write_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw); void e1000_stop_nvm(struct e1000_hw *hw); void e1000_release_nvm_generic(struct e1000_hw *hw); -void e1000_get_fw_version(struct e1000_hw *hw, - struct e1000_fw_version *fw_vers); #define E1000_STM_OPCODE 0xDB00 #endif Index: projects/clang-trunk/sys/dev/e1000/e1000_osdep.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_osdep.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_osdep.h (revision 287502) @@ -1,216 +1,223 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _FREEBSD_OS_H_ #define _FREEBSD_OS_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ASSERT(x) if(!(x)) panic("EM: x") #define usec_delay(x) DELAY(x) #define usec_delay_irq(x) DELAY(x) #define msec_delay(x) DELAY(1000*(x)) #define msec_delay_irq(x) DELAY(1000*(x)) #define DEBUGFUNC(F) DEBUGOUT(F); #define DEBUGOUT(S) do {} while (0) #define DEBUGOUT1(S,A) do {} while (0) #define DEBUGOUT2(S,A,B) do {} while (0) #define DEBUGOUT3(S,A,B,C) do {} while (0) #define DEBUGOUT7(S,A,B,C,D,E,F,G) do {} while (0) #define STATIC static #define FALSE 0 #define TRUE 1 +#ifndef __bool_true_false_are_defined +#define false FALSE +#define true TRUE +#endif #define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */ #define PCI_COMMAND_REGISTER PCIR_COMMAND /* Mutex used in the shared code */ #define E1000_MUTEX struct mtx #define E1000_MUTEX_INIT(mutex) mtx_init((mutex), #mutex, \ MTX_NETWORK_LOCK, \ MTX_DEF | MTX_DUPOK) #define E1000_MUTEX_DESTROY(mutex) mtx_destroy(mutex) #define E1000_MUTEX_LOCK(mutex) mtx_lock(mutex) #define E1000_MUTEX_TRYLOCK(mutex) mtx_trylock(mutex) #define E1000_MUTEX_UNLOCK(mutex) mtx_unlock(mutex) typedef uint64_t u64; typedef uint32_t u32; typedef uint16_t u16; typedef uint8_t u8; typedef int64_t s64; typedef int32_t s32; typedef int16_t s16; typedef int8_t s8; +#ifndef __bool_true_false_are_defined +typedef boolean_t bool; +#endif #define __le16 u16 #define __le32 u32 #define __le64 u64 #if __FreeBSD_version < 800000 #if defined(__i386__) || defined(__amd64__) #define mb() __asm volatile("mfence" ::: "memory") #define wmb() __asm volatile("sfence" ::: "memory") #define rmb() __asm volatile("lfence" ::: "memory") #else #define mb() #define rmb() #define wmb() #endif #endif /*__FreeBSD_version < 800000 */ #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) #endif struct e1000_osdep { bus_space_tag_t mem_bus_space_tag; bus_space_handle_t mem_bus_space_handle; bus_space_tag_t io_bus_space_tag; bus_space_handle_t io_bus_space_handle; bus_space_tag_t flash_bus_space_tag; bus_space_handle_t flash_bus_space_handle; struct device *dev; }; #define E1000_REGISTER(hw, reg) (((hw)->mac.type >= e1000_82543) \ ? reg : e1000_translate_register_82542(reg)) #define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) /* Read from an absolute offset in the adapter's memory space */ #define E1000_READ_OFFSET(hw, offset) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, offset) /* Write to an absolute offset in the adapter's memory space */ #define E1000_WRITE_OFFSET(hw, offset, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, offset, value) /* Register READ/WRITE macros */ #define E1000_READ_REG(hw, reg) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg)) #define E1000_WRITE_REG(hw, reg, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg), value) #define E1000_READ_REG_ARRAY(hw, reg, index) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + ((index)<< 2)) #define E1000_WRITE_REG_ARRAY(hw, reg, index, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + ((index)<< 2), value) #define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY #define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY #define E1000_READ_REG_ARRAY_BYTE(hw, reg, index) \ bus_space_read_1(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + index) #define E1000_WRITE_REG_ARRAY_BYTE(hw, reg, index, value) \ bus_space_write_1(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + index, value) #define E1000_WRITE_REG_ARRAY_WORD(hw, reg, index, value) \ bus_space_write_2(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + (index << 1), value) #define E1000_WRITE_REG_IO(hw, reg, value) do {\ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->io_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->io_bus_space_handle, \ (hw)->io_base, reg); \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->io_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->io_bus_space_handle, \ (hw)->io_base + 4, value); } while (0) #define E1000_READ_FLASH_REG(hw, reg) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg) #define E1000_READ_FLASH_REG16(hw, reg) \ bus_space_read_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg) #define E1000_WRITE_FLASH_REG(hw, reg, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value) #define E1000_WRITE_FLASH_REG16(hw, reg, value) \ bus_space_write_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value) #endif /* _FREEBSD_OS_H_ */ Index: projects/clang-trunk/sys/dev/e1000/e1000_phy.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_phy.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_phy.c (revision 287502) @@ -1,4253 +1,4253 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" static s32 e1000_wait_autoneg(struct e1000_hw *hw); static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data, bool read, bool page_set); static u32 e1000_get_phy_addr_for_hv_page(u32 page); static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, u16 *data, bool read); /* Cable length tables */ static const u16 e1000_m88_cable_length_table[] = { 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; #define M88E1000_CABLE_LENGTH_TABLE_SIZE \ (sizeof(e1000_m88_cable_length_table) / \ sizeof(e1000_m88_cable_length_table[0])) static const u16 e1000_igp_2_cable_length_table[] = { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121, 124}; #define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ (sizeof(e1000_igp_2_cable_length_table) / \ sizeof(e1000_igp_2_cable_length_table[0])) /** * e1000_init_phy_ops_generic - Initialize PHY function pointers * @hw: pointer to the HW structure * * Setups up the function pointers to no-op functions **/ void e1000_init_phy_ops_generic(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; DEBUGFUNC("e1000_init_phy_ops_generic"); /* Initialize function pointers */ phy->ops.init_params = e1000_null_ops_generic; phy->ops.acquire = e1000_null_ops_generic; phy->ops.check_polarity = e1000_null_ops_generic; phy->ops.check_reset_block = e1000_null_ops_generic; phy->ops.commit = e1000_null_ops_generic; phy->ops.force_speed_duplex = e1000_null_ops_generic; phy->ops.get_cfg_done = e1000_null_ops_generic; phy->ops.get_cable_length = e1000_null_ops_generic; phy->ops.get_info = e1000_null_ops_generic; phy->ops.set_page = e1000_null_set_page; phy->ops.read_reg = e1000_null_read_reg; phy->ops.read_reg_locked = e1000_null_read_reg; phy->ops.read_reg_page = e1000_null_read_reg; phy->ops.release = e1000_null_phy_generic; phy->ops.reset = e1000_null_ops_generic; phy->ops.set_d0_lplu_state = e1000_null_lplu_state; phy->ops.set_d3_lplu_state = e1000_null_lplu_state; phy->ops.write_reg = e1000_null_write_reg; phy->ops.write_reg_locked = e1000_null_write_reg; phy->ops.write_reg_page = e1000_null_write_reg; phy->ops.power_up = e1000_null_phy_generic; phy->ops.power_down = e1000_null_phy_generic; phy->ops.read_i2c_byte = e1000_read_i2c_byte_null; phy->ops.write_i2c_byte = e1000_write_i2c_byte_null; phy->ops.cfg_on_link_up = e1000_null_ops_generic; } /** * e1000_null_set_page - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_set_page(struct e1000_hw E1000_UNUSEDARG *hw, u16 E1000_UNUSEDARG data) { DEBUGFUNC("e1000_null_set_page"); return E1000_SUCCESS; } /** * e1000_null_read_reg - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_read_reg(struct e1000_hw E1000_UNUSEDARG *hw, u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG *data) { DEBUGFUNC("e1000_null_read_reg"); return E1000_SUCCESS; } /** * e1000_null_phy_generic - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_phy_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_phy_generic"); return; } /** * e1000_null_lplu_state - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_lplu_state(struct e1000_hw E1000_UNUSEDARG *hw, bool E1000_UNUSEDARG active) { DEBUGFUNC("e1000_null_lplu_state"); return E1000_SUCCESS; } /** * e1000_null_write_reg - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_write_reg(struct e1000_hw E1000_UNUSEDARG *hw, u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG data) { DEBUGFUNC("e1000_null_write_reg"); return E1000_SUCCESS; } /** * e1000_read_i2c_byte_null - No-op function, return 0 * @hw: pointer to hardware structure * @byte_offset: byte offset to write * @dev_addr: device address * @data: data value read * **/ s32 e1000_read_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, u8 E1000_UNUSEDARG byte_offset, u8 E1000_UNUSEDARG dev_addr, u8 E1000_UNUSEDARG *data) { DEBUGFUNC("e1000_read_i2c_byte_null"); return E1000_SUCCESS; } /** * e1000_write_i2c_byte_null - No-op function, return 0 * @hw: pointer to hardware structure * @byte_offset: byte offset to write * @dev_addr: device address * @data: data value to write * **/ s32 e1000_write_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, u8 E1000_UNUSEDARG byte_offset, u8 E1000_UNUSEDARG dev_addr, u8 E1000_UNUSEDARG data) { DEBUGFUNC("e1000_write_i2c_byte_null"); return E1000_SUCCESS; } /** * e1000_check_reset_block_generic - Check if PHY reset is blocked * @hw: pointer to the HW structure * * Read the PHY management control register and check whether a PHY reset * is blocked. If a reset is not blocked return E1000_SUCCESS, otherwise * return E1000_BLK_PHY_RESET (12). **/ s32 e1000_check_reset_block_generic(struct e1000_hw *hw) { u32 manc; DEBUGFUNC("e1000_check_reset_block"); manc = E1000_READ_REG(hw, E1000_MANC); return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? E1000_BLK_PHY_RESET : E1000_SUCCESS; } /** * e1000_get_phy_id - Retrieve the PHY ID and revision * @hw: pointer to the HW structure * * Reads the PHY registers and stores the PHY ID and possibly the PHY * revision in the hardware structure. **/ s32 e1000_get_phy_id(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 phy_id; u16 retry_count = 0; DEBUGFUNC("e1000_get_phy_id"); if (!phy->ops.read_reg) return E1000_SUCCESS; while (retry_count < 2) { ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); if (ret_val) return ret_val; phy->id = (u32)(phy_id << 16); usec_delay(20); ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id); if (ret_val) return ret_val; phy->id |= (u32)(phy_id & PHY_REVISION_MASK); phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); if (phy->id != 0 && phy->id != PHY_REVISION_MASK) return E1000_SUCCESS; retry_count++; } return E1000_SUCCESS; } /** * e1000_phy_reset_dsp_generic - Reset PHY DSP * @hw: pointer to the HW structure * * Reset the digital signal processor. **/ s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_phy_reset_dsp_generic"); if (!hw->phy.ops.write_reg) return E1000_SUCCESS; ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1); if (ret_val) return ret_val; return hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0); } /** * e1000_read_phy_reg_mdic - Read MDI control register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the MDI control register in the PHY at offset and stores the * information read to data. **/ s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) { struct e1000_phy_info *phy = &hw->phy; u32 i, mdic = 0; DEBUGFUNC("e1000_read_phy_reg_mdic"); if (offset > MAX_PHY_REG_ADDRESS) { DEBUGOUT1("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ mdic = ((offset << E1000_MDIC_REG_SHIFT) | (phy->addr << E1000_MDIC_PHY_SHIFT) | (E1000_MDIC_OP_READ)); E1000_WRITE_REG(hw, E1000_MDIC, mdic); /* Poll the ready bit to see if the MDI read completed * Increasing the time out as testing showed failures with * the lower time out */ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { usec_delay_irq(50); mdic = E1000_READ_REG(hw, E1000_MDIC); if (mdic & E1000_MDIC_READY) break; } if (!(mdic & E1000_MDIC_READY)) { DEBUGOUT("MDI Read did not complete\n"); return -E1000_ERR_PHY; } if (mdic & E1000_MDIC_ERROR) { DEBUGOUT("MDI Error\n"); return -E1000_ERR_PHY; } if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { DEBUGOUT2("MDI Read offset error - requested %d, returned %d\n", offset, (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); return -E1000_ERR_PHY; } *data = (u16) mdic; /* Allow some time after each MDIC transaction to avoid * reading duplicate data in the next MDIC transaction. */ if (hw->mac.type == e1000_pch2lan) usec_delay_irq(100); return E1000_SUCCESS; } /** * e1000_write_phy_reg_mdic - Write MDI control register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write to register at offset * * Writes data to MDI control register in the PHY at offset. **/ s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) { struct e1000_phy_info *phy = &hw->phy; u32 i, mdic = 0; DEBUGFUNC("e1000_write_phy_reg_mdic"); if (offset > MAX_PHY_REG_ADDRESS) { DEBUGOUT1("PHY Address %d is out of range\n", offset); return -E1000_ERR_PARAM; } /* Set up Op-code, Phy Address, and register offset in the MDI * Control register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ mdic = (((u32)data) | (offset << E1000_MDIC_REG_SHIFT) | (phy->addr << E1000_MDIC_PHY_SHIFT) | (E1000_MDIC_OP_WRITE)); E1000_WRITE_REG(hw, E1000_MDIC, mdic); /* Poll the ready bit to see if the MDI read completed * Increasing the time out as testing showed failures with * the lower time out */ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { usec_delay_irq(50); mdic = E1000_READ_REG(hw, E1000_MDIC); if (mdic & E1000_MDIC_READY) break; } if (!(mdic & E1000_MDIC_READY)) { DEBUGOUT("MDI Write did not complete\n"); return -E1000_ERR_PHY; } if (mdic & E1000_MDIC_ERROR) { DEBUGOUT("MDI Error\n"); return -E1000_ERR_PHY; } if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { DEBUGOUT2("MDI Write offset error - requested %d, returned %d\n", offset, (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); return -E1000_ERR_PHY; } /* Allow some time after each MDIC transaction to avoid * reading duplicate data in the next MDIC transaction. */ if (hw->mac.type == e1000_pch2lan) usec_delay_irq(100); return E1000_SUCCESS; } /** * e1000_read_phy_reg_i2c - Read PHY register using i2c * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the PHY register at offset using the i2c interface and stores the * retrieved information in data. **/ s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data) { struct e1000_phy_info *phy = &hw->phy; u32 i, i2ccmd = 0; DEBUGFUNC("e1000_read_phy_reg_i2c"); /* Set up Op-code, Phy Address, and register address in the I2CCMD * register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | (E1000_I2CCMD_OPCODE_READ)); E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); /* Poll the ready bit to see if the I2C read completed */ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { usec_delay(50); i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); if (i2ccmd & E1000_I2CCMD_READY) break; } if (!(i2ccmd & E1000_I2CCMD_READY)) { DEBUGOUT("I2CCMD Read did not complete\n"); return -E1000_ERR_PHY; } if (i2ccmd & E1000_I2CCMD_ERROR) { DEBUGOUT("I2CCMD Error bit set\n"); return -E1000_ERR_PHY; } /* Need to byte-swap the 16-bit value. */ *data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00); return E1000_SUCCESS; } /** * e1000_write_phy_reg_i2c - Write PHY register using i2c * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Writes the data to PHY register at the offset using the i2c interface. **/ s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data) { struct e1000_phy_info *phy = &hw->phy; u32 i, i2ccmd = 0; u16 phy_data_swapped; DEBUGFUNC("e1000_write_phy_reg_i2c"); /* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/ if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) { DEBUGOUT1("PHY I2C Address %d is out of range.\n", hw->phy.addr); return -E1000_ERR_CONFIG; } /* Swap the data bytes for the I2C interface */ phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00); /* Set up Op-code, Phy Address, and register address in the I2CCMD * register. The MAC will take care of interfacing with the * PHY to retrieve the desired data. */ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | E1000_I2CCMD_OPCODE_WRITE | phy_data_swapped); E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); /* Poll the ready bit to see if the I2C read completed */ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { usec_delay(50); i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); if (i2ccmd & E1000_I2CCMD_READY) break; } if (!(i2ccmd & E1000_I2CCMD_READY)) { DEBUGOUT("I2CCMD Write did not complete\n"); return -E1000_ERR_PHY; } if (i2ccmd & E1000_I2CCMD_ERROR) { DEBUGOUT("I2CCMD Error bit set\n"); return -E1000_ERR_PHY; } return E1000_SUCCESS; } /** * e1000_read_sfp_data_byte - Reads SFP module data. * @hw: pointer to the HW structure * @offset: byte location offset to be read * @data: read data buffer pointer * * Reads one byte from SFP module data stored * in SFP resided EEPROM memory or SFP diagnostic area. * Function should be called with * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters * access **/ s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data) { u32 i = 0; u32 i2ccmd = 0; u32 data_local = 0; DEBUGFUNC("e1000_read_sfp_data_byte"); if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { DEBUGOUT("I2CCMD command address exceeds upper limit\n"); return -E1000_ERR_PHY; } /* Set up Op-code, EEPROM Address,in the I2CCMD * register. The MAC will take care of interfacing with the * EEPROM to retrieve the desired data. */ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | E1000_I2CCMD_OPCODE_READ); E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); /* Poll the ready bit to see if the I2C read completed */ for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { usec_delay(50); data_local = E1000_READ_REG(hw, E1000_I2CCMD); if (data_local & E1000_I2CCMD_READY) break; } if (!(data_local & E1000_I2CCMD_READY)) { DEBUGOUT("I2CCMD Read did not complete\n"); return -E1000_ERR_PHY; } if (data_local & E1000_I2CCMD_ERROR) { DEBUGOUT("I2CCMD Error bit set\n"); return -E1000_ERR_PHY; } *data = (u8) data_local & 0xFF; return E1000_SUCCESS; } /** * e1000_write_sfp_data_byte - Writes SFP module data. * @hw: pointer to the HW structure * @offset: byte location offset to write to * @data: data to write * * Writes one byte to SFP module data stored * in SFP resided EEPROM memory or SFP diagnostic area. * Function should be called with * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters * access **/ s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data) { u32 i = 0; u32 i2ccmd = 0; u32 data_local = 0; DEBUGFUNC("e1000_write_sfp_data_byte"); if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { DEBUGOUT("I2CCMD command address exceeds upper limit\n"); return -E1000_ERR_PHY; } /* The programming interface is 16 bits wide * so we need to read the whole word first * then update appropriate byte lane and write * the updated word back. */ /* Set up Op-code, EEPROM Address,in the I2CCMD * register. The MAC will take care of interfacing * with an EEPROM to write the data given. */ i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | E1000_I2CCMD_OPCODE_READ); /* Set a command to read single word */ E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { usec_delay(50); /* Poll the ready bit to see if lastly * launched I2C operation completed */ i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); if (i2ccmd & E1000_I2CCMD_READY) { /* Check if this is READ or WRITE phase */ if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) == E1000_I2CCMD_OPCODE_READ) { /* Write the selected byte * lane and update whole word */ data_local = i2ccmd & 0xFF00; data_local |= data; i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | E1000_I2CCMD_OPCODE_WRITE | data_local); E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); } else { break; } } } if (!(i2ccmd & E1000_I2CCMD_READY)) { DEBUGOUT("I2CCMD Write did not complete\n"); return -E1000_ERR_PHY; } if (i2ccmd & E1000_I2CCMD_ERROR) { DEBUGOUT("I2CCMD Error bit set\n"); return -E1000_ERR_PHY; } return E1000_SUCCESS; } /** * e1000_read_phy_reg_m88 - Read m88 PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquires semaphore, if necessary, then reads the PHY register at offset * and storing the retrieved information in data. Release any acquired * semaphores before exiting. **/ s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_read_phy_reg_m88"); if (!hw->phy.ops.acquire) return E1000_SUCCESS; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); hw->phy.ops.release(hw); return ret_val; } /** * e1000_write_phy_reg_m88 - Write m88 PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquires semaphore, if necessary, then writes the data to PHY register * at the offset. Release any acquired semaphores before exiting. **/ s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; DEBUGFUNC("e1000_write_phy_reg_m88"); if (!hw->phy.ops.acquire) return E1000_SUCCESS; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); hw->phy.ops.release(hw); return ret_val; } /** * e1000_set_page_igp - Set page as on IGP-like PHY(s) * @hw: pointer to the HW structure * @page: page to set (shifted left when necessary) * * Sets PHY page required for PHY register access. Assumes semaphore is * already acquired. Note, this function sets phy.addr to 1 so the caller * must set it appropriately (if necessary) after this function returns. **/ s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page) { DEBUGFUNC("e1000_set_page_igp"); DEBUGOUT1("Setting page 0x%x\n", page); hw->phy.addr = 1; return e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page); } /** * __e1000_read_phy_reg_igp - Read igp PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * @locked: semaphore has already been acquired or not * * Acquires semaphore, if necessary, then reads the PHY register at offset * and stores the retrieved information in data. Release any acquired * semaphores before exiting. **/ static s32 __e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data, bool locked) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("__e1000_read_phy_reg_igp"); if (!locked) { if (!hw->phy.ops.acquire) return E1000_SUCCESS; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; } if (offset > MAX_PHY_MULTI_PAGE_REG) ret_val = e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (u16)offset); if (!ret_val) ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); if (!locked) hw->phy.ops.release(hw); return ret_val; } /** * e1000_read_phy_reg_igp - Read igp PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquires semaphore then reads the PHY register at offset and stores the * retrieved information in data. * Release the acquired semaphore before exiting. **/ s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) { return __e1000_read_phy_reg_igp(hw, offset, data, FALSE); } /** * e1000_read_phy_reg_igp_locked - Read igp PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the PHY register at offset and stores the retrieved information * in data. Assumes semaphore already acquired. **/ s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data) { return __e1000_read_phy_reg_igp(hw, offset, data, TRUE); } /** * e1000_write_phy_reg_igp - Write igp PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * @locked: semaphore has already been acquired or not * * Acquires semaphore, if necessary, then writes the data to PHY register * at the offset. Release any acquired semaphores before exiting. **/ static s32 __e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data, bool locked) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_write_phy_reg_igp"); if (!locked) { if (!hw->phy.ops.acquire) return E1000_SUCCESS; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; } if (offset > MAX_PHY_MULTI_PAGE_REG) ret_val = e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (u16)offset); if (!ret_val) ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); if (!locked) hw->phy.ops.release(hw); return ret_val; } /** * e1000_write_phy_reg_igp - Write igp PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquires semaphore then writes the data to PHY register * at the offset. Release any acquired semaphores before exiting. **/ s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) { return __e1000_write_phy_reg_igp(hw, offset, data, FALSE); } /** * e1000_write_phy_reg_igp_locked - Write igp PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Writes the data to PHY register at the offset. * Assumes semaphore already acquired. **/ s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data) { return __e1000_write_phy_reg_igp(hw, offset, data, TRUE); } /** * __e1000_read_kmrn_reg - Read kumeran register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * @locked: semaphore has already been acquired or not * * Acquires semaphore, if necessary. Then reads the PHY register at offset * using the kumeran interface. The information retrieved is stored in data. * Release any acquired semaphores before exiting. **/ static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, bool locked) { u32 kmrnctrlsta; DEBUGFUNC("__e1000_read_kmrn_reg"); if (!locked) { s32 ret_val = E1000_SUCCESS; if (!hw->phy.ops.acquire) return E1000_SUCCESS; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; } kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); E1000_WRITE_FLUSH(hw); usec_delay(2); kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA); *data = (u16)kmrnctrlsta; if (!locked) hw->phy.ops.release(hw); return E1000_SUCCESS; } /** * e1000_read_kmrn_reg_generic - Read kumeran register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquires semaphore then reads the PHY register at offset using the * kumeran interface. The information retrieved is stored in data. * Release the acquired semaphore before exiting. **/ s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data) { return __e1000_read_kmrn_reg(hw, offset, data, FALSE); } /** * e1000_read_kmrn_reg_locked - Read kumeran register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the PHY register at offset using the kumeran interface. The * information retrieved is stored in data. * Assumes semaphore already acquired. **/ s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data) { return __e1000_read_kmrn_reg(hw, offset, data, TRUE); } /** * __e1000_write_kmrn_reg - Write kumeran register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * @locked: semaphore has already been acquired or not * * Acquires semaphore, if necessary. Then write the data to PHY register * at the offset using the kumeran interface. Release any acquired semaphores * before exiting. **/ static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, bool locked) { u32 kmrnctrlsta; DEBUGFUNC("e1000_write_kmrn_reg_generic"); if (!locked) { s32 ret_val = E1000_SUCCESS; if (!hw->phy.ops.acquire) return E1000_SUCCESS; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; } kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | data; E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); E1000_WRITE_FLUSH(hw); usec_delay(2); if (!locked) hw->phy.ops.release(hw); return E1000_SUCCESS; } /** * e1000_write_kmrn_reg_generic - Write kumeran register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquires semaphore then writes the data to the PHY register at the offset * using the kumeran interface. Release the acquired semaphore before exiting. **/ s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data) { return __e1000_write_kmrn_reg(hw, offset, data, FALSE); } /** * e1000_write_kmrn_reg_locked - Write kumeran register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Write the data to PHY register at the offset using the kumeran interface. * Assumes semaphore already acquired. **/ s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data) { return __e1000_write_kmrn_reg(hw, offset, data, TRUE); } /** * e1000_set_master_slave_mode - Setup PHY for Master/slave mode * @hw: pointer to the HW structure * * Sets up Master/slave mode **/ static s32 e1000_set_master_slave_mode(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; /* Resolve Master/Slave mode */ ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data); if (ret_val) return ret_val; /* load defaults for future use */ hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ? ((phy_data & CR_1000T_MS_VALUE) ? e1000_ms_force_master : e1000_ms_force_slave) : e1000_ms_auto; switch (hw->phy.ms_type) { case e1000_ms_force_master: phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); break; case e1000_ms_force_slave: phy_data |= CR_1000T_MS_ENABLE; phy_data &= ~(CR_1000T_MS_VALUE); break; case e1000_ms_auto: phy_data &= ~CR_1000T_MS_ENABLE; /* fall-through */ default: break; } return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data); } /** * e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link * @hw: pointer to the HW structure * * Sets up Carrier-sense on Transmit and downshift values. **/ s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; DEBUGFUNC("e1000_copper_link_setup_82577"); if (hw->phy.type == e1000_phy_82580) { ret_val = hw->phy.ops.reset(hw); if (ret_val) { DEBUGOUT("Error resetting the PHY.\n"); return ret_val; } } /* Enable CRS on Tx. This must be set for half-duplex operation. */ ret_val = hw->phy.ops.read_reg(hw, I82577_CFG_REG, &phy_data); if (ret_val) return ret_val; phy_data |= I82577_CFG_ASSERT_CRS_ON_TX; /* Enable downshift */ phy_data |= I82577_CFG_ENABLE_DOWNSHIFT; ret_val = hw->phy.ops.write_reg(hw, I82577_CFG_REG, phy_data); if (ret_val) return ret_val; /* Set MDI/MDIX mode */ ret_val = hw->phy.ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data); if (ret_val) return ret_val; phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; /* Options: * 0 - Auto (default) * 1 - MDI mode * 2 - MDI-X mode */ switch (hw->phy.mdix) { case 1: break; case 2: phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; break; case 0: default: phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; break; } ret_val = hw->phy.ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data); if (ret_val) return ret_val; return e1000_set_master_slave_mode(hw); } /** * e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link * @hw: pointer to the HW structure * * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock * and downshift values are set also. **/ s32 e1000_copper_link_setup_m88(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data; DEBUGFUNC("e1000_copper_link_setup_m88"); /* Enable CRS on Tx. This must be set for half-duplex operation. */ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; /* For BM PHY this bit is downshift enable */ if (phy->type != e1000_phy_bm) phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; /* Options: * MDI/MDI-X = 0 (default) * 0 - Auto for all speeds * 1 - MDI mode * 2 - MDI-X mode * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) */ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; switch (phy->mdix) { case 1: phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; break; case 2: phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; break; case 3: phy_data |= M88E1000_PSCR_AUTO_X_1000T; break; case 0: default: phy_data |= M88E1000_PSCR_AUTO_X_MODE; break; } /* Options: * disable_polarity_correction = 0 (default) * Automatic Correction for Reversed Cable Polarity * 0 - Disabled * 1 - Enabled */ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; if (phy->disable_polarity_correction) phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; /* Enable downshift on BM (disabled by default) */ if (phy->type == e1000_phy_bm) { /* For 82574/82583, first disable then enable downshift */ if (phy->id == BME1000_E_PHY_ID_R2) { phy_data &= ~BME1000_PSCR_ENABLE_DOWNSHIFT; ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; /* Commit the changes. */ ret_val = phy->ops.commit(hw); if (ret_val) { DEBUGOUT("Error committing the PHY changes\n"); return ret_val; } } phy_data |= BME1000_PSCR_ENABLE_DOWNSHIFT; } ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; if ((phy->type == e1000_phy_m88) && (phy->revision < E1000_REVISION_4) && (phy->id != BME1000_E_PHY_ID_R2)) { /* Force TX_CLK in the Extended PHY Specific Control Register * to 25MHz clock. */ ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= M88E1000_EPSCR_TX_CLK_25; if ((phy->revision == E1000_REVISION_2) && (phy->id == M88E1111_I_PHY_ID)) { /* 82573L PHY - set the downshift counter to 5x. */ phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; } else { /* Configure Master and Slave downshift values */ phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); } ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; } if ((phy->type == e1000_phy_bm) && (phy->id == BME1000_E_PHY_ID_R2)) { /* Set PHY page 0, register 29 to 0x0003 */ ret_val = phy->ops.write_reg(hw, 29, 0x0003); if (ret_val) return ret_val; /* Set PHY page 0, register 30 to 0x0000 */ ret_val = phy->ops.write_reg(hw, 30, 0x0000); if (ret_val) return ret_val; } /* Commit the changes. */ ret_val = phy->ops.commit(hw); if (ret_val) { DEBUGOUT("Error committing the PHY changes\n"); return ret_val; } if (phy->type == e1000_phy_82578) { ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; /* 82578 PHY - set the downshift count to 1x. */ phy_data |= I82578_EPSCR_DOWNSHIFT_ENABLE; phy_data &= ~I82578_EPSCR_DOWNSHIFT_COUNTER_MASK; ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; } return E1000_SUCCESS; } /** * e1000_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link * @hw: pointer to the HW structure * * Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's. * Also enables and sets the downshift parameters. **/ s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data; DEBUGFUNC("e1000_copper_link_setup_m88_gen2"); /* Enable CRS on Tx. This must be set for half-duplex operation. */ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; /* Options: * MDI/MDI-X = 0 (default) * 0 - Auto for all speeds * 1 - MDI mode * 2 - MDI-X mode * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) */ phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; switch (phy->mdix) { case 1: phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; break; case 2: phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; break; case 3: /* M88E1112 does not support this mode) */ if (phy->id != M88E1112_E_PHY_ID) { phy_data |= M88E1000_PSCR_AUTO_X_1000T; break; } case 0: default: phy_data |= M88E1000_PSCR_AUTO_X_MODE; break; } /* Options: * disable_polarity_correction = 0 (default) * Automatic Correction for Reversed Cable Polarity * 0 - Disabled * 1 - Enabled */ phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; if (phy->disable_polarity_correction) phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; /* Enable downshift and setting it to X6 */ if (phy->id == M88E1543_E_PHY_ID) { phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE; ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; ret_val = phy->ops.commit(hw); if (ret_val) { DEBUGOUT("Error committing the PHY changes\n"); return ret_val; } } phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK; phy_data |= I347AT4_PSCR_DOWNSHIFT_6X; phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE; ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; /* Commit the changes. */ ret_val = phy->ops.commit(hw); if (ret_val) { DEBUGOUT("Error committing the PHY changes\n"); return ret_val; } ret_val = e1000_set_master_slave_mode(hw); if (ret_val) return ret_val; return E1000_SUCCESS; } /** * e1000_copper_link_setup_igp - Setup igp PHY's for copper link * @hw: pointer to the HW structure * * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for * igp PHY's. **/ s32 e1000_copper_link_setup_igp(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; DEBUGFUNC("e1000_copper_link_setup_igp"); ret_val = hw->phy.ops.reset(hw); if (ret_val) { DEBUGOUT("Error resetting the PHY.\n"); return ret_val; } /* Wait 100ms for MAC to configure PHY from NVM settings, to avoid * timeout issues when LFS is enabled. */ msec_delay(100); /* The NVM settings will configure LPLU in D3 for * non-IGP1 PHYs. */ if (phy->type == e1000_phy_igp) { /* disable lplu d3 during driver init */ ret_val = hw->phy.ops.set_d3_lplu_state(hw, FALSE); if (ret_val) { DEBUGOUT("Error Disabling LPLU D3\n"); return ret_val; } } /* disable lplu d0 during driver init */ if (hw->phy.ops.set_d0_lplu_state) { ret_val = hw->phy.ops.set_d0_lplu_state(hw, FALSE); if (ret_val) { DEBUGOUT("Error Disabling LPLU D0\n"); return ret_val; } } /* Configure mdi-mdix settings */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCR_AUTO_MDIX; switch (phy->mdix) { case 1: data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; break; case 2: data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; break; case 0: default: data |= IGP01E1000_PSCR_AUTO_MDIX; break; } ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data); if (ret_val) return ret_val; /* set auto-master slave resolution settings */ if (hw->mac.autoneg) { /* when autonegotiation advertisement is only 1000Mbps then we * should disable SmartSpeed and enable Auto MasterSlave * resolution as hardware default. */ if (phy->autoneg_advertised == ADVERTISE_1000_FULL) { /* Disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; /* Set auto Master/Slave resolution process */ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data); if (ret_val) return ret_val; data &= ~CR_1000T_MS_ENABLE; ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data); if (ret_val) return ret_val; } ret_val = e1000_set_master_slave_mode(hw); } return ret_val; } /** * e1000_phy_setup_autoneg - Configure PHY for auto-negotiation * @hw: pointer to the HW structure * * Reads the MII auto-neg advertisement register and/or the 1000T control * register and if the PHY is already setup for auto-negotiation, then * return successful. Otherwise, setup advertisement and flow control to * the appropriate values for the wanted auto-negotiation. **/ s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 mii_autoneg_adv_reg; u16 mii_1000t_ctrl_reg = 0; DEBUGFUNC("e1000_phy_setup_autoneg"); phy->autoneg_advertised &= phy->autoneg_mask; /* Read the MII Auto-Neg Advertisement Register (Address 4). */ ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); if (ret_val) return ret_val; if (phy->autoneg_mask & ADVERTISE_1000_FULL) { /* Read the MII 1000Base-T Control Register (Address 9). */ ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg); if (ret_val) return ret_val; } /* Need to parse both autoneg_advertised and fc and set up * the appropriate PHY registers. First we will parse for * autoneg_advertised software override. Since we can advertise * a plethora of combinations, we need to check each bit * individually. */ /* First we clear all the 10/100 mb speed bits in the Auto-Neg * Advertisement Register (Address 4) and the 1000 mb speed bits in * the 1000Base-T Control Register (Address 9). */ mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | NWAY_AR_100TX_HD_CAPS | NWAY_AR_10T_FD_CAPS | NWAY_AR_10T_HD_CAPS); mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised); /* Do we want to advertise 10 Mb Half Duplex? */ if (phy->autoneg_advertised & ADVERTISE_10_HALF) { DEBUGOUT("Advertise 10mb Half duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; } /* Do we want to advertise 10 Mb Full Duplex? */ if (phy->autoneg_advertised & ADVERTISE_10_FULL) { DEBUGOUT("Advertise 10mb Full duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; } /* Do we want to advertise 100 Mb Half Duplex? */ if (phy->autoneg_advertised & ADVERTISE_100_HALF) { DEBUGOUT("Advertise 100mb Half duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; } /* Do we want to advertise 100 Mb Full Duplex? */ if (phy->autoneg_advertised & ADVERTISE_100_FULL) { DEBUGOUT("Advertise 100mb Full duplex\n"); mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; } /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ if (phy->autoneg_advertised & ADVERTISE_1000_HALF) DEBUGOUT("Advertise 1000mb Half duplex request denied!\n"); /* Do we want to advertise 1000 Mb Full Duplex? */ if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { DEBUGOUT("Advertise 1000mb Full duplex\n"); mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; } /* Check for a software override of the flow control settings, and * setup the PHY advertisement registers accordingly. If * auto-negotiation is enabled, then software will have to set the * "PAUSE" bits to the correct value in the Auto-Negotiation * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- * negotiation. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause frames * but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames * but we do not support receiving pause frames). * 3: Both Rx and Tx flow control (symmetric) are enabled. * other: No software override. The flow control configuration * in the EEPROM is used. */ switch (hw->fc.current_mode) { case e1000_fc_none: /* Flow control (Rx & Tx) is completely disabled by a * software over-ride. */ mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; case e1000_fc_rx_pause: /* Rx Flow control is enabled, and Tx Flow control is * disabled, by a software over-ride. * * Since there really isn't a way to advertise that we are * capable of Rx Pause ONLY, we will advertise that we * support both symmetric and asymmetric Rx PAUSE. Later * (in e1000_config_fc_after_link_up) we will disable the * hw's ability to send PAUSE frames. */ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; case e1000_fc_tx_pause: /* Tx Flow control is enabled, and Rx Flow control is * disabled, by a software over-ride. */ mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; break; case e1000_fc_full: /* Flow control (both Rx and Tx) is enabled by a software * over-ride. */ mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); break; default: DEBUGOUT("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; } ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); if (ret_val) return ret_val; DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); if (phy->autoneg_mask & ADVERTISE_1000_FULL) ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg); return ret_val; } /** * e1000_copper_link_autoneg - Setup/Enable autoneg for copper link * @hw: pointer to the HW structure * * Performs initial bounds checking on autoneg advertisement parameter, then * configure to advertise the full capability. Setup the PHY to autoneg * and restart the negotiation process between the link partner. If * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. **/ s32 e1000_copper_link_autoneg(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_ctrl; DEBUGFUNC("e1000_copper_link_autoneg"); /* Perform some bounds checking on the autoneg advertisement * parameter. */ phy->autoneg_advertised &= phy->autoneg_mask; /* If autoneg_advertised is zero, we assume it was not defaulted * by the calling code so we set to advertise full capability. */ if (!phy->autoneg_advertised) phy->autoneg_advertised = phy->autoneg_mask; DEBUGOUT("Reconfiguring auto-neg advertisement params\n"); ret_val = e1000_phy_setup_autoneg(hw); if (ret_val) { DEBUGOUT("Error Setting up Auto-Negotiation\n"); return ret_val; } DEBUGOUT("Restarting Auto-Neg\n"); /* Restart auto-negotiation by setting the Auto Neg Enable bit and * the Auto Neg Restart bit in the PHY control register. */ ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); if (ret_val) return ret_val; phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl); if (ret_val) return ret_val; /* Does the user want to wait for Auto-Neg to complete here, or * check at a later time (for example, callback routine). */ if (phy->autoneg_wait_to_complete) { ret_val = e1000_wait_autoneg(hw); if (ret_val) { DEBUGOUT("Error while waiting for autoneg to complete\n"); return ret_val; } } hw->mac.get_link_status = TRUE; return ret_val; } /** * e1000_setup_copper_link_generic - Configure copper link settings * @hw: pointer to the HW structure * * Calls the appropriate function to configure the link for auto-neg or forced * speed and duplex. Then we check for link, once link is established calls * to configure collision distance and flow control are called. If link is * not established, we return -E1000_ERR_PHY (-2). **/ s32 e1000_setup_copper_link_generic(struct e1000_hw *hw) { s32 ret_val; bool link; DEBUGFUNC("e1000_setup_copper_link_generic"); if (hw->mac.autoneg) { /* Setup autoneg and flow control advertisement and perform * autonegotiation. */ ret_val = e1000_copper_link_autoneg(hw); if (ret_val) return ret_val; } else { /* PHY will be set to 10H, 10F, 100H or 100F * depending on user settings. */ DEBUGOUT("Forcing Speed and Duplex\n"); ret_val = hw->phy.ops.force_speed_duplex(hw); if (ret_val) { DEBUGOUT("Error Forcing Speed and Duplex\n"); return ret_val; } } /* Check link status. Wait up to 100 microseconds for link to become * valid. */ ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10, &link); if (ret_val) return ret_val; if (link) { DEBUGOUT("Valid link established!!!\n"); hw->mac.ops.config_collision_dist(hw); ret_val = e1000_config_fc_after_link_up_generic(hw); } else { DEBUGOUT("Unable to establish link!!!\n"); } return ret_val; } /** * e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY * @hw: pointer to the HW structure * * Calls the PHY setup function to force speed and duplex. Clears the * auto-crossover to force MDI manually. Waits for link and returns * successful if link up is successful, else -E1000_ERR_PHY (-2). **/ s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data; bool link; DEBUGFUNC("e1000_phy_force_speed_duplex_igp"); ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); if (ret_val) return ret_val; e1000_phy_force_speed_duplex_setup(hw, &phy_data); ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); if (ret_val) return ret_val; /* Clear Auto-Crossover to force MDI manually. IGP requires MDI * forced whenever speed and duplex are forced. */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); if (ret_val) return ret_val; phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); if (ret_val) return ret_val; DEBUGOUT1("IGP PSCR: %X\n", phy_data); usec_delay(1); if (phy->autoneg_wait_to_complete) { DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n"); ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; if (!link) DEBUGOUT("Link taking longer than expected.\n"); /* Try once more */ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); } return ret_val; } /** * e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY * @hw: pointer to the HW structure * * Calls the PHY setup function to force speed and duplex. Clears the * auto-crossover to force MDI manually. Resets the PHY to commit the * changes. If time expires while waiting for link up, we reset the DSP. * After reset, TX_CLK and CRS on Tx must be set. Return successful upon * successful completion, else return corresponding error code. **/ s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data; bool link; DEBUGFUNC("e1000_phy_force_speed_duplex_m88"); /* I210 and I211 devices support Auto-Crossover in forced operation. */ if (phy->type != e1000_phy_i210) { /* Clear Auto-Crossover to force MDI manually. M88E1000 * requires MDI forced whenever speed and duplex are forced. */ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; - - DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data); } + + DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data); ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); if (ret_val) return ret_val; e1000_phy_force_speed_duplex_setup(hw, &phy_data); ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); if (ret_val) return ret_val; /* Reset the phy to commit changes. */ ret_val = hw->phy.ops.commit(hw); if (ret_val) return ret_val; if (phy->autoneg_wait_to_complete) { DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n"); ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; if (!link) { bool reset_dsp = TRUE; switch (hw->phy.id) { case I347AT4_E_PHY_ID: case M88E1340M_E_PHY_ID: case M88E1112_E_PHY_ID: case M88E1543_E_PHY_ID: case M88E1512_E_PHY_ID: case I210_I_PHY_ID: reset_dsp = FALSE; break; default: if (hw->phy.type != e1000_phy_m88) reset_dsp = FALSE; break; } if (!reset_dsp) { DEBUGOUT("Link taking longer than expected.\n"); } else { /* We didn't get link. * Reset the DSP and cross our fingers. */ ret_val = phy->ops.write_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x001d); if (ret_val) return ret_val; ret_val = e1000_phy_reset_dsp_generic(hw); if (ret_val) return ret_val; } } /* Try once more */ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; } if (hw->phy.type != e1000_phy_m88) return E1000_SUCCESS; if (hw->phy.id == I347AT4_E_PHY_ID || hw->phy.id == M88E1340M_E_PHY_ID || hw->phy.id == M88E1112_E_PHY_ID) return E1000_SUCCESS; if (hw->phy.id == I210_I_PHY_ID) return E1000_SUCCESS; if ((hw->phy.id == M88E1543_E_PHY_ID) || (hw->phy.id == M88E1512_E_PHY_ID)) return E1000_SUCCESS; ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; /* Resetting the phy means we need to re-force TX_CLK in the * Extended PHY Specific Control Register to 25MHz clock from * the reset value of 2.5MHz. */ phy_data |= M88E1000_EPSCR_TX_CLK_25; ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; /* In addition, we must re-enable CRS on Tx for both half and full * duplex. */ ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); return ret_val; } /** * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex * @hw: pointer to the HW structure * * Forces the speed and duplex settings of the PHY. * This is a function pointer entry point only called by * PHY setup routines. **/ s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; bool link; DEBUGFUNC("e1000_phy_force_speed_duplex_ife"); ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data); if (ret_val) return ret_val; e1000_phy_force_speed_duplex_setup(hw, &data); ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data); if (ret_val) return ret_val; /* Disable MDI-X support for 10/100 */ ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); if (ret_val) return ret_val; data &= ~IFE_PMC_AUTO_MDIX; data &= ~IFE_PMC_FORCE_MDIX; ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data); if (ret_val) return ret_val; DEBUGOUT1("IFE PMC: %X\n", data); usec_delay(1); if (phy->autoneg_wait_to_complete) { DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n"); ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; if (!link) DEBUGOUT("Link taking longer than expected.\n"); /* Try once more */ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; } return E1000_SUCCESS; } /** * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex * @hw: pointer to the HW structure * @phy_ctrl: pointer to current value of PHY_CONTROL * * Forces speed and duplex on the PHY by doing the following: disable flow * control, force speed/duplex on the MAC, disable auto speed detection, * disable auto-negotiation, configure duplex, configure speed, configure * the collision distance, write configuration to CTRL register. The * caller must write to the PHY_CONTROL register for these settings to * take affect. **/ void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl) { struct e1000_mac_info *mac = &hw->mac; u32 ctrl; DEBUGFUNC("e1000_phy_force_speed_duplex_setup"); /* Turn off flow control when forcing speed/duplex */ hw->fc.current_mode = e1000_fc_none; /* Force speed/duplex on the mac */ ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); ctrl &= ~E1000_CTRL_SPD_SEL; /* Disable Auto Speed Detection */ ctrl &= ~E1000_CTRL_ASDE; /* Disable autoneg on the phy */ *phy_ctrl &= ~MII_CR_AUTO_NEG_EN; /* Forcing Full or Half Duplex? */ if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) { ctrl &= ~E1000_CTRL_FD; *phy_ctrl &= ~MII_CR_FULL_DUPLEX; DEBUGOUT("Half Duplex\n"); } else { ctrl |= E1000_CTRL_FD; *phy_ctrl |= MII_CR_FULL_DUPLEX; DEBUGOUT("Full Duplex\n"); } /* Forcing 10mb or 100mb? */ if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) { ctrl |= E1000_CTRL_SPD_100; *phy_ctrl |= MII_CR_SPEED_100; *phy_ctrl &= ~MII_CR_SPEED_1000; DEBUGOUT("Forcing 100mb\n"); } else { ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); DEBUGOUT("Forcing 10mb\n"); } hw->mac.ops.config_collision_dist(hw); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); } /** * e1000_set_d3_lplu_state_generic - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D3 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D3 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. **/ s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; DEBUGFUNC("e1000_set_d3_lplu_state_generic"); if (!hw->phy.ops.read_reg) return E1000_SUCCESS; ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); if (ret_val) return ret_val; if (!active) { data &= ~IGP02E1000_PM_D3_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); if (ret_val) return ret_val; /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { data |= IGP02E1000_PM_D3_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); if (ret_val) return ret_val; /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); } return ret_val; } /** * e1000_check_downshift_generic - Checks whether a downshift in speed occurred * @hw: pointer to the HW structure * * Success returns 0, Failure returns 1 * * A downshift is detected by querying the PHY link health. **/ s32 e1000_check_downshift_generic(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, offset, mask; DEBUGFUNC("e1000_check_downshift_generic"); switch (phy->type) { case e1000_phy_i210: case e1000_phy_m88: case e1000_phy_gg82563: case e1000_phy_bm: case e1000_phy_82578: offset = M88E1000_PHY_SPEC_STATUS; mask = M88E1000_PSSR_DOWNSHIFT; break; case e1000_phy_igp: case e1000_phy_igp_2: case e1000_phy_igp_3: offset = IGP01E1000_PHY_LINK_HEALTH; mask = IGP01E1000_PLHR_SS_DOWNGRADE; break; default: /* speed downshift not supported */ phy->speed_downgraded = FALSE; return E1000_SUCCESS; } ret_val = phy->ops.read_reg(hw, offset, &phy_data); if (!ret_val) phy->speed_downgraded = !!(phy_data & mask); return ret_val; } /** * e1000_check_polarity_m88 - Checks the polarity. * @hw: pointer to the HW structure * * Success returns 0, Failure returns -E1000_ERR_PHY (-2) * * Polarity is determined based on the PHY specific status register. **/ s32 e1000_check_polarity_m88(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; DEBUGFUNC("e1000_check_polarity_m88"); ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data); if (!ret_val) phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal); return ret_val; } /** * e1000_check_polarity_igp - Checks the polarity. * @hw: pointer to the HW structure * * Success returns 0, Failure returns -E1000_ERR_PHY (-2) * * Polarity is determined based on the PHY port status register, and the * current speed (since there is no polarity at 100Mbps). **/ s32 e1000_check_polarity_igp(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data, offset, mask; DEBUGFUNC("e1000_check_polarity_igp"); /* Polarity is determined based on the speed of * our connection. */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); if (ret_val) return ret_val; if ((data & IGP01E1000_PSSR_SPEED_MASK) == IGP01E1000_PSSR_SPEED_1000MBPS) { offset = IGP01E1000_PHY_PCS_INIT_REG; mask = IGP01E1000_PHY_POLARITY_MASK; } else { /* This really only applies to 10Mbps since * there is no polarity for 100Mbps (always 0). */ offset = IGP01E1000_PHY_PORT_STATUS; mask = IGP01E1000_PSSR_POLARITY_REVERSED; } ret_val = phy->ops.read_reg(hw, offset, &data); if (!ret_val) phy->cable_polarity = ((data & mask) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal); return ret_val; } /** * e1000_check_polarity_ife - Check cable polarity for IFE PHY * @hw: pointer to the HW structure * * Polarity is determined on the polarity reversal feature being enabled. **/ s32 e1000_check_polarity_ife(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, offset, mask; DEBUGFUNC("e1000_check_polarity_ife"); /* Polarity is determined based on the reversal feature being enabled. */ if (phy->polarity_correction) { offset = IFE_PHY_EXTENDED_STATUS_CONTROL; mask = IFE_PESC_POLARITY_REVERSED; } else { offset = IFE_PHY_SPECIAL_CONTROL; mask = IFE_PSC_FORCE_POLARITY; } ret_val = phy->ops.read_reg(hw, offset, &phy_data); if (!ret_val) phy->cable_polarity = ((phy_data & mask) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal); return ret_val; } /** * e1000_wait_autoneg - Wait for auto-neg completion * @hw: pointer to the HW structure * * Waits for auto-negotiation to complete or for the auto-negotiation time * limit to expire, which ever happens first. **/ static s32 e1000_wait_autoneg(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 i, phy_status; DEBUGFUNC("e1000_wait_autoneg"); if (!hw->phy.ops.read_reg) return E1000_SUCCESS; /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); if (ret_val) break; ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); if (ret_val) break; if (phy_status & MII_SR_AUTONEG_COMPLETE) break; msec_delay(100); } /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation * has completed. */ return ret_val; } /** * e1000_phy_has_link_generic - Polls PHY for link * @hw: pointer to the HW structure * @iterations: number of times to poll for link * @usec_interval: delay between polling attempts * @success: pointer to whether polling was successful or not * * Polls the PHY status register for link, 'iterations' number of times. **/ s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, u32 usec_interval, bool *success) { s32 ret_val = E1000_SUCCESS; u16 i, phy_status; DEBUGFUNC("e1000_phy_has_link_generic"); if (!hw->phy.ops.read_reg) return E1000_SUCCESS; for (i = 0; i < iterations; i++) { /* Some PHYs require the PHY_STATUS register to be read * twice due to the link bit being sticky. No harm doing * it across the board. */ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); if (ret_val) { /* If the first read fails, another entity may have * ownership of the resources, wait and try again to * see if they have relinquished the resources yet. */ if (usec_interval >= 1000) msec_delay(usec_interval/1000); else usec_delay(usec_interval); } ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); if (ret_val) break; if (phy_status & MII_SR_LINK_STATUS) break; if (usec_interval >= 1000) msec_delay(usec_interval/1000); else usec_delay(usec_interval); } *success = (i < iterations); return ret_val; } /** * e1000_get_cable_length_m88 - Determine cable length for m88 PHY * @hw: pointer to the HW structure * * Reads the PHY specific status register to retrieve the cable length * information. The cable length is determined by averaging the minimum and * maximum values to get the "average" cable length. The m88 PHY has four * possible cable length values, which are: * Register Value Cable Length * 0 < 50 meters * 1 50 - 80 meters * 2 80 - 110 meters * 3 110 - 140 meters * 4 > 140 meters **/ s32 e1000_get_cable_length_m88(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, index; DEBUGFUNC("e1000_get_cable_length_m88"); ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) return ret_val; index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> M88E1000_PSSR_CABLE_LENGTH_SHIFT); if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) return -E1000_ERR_PHY; phy->min_cable_length = e1000_m88_cable_length_table[index]; phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; return E1000_SUCCESS; } s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, phy_data2, is_cm; u16 index, default_page; DEBUGFUNC("e1000_get_cable_length_m88_gen2"); switch (hw->phy.id) { case I210_I_PHY_ID: /* Get cable length from PHY Cable Diagnostics Control Reg */ ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + (I347AT4_PCDL + phy->addr), &phy_data); if (ret_val) return ret_val; /* Check if the unit of cable length is meters or cm */ ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + I347AT4_PCDC, &phy_data2); if (ret_val) return ret_val; is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); /* Populate the phy structure with cable length in meters */ phy->min_cable_length = phy_data / (is_cm ? 100 : 1); phy->max_cable_length = phy_data / (is_cm ? 100 : 1); phy->cable_length = phy_data / (is_cm ? 100 : 1); break; case M88E1543_E_PHY_ID: case M88E1512_E_PHY_ID: case M88E1340M_E_PHY_ID: case I347AT4_E_PHY_ID: /* Remember the original page select and set it to 7 */ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, &default_page); if (ret_val) return ret_val; ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07); if (ret_val) return ret_val; /* Get cable length from PHY Cable Diagnostics Control Reg */ ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr), &phy_data); if (ret_val) return ret_val; /* Check if the unit of cable length is meters or cm */ ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2); if (ret_val) return ret_val; is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); /* Populate the phy structure with cable length in meters */ phy->min_cable_length = phy_data / (is_cm ? 100 : 1); phy->max_cable_length = phy_data / (is_cm ? 100 : 1); phy->cable_length = phy_data / (is_cm ? 100 : 1); /* Reset the page select to its original value */ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, default_page); if (ret_val) return ret_val; break; case M88E1112_E_PHY_ID: /* Remember the original page select and set it to 5 */ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, &default_page); if (ret_val) return ret_val; ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE, &phy_data); if (ret_val) return ret_val; index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> M88E1000_PSSR_CABLE_LENGTH_SHIFT; if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) return -E1000_ERR_PHY; phy->min_cable_length = e1000_m88_cable_length_table[index]; phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; /* Reset the page select to its original value */ ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, default_page); if (ret_val) return ret_val; break; default: return -E1000_ERR_PHY; } return ret_val; } /** * e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY * @hw: pointer to the HW structure * * The automatic gain control (agc) normalizes the amplitude of the * received signal, adjusting for the attenuation produced by the * cable. By reading the AGC registers, which represent the * combination of coarse and fine gain value, the value can be put * into a lookup table to obtain the approximate cable length * for each channel. **/ s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, i, agc_value = 0; u16 cur_agc_index, max_agc_index = 0; u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { IGP02E1000_PHY_AGC_A, IGP02E1000_PHY_AGC_B, IGP02E1000_PHY_AGC_C, IGP02E1000_PHY_AGC_D }; DEBUGFUNC("e1000_get_cable_length_igp_2"); /* Read the AGC registers for all channels */ for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data); if (ret_val) return ret_val; /* Getting bits 15:9, which represent the combination of * coarse and fine gain values. The result is a number * that can be put into the lookup table to obtain the * approximate cable length. */ cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & IGP02E1000_AGC_LENGTH_MASK); /* Array index bound check. */ if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || (cur_agc_index == 0)) return -E1000_ERR_PHY; /* Remove min & max AGC values from calculation. */ if (e1000_igp_2_cable_length_table[min_agc_index] > e1000_igp_2_cable_length_table[cur_agc_index]) min_agc_index = cur_agc_index; if (e1000_igp_2_cable_length_table[max_agc_index] < e1000_igp_2_cable_length_table[cur_agc_index]) max_agc_index = cur_agc_index; agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; } agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + e1000_igp_2_cable_length_table[max_agc_index]); agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); /* Calculate cable length with the error range of +/- 10 meters. */ phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ? (agc_value - IGP02E1000_AGC_RANGE) : 0); phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; return E1000_SUCCESS; } /** * e1000_get_phy_info_m88 - Retrieve PHY information * @hw: pointer to the HW structure * * Valid for only copper links. Read the PHY status register (sticky read) * to verify that link is up. Read the PHY special control register to * determine the polarity and 10base-T extended distance. Read the PHY * special status register to determine MDI/MDIx and current speed. If * speed is 1000, then determine cable length, local and remote receiver. **/ s32 e1000_get_phy_info_m88(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data; bool link; DEBUGFUNC("e1000_get_phy_info_m88"); if (phy->media_type != e1000_media_type_copper) { DEBUGOUT("Phy info is only valid for copper media\n"); return -E1000_ERR_CONFIG; } ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (!link) { DEBUGOUT("Phy info is only valid if link is up\n"); return -E1000_ERR_CONFIG; } ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy->polarity_correction = !!(phy_data & M88E1000_PSCR_POLARITY_REVERSAL); ret_val = e1000_check_polarity_m88(hw); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); if (ret_val) return ret_val; phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX); if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { ret_val = hw->phy.ops.get_cable_length(hw); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data); if (ret_val) return ret_val; phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; } else { /* Set values to "undefined" */ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; phy->local_rx = e1000_1000t_rx_status_undefined; phy->remote_rx = e1000_1000t_rx_status_undefined; } return ret_val; } /** * e1000_get_phy_info_igp - Retrieve igp PHY information * @hw: pointer to the HW structure * * Read PHY status to determine if link is up. If link is up, then * set/determine 10base-T extended distance and polarity correction. Read * PHY port status to determine MDI/MDIx and speed. Based on the speed, * determine on the cable length, local and remote receiver. **/ s32 e1000_get_phy_info_igp(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; bool link; DEBUGFUNC("e1000_get_phy_info_igp"); ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (!link) { DEBUGOUT("Phy info is only valid if link is up\n"); return -E1000_ERR_CONFIG; } phy->polarity_correction = TRUE; ret_val = e1000_check_polarity_igp(hw); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); if (ret_val) return ret_val; phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX); if ((data & IGP01E1000_PSSR_SPEED_MASK) == IGP01E1000_PSSR_SPEED_1000MBPS) { ret_val = phy->ops.get_cable_length(hw); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); if (ret_val) return ret_val; phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; } else { phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; phy->local_rx = e1000_1000t_rx_status_undefined; phy->remote_rx = e1000_1000t_rx_status_undefined; } return ret_val; } /** * e1000_get_phy_info_ife - Retrieves various IFE PHY states * @hw: pointer to the HW structure * * Populates "phy" structure with various feature states. **/ s32 e1000_get_phy_info_ife(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; bool link; DEBUGFUNC("e1000_get_phy_info_ife"); ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (!link) { DEBUGOUT("Phy info is only valid if link is up\n"); return -E1000_ERR_CONFIG; } ret_val = phy->ops.read_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data); if (ret_val) return ret_val; phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE); if (phy->polarity_correction) { ret_val = e1000_check_polarity_ife(hw); if (ret_val) return ret_val; } else { /* Polarity is forced */ phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal); } ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); if (ret_val) return ret_val; phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS); /* The following parameters are undefined for 10/100 operation. */ phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; phy->local_rx = e1000_1000t_rx_status_undefined; phy->remote_rx = e1000_1000t_rx_status_undefined; return E1000_SUCCESS; } /** * e1000_phy_sw_reset_generic - PHY software reset * @hw: pointer to the HW structure * * Does a software reset of the PHY by reading the PHY control register and * setting/write the control register reset bit to the PHY. **/ s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw) { s32 ret_val; u16 phy_ctrl; DEBUGFUNC("e1000_phy_sw_reset_generic"); if (!hw->phy.ops.read_reg) return E1000_SUCCESS; ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); if (ret_val) return ret_val; phy_ctrl |= MII_CR_RESET; ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl); if (ret_val) return ret_val; usec_delay(1); return ret_val; } /** * e1000_phy_hw_reset_generic - PHY hardware reset * @hw: pointer to the HW structure * * Verify the reset block is not blocking us from resetting. Acquire * semaphore (if necessary) and read/set/write the device control reset * bit in the PHY. Wait the appropriate delay time for the device to * reset and release the semaphore (if necessary). **/ s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u32 ctrl; DEBUGFUNC("e1000_phy_hw_reset_generic"); if (phy->ops.check_reset_block) { ret_val = phy->ops.check_reset_block(hw); if (ret_val) return E1000_SUCCESS; } ret_val = phy->ops.acquire(hw); if (ret_val) return ret_val; ctrl = E1000_READ_REG(hw, E1000_CTRL); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST); E1000_WRITE_FLUSH(hw); usec_delay(phy->reset_delay_us); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); E1000_WRITE_FLUSH(hw); usec_delay(150); phy->ops.release(hw); return phy->ops.get_cfg_done(hw); } /** * e1000_get_cfg_done_generic - Generic configuration done * @hw: pointer to the HW structure * * Generic function to wait 10 milli-seconds for configuration to complete * and return success. **/ s32 e1000_get_cfg_done_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_get_cfg_done_generic"); msec_delay_irq(10); return E1000_SUCCESS; } /** * e1000_phy_init_script_igp3 - Inits the IGP3 PHY * @hw: pointer to the HW structure * * Initializes a Intel Gigabit PHY3 when an EEPROM is not present. **/ s32 e1000_phy_init_script_igp3(struct e1000_hw *hw) { DEBUGOUT("Running IGP 3 PHY init script\n"); /* PHY init IGP 3 */ /* Enable rise/fall, 10-mode work in class-A */ hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018); /* Remove all caps from Replica path filter */ hw->phy.ops.write_reg(hw, 0x2F52, 0x0000); /* Bias trimming for ADC, AFE and Driver (Default) */ hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24); /* Increase Hybrid poly bias */ hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0); /* Add 4% to Tx amplitude in Gig mode */ hw->phy.ops.write_reg(hw, 0x2010, 0x10B0); /* Disable trimming (TTT) */ hw->phy.ops.write_reg(hw, 0x2011, 0x0000); /* Poly DC correction to 94.6% + 2% for all channels */ hw->phy.ops.write_reg(hw, 0x20DD, 0x249A); /* ABS DC correction to 95.9% */ hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3); /* BG temp curve trim */ hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE); /* Increasing ADC OPAMP stage 1 currents to max */ hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4); /* Force 1000 ( required for enabling PHY regs configuration) */ hw->phy.ops.write_reg(hw, 0x0000, 0x0140); /* Set upd_freq to 6 */ hw->phy.ops.write_reg(hw, 0x1F30, 0x1606); /* Disable NPDFE */ hw->phy.ops.write_reg(hw, 0x1F31, 0xB814); /* Disable adaptive fixed FFE (Default) */ hw->phy.ops.write_reg(hw, 0x1F35, 0x002A); /* Enable FFE hysteresis */ hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067); /* Fixed FFE for short cable lengths */ hw->phy.ops.write_reg(hw, 0x1F54, 0x0065); /* Fixed FFE for medium cable lengths */ hw->phy.ops.write_reg(hw, 0x1F55, 0x002A); /* Fixed FFE for long cable lengths */ hw->phy.ops.write_reg(hw, 0x1F56, 0x002A); /* Enable Adaptive Clip Threshold */ hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0); /* AHT reset limit to 1 */ hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF); /* Set AHT master delay to 127 msec */ hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC); /* Set scan bits for AHT */ hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF); /* Set AHT Preset bits */ hw->phy.ops.write_reg(hw, 0x1F79, 0x0210); /* Change integ_factor of channel A to 3 */ hw->phy.ops.write_reg(hw, 0x1895, 0x0003); /* Change prop_factor of channels BCD to 8 */ hw->phy.ops.write_reg(hw, 0x1796, 0x0008); /* Change cg_icount + enable integbp for channels BCD */ hw->phy.ops.write_reg(hw, 0x1798, 0xD008); /* Change cg_icount + enable integbp + change prop_factor_master * to 8 for channel A */ hw->phy.ops.write_reg(hw, 0x1898, 0xD918); /* Disable AHT in Slave mode on channel A */ hw->phy.ops.write_reg(hw, 0x187A, 0x0800); /* Enable LPLU and disable AN to 1000 in non-D0a states, * Enable SPD+B2B */ hw->phy.ops.write_reg(hw, 0x0019, 0x008D); /* Enable restart AN on an1000_dis change */ hw->phy.ops.write_reg(hw, 0x001B, 0x2080); /* Enable wh_fifo read clock in 10/100 modes */ hw->phy.ops.write_reg(hw, 0x0014, 0x0045); /* Restart AN, Speed selection is 1000 */ hw->phy.ops.write_reg(hw, 0x0000, 0x1340); return E1000_SUCCESS; } /** * e1000_get_phy_type_from_id - Get PHY type from id * @phy_id: phy_id read from the phy * * Returns the phy type from the id. **/ enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id) { enum e1000_phy_type phy_type = e1000_phy_unknown; switch (phy_id) { case M88E1000_I_PHY_ID: case M88E1000_E_PHY_ID: case M88E1111_I_PHY_ID: case M88E1011_I_PHY_ID: case M88E1543_E_PHY_ID: case M88E1512_E_PHY_ID: case I347AT4_E_PHY_ID: case M88E1112_E_PHY_ID: case M88E1340M_E_PHY_ID: phy_type = e1000_phy_m88; break; case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */ phy_type = e1000_phy_igp_2; break; case GG82563_E_PHY_ID: phy_type = e1000_phy_gg82563; break; case IGP03E1000_E_PHY_ID: phy_type = e1000_phy_igp_3; break; case IFE_E_PHY_ID: case IFE_PLUS_E_PHY_ID: case IFE_C_E_PHY_ID: phy_type = e1000_phy_ife; break; case BME1000_E_PHY_ID: case BME1000_E_PHY_ID_R2: phy_type = e1000_phy_bm; break; case I82578_E_PHY_ID: phy_type = e1000_phy_82578; break; case I82577_E_PHY_ID: phy_type = e1000_phy_82577; break; case I82579_E_PHY_ID: phy_type = e1000_phy_82579; break; case I217_E_PHY_ID: phy_type = e1000_phy_i217; break; case I82580_I_PHY_ID: phy_type = e1000_phy_82580; break; case I210_I_PHY_ID: phy_type = e1000_phy_i210; break; default: phy_type = e1000_phy_unknown; break; } return phy_type; } /** * e1000_determine_phy_address - Determines PHY address. * @hw: pointer to the HW structure * * This uses a trial and error method to loop through possible PHY * addresses. It tests each by reading the PHY ID registers and * checking for a match. **/ s32 e1000_determine_phy_address(struct e1000_hw *hw) { u32 phy_addr = 0; u32 i; enum e1000_phy_type phy_type = e1000_phy_unknown; hw->phy.id = phy_type; for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) { hw->phy.addr = phy_addr; i = 0; do { e1000_get_phy_id(hw); phy_type = e1000_get_phy_type_from_id(hw->phy.id); /* If phy_type is valid, break - we found our * PHY address */ if (phy_type != e1000_phy_unknown) return E1000_SUCCESS; msec_delay(1); i++; } while (i < 10); } return -E1000_ERR_PHY_TYPE; } /** * e1000_get_phy_addr_for_bm_page - Retrieve PHY page address * @page: page to access * * Returns the phy address for the page requested. **/ static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg) { u32 phy_addr = 2; if ((page >= 768) || (page == 0 && reg == 25) || (reg == 31)) phy_addr = 1; return phy_addr; } /** * e1000_write_phy_reg_bm - Write BM PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquires semaphore, if necessary, then writes the data to PHY register * at the offset. Release any acquired semaphores before exiting. **/ s32 e1000_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; u32 page = offset >> IGP_PAGE_SHIFT; DEBUGFUNC("e1000_write_phy_reg_bm"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* Page 800 works differently than the rest so it has its own func */ if (page == BM_WUC_PAGE) { ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data, FALSE, false); goto release; } hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); if (offset > MAX_PHY_MULTI_PAGE_REG) { u32 page_shift, page_select; /* Page select is register 31 for phy address 1 and 22 for * phy address 2 and 3. Page select is shifted only for * phy address 1. */ if (hw->phy.addr == 1) { page_shift = IGP_PAGE_SHIFT; page_select = IGP01E1000_PHY_PAGE_SELECT; } else { page_shift = 0; page_select = BM_PHY_PAGE_SELECT; } /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000_write_phy_reg_mdic(hw, page_select, (page << page_shift)); if (ret_val) goto release; } ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_read_phy_reg_bm - Read BM PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquires semaphore, if necessary, then reads the PHY register at offset * and storing the retrieved information in data. Release any acquired * semaphores before exiting. **/ s32 e1000_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; u32 page = offset >> IGP_PAGE_SHIFT; DEBUGFUNC("e1000_read_phy_reg_bm"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* Page 800 works differently than the rest so it has its own func */ if (page == BM_WUC_PAGE) { ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data, TRUE, FALSE); goto release; } hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset); if (offset > MAX_PHY_MULTI_PAGE_REG) { u32 page_shift, page_select; /* Page select is register 31 for phy address 1 and 22 for * phy address 2 and 3. Page select is shifted only for * phy address 1. */ if (hw->phy.addr == 1) { page_shift = IGP_PAGE_SHIFT; page_select = IGP01E1000_PHY_PAGE_SELECT; } else { page_shift = 0; page_select = BM_PHY_PAGE_SELECT; } /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000_write_phy_reg_mdic(hw, page_select, (page << page_shift)); if (ret_val) goto release; } ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_read_phy_reg_bm2 - Read BM PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquires semaphore, if necessary, then reads the PHY register at offset * and storing the retrieved information in data. Release any acquired * semaphores before exiting. **/ s32 e1000_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; u16 page = (u16)(offset >> IGP_PAGE_SHIFT); DEBUGFUNC("e1000_read_phy_reg_bm2"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* Page 800 works differently than the rest so it has its own func */ if (page == BM_WUC_PAGE) { ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data, TRUE, FALSE); goto release; } hw->phy.addr = 1; if (offset > MAX_PHY_MULTI_PAGE_REG) { /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT, page); if (ret_val) goto release; } ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_write_phy_reg_bm2 - Write BM PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquires semaphore, if necessary, then writes the data to PHY register * at the offset. Release any acquired semaphores before exiting. **/ s32 e1000_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; u16 page = (u16)(offset >> IGP_PAGE_SHIFT); DEBUGFUNC("e1000_write_phy_reg_bm2"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* Page 800 works differently than the rest so it has its own func */ if (page == BM_WUC_PAGE) { ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data, FALSE, false); goto release; } hw->phy.addr = 1; if (offset > MAX_PHY_MULTI_PAGE_REG) { /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT, page); if (ret_val) goto release; } ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_enable_phy_wakeup_reg_access_bm - enable access to BM wakeup registers * @hw: pointer to the HW structure * @phy_reg: pointer to store original contents of BM_WUC_ENABLE_REG * * Assumes semaphore already acquired and phy_reg points to a valid memory * address to store contents of the BM_WUC_ENABLE_REG register. **/ s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg) { s32 ret_val; u16 temp; DEBUGFUNC("e1000_enable_phy_wakeup_reg_access_bm"); if (!phy_reg) return -E1000_ERR_PARAM; /* All page select, port ctrl and wakeup registers use phy address 1 */ hw->phy.addr = 1; /* Select Port Control Registers page */ ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT)); if (ret_val) { DEBUGOUT("Could not set Port Control page\n"); return ret_val; } ret_val = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, phy_reg); if (ret_val) { DEBUGOUT2("Could not read PHY register %d.%d\n", BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG); return ret_val; } /* Enable both PHY wakeup mode and Wakeup register page writes. * Prevent a power state change by disabling ME and Host PHY wakeup. */ temp = *phy_reg; temp |= BM_WUC_ENABLE_BIT; temp &= ~(BM_WUC_ME_WU_BIT | BM_WUC_HOST_WU_BIT); ret_val = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, temp); if (ret_val) { DEBUGOUT2("Could not write PHY register %d.%d\n", BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG); return ret_val; } /* Select Host Wakeup Registers page - caller now able to write * registers on the Wakeup registers page */ return e1000_set_page_igp(hw, (BM_WUC_PAGE << IGP_PAGE_SHIFT)); } /** * e1000_disable_phy_wakeup_reg_access_bm - disable access to BM wakeup regs * @hw: pointer to the HW structure * @phy_reg: pointer to original contents of BM_WUC_ENABLE_REG * * Restore BM_WUC_ENABLE_REG to its original value. * * Assumes semaphore already acquired and *phy_reg is the contents of the * BM_WUC_ENABLE_REG before register(s) on BM_WUC_PAGE were accessed by * caller. **/ s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg) { s32 ret_val; DEBUGFUNC("e1000_disable_phy_wakeup_reg_access_bm"); if (!phy_reg) return -E1000_ERR_PARAM; /* Select Port Control Registers page */ ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT)); if (ret_val) { DEBUGOUT("Could not set Port Control page\n"); return ret_val; } /* Restore 769.17 to its original value */ ret_val = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, *phy_reg); if (ret_val) DEBUGOUT2("Could not restore PHY register %d.%d\n", BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG); return ret_val; } /** * e1000_access_phy_wakeup_reg_bm - Read/write BM PHY wakeup register * @hw: pointer to the HW structure * @offset: register offset to be read or written * @data: pointer to the data to read or write * @read: determines if operation is read or write * @page_set: BM_WUC_PAGE already set and access enabled * * Read the PHY register at offset and store the retrieved information in * data, or write data to PHY register at offset. Note the procedure to * access the PHY wakeup registers is different than reading the other PHY * registers. It works as such: * 1) Set 769.17.2 (page 769, register 17, bit 2) = 1 * 2) Set page to 800 for host (801 if we were manageability) * 3) Write the address using the address opcode (0x11) * 4) Read or write the data using the data opcode (0x12) * 5) Restore 769.17.2 to its original value * * Steps 1 and 2 are done by e1000_enable_phy_wakeup_reg_access_bm() and * step 5 is done by e1000_disable_phy_wakeup_reg_access_bm(). * * Assumes semaphore is already acquired. When page_set==TRUE, assumes * the PHY page is set to BM_WUC_PAGE (i.e. a function in the call stack * is responsible for calls to e1000_[enable|disable]_phy_wakeup_reg_bm()). **/ static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data, bool read, bool page_set) { s32 ret_val; u16 reg, page; u16 phy_reg = 0; DEBUGFUNC("e1000_access_phy_wakeup_reg_bm"); reg = BM_PHY_REG_NUM(offset); page = BM_PHY_REG_PAGE(offset); /* Gig must be disabled for MDIO accesses to Host Wakeup reg page */ if ((hw->mac.type == e1000_pchlan) && (!(E1000_READ_REG(hw, E1000_PHY_CTRL) & E1000_PHY_CTRL_GBE_DISABLE))) DEBUGOUT1("Attempting to access page %d while gig enabled.\n", page); if (!page_set) { /* Enable access to PHY wakeup registers */ ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg); if (ret_val) { DEBUGOUT("Could not enable PHY wakeup reg access\n"); return ret_val; } } DEBUGOUT2("Accessing PHY page %d reg 0x%x\n", page, reg); /* Write the Wakeup register page offset value using opcode 0x11 */ ret_val = e1000_write_phy_reg_mdic(hw, BM_WUC_ADDRESS_OPCODE, reg); if (ret_val) { DEBUGOUT1("Could not write address opcode to page %d\n", page); return ret_val; } if (read) { /* Read the Wakeup register page value using opcode 0x12 */ ret_val = e1000_read_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE, data); } else { /* Write the Wakeup register page value using opcode 0x12 */ ret_val = e1000_write_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE, *data); } if (ret_val) { DEBUGOUT2("Could not access PHY reg %d.%d\n", page, reg); return ret_val; } if (!page_set) ret_val = e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); return ret_val; } /** * e1000_power_up_phy_copper - Restore copper link in case of PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, restore the link to previous * settings. **/ void e1000_power_up_phy_copper(struct e1000_hw *hw) { u16 mii_reg = 0; /* The PHY will retain its settings across a power down/up cycle */ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); mii_reg &= ~MII_CR_POWER_DOWN; hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); } /** * e1000_power_down_phy_copper - Restore copper link in case of PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, restore the link to previous * settings. **/ void e1000_power_down_phy_copper(struct e1000_hw *hw) { u16 mii_reg = 0; /* The PHY will retain its settings across a power down/up cycle */ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); mii_reg |= MII_CR_POWER_DOWN; hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); msec_delay(1); } /** * __e1000_read_phy_reg_hv - Read HV PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * @locked: semaphore has already been acquired or not * * Acquires semaphore, if necessary, then reads the PHY register at offset * and stores the retrieved information in data. Release any acquired * semaphore before exiting. **/ static s32 __e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data, bool locked, bool page_set) { s32 ret_val; u16 page = BM_PHY_REG_PAGE(offset); u16 reg = BM_PHY_REG_NUM(offset); u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page); DEBUGFUNC("__e1000_read_phy_reg_hv"); if (!locked) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; } /* Page 800 works differently than the rest so it has its own func */ if (page == BM_WUC_PAGE) { ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data, TRUE, page_set); goto out; } if (page > 0 && page < HV_INTC_FC_PAGE_START) { ret_val = e1000_access_phy_debug_regs_hv(hw, offset, data, TRUE); goto out; } if (!page_set) { if (page == HV_INTC_FC_PAGE_START) page = 0; if (reg > MAX_PHY_MULTI_PAGE_REG) { /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000_set_page_igp(hw, (page << IGP_PAGE_SHIFT)); hw->phy.addr = phy_addr; if (ret_val) goto out; } } DEBUGOUT3("reading PHY page %d (or 0x%x shifted) reg 0x%x\n", page, page << IGP_PAGE_SHIFT, reg); ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, data); out: if (!locked) hw->phy.ops.release(hw); return ret_val; } /** * e1000_read_phy_reg_hv - Read HV PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquires semaphore then reads the PHY register at offset and stores * the retrieved information in data. Release the acquired semaphore * before exiting. **/ s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data) { return __e1000_read_phy_reg_hv(hw, offset, data, FALSE, false); } /** * e1000_read_phy_reg_hv_locked - Read HV PHY register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the PHY register at offset and stores the retrieved information * in data. Assumes semaphore already acquired. **/ s32 e1000_read_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 *data) { return __e1000_read_phy_reg_hv(hw, offset, data, TRUE, FALSE); } /** * e1000_read_phy_reg_page_hv - Read HV PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Reads the PHY register at offset and stores the retrieved information * in data. Assumes semaphore already acquired and page already set. **/ s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 *data) { return __e1000_read_phy_reg_hv(hw, offset, data, TRUE, true); } /** * __e1000_write_phy_reg_hv - Write HV PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * @locked: semaphore has already been acquired or not * * Acquires semaphore, if necessary, then writes the data to PHY register * at the offset. Release any acquired semaphores before exiting. **/ static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data, bool locked, bool page_set) { s32 ret_val; u16 page = BM_PHY_REG_PAGE(offset); u16 reg = BM_PHY_REG_NUM(offset); u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page); DEBUGFUNC("__e1000_write_phy_reg_hv"); if (!locked) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; } /* Page 800 works differently than the rest so it has its own func */ if (page == BM_WUC_PAGE) { ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data, FALSE, page_set); goto out; } if (page > 0 && page < HV_INTC_FC_PAGE_START) { ret_val = e1000_access_phy_debug_regs_hv(hw, offset, &data, FALSE); goto out; } if (!page_set) { if (page == HV_INTC_FC_PAGE_START) page = 0; /* Workaround MDIO accesses being disabled after entering IEEE * Power Down (when bit 11 of the PHY Control register is set) */ if ((hw->phy.type == e1000_phy_82578) && (hw->phy.revision >= 1) && (hw->phy.addr == 2) && !(MAX_PHY_REG_ADDRESS & reg) && (data & (1 << 11))) { u16 data2 = 0x7EFF; ret_val = e1000_access_phy_debug_regs_hv(hw, (1 << 6) | 0x3, &data2, FALSE); if (ret_val) goto out; } if (reg > MAX_PHY_MULTI_PAGE_REG) { /* Page is shifted left, PHY expects (page x 32) */ ret_val = e1000_set_page_igp(hw, (page << IGP_PAGE_SHIFT)); hw->phy.addr = phy_addr; if (ret_val) goto out; } } DEBUGOUT3("writing PHY page %d (or 0x%x shifted) reg 0x%x\n", page, page << IGP_PAGE_SHIFT, reg); ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, data); out: if (!locked) hw->phy.ops.release(hw); return ret_val; } /** * e1000_write_phy_reg_hv - Write HV PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquires semaphore then writes the data to PHY register at the offset. * Release the acquired semaphores before exiting. **/ s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data) { return __e1000_write_phy_reg_hv(hw, offset, data, FALSE, false); } /** * e1000_write_phy_reg_hv_locked - Write HV PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Writes the data to PHY register at the offset. Assumes semaphore * already acquired. **/ s32 e1000_write_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 data) { return __e1000_write_phy_reg_hv(hw, offset, data, TRUE, FALSE); } /** * e1000_write_phy_reg_page_hv - Write HV PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Writes the data to PHY register at the offset. Assumes semaphore * already acquired and page already set. **/ s32 e1000_write_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 data) { return __e1000_write_phy_reg_hv(hw, offset, data, TRUE, true); } /** * e1000_get_phy_addr_for_hv_page - Get PHY adrress based on page * @page: page to be accessed **/ static u32 e1000_get_phy_addr_for_hv_page(u32 page) { u32 phy_addr = 2; if (page >= HV_INTC_FC_PAGE_START) phy_addr = 1; return phy_addr; } /** * e1000_access_phy_debug_regs_hv - Read HV PHY vendor specific high registers * @hw: pointer to the HW structure * @offset: register offset to be read or written * @data: pointer to the data to be read or written * @read: determines if operation is read or write * * Reads the PHY register at offset and stores the retreived information * in data. Assumes semaphore already acquired. Note that the procedure * to access these regs uses the address port and data port to read/write. * These accesses done with PHY address 2 and without using pages. **/ static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset, u16 *data, bool read) { s32 ret_val; u32 addr_reg; u32 data_reg; DEBUGFUNC("e1000_access_phy_debug_regs_hv"); /* This takes care of the difference with desktop vs mobile phy */ addr_reg = ((hw->phy.type == e1000_phy_82578) ? I82578_ADDR_REG : I82577_ADDR_REG); data_reg = addr_reg + 1; /* All operations in this function are phy address 2 */ hw->phy.addr = 2; /* masking with 0x3F to remove the page from offset */ ret_val = e1000_write_phy_reg_mdic(hw, addr_reg, (u16)offset & 0x3F); if (ret_val) { DEBUGOUT("Could not write the Address Offset port register\n"); return ret_val; } /* Read or write the data value next */ if (read) ret_val = e1000_read_phy_reg_mdic(hw, data_reg, data); else ret_val = e1000_write_phy_reg_mdic(hw, data_reg, *data); if (ret_val) DEBUGOUT("Could not access the Data port register\n"); return ret_val; } /** * e1000_link_stall_workaround_hv - Si workaround * @hw: pointer to the HW structure * * This function works around a Si bug where the link partner can get * a link up indication before the PHY does. If small packets are sent * by the link partner they can be placed in the packet buffer without * being properly accounted for by the PHY and will stall preventing * further packets from being received. The workaround is to clear the * packet buffer after the PHY detects link up. **/ s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_link_stall_workaround_hv"); if (hw->phy.type != e1000_phy_82578) return E1000_SUCCESS; /* Do not apply workaround if in PHY loopback bit 14 set */ hw->phy.ops.read_reg(hw, PHY_CONTROL, &data); if (data & PHY_CONTROL_LB) return E1000_SUCCESS; /* check if link is up and at 1Gbps */ ret_val = hw->phy.ops.read_reg(hw, BM_CS_STATUS, &data); if (ret_val) return ret_val; data &= (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | BM_CS_STATUS_SPEED_MASK); if (data != (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | BM_CS_STATUS_SPEED_1000)) return E1000_SUCCESS; msec_delay(200); /* flush the packets in the fifo buffer */ ret_val = hw->phy.ops.write_reg(hw, HV_MUX_DATA_CTRL, (HV_MUX_DATA_CTRL_GEN_TO_MAC | HV_MUX_DATA_CTRL_FORCE_SPEED)); if (ret_val) return ret_val; return hw->phy.ops.write_reg(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC); } /** * e1000_check_polarity_82577 - Checks the polarity. * @hw: pointer to the HW structure * * Success returns 0, Failure returns -E1000_ERR_PHY (-2) * * Polarity is determined based on the PHY specific status register. **/ s32 e1000_check_polarity_82577(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; DEBUGFUNC("e1000_check_polarity_82577"); ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); if (!ret_val) phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY) ? e1000_rev_polarity_reversed : e1000_rev_polarity_normal); return ret_val; } /** * e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY * @hw: pointer to the HW structure * * Calls the PHY setup function to force speed and duplex. **/ s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data; bool link; DEBUGFUNC("e1000_phy_force_speed_duplex_82577"); ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); if (ret_val) return ret_val; e1000_phy_force_speed_duplex_setup(hw, &phy_data); ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); if (ret_val) return ret_val; usec_delay(1); if (phy->autoneg_wait_to_complete) { DEBUGOUT("Waiting for forced speed/duplex link on 82577 phy\n"); ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; if (!link) DEBUGOUT("Link taking longer than expected.\n"); /* Try once more */ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); } return ret_val; } /** * e1000_get_phy_info_82577 - Retrieve I82577 PHY information * @hw: pointer to the HW structure * * Read PHY status to determine if link is up. If link is up, then * set/determine 10base-T extended distance and polarity correction. Read * PHY port status to determine MDI/MDIx and speed. Based on the speed, * determine on the cable length, local and remote receiver. **/ s32 e1000_get_phy_info_82577(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; bool link; DEBUGFUNC("e1000_get_phy_info_82577"); ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (!link) { DEBUGOUT("Phy info is only valid if link is up\n"); return -E1000_ERR_CONFIG; } phy->polarity_correction = TRUE; ret_val = e1000_check_polarity_82577(hw); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); if (ret_val) return ret_val; phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX); if ((data & I82577_PHY_STATUS2_SPEED_MASK) == I82577_PHY_STATUS2_SPEED_1000MBPS) { ret_val = hw->phy.ops.get_cable_length(hw); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); if (ret_val) return ret_val; phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok; } else { phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; phy->local_rx = e1000_1000t_rx_status_undefined; phy->remote_rx = e1000_1000t_rx_status_undefined; } return E1000_SUCCESS; } /** * e1000_get_cable_length_82577 - Determine cable length for 82577 PHY * @hw: pointer to the HW structure * * Reads the diagnostic status register and verifies result is valid before * placing it in the phy_cable_length field. **/ s32 e1000_get_cable_length_82577(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, length; DEBUGFUNC("e1000_get_cable_length_82577"); ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data); if (ret_val) return ret_val; length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >> I82577_DSTATUS_CABLE_LENGTH_SHIFT); if (length == E1000_CABLE_LENGTH_UNDEFINED) return -E1000_ERR_PHY; phy->cable_length = length; return E1000_SUCCESS; } /** * e1000_write_phy_reg_gs40g - Write GS40G PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquires semaphore, if necessary, then writes the data to PHY register * at the offset. Release any acquired semaphores before exiting. **/ s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; u16 page = offset >> GS40G_PAGE_SHIFT; DEBUGFUNC("e1000_write_phy_reg_gs40g"); offset = offset & GS40G_OFFSET_MASK; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); if (ret_val) goto release; ret_val = e1000_write_phy_reg_mdic(hw, offset, data); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_read_phy_reg_gs40g - Read GS40G PHY register * @hw: pointer to the HW structure * @offset: lower half is register offset to read to * upper half is page to use. * @data: data to read at register offset * * Acquires semaphore, if necessary, then reads the data in the PHY register * at the offset. Release any acquired semaphores before exiting. **/ s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; u16 page = offset >> GS40G_PAGE_SHIFT; DEBUGFUNC("e1000_read_phy_reg_gs40g"); offset = offset & GS40G_OFFSET_MASK; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); if (ret_val) goto release; ret_val = e1000_read_phy_reg_mdic(hw, offset, data); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_read_phy_reg_mphy - Read mPHY control register * @hw: pointer to the HW structure * @address: address to be read * @data: pointer to the read data * * Reads the mPHY control register in the PHY at offset and stores the * information read to data. **/ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data) { u32 mphy_ctrl = 0; bool locked = FALSE; bool ready; DEBUGFUNC("e1000_read_phy_reg_mphy"); /* Check if mPHY is ready to read/write operations */ ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; /* Check if mPHY access is disabled and enable it if so */ mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { locked = TRUE; ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; mphy_ctrl |= E1000_MPHY_ENA_ACCESS; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); } /* Set the address that we want to read */ ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; /* We mask address, because we want to use only current lane */ mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK & ~E1000_MPHY_ADDRESS_FNC_OVERRIDE) | (address & E1000_MPHY_ADDRESS_MASK); E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); /* Read data from the address */ ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; *data = E1000_READ_REG(hw, E1000_MPHY_DATA); /* Disable access to mPHY if it was originally disabled */ if (locked) ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS); return E1000_SUCCESS; } /** * e1000_write_phy_reg_mphy - Write mPHY control register * @hw: pointer to the HW structure * @address: address to write to * @data: data to write to register at offset * @line_override: used when we want to use different line than default one * * Writes data to mPHY control register. **/ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, bool line_override) { u32 mphy_ctrl = 0; bool locked = FALSE; bool ready; DEBUGFUNC("e1000_write_phy_reg_mphy"); /* Check if mPHY is ready to read/write operations */ ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; /* Check if mPHY access is disabled and enable it if so */ mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { locked = TRUE; ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; mphy_ctrl |= E1000_MPHY_ENA_ACCESS; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); } /* Set the address that we want to read */ ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; /* We mask address, because we want to use only current lane */ if (line_override) mphy_ctrl |= E1000_MPHY_ADDRESS_FNC_OVERRIDE; else mphy_ctrl &= ~E1000_MPHY_ADDRESS_FNC_OVERRIDE; mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK) | (address & E1000_MPHY_ADDRESS_MASK); E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); /* Read data from the address */ ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; E1000_WRITE_REG(hw, E1000_MPHY_DATA, data); /* Disable access to mPHY if it was originally disabled */ if (locked) ready = e1000_is_mphy_ready(hw); if (!ready) return -E1000_ERR_PHY; E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS); return E1000_SUCCESS; } /** * e1000_is_mphy_ready - Check if mPHY control register is not busy * @hw: pointer to the HW structure * * Returns mPHY control register status. **/ bool e1000_is_mphy_ready(struct e1000_hw *hw) { u16 retry_count = 0; u32 mphy_ctrl = 0; bool ready = FALSE; while (retry_count < 2) { mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); if (mphy_ctrl & E1000_MPHY_BUSY) { usec_delay(20); retry_count++; continue; } ready = TRUE; break; } if (!ready) DEBUGOUT("ERROR READING mPHY control register, phy is busy.\n"); return ready; } Index: projects/clang-trunk/sys/dev/e1000/e1000_regs.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/e1000_regs.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/e1000_regs.h (revision 287502) @@ -1,694 +1,686 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_REGS_H_ #define _E1000_REGS_H_ #define E1000_CTRL 0x00000 /* Device Control - RW */ #define E1000_CTRL_DUP 0x00004 /* Device Control Duplicate (Shadow) - RW */ #define E1000_STATUS 0x00008 /* Device Status - RO */ #define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ #define E1000_EERD 0x00014 /* EEPROM Read - RW */ #define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ #define E1000_FLA 0x0001C /* Flash Access - RW */ #define E1000_MDIC 0x00020 /* MDI Control - RW */ #define E1000_MDICNFG 0x00E04 /* MDI Config - RW */ #define E1000_REGISTER_SET_SIZE 0x20000 /* CSR Size */ #define E1000_EEPROM_INIT_CTRL_WORD_2 0x0F /* EEPROM Init Ctrl Word 2 */ #define E1000_EEPROM_PCIE_CTRL_WORD_2 0x28 /* EEPROM PCIe Ctrl Word 2 */ #define E1000_BARCTRL 0x5BBC /* BAR ctrl reg */ #define E1000_BARCTRL_FLSIZE 0x0700 /* BAR ctrl Flsize */ #define E1000_BARCTRL_CSRSIZE 0x2000 /* BAR ctrl CSR size */ #define E1000_MPHY_ADDR_CTRL 0x0024 /* GbE MPHY Address Control */ #define E1000_MPHY_DATA 0x0E10 /* GBE MPHY Data */ #define E1000_MPHY_STAT 0x0E0C /* GBE MPHY Statistics */ #define E1000_PPHY_CTRL 0x5b48 /* PCIe PHY Control */ #define E1000_I350_BARCTRL 0x5BFC /* BAR ctrl reg */ #define E1000_I350_DTXMXPKTSZ 0x355C /* Maximum sent packet size reg*/ #define E1000_SCTL 0x00024 /* SerDes Control - RW */ #define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ #define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ #define E1000_FEXT 0x0002C /* Future Extended - RW */ #define E1000_FEXTNVM 0x00028 /* Future Extended NVM - RW */ #define E1000_FEXTNVM3 0x0003C /* Future Extended NVM 3 - RW */ #define E1000_FEXTNVM4 0x00024 /* Future Extended NVM 4 - RW */ #define E1000_FEXTNVM6 0x00010 /* Future Extended NVM 6 - RW */ #define E1000_FEXTNVM7 0x000E4 /* Future Extended NVM 7 - RW */ -#define E1000_FEXTNVM9 0x5BB4 /* Future Extended NVM 9 - RW */ -#define E1000_FEXTNVM11 0x5BBC /* Future Extended NVM 11 - RW */ -#define E1000_PCIEANACFG 0x00F18 /* PCIE Analog Config */ #define E1000_FCT 0x00030 /* Flow Control Type - RW */ #define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ #define E1000_VET 0x00038 /* VLAN Ether Type - RW */ #define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ #define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ #define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ #define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ #define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ #define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ #define E1000_IVAR 0x000E4 /* Interrupt Vector Allocation Register - RW */ #define E1000_SVCR 0x000F0 #define E1000_SVT 0x000F4 #define E1000_LPIC 0x000FC /* Low Power IDLE control */ #define E1000_RCTL 0x00100 /* Rx Control - RW */ #define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ #define E1000_TXCW 0x00178 /* Tx Configuration Word - RW */ #define E1000_RXCW 0x00180 /* Rx Configuration Word - RO */ #define E1000_PBA_ECC 0x01100 /* PBA ECC Register */ #define E1000_EICR 0x01580 /* Ext. Interrupt Cause Read - R/clr */ #define E1000_EITR(_n) (0x01680 + (0x4 * (_n))) #define E1000_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ #define E1000_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ #define E1000_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ #define E1000_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ #define E1000_EIAM 0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */ #define E1000_GPIE 0x01514 /* General Purpose Interrupt Enable - RW */ #define E1000_IVAR0 0x01700 /* Interrupt Vector Allocation (array) - RW */ #define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ #define E1000_TCTL 0x00400 /* Tx Control - RW */ #define E1000_TCTL_EXT 0x00404 /* Extended Tx Control - RW */ #define E1000_TIPG 0x00410 /* Tx Inter-packet gap -RW */ #define E1000_TBT 0x00448 /* Tx Burst Timer - RW */ #define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ #define E1000_LEDCTL 0x00E00 /* LED Control - RW */ #define E1000_LEDMUX 0x08130 /* LED MUX Control */ #define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ #define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ #define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ #define E1000_POEMB E1000_PHY_CTRL /* PHY OEM Bits */ #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size */ #define E1000_PBECCSTS 0x0100C /* Packet Buffer ECC Status - RW */ -#define E1000_IOSFPC 0x00F28 /* TX corrupted data */ -#define E1000_IOSFPC 0x00F28 /* TX corrupted data */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ #define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ #define E1000_EEARBC_I210 0x12024 /* EEPROM Auto Read Bus Control */ #define E1000_FLASHT 0x01028 /* FLASH Timer Register */ #define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ #define E1000_FLSWCTL 0x01030 /* FLASH control register */ #define E1000_FLSWDATA 0x01034 /* FLASH data register */ #define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */ #define E1000_FLOP 0x0103C /* FLASH Opcode Register */ #define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ #define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */ #define E1000_I2CBB_EN 0x00000100 /* I2C - Bit Bang Enable */ #define E1000_I2C_CLK_OUT 0x00000200 /* I2C- Clock */ #define E1000_I2C_DATA_OUT 0x00000400 /* I2C- Data Out */ #define E1000_I2C_DATA_OE_N 0x00000800 /* I2C- Data Output Enable */ #define E1000_I2C_DATA_IN 0x00001000 /* I2C- Data In */ #define E1000_I2C_CLK_OE_N 0x00002000 /* I2C- Clock Output Enable */ #define E1000_I2C_CLK_IN 0x00004000 /* I2C- Clock In */ #define E1000_I2C_CLK_STRETCH_DIS 0x00008000 /* I2C- Dis Clk Stretching */ #define E1000_WDSTP 0x01040 /* Watchdog Setup - RW */ #define E1000_SWDSTS 0x01044 /* SW Device Status - RW */ #define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ #define E1000_TCPTIMER 0x0104C /* TCP Timer - RW */ #define E1000_VPDDIAG 0x01060 /* VPD Diagnostic - RO */ #define E1000_ICR_V2 0x01500 /* Intr Cause - new location - RC */ #define E1000_ICS_V2 0x01504 /* Intr Cause Set - new location - WO */ #define E1000_IMS_V2 0x01508 /* Intr Mask Set/Read - new location - RW */ #define E1000_IMC_V2 0x0150C /* Intr Mask Clear - new location - WO */ #define E1000_IAM_V2 0x01510 /* Intr Ack Auto Mask - new location - RW */ #define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ #define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ #define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ #define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ #define E1000_RDFH 0x02410 /* Rx Data FIFO Head - RW */ #define E1000_RDFT 0x02418 /* Rx Data FIFO Tail - RW */ #define E1000_RDFHS 0x02420 /* Rx Data FIFO Head Saved - RW */ #define E1000_RDFTS 0x02428 /* Rx Data FIFO Tail Saved - RW */ #define E1000_RDFPC 0x02430 /* Rx Data FIFO Packet Count - RW */ #define E1000_PBRTH 0x02458 /* PB Rx Arbitration Threshold - RW */ #define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */ /* Split and Replication Rx Control - RW */ #define E1000_RDPUMB 0x025CC /* DMA Rx Descriptor uC Mailbox - RW */ #define E1000_RDPUAD 0x025D0 /* DMA Rx Descriptor uC Addr Command - RW */ #define E1000_RDPUWD 0x025D4 /* DMA Rx Descriptor uC Data Write - RW */ #define E1000_RDPURD 0x025D8 /* DMA Rx Descriptor uC Data Read - RW */ #define E1000_RDPUCTL 0x025DC /* DMA Rx Descriptor uC Control - RW */ #define E1000_PBDIAG 0x02458 /* Packet Buffer Diagnostic - RW */ #define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ #define E1000_IRPBS 0x02404 /* Same as RXPBS, renamed for newer Si - RW */ #define E1000_PBRWAC 0x024E8 /* Rx packet buffer wrap around counter - RO */ #define E1000_RDTR 0x02820 /* Rx Delay Timer - RW */ #define E1000_RADV 0x0282C /* Rx Interrupt Absolute Delay Timer - RW */ #define E1000_EMIADD 0x10 /* Extended Memory Indirect Address */ #define E1000_EMIDATA 0x11 /* Extended Memory Indirect Data */ #define E1000_SRWR 0x12018 /* Shadow Ram Write Register - RW */ #define E1000_I210_FLMNGCTL 0x12038 #define E1000_I210_FLMNGDATA 0x1203C #define E1000_I210_FLMNGCNT 0x12040 #define E1000_I210_FLSWCTL 0x12048 #define E1000_I210_FLSWDATA 0x1204C #define E1000_I210_FLSWCNT 0x12050 #define E1000_I210_FLA 0x1201C #define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) #define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ /* QAV Tx mode control register */ #define E1000_I210_TQAVCTRL 0x3570 /* QAV Tx mode control register bitfields masks */ /* QAV enable */ #define E1000_TQAVCTRL_MODE (1 << 0) /* Fetching arbitration type */ #define E1000_TQAVCTRL_FETCH_ARB (1 << 4) /* Fetching timer enable */ #define E1000_TQAVCTRL_FETCH_TIMER_ENABLE (1 << 5) /* Launch arbitration type */ #define E1000_TQAVCTRL_LAUNCH_ARB (1 << 8) /* Launch timer enable */ #define E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE (1 << 9) /* SP waits for SR enable */ #define E1000_TQAVCTRL_SP_WAIT_SR (1 << 10) /* Fetching timer correction */ #define E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET 16 #define E1000_TQAVCTRL_FETCH_TIMER_DELTA \ (0xFFFF << E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET) /* High credit registers where _n can be 0 or 1. */ #define E1000_I210_TQAVHC(_n) (0x300C + 0x40 * (_n)) /* Queues fetch arbitration priority control register */ #define E1000_I210_TQAVARBCTRL 0x3574 /* Queues priority masks where _n and _p can be 0-3. */ #define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p) ((_p) << (2 * (_n))) /* QAV Tx mode control registers where _n can be 0 or 1. */ #define E1000_I210_TQAVCC(_n) (0x3004 + 0x40 * (_n)) /* QAV Tx mode control register bitfields masks */ #define E1000_TQAVCC_IDLE_SLOPE 0xFFFF /* Idle slope */ #define E1000_TQAVCC_KEEP_CREDITS (1 << 30) /* Keep credits opt enable */ #define E1000_TQAVCC_QUEUE_MODE (1 << 31) /* SP vs. SR Tx mode */ /* Good transmitted packets counter registers */ #define E1000_PQGPTC(_n) (0x010014 + (0x100 * (_n))) /* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */ #define E1000_I210_TXPBS_SIZE(_n, _s) ((_s) << (6 * (_n))) #define E1000_MMDAC 13 /* MMD Access Control */ #define E1000_MMDAAD 14 /* MMD Access Address/Data */ /* Convenience macros * * Note: "_n" is the queue number of the register to be written to. * * Example usage: * E1000_RDBAL_REG(current_rx_queue) */ #define E1000_RDBAL(_n) ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \ (0x0C000 + ((_n) * 0x40))) #define E1000_RDBAH(_n) ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \ (0x0C004 + ((_n) * 0x40))) #define E1000_RDLEN(_n) ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \ (0x0C008 + ((_n) * 0x40))) #define E1000_SRRCTL(_n) ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \ (0x0C00C + ((_n) * 0x40))) #define E1000_RDH(_n) ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \ (0x0C010 + ((_n) * 0x40))) #define E1000_RXCTL(_n) ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \ (0x0C014 + ((_n) * 0x40))) #define E1000_DCA_RXCTRL(_n) E1000_RXCTL(_n) #define E1000_RDT(_n) ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \ (0x0C018 + ((_n) * 0x40))) #define E1000_RXDCTL(_n) ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \ (0x0C028 + ((_n) * 0x40))) #define E1000_RQDPC(_n) ((_n) < 4 ? (0x02830 + ((_n) * 0x100)) : \ (0x0C030 + ((_n) * 0x40))) #define E1000_TDBAL(_n) ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \ (0x0E000 + ((_n) * 0x40))) #define E1000_TDBAH(_n) ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \ (0x0E004 + ((_n) * 0x40))) #define E1000_TDLEN(_n) ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \ (0x0E008 + ((_n) * 0x40))) #define E1000_TDH(_n) ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \ (0x0E010 + ((_n) * 0x40))) #define E1000_TXCTL(_n) ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \ (0x0E014 + ((_n) * 0x40))) #define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n) #define E1000_TDT(_n) ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \ (0x0E018 + ((_n) * 0x40))) #define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \ (0x0E028 + ((_n) * 0x40))) #define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : \ (0x0E038 + ((_n) * 0x40))) #define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : \ (0x0E03C + ((_n) * 0x40))) #define E1000_TARC(_n) (0x03840 + ((_n) * 0x100)) #define E1000_RSRPD 0x02C00 /* Rx Small Packet Detect - RW */ #define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ #define E1000_TXDMAC 0x03000 /* Tx DMA Control - RW */ #define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ #define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) #define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ (0x054E0 + ((_i - 16) * 8))) #define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ (0x054E4 + ((_i - 16) * 8))) #define E1000_SHRAL(_i) (0x05438 + ((_i) * 8)) #define E1000_SHRAH(_i) (0x0543C + ((_i) * 8)) #define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) #define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) #define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) #define E1000_FFMT_REG(_i) (0x09000 + ((_i) * 8)) #define E1000_FFVT_REG(_i) (0x09800 + ((_i) * 8)) #define E1000_FFLT_REG(_i) (0x05F00 + ((_i) * 8)) #define E1000_PBSLAC 0x03100 /* Pkt Buffer Slave Access Control */ #define E1000_PBSLAD(_n) (0x03110 + (0x4 * (_n))) /* Pkt Buffer DWORD */ #define E1000_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */ /* Same as TXPBS, renamed for newer Si - RW */ #define E1000_ITPBS 0x03404 #define E1000_TDFH 0x03410 /* Tx Data FIFO Head - RW */ #define E1000_TDFT 0x03418 /* Tx Data FIFO Tail - RW */ #define E1000_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */ #define E1000_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */ #define E1000_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */ #define E1000_TDPUMB 0x0357C /* DMA Tx Desc uC Mail Box - RW */ #define E1000_TDPUAD 0x03580 /* DMA Tx Desc uC Addr Command - RW */ #define E1000_TDPUWD 0x03584 /* DMA Tx Desc uC Data Write - RW */ #define E1000_TDPURD 0x03588 /* DMA Tx Desc uC Data Read - RW */ #define E1000_TDPUCTL 0x0358C /* DMA Tx Desc uC Control - RW */ #define E1000_DTXCTL 0x03590 /* DMA Tx Control - RW */ #define E1000_DTXTCPFLGL 0x0359C /* DMA Tx Control flag low - RW */ #define E1000_DTXTCPFLGH 0x035A0 /* DMA Tx Control flag high - RW */ /* DMA Tx Max Total Allow Size Reqs - RW */ #define E1000_DTXMXSZRQ 0x03540 #define E1000_TIDV 0x03820 /* Tx Interrupt Delay Value - RW */ #define E1000_TADV 0x0382C /* Tx Interrupt Absolute Delay Val - RW */ #define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ #define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ #define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ #define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ #define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ #define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ #define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ #define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ #define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ #define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ #define E1000_COLC 0x04028 /* Collision Count - R/clr */ #define E1000_DC 0x04030 /* Defer Count - R/clr */ #define E1000_TNCRS 0x04034 /* Tx-No CRS - R/clr */ #define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ #define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ #define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ #define E1000_XONRXC 0x04048 /* XON Rx Count - R/clr */ #define E1000_XONTXC 0x0404C /* XON Tx Count - R/clr */ #define E1000_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */ #define E1000_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */ #define E1000_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */ #define E1000_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */ #define E1000_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */ #define E1000_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */ #define E1000_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */ #define E1000_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */ #define E1000_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */ #define E1000_GPRC 0x04074 /* Good Packets Rx Count - R/clr */ #define E1000_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */ #define E1000_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */ #define E1000_GPTC 0x04080 /* Good Packets Tx Count - R/clr */ #define E1000_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */ #define E1000_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */ #define E1000_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */ #define E1000_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */ #define E1000_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */ #define E1000_RUC 0x040A4 /* Rx Undersize Count - R/clr */ #define E1000_RFC 0x040A8 /* Rx Fragment Count - R/clr */ #define E1000_ROC 0x040AC /* Rx Oversize Count - R/clr */ #define E1000_RJC 0x040B0 /* Rx Jabber Count - R/clr */ #define E1000_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */ #define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ #define E1000_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */ #define E1000_TORL 0x040C0 /* Total Octets Rx Low - R/clr */ #define E1000_TORH 0x040C4 /* Total Octets Rx High - R/clr */ #define E1000_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */ #define E1000_TOTH 0x040CC /* Total Octets Tx High - R/clr */ #define E1000_TPR 0x040D0 /* Total Packets Rx - R/clr */ #define E1000_TPT 0x040D4 /* Total Packets Tx - R/clr */ #define E1000_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */ #define E1000_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */ #define E1000_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */ #define E1000_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */ #define E1000_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */ #define E1000_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */ #define E1000_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */ #define E1000_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */ #define E1000_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */ #define E1000_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */ #define E1000_IAC 0x04100 /* Interrupt Assertion Count */ #define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Pkt Timer Expire Count */ #define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Abs Timer Expire Count */ #define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Pkt Timer Expire Count */ #define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Abs Timer Expire Count */ #define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ #define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Min Thresh Count */ #define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Desc Min Thresh Count */ #define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ #define E1000_CRC_OFFSET 0x05F50 /* CRC Offset register */ #define E1000_VFGPRC 0x00F10 #define E1000_VFGORC 0x00F18 #define E1000_VFMPRC 0x00F3C #define E1000_VFGPTC 0x00F14 #define E1000_VFGOTC 0x00F34 #define E1000_VFGOTLBC 0x00F50 #define E1000_VFGPTLBC 0x00F44 #define E1000_VFGORLBC 0x00F48 #define E1000_VFGPRLBC 0x00F40 /* Virtualization statistical counters */ #define E1000_PFVFGPRC(_n) (0x010010 + (0x100 * (_n))) #define E1000_PFVFGPTC(_n) (0x010014 + (0x100 * (_n))) #define E1000_PFVFGORC(_n) (0x010018 + (0x100 * (_n))) #define E1000_PFVFGOTC(_n) (0x010034 + (0x100 * (_n))) #define E1000_PFVFMPRC(_n) (0x010038 + (0x100 * (_n))) #define E1000_PFVFGPRLBC(_n) (0x010040 + (0x100 * (_n))) #define E1000_PFVFGPTLBC(_n) (0x010044 + (0x100 * (_n))) #define E1000_PFVFGORLBC(_n) (0x010048 + (0x100 * (_n))) #define E1000_PFVFGOTLBC(_n) (0x010050 + (0x100 * (_n))) /* LinkSec */ #define E1000_LSECTXUT 0x04300 /* Tx Untagged Pkt Cnt */ #define E1000_LSECTXPKTE 0x04304 /* Encrypted Tx Pkts Cnt */ #define E1000_LSECTXPKTP 0x04308 /* Protected Tx Pkt Cnt */ #define E1000_LSECTXOCTE 0x0430C /* Encrypted Tx Octets Cnt */ #define E1000_LSECTXOCTP 0x04310 /* Protected Tx Octets Cnt */ #define E1000_LSECRXUT 0x04314 /* Untagged non-Strict Rx Pkt Cnt */ #define E1000_LSECRXOCTD 0x0431C /* Rx Octets Decrypted Count */ #define E1000_LSECRXOCTV 0x04320 /* Rx Octets Validated */ #define E1000_LSECRXBAD 0x04324 /* Rx Bad Tag */ #define E1000_LSECRXNOSCI 0x04328 /* Rx Packet No SCI Count */ #define E1000_LSECRXUNSCI 0x0432C /* Rx Packet Unknown SCI Count */ #define E1000_LSECRXUNCH 0x04330 /* Rx Unchecked Packets Count */ #define E1000_LSECRXDELAY 0x04340 /* Rx Delayed Packet Count */ #define E1000_LSECRXLATE 0x04350 /* Rx Late Packets Count */ #define E1000_LSECRXOK(_n) (0x04360 + (0x04 * (_n))) /* Rx Pkt OK Cnt */ #define E1000_LSECRXINV(_n) (0x04380 + (0x04 * (_n))) /* Rx Invalid Cnt */ #define E1000_LSECRXNV(_n) (0x043A0 + (0x04 * (_n))) /* Rx Not Valid Cnt */ #define E1000_LSECRXUNSA 0x043C0 /* Rx Unused SA Count */ #define E1000_LSECRXNUSA 0x043D0 /* Rx Not Using SA Count */ #define E1000_LSECTXCAP 0x0B000 /* Tx Capabilities Register - RO */ #define E1000_LSECRXCAP 0x0B300 /* Rx Capabilities Register - RO */ #define E1000_LSECTXCTRL 0x0B004 /* Tx Control - RW */ #define E1000_LSECRXCTRL 0x0B304 /* Rx Control - RW */ #define E1000_LSECTXSCL 0x0B008 /* Tx SCI Low - RW */ #define E1000_LSECTXSCH 0x0B00C /* Tx SCI High - RW */ #define E1000_LSECTXSA 0x0B010 /* Tx SA0 - RW */ #define E1000_LSECTXPN0 0x0B018 /* Tx SA PN 0 - RW */ #define E1000_LSECTXPN1 0x0B01C /* Tx SA PN 1 - RW */ #define E1000_LSECRXSCL 0x0B3D0 /* Rx SCI Low - RW */ #define E1000_LSECRXSCH 0x0B3E0 /* Rx SCI High - RW */ /* LinkSec Tx 128-bit Key 0 - WO */ #define E1000_LSECTXKEY0(_n) (0x0B020 + (0x04 * (_n))) /* LinkSec Tx 128-bit Key 1 - WO */ #define E1000_LSECTXKEY1(_n) (0x0B030 + (0x04 * (_n))) #define E1000_LSECRXSA(_n) (0x0B310 + (0x04 * (_n))) /* Rx SAs - RW */ #define E1000_LSECRXPN(_n) (0x0B330 + (0x04 * (_n))) /* Rx SAs - RW */ /* LinkSec Rx Keys - where _n is the SA no. and _m the 4 dwords of the 128 bit * key - RW. */ #define E1000_LSECRXKEY(_n, _m) (0x0B350 + (0x10 * (_n)) + (0x04 * (_m))) #define E1000_SSVPC 0x041A0 /* Switch Security Violation Pkt Cnt */ #define E1000_IPSCTRL 0xB430 /* IpSec Control Register */ #define E1000_IPSRXCMD 0x0B408 /* IPSec Rx Command Register - RW */ #define E1000_IPSRXIDX 0x0B400 /* IPSec Rx Index - RW */ /* IPSec Rx IPv4/v6 Address - RW */ #define E1000_IPSRXIPADDR(_n) (0x0B420 + (0x04 * (_n))) /* IPSec Rx 128-bit Key - RW */ #define E1000_IPSRXKEY(_n) (0x0B410 + (0x04 * (_n))) #define E1000_IPSRXSALT 0x0B404 /* IPSec Rx Salt - RW */ #define E1000_IPSRXSPI 0x0B40C /* IPSec Rx SPI - RW */ /* IPSec Tx 128-bit Key - RW */ #define E1000_IPSTXKEY(_n) (0x0B460 + (0x04 * (_n))) #define E1000_IPSTXSALT 0x0B454 /* IPSec Tx Salt - RW */ #define E1000_IPSTXIDX 0x0B450 /* IPSec Tx SA IDX - RW */ #define E1000_PCS_CFG0 0x04200 /* PCS Configuration 0 - RW */ #define E1000_PCS_LCTL 0x04208 /* PCS Link Control - RW */ #define E1000_PCS_LSTAT 0x0420C /* PCS Link Status - RO */ #define E1000_CBTMPC 0x0402C /* Circuit Breaker Tx Packet Count */ #define E1000_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ #define E1000_CBRDPC 0x04044 /* Circuit Breaker Rx Dropped Count */ #define E1000_CBRMPC 0x040FC /* Circuit Breaker Rx Packet Count */ #define E1000_RPTHC 0x04104 /* Rx Packets To Host */ #define E1000_HGPTC 0x04118 /* Host Good Packets Tx Count */ #define E1000_HTCBDPC 0x04124 /* Host Tx Circuit Breaker Dropped Count */ #define E1000_HGORCL 0x04128 /* Host Good Octets Received Count Low */ #define E1000_HGORCH 0x0412C /* Host Good Octets Received Count High */ #define E1000_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ #define E1000_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ #define E1000_LENERRS 0x04138 /* Length Errors Count */ #define E1000_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ #define E1000_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ #define E1000_PCS_ANADV 0x04218 /* AN advertisement - RW */ #define E1000_PCS_LPAB 0x0421C /* Link Partner Ability - RW */ #define E1000_PCS_NPTX 0x04220 /* AN Next Page Transmit - RW */ #define E1000_PCS_LPABNP 0x04224 /* Link Partner Ability Next Pg - RW */ #define E1000_RXCSUM 0x05000 /* Rx Checksum Control - RW */ #define E1000_RLPML 0x05004 /* Rx Long Packet Max Length */ #define E1000_RFCTL 0x05008 /* Receive Filter Control*/ #define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ #define E1000_RA 0x05400 /* Receive Address - RW Array */ #define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */ #define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ #define E1000_VT_CTL 0x0581C /* VMDq Control - RW */ #define E1000_CIAA 0x05B88 /* Config Indirect Access Address - RW */ #define E1000_CIAD 0x05B8C /* Config Indirect Access Data - RW */ #define E1000_VFQA0 0x0B000 /* VLAN Filter Queue Array 0 - RW Array */ #define E1000_VFQA1 0x0B200 /* VLAN Filter Queue Array 1 - RW Array */ #define E1000_WUC 0x05800 /* Wakeup Control - RW */ #define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ #define E1000_WUS 0x05810 /* Wakeup Status - RO */ #define E1000_MANC 0x05820 /* Management Control - RW */ #define E1000_IPAV 0x05838 /* IP Address Valid - RW */ #define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ #define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ #define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ #define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ #define E1000_PBACL 0x05B68 /* MSIx PBA Clear - Read/Write 1's to clear */ #define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ #define E1000_HOST_IF 0x08800 /* Host Interface */ #define E1000_HIBBA 0x8F40 /* Host Interface Buffer Base Address */ /* Flexible Host Filter Table */ #define E1000_FHFT(_n) (0x09000 + ((_n) * 0x100)) /* Ext Flexible Host Filter Table */ #define E1000_FHFT_EXT(_n) (0x09A00 + ((_n) * 0x100)) #define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */ #define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ /* Management Decision Filters */ #define E1000_MDEF(_n) (0x05890 + (4 * (_n))) #define E1000_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */ #define E1000_CCMCTL 0x05B48 /* CCM Control Register */ #define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ #define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */ #define E1000_GCR 0x05B00 /* PCI-Ex Control */ #define E1000_GCR2 0x05B64 /* PCI-Ex Control #2 */ #define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ #define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ #define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ #define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ #define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ #define E1000_SWSM 0x05B50 /* SW Semaphore */ #define E1000_FWSM 0x05B54 /* FW Semaphore */ /* Driver-only SW semaphore (not used by BOOT agents) */ #define E1000_SWSM2 0x05B58 #define E1000_DCA_ID 0x05B70 /* DCA Requester ID Information - RO */ #define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */ #define E1000_UFUSE 0x05B78 /* UFUSE - RO */ #define E1000_FFLT_DBG 0x05F04 /* Debug Register */ #define E1000_HICR 0x08F00 /* Host Interface Control */ #define E1000_FWSTS 0x08F0C /* FW Status */ /* RSS registers */ #define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ #define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ #define E1000_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ #define E1000_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/ #define E1000_IMIRVP 0x05AC0 /* Immediate INT Rx VLAN Priority -RW */ #define E1000_MSIXBM(_i) (0x01600 + ((_i) * 4)) /* MSI-X Alloc Reg -RW */ #define E1000_RETA(_i) (0x05C00 + ((_i) * 4)) /* Redirection Table - RW */ #define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW */ #define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ #define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ /* VT Registers */ #define E1000_SWPBS 0x03004 /* Switch Packet Buffer Size - RW */ #define E1000_MBVFICR 0x00C80 /* Mailbox VF Cause - RWC */ #define E1000_MBVFIMR 0x00C84 /* Mailbox VF int Mask - RW */ #define E1000_VFLRE 0x00C88 /* VF Register Events - RWC */ #define E1000_VFRE 0x00C8C /* VF Receive Enables */ #define E1000_VFTE 0x00C90 /* VF Transmit Enables */ #define E1000_QDE 0x02408 /* Queue Drop Enable - RW */ #define E1000_DTXSWC 0x03500 /* DMA Tx Switch Control - RW */ #define E1000_WVBR 0x03554 /* VM Wrong Behavior - RWS */ #define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */ #define E1000_UTA 0x0A000 /* Unicast Table Array - RW */ #define E1000_IOVCTL 0x05BBC /* IOV Control Register */ #define E1000_VMRCTL 0X05D80 /* Virtual Mirror Rule Control */ #define E1000_VMRVLAN 0x05D90 /* Virtual Mirror Rule VLAN */ #define E1000_VMRVM 0x05DA0 /* Virtual Mirror Rule VM */ #define E1000_MDFB 0x03558 /* Malicious Driver free block */ #define E1000_LVMMC 0x03548 /* Last VM Misbehavior cause */ #define E1000_TXSWC 0x05ACC /* Tx Switch Control */ #define E1000_SCCRL 0x05DB0 /* Storm Control Control */ #define E1000_BSCTRH 0x05DB8 /* Broadcast Storm Control Threshold */ #define E1000_MSCTRH 0x05DBC /* Multicast Storm Control Threshold */ /* These act per VF so an array friendly macro is used */ #define E1000_V2PMAILBOX(_n) (0x00C40 + (4 * (_n))) #define E1000_P2VMAILBOX(_n) (0x00C00 + (4 * (_n))) #define E1000_VMBMEM(_n) (0x00800 + (64 * (_n))) #define E1000_VFVMBMEM(_n) (0x00800 + (_n)) #define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n))) /* VLAN Virtual Machine Filter - RW */ #define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) #define E1000_VMVIR(_n) (0x03700 + (4 * (_n))) #define E1000_DVMOLR(_n) (0x0C038 + (0x40 * (_n))) /* DMA VM offload */ #define E1000_VTCTRL(_n) (0x10000 + (0x100 * (_n))) /* VT Control */ #define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ #define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ #define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ #define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ #define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ #define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */ #define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */ #define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ #define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ #define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ #define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ #define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ #define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */ #define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */ #define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ -#define E1000_SYSSTMPL 0x0B648 /* HH Timesync system stamp low register */ -#define E1000_SYSSTMPH 0x0B64C /* HH Timesync system stamp hi register */ -#define E1000_PLTSTMPL 0x0B640 /* HH Timesync platform stamp low register */ -#define E1000_PLTSTMPH 0x0B644 /* HH Timesync platform stamp hi register */ #define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ #define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ #define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ #define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ #define E1000_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ /* Filtering Registers */ #define E1000_SAQF(_n) (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */ #define E1000_DAQF(_n) (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */ #define E1000_SPQF(_n) (0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */ #define E1000_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ #define E1000_TTQF(_n) (0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */ #define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */ #define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ #define E1000_RTTDCS 0x3600 /* Reedtown Tx Desc plane control and status */ #define E1000_RTTPCS 0x3474 /* Reedtown Tx Packet Plane control and status */ #define E1000_RTRPCS 0x2474 /* Rx packet plane control and status */ #define E1000_RTRUP2TC 0x05AC4 /* Rx User Priority to Traffic Class */ #define E1000_RTTUP2TC 0x0418 /* Transmit User Priority to Traffic Class */ /* Tx Desc plane TC Rate-scheduler config */ #define E1000_RTTDTCRC(_n) (0x3610 + ((_n) * 4)) /* Tx Packet plane TC Rate-Scheduler Config */ #define E1000_RTTPTCRC(_n) (0x3480 + ((_n) * 4)) /* Rx Packet plane TC Rate-Scheduler Config */ #define E1000_RTRPTCRC(_n) (0x2480 + ((_n) * 4)) /* Tx Desc Plane TC Rate-Scheduler Status */ #define E1000_RTTDTCRS(_n) (0x3630 + ((_n) * 4)) /* Tx Desc Plane TC Rate-Scheduler MMW */ #define E1000_RTTDTCRM(_n) (0x3650 + ((_n) * 4)) /* Tx Packet plane TC Rate-Scheduler Status */ #define E1000_RTTPTCRS(_n) (0x34A0 + ((_n) * 4)) /* Tx Packet plane TC Rate-scheduler MMW */ #define E1000_RTTPTCRM(_n) (0x34C0 + ((_n) * 4)) /* Rx Packet plane TC Rate-Scheduler Status */ #define E1000_RTRPTCRS(_n) (0x24A0 + ((_n) * 4)) /* Rx Packet plane TC Rate-Scheduler MMW */ #define E1000_RTRPTCRM(_n) (0x24C0 + ((_n) * 4)) /* Tx Desc plane VM Rate-Scheduler MMW*/ #define E1000_RTTDVMRM(_n) (0x3670 + ((_n) * 4)) /* Tx BCN Rate-Scheduler MMW */ #define E1000_RTTBCNRM(_n) (0x3690 + ((_n) * 4)) #define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select */ #define E1000_RTTDVMRC 0x3608 /* Tx Desc Plane VM Rate-Scheduler Config */ #define E1000_RTTDVMRS 0x360C /* Tx Desc Plane VM Rate-Scheduler Status */ #define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config */ #define E1000_RTTBCNRS 0x36B4 /* Tx BCN Rate-Scheduler Status */ #define E1000_RTTBCNCR 0xB200 /* Tx BCN Control Register */ #define E1000_RTTBCNTG 0x35A4 /* Tx BCN Tagging */ #define E1000_RTTBCNCP 0xB208 /* Tx BCN Congestion point */ #define E1000_RTRBCNCR 0xB20C /* Rx BCN Control Register */ #define E1000_RTTBCNRD 0x36B8 /* Tx BCN Rate Drift */ #define E1000_PFCTOP 0x1080 /* Priority Flow Control Type and Opcode */ #define E1000_RTTBCNIDX 0xB204 /* Tx BCN Congestion Point */ #define E1000_RTTBCNACH 0x0B214 /* Tx BCN Control High */ #define E1000_RTTBCNACL 0x0B210 /* Tx BCN Control Low */ /* DMA Coalescing registers */ #define E1000_DMACR 0x02508 /* Control Register */ #define E1000_DMCTXTH 0x03550 /* Transmit Threshold */ #define E1000_DMCTLX 0x02514 /* Time to Lx Request */ #define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ #define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ #define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ #define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ /* PCIe Parity Status Register */ #define E1000_PCIEERRSTS 0x05BA8 #define E1000_PROXYS 0x5F64 /* Proxying Status */ #define E1000_PROXYFC 0x5F60 /* Proxying Filter Control */ /* Thermal sensor configuration and status registers */ #define E1000_THMJT 0x08100 /* Junction Temperature */ #define E1000_THLOWTC 0x08104 /* Low Threshold Control */ #define E1000_THMIDTC 0x08108 /* Mid Threshold Control */ #define E1000_THHIGHTC 0x0810C /* High Threshold Control */ #define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ /* Energy Efficient Ethernet "EEE" registers */ #define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ #define E1000_LTRC 0x01A0 /* Latency Tolerance Reporting Control */ #define E1000_EEER 0x0E30 /* Energy Efficient Ethernet "EEE"*/ #define E1000_EEE_SU 0x0E34 /* EEE Setup */ #define E1000_TLPIC 0x4148 /* EEE Tx LPI Count - TLPIC */ #define E1000_RLPIC 0x414C /* EEE Rx LPI Count - RLPIC */ /* OS2BMC Registers */ #define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */ #define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */ #define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */ #define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */ +#define E1000_DOBFFCTL 0x3F24 /* DMA OBFF Control Register */ #endif Index: projects/clang-trunk/sys/dev/e1000/if_em.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/if_em.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/if_em.c (revision 287502) @@ -1,6197 +1,6038 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_em.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #ifdef DDB #include #include #endif #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "if_em.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int em_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char em_driver_version[] = "7.5.2"; +char em_driver_version[] = "7.4.2"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static em_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, - PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *em_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int em_probe(device_t); static int em_attach(device_t); static int em_detach(device_t); static int em_shutdown(device_t); static int em_suspend(device_t); static int em_resume(device_t); #ifdef EM_MULTIQUEUE static int em_mq_start(if_t, struct mbuf *); static int em_mq_start_locked(if_t, struct tx_ring *); static void em_qflush(if_t); #else static void em_start(if_t); static void em_start_locked(if_t, struct tx_ring *); #endif static int em_ioctl(if_t, u_long, caddr_t); static uint64_t em_get_counter(if_t, ift_counter); static void em_init(void *); static void em_init_locked(struct adapter *); static void em_stop(void *); static void em_media_status(if_t, struct ifmediareq *); static int em_media_change(if_t); static void em_identify_hardware(struct adapter *); static int em_allocate_pci_resources(struct adapter *); static int em_allocate_legacy(struct adapter *); static int em_allocate_msix(struct adapter *); static int em_allocate_queues(struct adapter *); static int em_setup_msix(struct adapter *); static void em_free_pci_resources(struct adapter *); static void em_local_timer(void *); static void em_reset(struct adapter *); static int em_setup_interface(device_t, struct adapter *); -static void em_flush_desc_rings(struct adapter *); static void em_setup_transmit_structures(struct adapter *); static void em_initialize_transmit_unit(struct adapter *); static int em_allocate_transmit_buffers(struct tx_ring *); static void em_free_transmit_structures(struct adapter *); static void em_free_transmit_buffers(struct tx_ring *); static int em_setup_receive_structures(struct adapter *); static int em_allocate_receive_buffers(struct rx_ring *); static void em_initialize_receive_unit(struct adapter *); static void em_free_receive_structures(struct adapter *); static void em_free_receive_buffers(struct rx_ring *); static void em_enable_intr(struct adapter *); static void em_disable_intr(struct adapter *); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static void em_txeof(struct tx_ring *); static bool em_rxeof(struct rx_ring *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct rx_ring *); #endif static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *); static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, struct ip *, u32 *, u32 *); static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, struct tcphdr *, u32 *, u32 *); static void em_set_promisc(struct adapter *); static void em_disable_promisc(struct adapter *); static void em_set_multi(struct adapter *); static void em_update_link_status(struct adapter *); static void em_refresh_mbufs(struct rx_ring *, int); static void em_register_vlan(void *, if_t, u16); static void em_unregister_vlan(void *, if_t, u16); static void em_setup_vlan_hw_support(struct adapter *); static int em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(device_t); static void em_enable_wakeup(device_t); static int em_enable_phy_wakeup(struct adapter *); static void em_led_func(void *, int); static void em_disable_aspm(struct adapter *); static int em_irq_fast(void *); /* MSIX handlers */ static void em_msix_tx(void *); static void em_msix_rx(void *); static void em_msix_link(void *); static void em_handle_tx(void *context, int pending); static void em_handle_rx(void *context, int pending); static void em_handle_link(void *context, int pending); #ifdef EM_MULTIQUEUE static void em_enable_vectors_82574(struct adapter *); #endif static void em_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static __inline void em_rx_discard(struct rx_ring *, int); #ifdef DEVICE_POLLING static poll_handler_t em_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_probe, em_probe), DEVMETHOD(device_attach, em_attach), DEVMETHOD(device_detach, em_detach), DEVMETHOD(device_shutdown, em_shutdown), DEVMETHOD(device_suspend, em_suspend), DEVMETHOD(device_resume, em_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(em, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif #define TSO_WORKAROUND 4 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_rxd = EM_DEFAULT_RXD; static int em_txd = EM_DEFAULT_TXD; SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, "Number of transmit descriptors per queue"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); static int em_enable_msix = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, "Enable MSI-X interrupts"); #ifdef EM_MULTIQUEUE static int em_num_queues = 1; SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, "82574 only: Number of queues to configure, 0 indicates autoconfigure"); #endif /* ** Global variable to store last used CPU when binding queues ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a ** queue is bound to a cpu. */ static int em_last_bind_cpu = -1; /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * em_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int em_probe(device_t dev) { char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != EM_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = em_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", em_strings[ent->index], em_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_attach(device_t dev) { struct adapter *adapter; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_attach: begin"); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ em_identify_hardware(adapter); /* Setup PCI resources */ if (em_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } /* - ** In the new SPT device flash is not a - ** separate BAR, rather it is also in BAR0, - ** so use the same tag and handle for the - ** FLASH read/write macros in the shared - ** code. - */ - if (hw->mac.type == e1000_pch_spt) { - adapter->osdep.flash_bus_space_tag = - adapter->osdep.mem_bus_space_tag; - adapter->osdep.flash_bus_space_handle = - adapter->osdep.mem_bus_space_handle; - } - - /* * Setup MSI/X or MSI if PCI Express */ adapter->msix = em_setup_msix(adapter); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); /* Sysctl for limiting the amount of work done in the taskqueue */ em_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, em_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, em_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = em_txd; if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, em_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; } else adapter->num_rx_desc = em_rxd; hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* ** Get queue/ring memory */ if (em_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* ** Do interrupt configuration */ if (adapter->msix > 1) /* Do MSIX */ error = em_allocate_msix(adapter); else /* MSI or Legacy */ error = em_allocate_legacy(adapter); if (error) goto err_late; /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(dev); /* Setup OS specific network interface */ if (em_setup_interface(dev, adapter) != 0) goto err_late; em_reset(adapter); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_update_link_status(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); /* Tell the stack that the interface is not active */ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); adapter->led_dev = led_create(em_led_func, adapter, device_get_nameunit(dev)); #ifdef DEV_NETMAP em_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("em_attach: end"); return (0); err_late: em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); if (adapter->ifp != (void *)NULL) if_free(adapter->ifp); err_pci: em_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); EM_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); EM_CORE_LOCK(adapter); adapter->in_detach = 1; em_stop(adapter); EM_CORE_UNLOCK(adapter); EM_CORE_LOCK_DESTROY(adapter); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ em_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); free(adapter->mta, M_DEVBUF); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_shutdown(device_t dev) { return em_suspend(dev); } /* * Suspend/resume device methods. */ static int em_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(dev); EM_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int em_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; EM_CORE_LOCK(adapter); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_init_locked(adapter); em_init_manageability(adapter); if ((if_getflags(ifp) & IFF_UP) && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } #ifndef EM_MULTIQUEUE static void em_start_locked(if_t ifp, struct tx_ring *txr) { struct adapter *adapter = if_getsoftc(ifp); struct mbuf *m_head; EM_TX_LOCK_ASSERT(txr); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; while (!if_sendq_empty(ifp)) { /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) { if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); break; } m_head = if_dequeue(ifp); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (em_xmit(txr, &m_head)) { if (m_head == NULL) break; if_sendq_prepend(ifp, m_head); break; } /* Mark the queue as having work */ if (txr->busy == EM_TX_IDLE) txr->busy = EM_TX_BUSY; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } return; } static void em_start(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_TX_LOCK(txr); em_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } return; } #else /* EM_MULTIQUEUE */ /********************************************************************* * Multiqueue Transmit routines * * em_mq_start is called by the stack to initiate a transmit. * however, if busy the driver can queue the request rather * than do an immediate send. It is this that is an advantage * in this driver, rather than also having multiple tx queues. **********************************************************************/ /* ** Multiqueue capable stack interface */ static int em_mq_start(if_t ifp, struct mbuf *m) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; unsigned int i, error; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % adapter->num_queues; else i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; error = drbr_enqueue(ifp, txr->br, m); if (error) return (error); if (EM_TX_TRYLOCK(txr)) { em_mq_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } else taskqueue_enqueue(txr->tq, &txr->tx_task); return (0); } static int em_mq_start_locked(if_t ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; EM_TX_LOCK_ASSERT(txr); if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) { return (ENETDOWN); } /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = em_xmit(txr, &next)) != 0) { if (next == NULL) { /* It was freed, move forward */ drbr_advance(ifp, txr->br); } else { /* * Still have one left, it may not be * the same since the transmit function * may have changed it. */ drbr_putback(ifp, txr->br, next); } break; } drbr_advance(ifp, txr->br); enq++; if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); if (next->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); ETHER_BPF_MTAP(ifp, next); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; } /* Mark the queue as having work */ if ((enq > 0) && (txr->busy == EM_TX_IDLE)) txr->busy = EM_TX_BUSY; if (txr->tx_avail < EM_MAX_SCATTER) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) { if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); } return (err); } /* ** Flush all ring buffers */ static void em_qflush(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); EM_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* EM_MULTIQUEUE */ /********************************************************************* * Ioctl entry point * * em_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int em_ioctl(if_t ifp, u_long command, caddr_t data) { struct adapter *adapter = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp,IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) em_init(adapter); #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: - case e1000_pch_spt: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; /* Adapters that do not support jumbo frames */ case e1000_ich8lan: max_frame_size = ETHER_MAX_LEN; break; default: max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { EM_CORE_UNLOCK(adapter); error = EINVAL; break; } if_setmtu(ifp, ifr->ifr_mtu); adapter->hw.mac.max_frame_size = if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; em_init_locked(adapter); EM_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); EM_CORE_LOCK(adapter); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { em_disable_promisc(adapter); em_set_promisc(adapter); } } else em_init_locked(adapter); } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) em_stop(adapter); adapter->if_flags = if_getflags(ifp); EM_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_CORE_LOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); #ifdef DEVICE_POLLING if (!(if_getcapenable(ifp) & IFCAP_POLLING)) #endif em_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ EM_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { EM_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } EM_CORE_UNLOCK(adapter); /* falls thru */ case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(em_poll, ifp); if (error) return (error); EM_CORE_LOCK(adapter); em_disable_intr(adapter); if_setcapenablebit(ifp, IFCAP_POLLING, 0); EM_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ EM_CORE_LOCK(adapter); em_enable_intr(adapter); if_setcapenablebit(ifp, 0, IFCAP_POLLING); EM_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { if_togglecapenable(ifp,IFCAP_HWCSUM); reinit = 1; } if (mask & IFCAP_TSO4) { if_togglecapenable(ifp,IFCAP_TSO4); reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); reinit = 1; } if (mask & IFCAP_VLAN_HWTSO) { if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); reinit = 1; } if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) if_togglecapenable(ifp, IFCAP_WOL_MCAST); if (mask & IFCAP_WOL_MAGIC) if_togglecapenable(ifp, IFCAP_WOL_MAGIC); } if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) em_init(adapter); if_vlancap(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_init_locked(struct adapter *adapter) { if_t ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("em_init: begin"); EM_CORE_LOCK_ASSERT(adapter); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(adapter); em_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); - /* - ** There have proven to be problems with TSO when not - ** at full gigabit speed, so disable the assist automatically - ** when at lower speeds. -jfv - */ - if (if_getcapenable(ifp) & IFCAP_TSO4) { - if (adapter->link_speed == SPEED_1000) - if_sethwassistbits(ifp, CSUM_TSO, 0); - } + if (if_getcapenable(ifp) & IFCAP_TSO4) + if_sethwassistbits(ifp, CSUM_TSO, 0); /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_setup_transmit_structures(adapter); em_initialize_transmit_unit(adapter); /* Setup Multicast table */ em_set_multi(adapter); /* ** Figure out the desired mbuf ** pool for doing jumbos */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (em_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); em_stop(adapter); return; } em_initialize_receive_unit(adapter); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_set_promisc(adapter); /* Set the interface as ACTIVE */ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); callout_reset(&adapter->timer, hz, em_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp; tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (if_getcapenable(ifp) & IFCAP_POLLING) em_disable_intr(adapter); else #endif /* DEVICE_POLLING */ em_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); } static void em_init(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine: note this only works with single queue * *********************************************************************/ static int em_poll(if_t ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 reg_icr; int rx_done; EM_CORE_LOCK(adapter); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { EM_CORE_UNLOCK(adapter); return (0); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); } } EM_CORE_UNLOCK(adapter); em_rxeof(rxr, count, &rx_done); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ static int em_irq_fast(void *arg) { struct adapter *adapter = arg; if_t ifp; u32 reg_icr; ifp = adapter->ifp; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; em_disable_intr(adapter); taskqueue_enqueue(adapter->tq, &adapter->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } /* Combined RX/TX handler, used by Legacy and MSI */ static void em_handle_que(void *context, int pending) { struct adapter *adapter = context; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(adapter->tq, &adapter->que_task); return; } } em_enable_intr(adapter); return; } /********************************************************************* * * MSIX Interrupt Service Routines * **********************************************************************/ static void em_msix_tx(void *arg) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; ++txr->tx_irq; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); return; } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static void em_msix_rx(void *arg) { struct rx_ring *rxr = arg; struct adapter *adapter = rxr->adapter; bool more; ++rxr->rx_irq; if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) return; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } return; } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static void em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; em_handle_link(adapter, 0); } else E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); /* ** Because we must read the ICR for this interrupt ** it may clear other causes using autoclear, for ** this reason we simply create a soft interrupt ** for all these vectors. */ if (reg_icr) { E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } return; } static void em_handle_rx(void *context, int pending) { struct rx_ring *rxr = context; struct adapter *adapter = rxr->adapter; bool more; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } } static void em_handle_tx(void *context, int pending) { struct tx_ring *txr = context; struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); } static void em_handle_link(void *context, int pending) { struct adapter *adapter = context; struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return; EM_CORE_LOCK(adapter); callout_stop(&adapter->timer); em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); if (adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_media_status(if_t ifp, struct ifmediareq *ifmr) { struct adapter *adapter = if_getsoftc(ifp); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_media_status: begin"); EM_CORE_LOCK(adapter); em_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { EM_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_media_change(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("em_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_buffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; struct ether_header *eh; struct ip *ip = NULL; struct tcphdr *tp = NULL; u32 txd_upper = 0, txd_lower = 0; int ip_off, poff; int nsegs, i, j, first, last = 0; int error; bool do_tso, tso_desc, remap = TRUE; m_head = *m_headp; do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); tso_desc = FALSE; ip_off = poff = 0; /* * Intel recommends entire IP/TCP header length reside in a single * buffer. If multiple descriptors are used to describe the IP and * TCP header, each descriptor should describe one or more * complete headers; descriptors referencing only parts of headers * are not supported. If all layer headers are not coalesced into * a single buffer, each buffer should not cross a 4KB boundary, * or be larger than the maximum read request size. * Controller also requires modifing IP/TCP header to make TSO work * so we firstly get a writable mbuf chain then coalesce ethernet/ * IP/TCP header into a single buffer to meet the requirement of * controller. This also simplifies IP/TCP/UDP checksum offloading * which also has similiar restrictions. */ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { if (do_tso || (m_head->m_next != NULL && m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { if (M_WRITABLE(*m_headp) == 0) { m_head = m_dup(*m_headp, M_NOWAIT); m_freem(*m_headp); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } *m_headp = m_head; } } /* * XXX * Assume IPv4, we don't have TSO/checksum offload support * for IPv6 yet. */ ip_off = sizeof(struct ether_header); if (m_head->m_len < ip_off) { m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } eh = mtod(m_head, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); if (m_head->m_len < ip_off) { m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } } if (m_head->m_len < ip_off + sizeof(struct ip)) { m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { if (m_head->m_len < poff + sizeof(struct tcphdr)) { m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } tp = (struct tcphdr *)(mtod(m_head, char *) + poff); /* * TSO workaround: * pull 4 more bytes of data into it. */ if (m_head->m_len < poff + (tp->th_off << 2)) { m_head = m_pullup(m_head, poff + (tp->th_off << 2) + TSO_WORKAROUND); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); tp = (struct tcphdr *)(mtod(m_head, char *) + poff); if (do_tso) { ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + (ip->ip_hl << 2) + (tp->th_off << 2)); ip->ip_sum = 0; /* * The pseudo TCP checksum does not include TCP * payload length so driver should recompute * the checksum here what hardware expect to * see. This is adherence of Microsoft's Large * Send specification. */ tp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { if (m_head->m_len < poff + sizeof(struct udphdr)) { m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); } *m_headp = m_head; } /* * Map the packet for DMA * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* * There are two types of errors we can (try) to handle: * - EFBIG means the mbuf chain was too long and bus_dma ran * out of segments. Defragment the mbuf chain and try again. * - ENOMEM means bus_dma could not obtain enough bounce buffers * at this point in time. Defer sending and try again later. * All other errors, in particular EINVAL, are fatal and prevent the * mbuf chain from ever going through. Drop it and report error. */ if (error == EFBIG && remap) { struct mbuf *m; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_alloc_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again, but only once */ remap = FALSE; goto retry; } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((!do_tso) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; txr->tx_tso = FALSE; } if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* Do hardware assists */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { em_tso_setup(txr, m_head, ip_off, ip, tp, &txd_upper, &txd_lower); /* we need to make a final sentinel transmit desc */ tso_desc = TRUE; } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) em_transmit_checksum_setup(txr, m_head, ip_off, ip, &txd_upper, &txd_lower); if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= htole16(if_getvtag(m_head)) << 16; /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } i = txr->next_avail_desc; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; tx_buffer = &txr->tx_buffers[i]; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; /* ** TSO Workaround: ** If this is the last descriptor, we want to ** split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == adapter->num_tx_desc) i = 0; /* Now make the sentinel */ txr->tx_avail--; ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | TSO_WORKAROUND); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } txr->next_avail_desc = i; txr->tx_avail -= nsegs; tx_buffer->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &txr->tx_buffers[first]; tx_buffer->next_eop = last; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void em_set_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (if_getflags(ifp) & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (if_getflags(ifp) & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } } static void em_disable_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_set_multi(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_local_timer(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 trigger = 0; EM_CORE_LOCK_ASSERT(adapter); em_update_link_status(adapter); em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Mask to use in the irq trigger */ if (adapter->msix_mem) { for (int i = 0; i < adapter->num_queues; i++, rxr++) trigger |= rxr->ims; rxr = adapter->rx_rings; } else trigger = E1000_ICS_RXDMT0; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { if (txr->busy == EM_TX_HUNG) goto hung; if (txr->busy >= EM_TX_MAXTRIES) txr->busy = EM_TX_HUNG; /* Schedule a TX tasklet if needed */ if (txr->tx_avail <= EM_MAX_SCATTER) taskqueue_enqueue(txr->tq, &txr->tx_task); } callout_reset(&adapter->timer, hz, em_local_timer, adapter); #ifndef DEVICE_POLLING /* Trigger an RX interrupt to guarantee mbuf refresh */ E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); #endif return; hung: /* Looks like we're hung */ device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", txr->me); em_print_debug_info(adapter); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); adapter->watchdog_events++; em_init_locked(adapter); } static void em_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if_t ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { - if (hw->mac.type == e1000_pch_spt) - msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable hang detection */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->busy = EM_TX_IDLE; if_link_state_change(ifp, LINK_STATE_DOWN); } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_stop(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("em_stop: begin"); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); /* Disarm Hang Detection. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); txr->busy = EM_TX_IDLE; EM_TX_UNLOCK(txr); } - /* I219 needs some special flushing to avoid hangs */ - if (adapter->hw.mac.type == e1000_pch_spt) - em_flush_desc_rings(adapter); - e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ int em_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); if (adapter->msix == 1) /* using MSI */ rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Allocate a fast interrupt and the associated * deferred processing contexts. */ TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); /* Use a TX only tasklet for local timer */ TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", device_get_nameunit(adapter->dev)); TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers * This is not really Multiqueue, rather * its just seperate interrupt vectors * for TX, RX, and Link. * **********************************************************************/ int em_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; int error, rid, vector = 0; int cpu_id = 0; /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* First set up ring resources */ for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { /* RX ring */ rid = vector + 1; rxr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (rxr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "RX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, rxr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, rxr, &rxr->tag)) != 0) { device_printf(dev, "Failed to register RX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); #endif rxr->msix = vector; if (em_last_bind_cpu < 0) em_last_bind_cpu = CPU_FIRST(); cpu_id = em_last_bind_cpu; bus_bind_intr(dev, rxr->res, cpu_id); TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, taskqueue_thread_enqueue, &rxr->tq); taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 ** are for RX0 and RX1, note this has ** NOTHING to do with the MSIX vector */ rxr->ims = 1 << (20 + i); adapter->ims |= rxr->ims; adapter->ivars |= (8 | rxr->msix) << (i * 4); em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { /* TX ring */ rid = vector + 1; txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (txr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "TX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, txr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, txr, &txr->tag)) != 0) { device_printf(dev, "Failed to register TX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); #endif txr->msix = vector; if (em_last_bind_cpu < 0) em_last_bind_cpu = CPU_FIRST(); cpu_id = em_last_bind_cpu; bus_bind_intr(dev, txr->res, cpu_id); TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 22 and 23 ** are for TX0 and TX1, note this has ** NOTHING to do with the MSIX vector */ txr->ims = 1 << (22 + i); adapter->ims |= txr->ims; adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } /* Link interrupt */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate " "bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; adapter->ivars |= (8 | vector) << 16; adapter->ivars |= 0x80000000; return (0); } static void em_free_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr; struct rx_ring *rxr; int rid; /* ** Release all the queue interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; /* an early abort? */ if (txr == NULL) break; rid = txr->msix +1; if (txr->tag != NULL) { bus_teardown_intr(dev, txr->res, txr->tag); txr->tag = NULL; } if (txr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res); rxr = &adapter->rx_rings[i]; /* an early abort? */ if (rxr == NULL) break; rid = rxr->msix +1; if (rxr->tag != NULL) { bus_teardown_intr(dev, rxr->res, rxr->tag); rxr->tag = NULL; } if (rxr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res); } if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); if (adapter->flash != NULL) bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); } /* * Setup MSI or MSI/X */ static int em_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int val; /* Nearly always going to use one queue */ adapter->num_queues = 1; /* ** Try using MSI-X for Hartwell adapters */ if ((adapter->hw.mac.type == e1000_82574) && (em_enable_msix == TRUE)) { #ifdef EM_MULTIQUEUE adapter->num_queues = (em_num_queues == 1) ? 1 : 2; if (adapter->num_queues > 1) em_enable_vectors_82574(adapter); #endif /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } val = pci_msix_count(dev); #ifdef EM_MULTIQUEUE /* We need 5 vectors in the multiqueue case */ if (adapter->num_queues > 1 ) { if (val >= 5) val = 5; else { adapter->num_queues = 1; device_printf(adapter->dev, "Insufficient MSIX vectors for >1 queue, " "using single queue...\n"); goto msix_one; } } else { msix_one: #endif if (val >= 3) val = 3; else { device_printf(adapter->dev, "Insufficient MSIX vectors, using MSI\n"); goto msi; } #ifdef EM_MULTIQUEUE } #endif if ((pci_alloc_msix(dev, &val) == 0)) { device_printf(adapter->dev, "Using MSIX interrupts " "with %d vectors\n", val); return (val); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); } msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } val = 1; if (pci_alloc_msi(dev, &val) == 0) { device_printf(adapter->dev, "Using an MSI interrupt\n"); return (val); } /* Should only happen due to manual configuration */ device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); return (0); } -/* -** The 3 following flush routines are used as a workaround in the -** I219 client parts and only for them. -** -** em_flush_tx_ring - remove all descriptors from the tx_ring -** -** We want to clear all pending descriptors from the TX ring. -** zeroing happens when the HW reads the regs. We assign the ring itself as -** the data of the next descriptor. We don't care about the data we are about -** to reset the HW. -*/ -static void -em_flush_tx_ring(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct tx_ring *txr = adapter->tx_rings; - struct e1000_tx_desc *txd; - u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; - u16 size = 512; - - tctl = E1000_READ_REG(hw, E1000_TCTL); - E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); - - txd = &txr->tx_base[txr->next_avail_desc++]; - if (txr->next_avail_desc == adapter->num_tx_desc) - txr->next_avail_desc = 0; - - /* Just use the ring as a dummy buffer addr */ - txd->buffer_addr = txr->txdma.dma_paddr; - txd->lower.data = htole32(txd_lower | size); - txd->upper.data = 0; - - /* flush descriptors to memory before notifying the HW */ - wmb(); - - E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); - mb(); - usec_delay(250); -} - -/* -** em_flush_rx_ring - remove all descriptors from the rx_ring -** -** Mark all descriptors in the RX ring as consumed and disable the rx ring -*/ -static void -em_flush_rx_ring(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - u32 rctl, rxdctl; - - rctl = E1000_READ_REG(hw, E1000_RCTL); - E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); - E1000_WRITE_FLUSH(hw); - usec_delay(150); - - rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); - /* zero the lower 14 bits (prefetch and host thresholds) */ - rxdctl &= 0xffffc000; - /* - * update thresholds: prefetch threshold to 31, host threshold to 1 - * and make sure the granularity is "descriptors" and not "cache lines" - */ - rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); - E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); - - /* momentarily enable the RX ring for the changes to take effect */ - E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); - E1000_WRITE_FLUSH(hw); - usec_delay(150); - E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); -} - -/* -** em_flush_desc_rings - remove all descriptors from the descriptor rings -** -** In i219, the descriptor rings must be emptied before resetting the HW -** or before changing the device state to D3 during runtime (runtime PM). -** -** Failure to do this will cause the HW to enter a unit hang state which can -** only be released by PCI reset on the device -** -*/ -static void -em_flush_desc_rings(struct adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - u16 hang_state; - u32 fext_nvm11, tdlen; - - /* First, disable MULR fix in FEXTNVM11 */ - fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); - fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; - E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); - - /* do nothing if we're not in faulty state, or if the queue is empty */ - tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); - hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); - if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) - return; - em_flush_tx_ring(adapter); - - /* recheck, maybe the fault is caused by the rx ring */ - hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); - if (hang_state & FLUSH_DESC_REQUIRED) - em_flush_rx_ring(adapter); -} - - /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void em_reset(struct adapter *adapter) { device_t dev = adapter->dev; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: - case e1000_pch_spt: pba = E1000_PBA_26K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: - case e1000_pch_spt: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* else fall thru */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } - /* I219 needs some special flushing to avoid hangs */ - if (hw->mac.type == e1000_pch_spt) - em_flush_desc_rings(adapter); - /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); em_disable_aspm(adapter); /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(device_t dev, struct adapter *adapter) { if_t ifp; INIT_DEBUGOUT("em_setup_interface: begin"); ifp = adapter->ifp = if_gethandle(IFT_ETHER); if (ifp == 0) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setdev(ifp, dev); if_setinitfn(ifp, em_init); if_setsoftc(ifp, adapter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, em_ioctl); if_setgetcounterfn(ifp, em_get_counter); /* TSO parameters */ ifp->if_hw_tsomax = IP_MAXPACKET; ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER; ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; #ifdef EM_MULTIQUEUE /* Multiqueue stack interface */ if_settransmitfn(ifp, em_mq_start); if_setqflushfn(ifp, em_qflush); #else if_setstartfn(ifp, em_start); if_setsendqlen(ifp, adapter->num_tx_desc - 1); if_setsendqready(ifp); #endif ether_ifattach(ifp, adapter->hw.mac.addr); if_setcapabilities(ifp, 0); if_setcapenable(ifp, 0); if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4, 0); /* * Tell the upper layer(s) we * support full VLAN capability */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU, 0); if_setcapenable(ifp, if_getcapabilities(ifp)); /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); #ifdef DEVICE_POLLING if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); #endif /* Enable only WOL MAGIC by default */ if (adapter->wol) { if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, em_media_change, em_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (error); } static void em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); dma->dma_paddr = 0; } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_vaddr = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int em_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* Allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); if (em_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #if __FreeBSD_version >= 800000 /* Allocate a buf ring */ txr->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (em_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) em_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) em_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #if __FreeBSD_version >= 800000 buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int em_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct em_buffer *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ EM_TSO_SIZE, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct em_buffer *) malloc(sizeof(struct em_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ em_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void em_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_buffer *txbuf; int i; #ifdef DEV_NETMAP struct netmap_slot *slot; struct netmap_adapter *na = netmap_getna(adapter->ifp); #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ EM_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); txr->tx_base[i].buffer_addr = htole64(paddr); /* reload the map for netmap mode */ netmap_load_map(na, txr->txtag, txbuf->map, addr); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->next_eop = -1; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; txr->busy = EM_TX_IDLE; /* Clear checksum offload context. */ txr->last_hw_offload = 0; txr->last_hw_ipcss = 0; txr->last_hw_ipcso = 0; txr->last_hw_tucss = 0; txr->last_hw_tucso = 0; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); EM_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void em_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) em_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txr->busy = EM_TX_IDLE; txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); - if (hw->mac.type == e1000_pch_spt) { - u32 reg; - reg = E1000_READ_REG(hw, E1000_IOSFPC); - reg |= E1000_RCTL_RDMTS_HEX; - E1000_WRITE_REG(hw, E1000_IOSFPC, reg); - reg = E1000_READ_REG(hw, E1000_TARC(0)); - reg |= E1000_TARC0_CB_MULTIQ_3_REQ; - E1000_WRITE_REG(hw, E1000_TARC(0), reg); - } } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void em_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); em_free_transmit_buffers(txr); em_dma_free(adapter, &txr->txdma); EM_TX_UNLOCK(txr); EM_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void em_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_buffer *txbuf; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; for (int i = 0; i < adapter->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; if (txbuf->map != NULL) { bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } else if (txbuf->map != NULL) { bus_dmamap_unload(txr->txtag, txbuf->map); bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } #if __FreeBSD_version >= 800000 if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static void em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD = NULL; struct em_buffer *tx_buffer; int cur, hdr_len; u32 cmd = 0; u16 offload = 0; u8 ipcso, ipcss, tucso, tucss; ipcss = ipcso = tucss = tucso = 0; hdr_len = ip_off + (ip->ip_hl << 2); cur = txr->next_avail_desc; /* Setup of IP header checksum. */ if (mp->m_pkthdr.csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; offload |= CSUM_IP; ipcss = ip_off; ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->lower_setup.ip_fields.ipcss = ipcss; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = ipcso; cmd |= E1000_TXD_CMD_IP; } if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; offload |= CSUM_TCP; tucss = hdr_len; tucso = hdr_len + offsetof(struct tcphdr, th_sum); /* * Setting up new checksum offload context for every frames * takes a lot of processing time for hardware. This also * reduces performance a lot for small sized frames so avoid * it if driver can use previously configured checksum * offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; cmd |= E1000_TXD_CMD_TCP; } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; tucss = hdr_len; tucso = hdr_len + offsetof(struct udphdr, uh_sum); /* * Setting up new checksum offload context for every frames * takes a lot of processing time for hardware. This also * reduces performance a lot for small sized frames so avoid * it if driver can use previously configured checksum * offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = tucss; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } if (offload & CSUM_IP) { txr->last_hw_ipcss = ipcss; txr->last_hw_ipcso = ipcso; } TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &txr->tx_buffers[cur]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ static void em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD; struct em_buffer *tx_buffer; int cur, hdr_len; /* * In theory we can use the same TSO context if and only if * frame is the same type(IP/TCP) and the same MSS. However * checking whether a frame has the same IP/TCP structure is * hard thing so just ignore that and always restablish a * new TSO context. */ hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[cur]; TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = ip_off; TXD->lower_setup.ip_fields.ipcse = htole16(ip_off + (ip->ip_hl << 2) - 1); TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (mp->m_pkthdr.len - (hdr_len))); /* Total len */ tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; txr->tx_tso = TRUE; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void em_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; int first, last, done, processed; struct em_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; if_t ifp = adapter->ifp; EM_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return; #endif /* DEV_NETMAP */ /* No work, make sure hang detection is disabled */ if (txr->tx_avail == adapter->num_tx_desc) { txr->busy = EM_TX_IDLE; return; } processed = 0; first = txr->next_to_clean; tx_desc = &txr->tx_base[first]; tx_buffer = &txr->tx_buffers[first]; last = tx_buffer->next_eop; eop_desc = &txr->tx_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++txr->tx_avail; ++processed; if (tx_buffer->m_head) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &txr->tx_buffers[first]; tx_desc = &txr->tx_base[first]; } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* ** Hang detection: we know there's work outstanding ** or the entry return would have been taken, so no ** descriptor processed here indicates a potential hang. ** The local timer will examine this and do a reset if needed. */ if (processed == 0) { if (txr->busy != EM_TX_HUNG) ++txr->busy; } else /* At least one descriptor was cleaned */ txr->busy = EM_TX_BUSY; /* note this clears HUNG */ /* * If we have a minimum free, clear IFF_DRV_OACTIVE * to tell the stack that it is OK to send packets. * Notice that all writes of OACTIVE happen under the * TX lock which, with a single queue, guarantees * sanity. */ if (txr->tx_avail >= EM_MAX_SCATTER) { if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); } /* Disable hang detection if all clean */ if (txr->tx_avail == adapter->num_tx_desc) txr->busy = EM_TX_IDLE; } /********************************************************************* * * Refresh RX descriptor mbufs from system mbuf buffer pool. * **********************************************************************/ static void em_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; struct mbuf *m; bus_dma_segment_t segs[1]; struct em_buffer *rxbuf; int i, j, error, nsegs; bool cleaned = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head == NULL) { m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); /* ** If we have a temporary resource shortage ** that causes a failure, just abort refresh ** for now, we will return to this point when ** reinvoked from em_rxeof. */ if (m == NULL) goto update; } else m = rxbuf->m_head; m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; m->m_flags |= M_PKTHDR; m->m_data = m->m_ext.ext_buf; /* Use bus_dma machinery to setup the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(m); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = m; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr); cleaned = TRUE; i = j; /* Next is precalulated for us */ rxr->next_to_refresh = i; /* Calculate next controlling index */ if (++j == adapter->num_rx_desc) j = 0; } update: /* ** Update the tail pointer only if, ** and as far as we have refreshed. */ if (cleaned) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int em_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct em_buffer *rxbuf; int error; rxr->rx_buffers = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &rxr->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); goto fail; } rxbuf = rxr->rx_buffers; for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } } return (0); fail: em_free_receive_structures(adapter); return (error); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int em_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_buffer *rxbuf; bus_dma_segment_t seg[1]; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_slot *slot; struct netmap_adapter *na = netmap_getna(adapter->ifp); #endif /* Clear the ring contents */ EM_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* ** Free current RX buffer structs and their mbufs */ for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; /* mark as freed */ } } /* Now replenish the mbufs */ for (int j = 0; j != adapter->num_rx_desc; ++j) { rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); /* Update descriptor */ rxr->rx_base[j].buffer_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } rxbuf->m_head->m_len = adapter->rx_mbuf_sz; rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, rxbuf->m_head, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; goto fail; } bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr); } rxr->next_to_check = 0; rxr->next_to_refresh = 0; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); fail: EM_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int em_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int q; for (q = 0; q < adapter->num_queues; q++, rxr++) if (em_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ for (int i = 0; i < q; ++i) { rxr = &adapter->rx_rings[i]; for (int n = 0; n < adapter->num_rx_desc; n++) { struct em_buffer *rxbuf; rxbuf = &rxr->rx_buffers[n]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } rxr->next_to_check = 0; rxr->next_to_refresh = 0; } return (ENOBUFS); } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void em_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { em_free_receive_buffers(rxr); /* Free the ring memory as well */ em_dma_free(adapter, &rxr->rxdma); EM_RX_LOCK_DESTROY(rxr); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void em_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_buffer *rxbuf = NULL; INIT_DEBUGOUT("free_receive_buffers: begin"); if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->map != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); bus_dmamap_destroy(rxr->rxtag, rxbuf->map); } if (rxbuf->m_head != NULL) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; rxr->next_to_check = 0; rxr->next_to_refresh = 0; } if (rxr->rxtag != NULL) { bus_dma_tag_destroy(rxr->rxtag); rxr->rxtag = NULL; } return; } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u64 bus_addr; u32 rctl, rxcsum; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); /* ** When using MSIX interrupts we need to throttle ** using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { u32 rfctl; for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_ACK_DIS; E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); } rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM) { #ifdef EM_MULTIQUEUE rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; #else rxcsum |= E1000_RXCSUM_TUOFL; #endif } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); #ifdef EM_MULTIQUEUE if (adapter->num_queues > 1) { uint32_t rss_key[10]; uint32_t reta; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < 10; ++i) E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]); /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ reta = 0; for (i = 0; i < 4; ++i) { uint32_t q; q = (i % adapter->num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) E1000_WRITE_REG(hw, E1000_RETA(i), reta); E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); } #endif /* ** XXX TEMPORARY WORKAROUND: on some systems with 82573 ** long latencies are observed, like Lenovo X60. This ** change eliminates the problem, but since having positive ** values in RDTR is a known source of problems on other ** platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ u32 rdt = adapter->num_rx_desc - 1; /* default */ bus_addr = rxr->rxdma.dma_paddr; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { struct netmap_adapter *na = netmap_getna(adapter->ifp); rdt -= nm_kr_rxspace(&na->rx_rings[i]); } #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if ((adapter->hw.mac.type == e1000_82574) && (if_getmtu(ifp) > ETHERMTU)) { for (int i = 0; i < adapter->num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; rctl &= ~E1000_RCTL_SBP; if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * For polling we also now return the number of cleaned packets *********************************************************************/ static bool em_rxeof(struct rx_ring *rxr, int count, int *done) { struct adapter *adapter = rxr->adapter; if_t ifp = adapter->ifp; struct mbuf *mp, *sendmp; u8 status = 0; u16 len; int i, processed, rxdone = 0; bool eop; struct e1000_rx_desc *cur; EM_RX_LOCK(rxr); /* Sync the ring */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { EM_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check, processed = 0; count != 0;) { if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; cur = &rxr->rx_base[i]; status = cur->status; mp = sendmp = NULL; if ((status & E1000_RXD_STAT_DD) == 0) break; len = le16toh(cur->length); eop = (status & E1000_RXD_STAT_EOP) != 0; if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) || (rxr->discard == TRUE)) { adapter->dropped_pkts++; ++rxr->rx_discarded; if (!eop) /* Catch subsequent segs */ rxr->discard = TRUE; else rxr->discard = FALSE; em_rx_discard(rxr, i); goto next_desc; } bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); /* Assign correct length to the current fragment */ mp = rxr->rx_buffers[i].m_head; mp->m_len = len; /* Trigger for refresh */ rxr->rx_buffers[i].m_head = NULL; /* First segment? */ if (rxr->fmp == NULL) { mp->m_pkthdr.len = len; rxr->fmp = rxr->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; rxr->lmp->m_next = mp; rxr->lmp = mp; rxr->fmp->m_pkthdr.len += len; } if (eop) { --count; sendmp = rxr->fmp; if_setrcvif(sendmp, ifp); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); em_receive_checksum(cur, sendmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->hw.mac.max_frame_size > (MCLBYTES - ETHER_ALIGN) && em_fixup_rx(rxr) != 0) goto skip; #endif if (status & E1000_RXD_STAT_VP) { if_setvtag(sendmp, le16toh(cur->special)); sendmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT skip: #endif rxr->fmp = rxr->lmp = NULL; } next_desc: /* Sync the ring */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Zero out the receive descriptors status. */ cur->status = 0; ++rxdone; /* cumulative for POLL */ ++processed; /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Send to the stack */ if (sendmp != NULL) { rxr->next_to_check = i; EM_RX_UNLOCK(rxr); if_input(ifp, sendmp); EM_RX_LOCK(rxr); i = rxr->next_to_check; } /* Only refresh mbufs every 8 descriptors */ if (processed == 8) { em_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remaining refresh work */ if (e1000_rx_unrefreshed(rxr)) em_refresh_mbufs(rxr, i); rxr->next_to_check = i; if (done != NULL) *done = rxdone; EM_RX_UNLOCK(rxr); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } static __inline void em_rx_discard(struct rx_ring *rxr, int i) { struct em_buffer *rbuf; rbuf = &rxr->rx_buffers[i]; bus_dmamap_unload(rxr->rxtag, rbuf->map); /* Free any previous pieces */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** Free buffer and allow em_refresh_mbufs() ** to clean up and recharge buffer. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; } return; } #ifndef __NO_STRICT_ALIGNMENT /* * When jumbo frames are enabled we should realign entire payload on * architecures with strict alignment. This is serious design mistake of 8254x * as it nullifies DMA operations. 8254x just allows RX buffer size to be * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its * payload. On architecures without strict alignment restrictions 8254x still * performs unaligned memory access which would reduce the performance too. * To avoid copying over an entire frame to align, we allocate a new mbuf and * copy ethernet header to the new mbuf. The new mbuf is prepended into the * existing mbuf chain. * * Be aware, best performance of the 8254x is achived only when jumbo frame is * not used at all on architectures with strict alignment. */ static int em_fixup_rx(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct mbuf *m, *n; int error; error = 0; m = rxr->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n != NULL) { bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; rxr->fmp = n; } else { adapter->dropped_pkts++; m_freem(rxr->fmp); rxr->fmp = NULL; error = ENOMEM; } } return (error); } #endif /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp) { mp->m_pkthdr.csum_flags = 0; /* Ignore Checksum bit is set */ if (rx_desc->status & E1000_RXD_STAT_IXSM) return; if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) return; /* IP Checksum Good? */ if (rx_desc->status & E1000_RXD_STAT_IPCS) mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); /* TCP or UDP checksum */ if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } /* * This routine is run via an vlan * config EVENT */ static void em_register_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if ((void*)adapter != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void em_unregister_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if (adapter != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_enable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); ims_mask |= adapter->ims; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_disable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if (hw->mac.type == e1000_82574) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); return; } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* Falls thru */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* ** Determine type of Wakeup: note that wol ** is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if ((adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_pch2lan)) { if (em_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (if_getcapenable(ifp) & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* ** WOL in the newer chipset interfaces (pchlan) ** require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_led_func(void *arg, int onoff) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } EM_CORE_UNLOCK(adapter); } /* ** Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", CTLFLAG_RD, &adapter->mbuf_alloc_failed, "Std mbuf failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", CTLFLAG_RD, &adapter->mbuf_cluster_failed, "Std mbuf cluster failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; EM_CORE_LOCK(adapter); regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); EM_CORE_UNLOCK(adapter); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } static void em_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); EM_CORE_LOCK(adapter); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *)arg1; em_print_debug_info(adapter); } return (error); } /* ** This routine is meant to be fluid, add whatever is ** needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); device_printf(dev, "Tx Queue Status = %d\n", txr->busy); device_printf(dev, "TX descriptors avail = %d\n", txr->tx_avail); device_printf(dev, "Tx Descriptors avail failure = %ld\n", txr->no_desc_avail); device_printf(dev, "RX Queue %d ------\n", i); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(i)), E1000_READ_REG(&adapter->hw, E1000_RDT(i))); device_printf(dev, "RX discarded packets = %ld\n", rxr->rx_discarded); device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); } } #ifdef EM_MULTIQUEUE /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSIX * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; device_t dev = adapter->dev; u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSIX vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } #endif #ifdef DDB DB_COMMAND(em_reset_dev, em_ddb_reset_dev) { devclass_t dc; int max_em; dc = devclass_find("em"); max_em = devclass_get_maxunit(dc); for (int index = 0; index < (max_em - 1); index++) { device_t dev; dev = devclass_get_device(dc, index); if (device_get_driver(dev) == &em_driver) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } } } DB_COMMAND(em_dump_queue, em_ddb_dump_queue) { devclass_t dc; int max_em; dc = devclass_find("em"); max_em = devclass_get_maxunit(dc); for (int index = 0; index < (max_em - 1); index++) { device_t dev; dev = devclass_get_device(dc, index); if (device_get_driver(dev) == &em_driver) em_print_debug_info(device_get_softc(dev)); } } #endif Index: projects/clang-trunk/sys/dev/e1000/if_em.h =================================================================== --- projects/clang-trunk/sys/dev/e1000/if_em.h (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/if_em.h (revision 287502) @@ -1,547 +1,544 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _EM_H_DEFINED_ #define _EM_H_DEFINED_ /* Tunables */ /* * EM_TXD: Maximum number of Transmit Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 256 * This value is the number of transmit descriptors allocated by the driver. * Increasing this value allows the driver to queue more transmits. Each * descriptor is 16 bytes. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_TXD 80 #define EM_MAX_TXD 4096 #ifdef EM_MULTIQUEUE #define EM_DEFAULT_TXD 4096 #else #define EM_DEFAULT_TXD 1024 #endif /* * EM_RXD - Maximum number of receive Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 256 * This value is the number of receive descriptors allocated by the driver. * Increasing this value allows the driver to buffer more incoming packets. * Each descriptor is 16 bytes. A receive buffer is also allocated for each * descriptor. The maximum MTU size is 16110. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_RXD 80 #define EM_MAX_RXD 4096 #ifdef EM_MULTIQUEUE #define EM_DEFAULT_RXD 4096 #else #define EM_DEFAULT_RXD 1024 #endif /* * EM_TIDV - Transmit Interrupt Delay Value * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value delays the generation of transmit interrupts in units of * 1.024 microseconds. Transmit interrupt reduction can improve CPU * efficiency if properly tuned for specific network traffic. If the * system is reporting dropped transmits, this value may be set too high * causing the driver to run out of available transmit descriptors. */ #define EM_TIDV 64 /* * EM_TADV - Transmit Absolute Interrupt Delay Value * (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * transmit interrupt is generated. Useful only if EM_TIDV is non-zero, * this value ensures that an interrupt is generated after the initial * packet is sent on the wire within the set amount of time. Proper tuning, * along with EM_TIDV, may improve traffic throughput in specific * network conditions. */ #define EM_TADV 64 /* * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) * Valid Range: 0-65535 (0=off) * Default Value: 0 * This value delays the generation of receive interrupts in units of 1.024 * microseconds. Receive interrupt reduction can improve CPU efficiency if * properly tuned for specific network traffic. Increasing this value adds * extra latency to frame reception and can end up decreasing the throughput * of TCP traffic. If the system is reporting dropped receives, this value * may be set too high, causing the driver to run out of available receive * descriptors. * * CAUTION: When setting EM_RDTR to a value other than 0, adapters * may hang (stop transmitting) under certain network conditions. * If this occurs a WATCHDOG message is logged in the system * event log. In addition, the controller is automatically reset, * restoring the network connection. To eliminate the potential * for the hang ensure that EM_RDTR is set to 0. */ #ifdef EM_MULTIQUEUE #define EM_RDTR 64 #else #define EM_RDTR 0 #endif /* * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * receive interrupt is generated. Useful only if EM_RDTR is non-zero, * this value ensures that an interrupt is generated after the initial * packet is received within the set amount of time. Proper tuning, * along with EM_RDTR, may improve traffic throughput in specific network * conditions. */ #ifdef EM_MULTIQUEUE #define EM_RADV 128 #else #define EM_RADV 64 #endif /* * This parameter controls the max duration of transmit watchdog. */ #define EM_WATCHDOG (10 * hz) /* * This parameter controls when the driver calls the routine to reclaim * transmit descriptors. */ #define EM_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) /* * This parameter controls whether or not autonegotation is enabled. * 0 - Disable autonegotiation * 1 - Enable autonegotiation */ #define DO_AUTO_NEG 1 /* * This parameter control whether or not the driver will wait for * autonegotiation to complete. * 1 - Wait for autonegotiation to complete * 0 - Don't wait for autonegotiation to complete */ #define WAIT_FOR_AUTO_NEG_DEFAULT 0 /* Tunables -- End */ #define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ADVERTISE_1000_FULL) #define AUTO_ALL_MODES 0 /* PHY master/slave setting */ #define EM_MASTER_SLAVE e1000_ms_hw_default /* * Micellaneous constants */ #define EM_VENDOR_ID 0x8086 #define EM_FLASH 0x0014 #define EM_JUMBO_PBA 0x00000028 #define EM_DEFAULT_PBA 0x00000030 #define EM_SMARTSPEED_DOWNSHIFT 3 #define EM_SMARTSPEED_MAX 15 #define EM_MAX_LOOP 10 #define MAX_NUM_MULTICAST_ADDRESSES 128 #define PCI_ANY_ID (~0U) #define ETHER_ALIGN 2 #define EM_FC_PAUSE_TIME 0x0680 #define EM_EEPROM_APME 0x400; #define EM_82544_APME 0x0004; /* * Driver state logic for the detection of a hung state * in hardware. Set TX_HUNG whenever a TX packet is used * (data is sent) and clear it when txeof() is invoked if * any descriptors from the ring are cleaned/reclaimed. * Increment internal counter if no descriptors are cleaned * and compare to TX_MAXTRIES. When counter > TX_MAXTRIES, * reset adapter. */ #define EM_TX_IDLE 0x00000000 #define EM_TX_BUSY 0x00000001 #define EM_TX_HUNG 0x80000000 #define EM_TX_MAXTRIES 10 -#define PCICFG_DESC_RING_STATUS 0xe4 -#define FLUSH_DESC_REQUIRED 0x100 - /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will * also optimize cache line size effect. H/W supports up to cache line size 128. */ #define EM_DBA_ALIGN 128 /* * See Intel 82574 Driver Programming Interface Manual, Section 10.2.6.9 */ #define TARC_COMPENSATION_MODE (1 << 7) /* Compensation Mode */ #define TARC_SPEED_MODE_BIT (1 << 21) /* On PCI-E MACs only */ #define TARC_MQ_FIX (1 << 23) | \ (1 << 24) | \ (1 << 25) /* Handle errata in MQ mode */ #define TARC_ERRATA_BIT (1 << 26) /* Note from errata on 82574 */ /* PCI Config defines */ #define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) #define EM_BAR_TYPE_MASK 0x00000001 #define EM_BAR_TYPE_MMEM 0x00000000 #define EM_BAR_TYPE_FLASH 0x0014 #define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) #define EM_BAR_MEM_TYPE_MASK 0x00000006 #define EM_BAR_MEM_TYPE_32BIT 0x00000000 #define EM_BAR_MEM_TYPE_64BIT 0x00000004 #define EM_MSIX_BAR 3 /* On 82575 */ /* More backward compatibility */ #if __FreeBSD_version < 900000 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #endif /* Defines for printing debug information */ #define DEBUG_INIT 0 #define DEBUG_IOCTL 0 #define DEBUG_HW 0 #define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") #define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) #define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) #define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") #define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) #define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) #define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) #define EM_MAX_SCATTER 64 #define EM_VFTA_SIZE 128 #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ #define EM_MSIX_LINK 0x01000000 /* For 82574 use */ #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 #define CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ /* * 82574 has a nonstandard address for EIAC * and since its only used in MSIX, and in * the em driver only 82574 uses MSIX we can * solve it just using this define. */ #define EM_EIAC 0x000DC /* * 82574 only reports 3 MSI-X vectors by default; * defines assisting with making it report 5 are * located here. */ #define EM_NVM_PCIE_CTRL 0x1B #define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT) #define EM_NVM_MSIX_N_SHIFT 7 /* * Bus dma allocation structure used by * e1000_dma_malloc and e1000_dma_free. */ struct em_dma_alloc { bus_addr_t dma_paddr; caddr_t dma_vaddr; bus_dma_tag_t dma_tag; bus_dmamap_t dma_map; bus_dma_segment_t dma_seg; int dma_nseg; }; struct adapter; struct em_int_delay_info { struct adapter *adapter; /* Back-pointer to the adapter struct */ int offset; /* Register offset to read/write */ int value; /* Current value in usecs */ }; /* * The transmit ring, one per tx queue */ struct tx_ring { struct adapter *adapter; struct mtx tx_mtx; char mtx_name[16]; u32 me; u32 msix; u32 ims; int busy; struct em_dma_alloc txdma; struct e1000_tx_desc *tx_base; struct task tx_task; struct taskqueue *tq; u32 next_avail_desc; u32 next_to_clean; struct em_buffer *tx_buffers; volatile u16 tx_avail; u32 tx_tso; /* last tx was tso */ u16 last_hw_offload; u8 last_hw_ipcso; u8 last_hw_ipcss; u8 last_hw_tucso; u8 last_hw_tucss; #if __FreeBSD_version >= 800000 struct buf_ring *br; #endif /* Interrupt resources */ bus_dma_tag_t txtag; void *tag; struct resource *res; unsigned long tx_irq; unsigned long no_desc_avail; }; /* * The Receive ring, one per rx queue */ struct rx_ring { struct adapter *adapter; u32 me; u32 msix; u32 ims; struct mtx rx_mtx; char mtx_name[16]; u32 payload; struct task rx_task; struct taskqueue *tq; struct e1000_rx_desc *rx_base; struct em_dma_alloc rxdma; u32 next_to_refresh; u32 next_to_check; struct em_buffer *rx_buffers; struct mbuf *fmp; struct mbuf *lmp; /* Interrupt resources */ void *tag; struct resource *res; bus_dma_tag_t rxtag; bool discard; /* Soft stats */ unsigned long rx_irq; unsigned long rx_discarded; unsigned long rx_packets; unsigned long rx_bytes; }; /* Our adapter structure */ struct adapter { if_t ifp; struct e1000_hw hw; /* FreeBSD operating-system-specific structures. */ struct e1000_osdep osdep; struct device *dev; struct cdev *led_dev; struct resource *memory; struct resource *flash; struct resource *msix_mem; struct resource *res; void *tag; u32 linkvec; u32 ivars; struct ifmedia media; struct callout timer; int msix; int if_flags; int max_frame_size; int min_frame_size; struct mtx core_mtx; int em_insert_vlan_header; u32 ims; bool in_detach; /* Task for FAST handling */ struct task link_task; struct task que_task; struct taskqueue *tq; /* private task queue */ eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; u16 num_vlans; u8 num_queues; /* * Transmit rings: * Allocated at run time, an array of rings. */ struct tx_ring *tx_rings; int num_tx_desc; u32 txd_cmd; /* * Receive rings: * Allocated at run time, an array of rings. */ struct rx_ring *rx_rings; int num_rx_desc; u32 rx_process_limit; u32 rx_mbuf_sz; /* Management and WOL features */ u32 wol; bool has_manage; bool has_amt; /* Multicast array memory */ u8 *mta; /* ** Shadow VFTA table, this is needed because ** the real vlan filter table gets cleared during ** a soft reset and the driver needs to be able ** to repopulate it. */ u32 shadow_vfta[EM_VFTA_SIZE]; /* Info about the interface */ u16 link_active; u16 fc; u16 link_speed; u16 link_duplex; u32 smartspeed; struct em_int_delay_info tx_int_delay; struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; struct em_int_delay_info tx_itr; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; unsigned long mbuf_alloc_failed; unsigned long mbuf_cluster_failed; unsigned long no_tx_map_avail; unsigned long no_tx_dma_setup; unsigned long rx_overruns; unsigned long watchdog_events; unsigned long link_irq; struct e1000_hw_stats stats; }; /******************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * ********************************************************************************/ typedef struct _em_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } em_vendor_info_t; struct em_buffer { int next_eop; /* Index of the desc to watch */ struct mbuf *m_head; bus_dmamap_t map; /* bus_dma map for packet */ }; /* ** Find the number of unrefreshed RX descriptors */ static inline u16 e1000_rx_unrefreshed(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; if (rxr->next_to_check > rxr->next_to_refresh) return (rxr->next_to_check - rxr->next_to_refresh - 1); else return ((adapter->num_rx_desc + rxr->next_to_check) - rxr->next_to_refresh - 1); } #define EM_CORE_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) #define EM_TX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->tx_mtx, _name, "EM TX Lock", MTX_DEF) #define EM_RX_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->rx_mtx, _name, "EM RX Lock", MTX_DEF) #define EM_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) #define EM_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) #define EM_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) #define EM_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) #define EM_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) #define EM_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) #define EM_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) #define EM_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) #define EM_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) #define EM_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) #define EM_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) #define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) #define EM_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) #endif /* _EM_H_DEFINED_ */ Index: projects/clang-trunk/sys/dev/e1000/if_igb.c =================================================================== --- projects/clang-trunk/sys/dev/e1000/if_igb.c (revision 287501) +++ projects/clang-trunk/sys/dev/e1000/if_igb.c (revision 287502) @@ -1,6376 +1,6376 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_altq.h" #endif #include "if_igb.h" /********************************************************************* * Driver version: *********************************************************************/ char igb_driver_version[] = "2.5.2"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static igb_vendor_info_t igb_vendor_info_array[] = { {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *igb_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int igb_probe(device_t); static int igb_attach(device_t); static int igb_detach(device_t); static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); #ifndef IGB_LEGACY_TX static int igb_mq_start(struct ifnet *, struct mbuf *); static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); static void igb_qflush(struct ifnet *); static void igb_deferred_mq_start(void *, int); #else static void igb_start(struct ifnet *); static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #endif static int igb_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t igb_get_counter(if_t, ift_counter); static void igb_init(void *); static void igb_init_locked(struct adapter *); static void igb_stop(void *); static void igb_media_status(struct ifnet *, struct ifmediareq *); static int igb_media_change(struct ifnet *); static void igb_identify_hardware(struct adapter *); static int igb_allocate_pci_resources(struct adapter *); static int igb_allocate_msix(struct adapter *); static int igb_allocate_legacy(struct adapter *); static int igb_setup_msix(struct adapter *); static void igb_free_pci_resources(struct adapter *); static void igb_local_timer(void *); static void igb_reset(struct adapter *); static int igb_setup_interface(device_t, struct adapter *); static int igb_allocate_queues(struct adapter *); static void igb_configure_queues(struct adapter *); static int igb_allocate_transmit_buffers(struct tx_ring *); static void igb_setup_transmit_structures(struct adapter *); static void igb_setup_transmit_ring(struct tx_ring *); static void igb_initialize_transmit_units(struct adapter *); static void igb_free_transmit_structures(struct adapter *); static void igb_free_transmit_buffers(struct tx_ring *); static int igb_allocate_receive_buffers(struct rx_ring *); static int igb_setup_receive_structures(struct adapter *); static int igb_setup_receive_ring(struct rx_ring *); static void igb_initialize_receive_units(struct adapter *); static void igb_free_receive_structures(struct adapter *); static void igb_free_receive_buffers(struct rx_ring *); static void igb_free_receive_ring(struct rx_ring *); static void igb_enable_intr(struct adapter *); static void igb_disable_intr(struct adapter *); static void igb_update_stats_counters(struct adapter *); static bool igb_txeof(struct tx_ring *); static __inline void igb_rx_discard(struct rx_ring *, int); static __inline void igb_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); static bool igb_rxeof(struct igb_queue *, int, int *); static void igb_rx_checksum(u32, struct mbuf *, u32); static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static int igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void igb_set_promisc(struct adapter *); static void igb_disable_promisc(struct adapter *); static void igb_set_multi(struct adapter *); static void igb_update_link_status(struct adapter *); static void igb_refresh_mbufs(struct rx_ring *, int); static void igb_register_vlan(void *, struct ifnet *, u16); static void igb_unregister_vlan(void *, struct ifnet *, u16); static void igb_setup_vlan_hw_support(struct adapter *); static int igb_xmit(struct tx_ring *, struct mbuf **); static int igb_dma_malloc(struct adapter *, bus_size_t, struct igb_dma_alloc *, int); static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void igb_print_nvm_info(struct adapter *); static int igb_is_valid_ether_addr(u8 *); static void igb_add_hw_stats(struct adapter *); static void igb_vf_init_stats(struct adapter *); static void igb_update_vf_stats_counters(struct adapter *); /* Management and WOL Support */ static void igb_init_manageability(struct adapter *); static void igb_release_manageability(struct adapter *); static void igb_get_hw_control(struct adapter *); static void igb_release_hw_control(struct adapter *); static void igb_enable_wakeup(device_t); static void igb_led_func(void *, int); static int igb_irq_fast(void *); static void igb_msix_que(void *); static void igb_msix_link(void *); static void igb_handle_que(void *context, int pending); static void igb_handle_link(void *context, int pending); static void igb_handle_link_locked(struct adapter *); static void igb_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); #ifdef DEVICE_POLLING static poll_handler_t igb_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, igb_probe), DEVMETHOD(device_attach, igb_attach), DEVMETHOD(device_detach, igb_detach), DEVMETHOD(device_shutdown, igb_shutdown), DEVMETHOD(device_suspend, igb_suspend), DEVMETHOD(device_resume, igb_resume), DEVMETHOD_END }; static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(igb, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. *********************************************************************/ static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); /* Descriptor defaults */ static int igb_rxd = IGB_DEFAULT_RXD; static int igb_txd = IGB_DEFAULT_TXD; SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, "Number of transmit descriptors per queue"); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int igb_enable_aim = TRUE; SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, "Enable adaptive interrupt moderation"); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int igb_enable_msix = 1; SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, "Enable MSI-X interrupts"); /* ** Tuneable Interrupt rate */ static int igb_max_interrupt_rate = 8000; SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); #ifndef IGB_LEGACY_TX /* ** Tuneable number of buffers in the buf-ring (drbr_xxx) */ static int igb_buf_ring_size = IGB_BR_SIZE; SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, &igb_buf_ring_size, 0, "Size of the bufring"); #endif /* ** Header split causes the packet header to ** be dma'd to a seperate mbuf from the payload. ** this can have memory alignment benefits. But ** another plus is that small packets often fit ** into the header and thus use no cluster. Its ** a very workload dependent type feature. */ static int igb_header_split = FALSE; SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, "Enable receive mbuf header split"); /* ** This will autoconfigure based on the ** number of CPUs and max supported ** MSIX messages if left at 0. */ static int igb_num_queues = 0; SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* ** Global variable to store last used CPU when binding queues ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a ** queue is bound to a cpu. */ static int igb_last_bind_cpu = -1; /* How many packets rxeof tries to clean at a time */ static int igb_rx_process_limit = 100; SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &igb_rx_process_limit, 0, "Maximum number of received packets to process at a time, -1 means unlimited"); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * igb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int igb_probe(device_t dev) { char adapter_name[256]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; igb_vendor_info_t *ent; INIT_DEBUGOUT("igb_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IGB_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = igb_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", igb_strings[ent->index], igb_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int igb_attach(device_t dev) { struct adapter *adapter; int error = 0; u16 eeprom_data; INIT_DEBUGOUT("igb_attach: begin"); if (resource_disabled("igb", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTLs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_nvm_info, "I", "NVM Information"); igb_set_sysctl_value(adapter, "enable_aim", "Interrupt Moderation", &adapter->enable_aim, igb_enable_aim); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ igb_identify_hardware(adapter); /* Setup PCI resources */ if (igb_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Sysctl for limiting the amount of work done in the taskqueue */ igb_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, igb_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", IGB_DEFAULT_TXD, igb_txd); adapter->num_tx_desc = IGB_DEFAULT_TXD; } else adapter->num_tx_desc = igb_txd; if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", IGB_DEFAULT_RXD, igb_rxd); adapter->num_rx_desc = IGB_DEFAULT_RXD; } else adapter->num_rx_desc = igb_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* ** Allocate and Setup Queues */ if (igb_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate the appropriate stats memory */ if (adapter->vf_ifp) { adapter->stats = (struct e1000_vf_stats *)malloc(sizeof \ (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); igb_vf_init_stats(adapter); } else adapter->stats = (struct e1000_hw_stats *)malloc(sizeof \ (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->stats == NULL) { device_printf(dev, "Can not allocate stats memory\n"); error = ENOMEM; goto err_late; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Some adapter-specific advanced features */ if (adapter->hw.mac.type >= e1000_i350) { SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) - e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); + e1000_set_eee_i354(&adapter->hw); else - e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); + e1000_set_eee_i350(&adapter->hw); } } /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (((adapter->hw.mac.type != e1000_i210) && (adapter->hw.mac.type != e1000_i211)) && (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* ** Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } /* Check its sanity */ if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Setup OS specific network interface */ if (igb_setup_interface(dev, adapter) != 0) goto err_late; /* Now get a good starting state */ igb_reset(adapter); /* Initialize statistics */ igb_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); /* Indicate SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); /* Determine if we have to control management hardware */ adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); /* * Setup Wake-on-Lan */ /* APME bit in EEPROM is mapped to WUC.APME */ eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; if (eeprom_data) adapter->wol = E1000_WUFC_MAG; /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); igb_add_hw_stats(adapter); /* Tell the stack that the interface is not active */ adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; adapter->led_dev = led_create(igb_led_func, adapter, device_get_nameunit(dev)); /* ** Configure Interrupts */ if ((adapter->msix > 1) && (igb_enable_msix)) error = igb_allocate_msix(adapter); else /* MSI or Legacy */ error = igb_allocate_legacy(adapter); if (error) goto err_late; #ifdef DEV_NETMAP igb_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("igb_attach: end"); return (0); err_late: igb_detach(dev); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); igb_release_hw_control(adapter); err_pci: igb_free_pci_resources(adapter); if (adapter->ifp != NULL) if_free(adapter->ifp); free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int igb_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("igb_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } ether_ifdetach(adapter->ifp); if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif IGB_CORE_LOCK(adapter); adapter->in_detach = 1; igb_stop(adapter); IGB_CORE_UNLOCK(adapter); e1000_phy_hw_reset(&adapter->hw); /* Give control back to firmware */ igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ igb_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); if (adapter->mta != NULL) free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int igb_shutdown(device_t dev) { return igb_suspend(dev); } /* * Suspend/resume device methods. */ static int igb_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); IGB_CORE_LOCK(adapter); igb_stop(adapter); igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } IGB_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int igb_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); igb_init_manageability(adapter); if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } } IGB_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } #ifdef IGB_LEGACY_TX /********************************************************************* * Transmit entry point * * igb_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; IGB_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (txr->tx_avail <= IGB_MAX_SCATTER) { txr->queue_status |= IGB_QUEUE_DEPLETED; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (igb_xmit(txr, &m_head)) { if (m_head != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m_head); if (txr->tx_avail <= IGB_MAX_SCATTER) txr->queue_status |= IGB_QUEUE_DEPLETED; break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ txr->watchdog_time = ticks; txr->queue_status |= IGB_QUEUE_WORKING; } } /* * Legacy TX driver routine, called from the * stack, always uses tx[0], and spins for it. * Should not be used with multiqueue tx */ static void igb_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_TX_LOCK(txr); igb_start_locked(txr, ifp); IGB_TX_UNLOCK(txr); } return; } #else /* ~IGB_LEGACY_TX */ /* ** Multiqueue Transmit Entry: ** quick turnaround to the stack ** */ static int igb_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que; struct tx_ring *txr; int i, err = 0; #ifdef RSS uint32_t bucket_id; #endif /* Which queue to use */ /* * When doing RSS, map it to the same outbound queue * as the incoming flow would be mapped to. * * If everything is setup correctly, it should be the * same bucket that the current CPU we're on is. */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { #ifdef RSS if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), &bucket_id) == 0) { /* XXX TODO: spit out something if bucket_id > num_queues? */ i = bucket_id % adapter->num_queues; } else { #endif i = m->m_pkthdr.flowid % adapter->num_queues; #ifdef RSS } #endif } else { i = curcpu % adapter->num_queues; } txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; err = drbr_enqueue(ifp, txr->br, m); if (err) return (err); if (IGB_TX_TRYLOCK(txr)) { igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } else taskqueue_enqueue(que->tq, &txr->txq_task); return (0); } static int igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; IGB_TX_LOCK_ASSERT(txr); if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) return (ENETDOWN); /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = igb_xmit(txr, &next)) != 0) { if (next == NULL) { /* It was freed, move forward */ drbr_advance(ifp, txr->br); } else { /* * Still have one left, it may not be * the same since the transmit function * may have changed it. */ drbr_putback(ifp, txr->br, next); } break; } drbr_advance(ifp, txr->br); enq++; if (next->m_flags & M_MCAST && adapter->vf_ifp) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (enq > 0) { /* Set the watchdog */ txr->queue_status |= IGB_QUEUE_WORKING; txr->watchdog_time = ticks; } if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); if (txr->tx_avail <= IGB_MAX_SCATTER) txr->queue_status |= IGB_QUEUE_DEPLETED; return (err); } /* * Called from a taskqueue to drain queued transmit packets. */ static void igb_deferred_mq_start(void *arg, int pending) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; IGB_TX_LOCK(txr); if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } /* ** Flush all ring buffers */ static void igb_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IGB_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* ~IGB_LEGACY_TX */ /********************************************************************* * Ioctl entry point * * igb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); IGB_CORE_LOCK(adapter); max_frame_size = 9234; if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { IGB_CORE_UNLOCK(adapter); error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); IGB_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { igb_disable_promisc(adapter); igb_set_promisc(adapter); } } else igb_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) igb_stop(adapter); adapter->if_flags = ifp->if_flags; IGB_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); igb_set_multi(adapter); #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif igb_enable_intr(adapter); IGB_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ IGB_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { IGB_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } IGB_CORE_UNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(igb_poll, ifp); if (error) return (error); IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ IGB_CORE_LOCK(adapter); igb_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; reinit = 1; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; reinit = 1; } if (mask & IFCAP_VLAN_HWTSO) { ifp->if_capenable ^= IFCAP_VLAN_HWTSO; reinit = 1; } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; reinit = 1; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); VLAN_CAPABILITIES(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void igb_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("igb_init: begin"); IGB_CORE_LOCK_ASSERT(adapter); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); igb_reset(adapter); igb_update_link_status(adapter); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type == e1000_82576) ifp->if_hwassist |= CSUM_SCTP; #endif } if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; /* Configure for OS presence */ igb_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ igb_setup_transmit_structures(adapter); igb_initialize_transmit_units(adapter); /* Setup Multicast table */ igb_set_multi(adapter); /* ** Figure out the desired mbuf pool ** for doing jumbo/packetsplit */ if (adapter->max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (igb_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); return; } igb_initialize_receive_units(adapter); e1000_rx_fifo_flush_82575(&adapter->hw); /* Enable VLAN support */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) igb_setup_vlan_hw_support(adapter); /* Don't lose promiscuous settings */ igb_set_promisc(adapter); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); if (adapter->msix > 1) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) igb_disable_intr(adapter); else #endif /* DEVICE_POLLING */ { igb_enable_intr(adapter); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); } /* Set Energy Efficient Ethernet */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) - e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); + e1000_set_eee_i354(&adapter->hw); else - e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); + e1000_set_eee_i350(&adapter->hw); } } static void igb_init(void *arg) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_que(void *context, int pending) { struct igb_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bool more; more = igb_rxeof(que, adapter->rx_process_limit, NULL); IGB_TX_LOCK(txr); igb_txeof(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); /* Do we need another? */ if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) return; #endif /* Reenable this interrupt */ if (que->eims) E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); else igb_enable_intr(adapter); } /* Deal with link in a sleepable context */ static void igb_handle_link(void *context, int pending) { struct adapter *adapter = context; IGB_CORE_LOCK(adapter); igb_handle_link_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_link_locked(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; IGB_CORE_LOCK_ASSERT(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } } } /********************************************************************* * * MSI/Legacy Deferred * Interrupt Service routine * *********************************************************************/ static int igb_irq_fast(void *arg) { struct adapter *adapter = arg; struct igb_queue *que = adapter->queues; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Mask interrupts until the taskqueue is finished running. This is * cheap, just assume that it is needed. This also works around the * MSI message reordering errata on certain systems. */ igb_disable_intr(adapter); taskqueue_enqueue(que->tq, &que->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) taskqueue_enqueue(que->tq, &adapter->link_task); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } #ifdef DEVICE_POLLING #if __FreeBSD_version >= 800000 #define POLL_RETURN_COUNT(a) (a) static int #else #define POLL_RETURN_COUNT(a) static void #endif igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que; struct tx_ring *txr; u32 reg_icr, rx_done = 0; u32 loop = IGB_MAX_LOOP; bool more; IGB_CORE_LOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { IGB_CORE_UNLOCK(adapter); return POLL_RETURN_COUNT(rx_done); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) igb_handle_link_locked(adapter); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; } IGB_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; txr = que->txr; igb_rxeof(que, count, &rx_done); IGB_TX_LOCK(txr); do { more = igb_txeof(txr); } while (loop-- && more); #ifndef IGB_LEGACY_TX if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } return POLL_RETURN_COUNT(rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * MSIX Que Interrupt Service routine * **********************************************************************/ static void igb_msix_que(void *arg) { struct igb_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; u32 newitr = 0; bool more_rx; /* Ignore spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); ++que->irqs; IGB_TX_LOCK(txr); igb_txeof(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); if (adapter->enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) E1000_WRITE_REG(&adapter->hw, E1000_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; /* Used half Default if sub-gig */ if (adapter->link_speed != 1000) newitr = IGB_DEFAULT_ITR / 2; else { if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); } newitr &= 0x7FFC; /* Mask invalid bits */ if (adapter->hw.mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: /* Schedule a clean task if needed*/ if (more_rx) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); return; } /********************************************************************* * * MSIX Link Interrupt Service routine * **********************************************************************/ static void igb_msix_link(void *arg) { struct adapter *adapter = arg; u32 icr; ++adapter->link_irq; icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (!(icr & E1000_ICR_LSC)) goto spurious; igb_handle_link(adapter, 0); spurious: /* Rearm */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("igb_media_status: begin"); IGB_CORE_LOCK(adapter); igb_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IGB_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: /* ** Support for 100Mb SFP - these are Fiber ** but the media type appears as serdes */ if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) ifmr->ifm_active |= IFM_100_FX; else ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; case 2500: ifmr->ifm_active |= IFM_2500_SX; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; IGB_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int igb_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("igb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); IGB_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to Advanced TX descriptors. * **********************************************************************/ static int igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; u32 olinfo_status = 0, cmd_type_len; int i, j, error, nsegs; int first; bool remap = TRUE; struct mbuf *m_head; bus_dma_segment_t segs[IGB_MAX_SCATTER]; bus_dmamap_t map; struct igb_tx_buf *txbuf; union e1000_adv_tx_desc *txd = NULL; m_head = *m_headp; /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (m_head->m_flags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; map = txbuf->map; /* * Map the packet for DMA. */ retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(error)) { struct mbuf *m; switch (error) { case EFBIG: /* Try it again? - one try */ if (remap == TRUE) { remap = FALSE; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; goto retry; } else return (error); default: txr->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } /* Make certain there are enough descriptors */ if (nsegs > txr->tx_avail - 2) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* ** Set up the appropriate offload context ** this will consume the first descriptor */ error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); if (__predict_false(error)) { m_freem(*m_headp); *m_headp = NULL; return (error); } /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); if (++i == txr->num_desc) i = 0; } txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); txr->tx_avail -= nsegs; txr->next_avail_desc = i; txbuf->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* Set the EOP descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop = txd; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void igb_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 reg; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_enabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); if (ifp->if_flags & IFF_PROMISC) { reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } else if (ifp->if_flags & IFF_ALLMULTI) { reg |= E1000_RCTL_MPE; reg &= ~E1000_RCTL_UPE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } } static void igb_disable_promisc(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 reg; int mcnt = 0; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_disabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); reg &= (~E1000_RCTL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg &= (~E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void igb_set_multi(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ifmultiaddr *ifma; u32 reg_rctl = 0; u8 *mta; int mcnt = 0; IOCTL_DEBUGOUT("igb_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); } /********************************************************************* * Timer routine: * This routine checks for link status, * updates statistics, and does the watchdog. * **********************************************************************/ static void igb_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct igb_queue *que = adapter->queues; int hung = 0, busy = 0; IGB_CORE_LOCK_ASSERT(adapter); igb_update_link_status(adapter); igb_update_stats_counters(adapter); /* ** Check the TX queues status ** - central locked handling of OACTIVE ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if ((txr->queue_status & IGB_QUEUE_HUNG) && (adapter->pause_frames == 0)) ++hung; if (txr->queue_status & IGB_QUEUE_DEPLETED) ++busy; if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) taskqueue_enqueue(que->tq, &que->que_task); } if (hung == adapter->num_queues) goto timeout; if (busy == adapter->num_queues) ifp->if_drv_flags |= IFF_DRV_OACTIVE; else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && (busy < adapter->num_queues)) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; adapter->pause_frames = 0; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); #ifndef DEVICE_POLLING /* Schedule all queue interrupts - deadlock protection */ E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); #endif return; timeout: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; igb_init_locked(adapter); } static void igb_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check, thstat, ctrl; char *flowctl = NULL; link_check = thstat = ctrl = 0; /* Get the cached link value or read for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* Fall thru */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Get the flow control for display */ switch (fc->current_mode) { case e1000_fc_rx_pause: flowctl = "RX"; break; case e1000_fc_tx_pause: flowctl = "TX"; break; case e1000_fc_full: flowctl = "Full"; break; case e1000_fc_none: default: flowctl = "None"; break; } /* Now we check if a transition has happened */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); if (bootverbose) device_printf(dev, "Link is up %d Mbps %s," " Flow Control: %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex"), flowctl); adapter->link_active = 1; ifp->if_baudrate = adapter->link_speed * 1000000; if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if (hw->dev_spec._82575.media_changed) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; igb_reset(adapter); } /* This can sleep */ if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_PWR_DOWN)) device_printf(dev, "Link: thermal shutdown\n"); adapter->link_active = 0; /* This can sleep */ if_link_state_change(ifp, LINK_STATE_DOWN); /* Reset queue state */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->queue_status = IGB_QUEUE_IDLE; } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void igb_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; IGB_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("igb_stop: begin"); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* Disarm watchdog timer. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); txr->queue_status = IGB_QUEUE_IDLE; IGB_TX_UNLOCK(txr); } e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void igb_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Set MAC type early for PCI setup */ e1000_set_mac_type(&adapter->hw); /* Are we a VF device? */ if ((adapter->hw.mac.type == e1000_vfadapt) || (adapter->hw.mac.type == e1000_vfadapt_i350)) adapter->vf_ifp = 1; else adapter->vf_ifp = 0; } static int igb_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->pci_mem == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->num_queues = 1; /* Defaults for Legacy or MSI */ /* This will setup either MSI/X or MSI */ adapter->msix = igb_setup_msix(adapter); adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int igb_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; #ifndef IGB_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; #endif int error, rid = 0; /* Turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* MSI RID is 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } #ifndef IGB_LEGACY_TX TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); #endif /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); /* Make tasklet for deferred link handling */ TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); que->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Queue Interrupt handlers: * **********************************************************************/ static int igb_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; int error, rid, vector = 0; int cpu_id = 0; #ifdef RSS cpuset_t cpu_mask; #endif /* Be sure to start with all interrupts disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (adapter->num_queues != rss_getnumbuckets()) { device_printf(dev, "%s: number of queues (%d) != number of RSS buckets (%d)" "; performance will be impacted.\n", __func__, adapter->num_queues, rss_getnumbuckets()); } #endif for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { rid = vector +1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Queue Interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register Queue handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; if (adapter->hw.mac.type == e1000_82575) que->eims = E1000_EICR_TX_QUEUE0 << i; else que->eims = 1 << vector; #ifdef RSS /* * The queue ID is used as the RSS layer bucket ID. * We look up the queue ID -> RSS CPU ID and select * that. */ cpu_id = rss_getcpu(i % rss_getnumbuckets()); #else /* * Bind the msix vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS round-robin * bucket -> queue -> CPU allocation. */ if (adapter->num_queues > 1) { if (igb_last_bind_cpu < 0) igb_last_bind_cpu = CPU_FIRST(); cpu_id = igb_last_bind_cpu; } #endif if (adapter->num_queues > 1) { bus_bind_intr(dev, que->res, cpu_id); #ifdef RSS device_printf(dev, "Bound queue %d to RSS bucket %d\n", i, cpu_id); #else device_printf(dev, "Bound queue %d to cpu %d\n", i, cpu_id); #endif } #ifndef IGB_LEGACY_TX TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr); #endif /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); que->tq = taskqueue_create("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); if (adapter->num_queues > 1) { /* * Only pin the taskqueue thread to a CPU if * RSS is in use. * * This again just happens to match the default RSS * round-robin bucket -> queue -> CPU allocation. */ #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s que (bucket %d)", device_get_nameunit(adapter->dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que (qid %d)", device_get_nameunit(adapter->dev), cpu_id); #endif } else { taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* Finally update the last bound CPU id */ if (adapter->num_queues > 1) igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); } /* And Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Link Interrupt\n"); return (ENXIO); } if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register Link handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; return (0); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct igb_queue *que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, que->eims); adapter->que_mask |= que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (igb_max_interrupt_rate > 0) newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); } return; } static void igb_free_pci_resources(struct adapter *adapter) { struct igb_queue *que = adapter->queues; device_t dev = adapter->dev; int rid; /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* * First release all the interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); que = adapter->queues; if (adapter->tag != NULL) { taskqueue_drain(que->tq, &adapter->link_task); bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq != NULL) { #ifndef IGB_LEGACY_TX taskqueue_drain(que->tq, &que->txr->txq_task); #endif taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, adapter->memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); } /* * Setup Either MSI/X or MSI */ static int igb_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int bar, want, queues, msgs, maxqueues; /* tuneable override */ if (igb_enable_msix == 0) goto msi; /* First try MSI/X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); bar = pci_read_config(dev, adapter->memrid, 4); if (bar == 0) /* use next bar */ adapter->memrid += 4; adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; /* Override via tuneable */ if (igb_num_queues != 0) queues = igb_num_queues; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82575: maxqueues = 4; break; case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: maxqueues = 4; break; case e1000_i211: maxqueues = 2; break; default: /* VF interfaces */ maxqueues = 1; break; } /* Final clamp on the actual hardware capability */ if (queues > maxqueues) queues = maxqueues; /* ** One vector (RX/TX pair) per queue ** plus an additional for Link interrupt */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors configured, but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { device_printf(adapter->dev," Using an MSI interrupt\n"); return (msgs); } device_printf(adapter->dev," Using a Legacy interrupt\n"); return (0); } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; if (hw->mac.type == e1000_i211) return; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - adapter->max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - adapter->max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * adapter->max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } /********************************************************************* * * Set up an fresh starting state * **********************************************************************/ static void igb_reset(struct adapter *adapter) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; struct ifnet *ifp = adapter->ifp; u32 pba = 0; u16 hwm; INIT_DEBUGOUT("igb_reset: begin"); /* Let the firmware know the OS is in control */ igb_get_hw_control(adapter); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; default: break; } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } INIT_DEBUGOUT1("igb_init: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. */ hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); if (hw->mac.type < e1000_82576) { fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ fc->low_water = fc->high_water - 8; } else { fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; } fc->pause_time = IGB_FC_PAUSE_TIME; fc->send_xon = TRUE; if (adapter->fc) fc->requested_mode = adapter->fc; else fc->requested_mode = e1000_fc_default; /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); /* Reset for AutoMediaDetect */ if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } if (e1000_init_hw(hw) < 0) device_printf(dev, "Hardware Initialization Failed\n"); /* Setup DMA Coalescing */ igb_init_dmac(adapter, pba); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int igb_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("igb_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = igb_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = igb_ioctl; ifp->if_get_counter = igb_get_counter; #ifndef IGB_LEGACY_TX ifp->if_transmit = igb_mq_start; ifp->if_qflush = igb_qflush; #else ifp->if_start = igb_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); #endif ether_ifattach(ifp, adapter->hw.mac.addr); ifp->if_capabilities = ifp->if_capenable = 0; ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capenable = ifp->if_capabilities; /* Don't enable LRO by default */ ifp->if_capabilities |= IFCAP_LRO; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Tell the upper layer(s) we * support full VLAN capability. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the igb driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, igb_media_change, igb_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int igb_dma_malloc(struct adapter *adapter, bus_size_t size, struct igb_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ IGB_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (error); } static void igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); dma->dma_paddr = 0; } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_vaddr = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int igb_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = NULL; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct igb_queue *) malloc(sizeof(struct igb_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* Next allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; txr->num_desc = adapter->num_tx_desc; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (igb_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #ifndef IGB_LEGACY_TX /* Allocate a buf ring */ txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (igb_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) igb_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) igb_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #ifndef IGB_LEGACY_TX buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int igb_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct igb_tx_buf *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IGB_TSO_SIZE, /* maxsize */ IGB_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ igb_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void igb_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *txbuf; int i; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ IGB_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); /* no need to set the address */ netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->eop = NULL; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IGB_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void igb_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) igb_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void igb_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl; INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); tctl = txdctl = 0; /* Setup the Tx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr); /* Setup the HW Tx Head and Tail descriptor pointers */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(hw, E1000_TDBAL(i)), E1000_READ_REG(hw, E1000_TDLEN(i))); txr->queue_status = IGB_QUEUE_IDLE; txdctl |= IGB_TX_PTHRESH; txdctl |= IGB_TX_HTHRESH << 8; txdctl |= IGB_TX_WTHRESH << 16; txdctl |= E1000_TXDCTL_QUEUE_ENABLE; E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } if (adapter->vf_ifp) return; e1000_config_collision_dist(hw); /* Program the Transmit Control Register */ tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(hw, E1000_TCTL, tctl); } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void igb_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); igb_free_transmit_buffers(txr); igb_dma_free(adapter, &txr->txdma); IGB_TX_UNLOCK(txr); IGB_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void igb_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } #ifndef IGB_LEGACY_TX if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct adapter *adapter = txr->adapter; struct e1000_adv_tx_context_desc *TXD; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, paylen; u16 vtag = 0, eh_type; int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; eh_type = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; eh_type = eh->evl_encap_proto; } switch (ntohs(eh_type)) { #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); /* XXX-BZ For now we do not pretend to support ext. hdrs. */ if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); ip_hlen = sizeof(struct ip6_hdr); ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; break; #endif default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(eh_type)); break; } ctxd = txr->next_avail_desc; TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; tcp_hlen = th->th_off << 2; /* This is used in the transmit desc in encap */ paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); if (++ctxd == txr->num_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; ++txr->tso_tx; return (0); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; int ehdrlen, ip_hlen = 0; u16 etype; u8 ipproto = 0; int offload = TRUE; int ctxd = txr->next_avail_desc; u16 vtag = 0; /* First check if TSO is to be used */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) /* ... no offload to do */ return (0); /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; ipproto = ip->ip_p; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & CSUM_UDP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & CSUM_SCTP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } if (offload) /* For the TX descriptor setup */ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); /* We've consumed the first desc, adjust counters */ if (++ctxd == txr->num_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (0); } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * * TRUE return means there's work in the ring to clean, FALSE its empty. **********************************************************************/ static bool igb_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; #ifdef DEV_NETMAP struct ifnet *ifp = adapter->ifp; #endif /* DEV_NETMAP */ u32 work, processed = 0; u16 limit = txr->process_limit; struct igb_tx_buf *buf; union e1000_adv_tx_desc *txd; mtx_assert(&txr->tx_mtx, MA_OWNED); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return (FALSE); #endif /* DEV_NETMAP */ if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return FALSE; } /* Get work starting point */ work = txr->next_to_clean; buf = &txr->tx_buffers[work]; txd = &txr->tx_base[work]; work -= txr->num_desc; /* The distance to ring end */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); do { union e1000_adv_tx_desc *eop = buf->eop; if (eop == NULL) /* No work */ break; if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) break; /* I/O not complete */ if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } buf->eop = NULL; ++txr->tx_avail; /* We clean the range if multi segment */ while (txd != eop) { ++txd; ++buf; ++work; /* wrap the ring? */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } ++txr->tx_avail; buf->eop = NULL; } ++txr->packets; ++processed; txr->watchdog_time = ticks; /* Try the next packet */ ++txd; ++buf; ++work; /* reset with a wrap */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } prefetch(txd); } while (__predict_true(--limit)); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); work += txr->num_desc; txr->next_to_clean = work; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. */ if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) txr->queue_status |= IGB_QUEUE_HUNG; if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) txr->queue_status &= ~IGB_QUEUE_DEPLETED; if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return (FALSE); } return (TRUE); } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void igb_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; struct igb_rx_buf *rxbuf; struct mbuf *mh, *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; /* No hdr mbuf used with header split off */ if (rxr->hdr_split == FALSE) goto no_split; if (rxbuf->m_head == NULL) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) goto update; } else mh = rxbuf->m_head; mh->m_pkthdr.len = mh->m_len = MHLEN; mh->m_len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(mh); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = mh; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr); no_split: if (rxbuf->m_pack == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (mp == NULL) goto update; } else mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: payload dmamap load" " failure - %d\n", error); m_free(mp); rxbuf->m_pack = NULL; goto update; } rxbuf->m_pack = mp; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr); refreshed = TRUE; /* I feel wefreshed :) */ i = j; /* our next is precalculated */ rxr->next_to_refresh = i; if (++j == adapter->num_rx_desc) j = 0; } update: if (refreshed) /* update tail */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int igb_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct igb_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; if (!(rxr->rx_buffers = (struct igb_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSIZE, /* maxsize */ 1, /* nsegments */ MSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->htag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX payload DMA tag\n"); goto fail; } for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); if (error) { device_printf(dev, "Unable to create RX head DMA maps\n"); goto fail; } error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX packet DMA maps\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ igb_free_receive_structures(adapter); return (error); } static void igb_free_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int igb_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ifnet *ifp; device_t dev; struct igb_rx_buf *rxbuf; bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(rxr->adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ adapter = rxr->adapter; dev = adapter->dev; ifp = adapter->ifp; /* Clear the ring contents */ IGB_RX_LOCK(rxr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* DEV_NETMAP */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* ** Free current RX buffer structures and their mbufs */ igb_free_receive_ring(rxr); /* Configure for header split? */ if (igb_header_split) rxr->hdr_split = TRUE; /* Now replenish the ring mbufs */ for (int j = 0; j < adapter->num_rx_desc; ++j) { struct mbuf *mh, *mp; rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { /* slot sj is mapped to the j-th NIC-ring entry */ int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(na, slot + sj, &paddr); netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ if (rxr->hdr_split == FALSE) goto skip_head; /* First the header */ rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } m_adj(rxbuf->m_head, ETHER_ALIGN); mh = rxbuf->m_head; mh->m_len = mh->m_pkthdr.len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, rxbuf->m_head, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) /* Nothing elegant to do here */ goto fail; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); skip_head: /* Now the payload cluster */ rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_pack == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = adapter->num_rx_desc - 1; rxr->lro_enabled = FALSE; rxr->rx_split_packets = 0; rxr->rx_bytes = 0; rxr->fmp = NULL; rxr->lmp = NULL; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface, we ** also only do head split when LRO ** is enabled, since so often they ** are undesireable in similar setups. */ if (ifp->if_capenable & IFCAP_LRO) { error = tcp_lro_init(lro); if (error) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IGB_RX_UNLOCK(rxr); return (0); fail: igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int igb_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int i; for (i = 0; i < adapter->num_queues; i++, rxr++) if (igb_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'i' is the endpoint. */ for (int j = 0; j < i; ++j) { rxr = &adapter->rx_rings[j]; IGB_RX_LOCK(rxr); igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); } return (ENOBUFS); } /* * Initialise the RSS mapping for NICs that support multiple transmit/ * receive rings. */ static void igb_initialise_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->num_queues; #else queue_id = (i % adapter->num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_8Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void igb_initialize_receive_units(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, psize, srrctl = 0; INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* ** Set up for header split */ if (igb_header_split) { /* Use a standard mbuf for the header */ srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; } else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; /* ** Set up for jumbo frames */ if (ifp->if_mtu > ETHERMTU) { rctl |= E1000_RCTL_LPE; if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } /* Set maximum packet len */ psize = adapter->max_frame_size; /* are we on a vlan? */ if (adapter->ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { rctl &= ~E1000_RCTL_LPE; srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 bus_addr = rxr->rxdma.dma_paddr; u32 rxdctl; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } /* ** Setup for RX MultiQueue */ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (adapter->num_queues >1) { /* rss setup */ igb_initialise_rss_mapping(adapter); /* ** NOTE: Receive Full-Packet Checksum Offload ** is mutually exclusive with Multiqueue. However ** this is not the same as TCP/IP checksums which ** still work. */ rxcsum |= E1000_RXCSUM_PCSD; #if __FreeBSD_version >= 800000 /* For SCTP Offload */ if ((hw->mac.type == e1000_82576) && (ifp->if_capenable & IFCAP_RXCSUM)) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else { /* Non RSS setup */ if (ifp->if_capenable & IFCAP_RXCSUM) { rxcsum |= E1000_RXCSUM_IPPCSE; #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type == e1000_82576) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else rxcsum &= ~E1000_RXCSUM_TUOFL; } E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip CRC bytes. */ rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; /* Don't store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Receives */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); /* * Setup the HW Rx Head and Tail Descriptor Pointers * - needs to be after enable */ for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. * In this driver it means we adjust RDT to * something different from next_to_refresh * (which is not used in netmap mode). */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = rxr->next_to_refresh - nm_kr_rxspace(kring); if (t >= adapter->num_rx_desc) t -= adapter->num_rx_desc; else if (t < 0) t += adapter->num_rx_desc; E1000_WRITE_REG(hw, E1000_RDT(i), t); } else #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); } return; } /********************************************************************* * * Free receive rings. * **********************************************************************/ static void igb_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; igb_free_receive_buffers(rxr); tcp_lro_free(lro); igb_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures. * **********************************************************************/ static void igb_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; int i; INIT_DEBUGOUT("free_receive_structures: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; if (rxbuf->hmap != NULL) { bus_dmamap_destroy(rxr->htag, rxbuf->hmap); rxbuf->hmap = NULL; } if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->htag != NULL) { bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } } static __inline void igb_rx_discard(struct rx_ring *rxr, int i) { struct igb_rx_buf *rbuf; rbuf = &rxr->rx_buffers[i]; /* Partially received? Free the chain */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** With advanced descriptors the writeback ** clobbers the buffer addrs, so its easier ** to just free the existing mbufs and take ** the normal refresh path to get new buffers ** and mapping. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; bus_dmamap_unload(rxr->htag, rbuf->hmap); } if (rbuf->m_pack) { m_free(rbuf->m_pack); rbuf->m_pack = NULL; bus_dmamap_unload(rxr->ptag, rbuf->pmap); } return; } static __inline void igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IGB_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IGB_RX_LOCK(rxr); } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE if more to clean, FALSE otherwise *********************************************************************/ static bool igb_rxeof(struct igb_queue *que, int count, int *done) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; int i, processed = 0, rxdone = 0; u32 ptype, staterr = 0; union e1000_adv_rx_desc *cur; IGB_RX_LOCK(rxr); /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { IGB_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ /* Main clean loop */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; struct igb_rx_buf *rxbuf; u16 hlen, plen, hdr, vtag, pkt_info; bool eop = FALSE; cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = mh = mp = NULL; cur->wb.upper.status_error = 0; rxbuf = &rxr->rx_buffers[i]; plen = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; if (((adapter->hw.mac.type == e1000_i350) || (adapter->hw.mac.type == e1000_i354)) && (staterr & E1000_RXDEXT_STATERR_LB)) vtag = be16toh(cur->wb.upper.vlan); else vtag = le16toh(cur->wb.upper.vlan); hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); /* * Free the frame (all segments) if we're at EOP and * it's an error. * * The datasheet states that EOP + status is only valid for * the final segment in a multi-segment frame. */ if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { adapter->dropped_pkts++; ++rxr->rx_discarded; igb_rx_discard(rxr, i); goto next_desc; } /* ** The way the hardware is configured to ** split, it will ONLY use the header buffer ** when header split is enabled, otherwise we ** get normal behavior, ie, both header and ** payload are DMA'd into the payload buffer. ** ** The fmp test is to catch the case where a ** packet spans multiple descriptors, in that ** case only the first header is valid. */ if (rxr->hdr_split && rxr->fmp == NULL) { bus_dmamap_unload(rxr->htag, rxbuf->hmap); hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; if (hlen > IGB_HDR_BUF) hlen = IGB_HDR_BUF; mh = rxr->rx_buffers[i].m_head; mh->m_len = hlen; /* clear buf pointer for refresh */ rxbuf->m_head = NULL; /* ** Get the payload length, this ** could be zero if its a small ** packet. */ if (plen > 0) { mp = rxr->rx_buffers[i].m_pack; mp->m_len = plen; mh->m_next = mp; /* clear buf pointer */ rxbuf->m_pack = NULL; rxr->rx_split_packets++; } } else { /* ** Either no header split, or a ** secondary piece of a fragmented ** split packet. */ mh = rxr->rx_buffers[i].m_pack; mh->m_len = plen; /* clear buf info for refresh */ rxbuf->m_pack = NULL; } bus_dmamap_unload(rxr->ptag, rxbuf->pmap); ++processed; /* So we know when to refresh */ /* Initial frame - setup */ if (rxr->fmp == NULL) { mh->m_pkthdr.len = mh->m_len; /* Save the head of the chain */ rxr->fmp = mh; rxr->lmp = mh; if (mp != NULL) { /* Add payload if split */ mh->m_pkthdr.len += mp->m_len; rxr->lmp = mh->m_next; } } else { /* Chain mbuf's together */ rxr->lmp->m_next = mh; rxr->lmp = rxr->lmp->m_next; rxr->fmp->m_pkthdr.len += mh->m_len; } if (eop) { rxr->fmp->m_pkthdr.rcvif = ifp; rxr->rx_packets++; /* capture data for AIM */ rxr->packets++; rxr->bytes += rxr->fmp->m_pkthdr.len; rxr->rx_bytes += rxr->fmp->m_pkthdr.len; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, rxr->fmp, ptype); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { rxr->fmp->m_pkthdr.ether_vtag = vtag; rxr->fmp->m_flags |= M_VLANTAG; } /* * In case of multiqueue, we have RXCSUM.PCSD bit set * and never cleared. This means we have RSS hash * available to be used. */ if (adapter->num_queues > 1) { rxr->fmp->m_pkthdr.flowid = le32toh(cur->wb.lower.hi_dword.rss); switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV4); break; case E1000_RXDADV_RSSTYPE_IPV4: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV4); break; case E1000_RXDADV_RSSTYPE_IPV6_TCP: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6); break; case E1000_RXDADV_RSSTYPE_IPV6_EX: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6_EX); break; case E1000_RXDADV_RSSTYPE_IPV6: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6); break; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6_EX); break; default: /* XXX fallthrough */ M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); } } else { #ifndef IGB_LEGACY_TX rxr->fmp->m_pkthdr.flowid = que->msix; M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); #endif } sendmp = rxr->fmp; /* Make sure to set M_PKTHDR. */ sendmp->m_flags |= M_PKTHDR; rxr->fmp = NULL; rxr->lmp = NULL; } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* ** Send to the stack or LRO */ if (sendmp != NULL) { rxr->next_to_check = i; igb_rx_input(rxr, ifp, sendmp, ptype); i = rxr->next_to_check; rxdone++; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { igb_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remainders */ if (igb_rx_unrefreshed(rxr)) igb_refresh_mbufs(rxr, i); rxr->next_to_check = i; /* * Flush any outstanding LRO work */ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } if (done != NULL) *done += rxdone; IGB_RX_UNLOCK(rxr); return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8) (staterr >> 24); int sctp; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) sctp = 1; else sctp = 0; if (status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) /* reassign */ type = CSUM_SCTP_VALID; #endif /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (sctp == 0) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } /* * This routine is run via an vlan * config EVENT */ static void igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Change hw filter setting */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Change hw filter setting */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 reg; if (adapter->vf_ifp) { e1000_rlpml_set_vf(hw, adapter->max_frame_size + VLAN_TAG_SIZE); return; } reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } /* Update the frame size */ E1000_WRITE_REG(&adapter->hw, E1000_RLPML, adapter->max_frame_size + VLAN_TAG_SIZE); /* Don't bother with table if no vlans */ if ((adapter->num_vlans == 0) || ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IGB_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) { if (adapter->vf_ifp) e1000_vfta_set_vf(hw, adapter->shadow_vfta[i], TRUE); else e1000_write_vfta(hw, i, adapter->shadow_vfta[i]); } } static void igb_enable_intr(struct adapter *adapter) { /* With RSS set up what to auto clear */ if (adapter->msix_mem) { u32 mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, IMS_ENABLE_MASK); } E1000_WRITE_FLUSH(&adapter->hw); return; } static void igb_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) { E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); return; } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void igb_init_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; manc2h |= 1 << 5; /* Mng Port 623 */ manc2h |= 1 << 6; /* Mng Port 664 */ E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void igb_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is loaded. * */ static void igb_get_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware know the driver has taken over */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that the * driver is no longer loaded. * */ static void igb_release_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware taken over control of h/w */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); } static int igb_is_valid_ether_addr(uint8_t *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* * Enable PCI Wake On Lan capability */ static void igb_enable_wakeup(device_t dev) { u16 cap, status; u8 id; /* First find the capabilities pointer*/ cap = pci_read_config(dev, PCIR_CAP_PTR, 2); /* Read the PM Capabilities */ id = pci_read_config(dev, cap, 1); if (id != PCIY_PMG) /* Something wrong */ return; /* OK, we have the power capabilities, so now get the status register */ cap += PCIR_POWER_STATUS; status = pci_read_config(dev, cap, 2); status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, cap, status, 2); return; } static void igb_led_func(void *arg, int onoff) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } IGB_CORE_UNLOCK(adapter); } static uint64_t igb_get_vf_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; struct e1000_vf_stats *stats; #ifndef IGB_LEGACY_TX struct tx_ring *txr; uint64_t rv; #endif adapter = if_getsoftc(ifp); stats = (struct e1000_vf_stats *)adapter->stats; switch (cnt) { case IFCOUNTER_IPACKETS: return (stats->gprc); case IFCOUNTER_OPACKETS: return (stats->gptc); case IFCOUNTER_IBYTES: return (stats->gorc); case IFCOUNTER_OBYTES: return (stats->gotc); case IFCOUNTER_IMCASTS: return (stats->mprc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts); case IFCOUNTER_OERRORS: return (adapter->watchdog_events); #ifndef IGB_LEGACY_TX case IFCOUNTER_OQDROPS: rv = 0; txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) rv += txr->br->br_drops; return (rv); #endif default: return (if_get_counter_default(ifp, cnt)); } } static uint64_t igb_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; struct e1000_hw_stats *stats; #ifndef IGB_LEGACY_TX struct tx_ring *txr; uint64_t rv; #endif adapter = if_getsoftc(ifp); if (adapter->vf_ifp) return (igb_get_vf_counter(ifp, cnt)); stats = (struct e1000_hw_stats *)adapter->stats; switch (cnt) { case IFCOUNTER_IPACKETS: return (stats->gprc); case IFCOUNTER_OPACKETS: return (stats->gptc); case IFCOUNTER_IBYTES: return (stats->gorc); case IFCOUNTER_OBYTES: return (stats->gotc); case IFCOUNTER_IMCASTS: return (stats->mprc); case IFCOUNTER_OMCASTS: return (stats->mptc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + stats->rxerrc + stats->crcerrs + stats->algnerrc + stats->ruc + stats->roc + stats->cexterr); case IFCOUNTER_OERRORS: return (stats->ecol + stats->latecol + adapter->watchdog_events); case IFCOUNTER_COLLISIONS: return (stats->colc); case IFCOUNTER_IQDROPS: return (stats->mpc); #ifndef IGB_LEGACY_TX case IFCOUNTER_OQDROPS: rv = 0; txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) rv += txr->br->br_drops; return (rv); #endif default: return (if_get_counter_default(ifp, cnt)); } } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void igb_update_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_hw_stats *stats; /* ** The virtual function adapter has only a ** small controlled set of stats, do only ** those and return. */ if (adapter->vf_ifp) { igb_update_vf_stats_counters(adapter); return; } stats = (struct e1000_hw_stats *)adapter->stats; if (adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { stats->symerrs += E1000_READ_REG(hw,E1000_SYMERRS); stats->sec += E1000_READ_REG(hw, E1000_SEC); } stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); stats->mpc += E1000_READ_REG(hw, E1000_MPC); stats->scc += E1000_READ_REG(hw, E1000_SCC); stats->ecol += E1000_READ_REG(hw, E1000_ECOL); stats->mcc += E1000_READ_REG(hw, E1000_MCC); stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); stats->colc += E1000_READ_REG(hw, E1000_COLC); stats->dc += E1000_READ_REG(hw, E1000_DC); stats->rlec += E1000_READ_REG(hw, E1000_RLEC); stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); stats->xoffrxc += adapter->pause_frames; stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); stats->gprc += E1000_READ_REG(hw, E1000_GPRC); stats->bprc += E1000_READ_REG(hw, E1000_BPRC); stats->mprc += E1000_READ_REG(hw, E1000_MPRC); stats->gptc += E1000_READ_REG(hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); stats->ruc += E1000_READ_REG(hw, E1000_RUC); stats->rfc += E1000_READ_REG(hw, E1000_RFC); stats->roc += E1000_READ_REG(hw, E1000_ROC); stats->rjc += E1000_READ_REG(hw, E1000_RJC); stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); stats->tor += E1000_READ_REG(hw, E1000_TORL) + ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); stats->tot += E1000_READ_REG(hw, E1000_TOTL) + ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); stats->tpr += E1000_READ_REG(hw, E1000_TPR); stats->tpt += E1000_READ_REG(hw, E1000_TPT); stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); stats->mptc += E1000_READ_REG(hw, E1000_MPTC); stats->bptc += E1000_READ_REG(hw, E1000_BPTC); /* Interrupt Counts */ stats->iac += E1000_READ_REG(hw, E1000_IAC); stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); /* Host to Card Statistics */ stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); /* Driver specific counters */ adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); adapter->packet_buf_alloc_tx = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); adapter->packet_buf_alloc_rx = (E1000_READ_REG(hw, E1000_PBA) & 0xffff); } /********************************************************************** * * Initialize the VF board statistics counters. * **********************************************************************/ static void igb_vf_init_stats(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; stats = (struct e1000_vf_stats *)adapter->stats; if (stats == NULL) return; stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); } /********************************************************************** * * Update the VF board statistics counters. * **********************************************************************/ static void igb_update_vf_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; if (adapter->link_speed == 0) return; stats = (struct e1000_vf_stats *)adapter->stats; UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); } /* Export a single 32-bit register via a read-only sysctl. */ static int igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* ** Tuneable interrupt rate handler */ static int igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); int error; u32 reg, usec, rate; reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); usec = ((reg & 0x7FFC) >> 2); if (usec > 0) rate = 1000000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; return 0; } /* * Add sysctl variables, one per statistic, to the system. */ static void igb_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", CTLFLAG_RD, &adapter->device_control, "Device Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", CTLFLAG_RD, &adapter->rx_control, "Receiver Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", CTLFLAG_RD, &adapter->int_mask, "Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", CTLFLAG_RD, &adapter->eint_mask, "Extended Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_tx, "Transmit Buffer Packet Allocation"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_rx, "Receive Buffer Packet Allocation"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], sizeof(&adapter->queues[i]), igb_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue Descriptors Unavailable"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); /* ** VF adapter has a very limited set of stats ** since its not managing the metal, so to speak. */ if (adapter->vf_ifp) { SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); return; } SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &stats->symerrs, "Symbol Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &stats->sec, "Sequence Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &stats->dc, "Defer Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &stats->mpc, "Missed Packets"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &stats->rnbc, "Receive No Buffers"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &stats->ruc, "Receive Undersize"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &stats->rjc, "Recevied Jabber"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &stats->rxerrc, "Receive Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &stats->algnerrc, "Alignment Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", CTLFLAG_RD, &stats->tncrs, "Transmit with No CRS"); /* On 82575 these are collision counts */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &stats->cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->xonrxc, "XON Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->xontxc, "XON Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->xoffrxc, "XOFF Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->xofftxc, "XOFF Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", CTLFLAG_RD, &stats->fcruc, "Unsupported Flow Control Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", CTLFLAG_RD, &stats->mgprc, "Management Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", CTLFLAG_RD, &stats->mgpdc, "Management Packets Dropped"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", CTLFLAG_RD, &stats->mgptc, "Management Packets Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", CTLFLAG_RD, &stats->tot, "Total Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &stats->tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &stats->tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &stats->iac, "Interrupt Assertion Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &stats->icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &stats->icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &stats->ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &stats->ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &stats->ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &stats->ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &stats->icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &stats->icrxoc, "Interrupt Cause Receiver Overrun Count"); /* Host to Card Stats */ host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", CTLFLAG_RD, NULL, "Host to Card Statistics"); host_list = SYSCTL_CHILDREN(host_node); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", CTLFLAG_RD, &stats->cbtmpc, "Circuit Breaker Tx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", CTLFLAG_RD, &stats->htdpmc, "Host Transmit Discarded Packets"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", CTLFLAG_RD, &stats->rpthc, "Rx Packets To Host"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", CTLFLAG_RD, &stats->cbrmpc, "Circuit Breaker Rx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", CTLFLAG_RD, &stats->cbrdpc, "Circuit Breaker Rx Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", CTLFLAG_RD, &stats->hgptc, "Host Good Packets Tx Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", CTLFLAG_RD, &stats->htcbdpc, "Host Tx Circuit Breaker Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", CTLFLAG_RD, &stats->hgorc, "Host Good Octets Received Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", CTLFLAG_RD, &stats->hgotc, "Host Good Octets Transmit Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", CTLFLAG_RD, &stats->lenerrs, "Length Errors"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", CTLFLAG_RD, &stats->scvpc, "SerDes/SGMII Code Violation Pkt Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", CTLFLAG_RD, &stats->hrmpc, "Header Redirection Missed Packet Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; igb_print_nvm_info(adapter); } return (error); } static void igb_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static void igb_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); /* XXX TODO: update DROP_EN on each RX queue if appropriate */ return (error); } /* ** Manage DMA Coalesce: ** Control values: ** 0/1 - off/on ** Legal timer values are: ** 250,500,1000-10000 in thousands */ static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (adapter->dmac) { case 0: /* Disabling */ break; case 1: /* Just enable and use default */ adapter->dmac = 1000; break; case 250: case 500: case 1000: case 2000: case 3000: case 4000: case 5000: case 6000: case 7000: case 8000: case 9000: case 10000: /* Legal values - allow */ break; default: /* Do nothing, illegal value */ adapter->dmac = 0; return (EINVAL); } /* Reinit the interface */ igb_init(adapter); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec._82575.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); IGB_CORE_LOCK(adapter); adapter->hw.dev_spec._82575.eee_disable = (value != 0); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); return (0); } Index: projects/clang-trunk/sys/kern/vfs_cache.c =================================================================== --- projects/clang-trunk/sys/kern/vfs_cache.c (revision 287501) +++ projects/clang-trunk/sys/kern/vfs_cache.c (revision 287502) @@ -1,1481 +1,1525 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Poul-Henning Kamp of the FreeBSD Project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include SDT_PROVIDER_DECLARE(vfs); SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int", "struct vnode *", "char *"); SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative, "struct vnode *", "char *"); SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", "char *"); /* * This structure describes the elements in the cache of recent * names looked up by namei. */ struct namecache { LIST_ENTRY(namecache) nc_hash; /* hash chain */ LIST_ENTRY(namecache) nc_src; /* source vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ struct vnode *nc_dvp; /* vnode of parent of name */ struct vnode *nc_vp; /* vnode the name refers to */ u_char nc_flag; /* flag bits */ u_char nc_nlen; /* length of name */ char nc_name[0]; /* segment name + nul */ }; /* * struct namecache_ts repeats struct namecache layout up to the * nc_nlen member. * struct namecache_ts is used in place of struct namecache when time(s) need * to be stored. The nc_dotdottime field is used when a cache entry is mapping * both a non-dotdot directory name plus dotdot for the directory's * parent. */ struct namecache_ts { LIST_ENTRY(namecache) nc_hash; /* hash chain */ LIST_ENTRY(namecache) nc_src; /* source vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ struct vnode *nc_dvp; /* vnode of parent of name */ struct vnode *nc_vp; /* vnode the name refers to */ u_char nc_flag; /* flag bits */ u_char nc_nlen; /* length of name */ struct timespec nc_time; /* timespec provided by fs */ struct timespec nc_dotdottime; /* dotdot timespec provided by fs */ int nc_ticks; /* ticks value when entry was added */ char nc_name[0]; /* segment name + nul */ }; /* * Flags in namecache.nc_flag */ #define NCF_WHITE 0x01 #define NCF_ISDOTDOT 0x02 #define NCF_TS 0x04 #define NCF_DTS 0x08 /* * Name caching works as follows: * * Names found by directory scans are retained in a cache * for future reference. It is managed LRU, so frequently * used names will hang around. Cache is indexed by hash value * obtained from (vp, name) where vp refers to the directory * containing name. * * If it is a "negative" entry, (i.e. for a name that is known NOT to * exist) the vnode pointer will be NULL. * * Upon reaching the last segment of a path, if the reference * is for DELETE, or NOCACHE is set (rewrite), and the * name is located in the cache, it will be dropped. */ /* * Structures associated with name cacheing. */ #define NCHHASH(hash) \ (&nchashtbl[(hash) & nchash]) static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ static u_long nchash; /* size of hash table */ SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "Size of namecache hash table"); static u_long ncnegfactor = 16; /* ratio of negative entries */ SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "Ratio of negative namecache entries"); static u_long numneg; /* number of negative entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "Number of negative entries in namecache"); static u_long numcache; /* number of cache entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "Number of namecache entries"); static u_long numcachehv; /* number of cache entries with vnodes held */ SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "Number of namecache entries with vnodes held"); static u_int ncsizefactor = 2; SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0, "Size factor for namecache"); struct nchstats nchstats; /* cache effectiveness statistics */ static struct rwlock cache_lock; RW_SYSINIT(vfscache, &cache_lock, "Name Cache"); #define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) #define CACHE_RLOCK() rw_rlock(&cache_lock) #define CACHE_RUNLOCK() rw_runlock(&cache_lock) #define CACHE_WLOCK() rw_wlock(&cache_lock) #define CACHE_WUNLOCK() rw_wunlock(&cache_lock) /* * UMA zones for the VFS cache. * * The small cache is used for entries with short names, which are the * most common. The large cache is used for entries which are too big to * fit in the small cache. */ static uma_zone_t cache_zone_small; static uma_zone_t cache_zone_small_ts; static uma_zone_t cache_zone_large; static uma_zone_t cache_zone_large_ts; #define CACHE_PATH_CUTOFF 35 static struct namecache * cache_alloc(int len, int ts) { if (len > CACHE_PATH_CUTOFF) { if (ts) return (uma_zalloc(cache_zone_large_ts, M_WAITOK)); else return (uma_zalloc(cache_zone_large, M_WAITOK)); } if (ts) return (uma_zalloc(cache_zone_small_ts, M_WAITOK)); else return (uma_zalloc(cache_zone_small, M_WAITOK)); } static void cache_free(struct namecache *ncp) { int ts; if (ncp == NULL) return; ts = ncp->nc_flag & NCF_TS; if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) { if (ts) uma_zfree(cache_zone_small_ts, ncp); else uma_zfree(cache_zone_small, ncp); } else if (ts) uma_zfree(cache_zone_large_ts, ncp); else uma_zfree(cache_zone_large, ncp); } static char * nc_get_name(struct namecache *ncp) { struct namecache_ts *ncp_ts; if ((ncp->nc_flag & NCF_TS) == 0) return (ncp->nc_name); ncp_ts = (struct namecache_ts *)ncp; return (ncp_ts->nc_name); } static void cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp) { KASSERT((ncp->nc_flag & NCF_TS) != 0 || (tsp == NULL && ticksp == NULL), ("No NCF_TS")); if (tsp != NULL) *tsp = ((struct namecache_ts *)ncp)->nc_time; if (ticksp != NULL) *ticksp = ((struct namecache_ts *)ncp)->nc_ticks; } static int doingcache = 1; /* 1 => enable the cache */ SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "VFS namecache enabled"); /* Export size information to userland */ SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct namecache), "sizeof(struct namecache)"); /* * The new name cache statistics */ static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); #define STATNODE(mode, name, var, descr) \ SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr); STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries"); STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries"); static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls, "Number of cache lookups"); static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits, "Number of '.' hits"); static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits, "Number of '..' hits"); static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks, "Number of checks in lookup"); static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss, "Number of cache misses"); static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap, "Number of cache misses we do not want to cache"); static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, "Number of cache hits (positive) we do not want to cache"); static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits, "Number of cache hits (positive)"); static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps, "Number of cache hits (negative) we do not want to cache"); static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits, "Number of cache hits (negative)"); static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades, "Number of updates of the cache after lookup (write lock + retry)"); SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE, &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); static void cache_zap(struct namecache *ncp); static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen); static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); #ifdef DIAGNOSTIC /* * Grab an atomic snapshot of the name cache hash chain lengths */ static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); static int sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) { struct nchashhead *ncpp; struct namecache *ncp; int i, error, n_nchash, *cntbuf; +retry: n_nchash = nchash + 1; /* nchash is max index, not count */ if (req->oldptr == NULL) return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK); CACHE_RLOCK(); + if (n_nchash != nchash + 1) { + CACHE_RUNLOCK(); + free(cntbuf, M_TEMP); + goto retry; + } /* Scan hash tables counting entries */ for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++) LIST_FOREACH(ncp, ncpp, nc_hash) cntbuf[i]++; CACHE_RUNLOCK(); for (error = 0, i = 0; i < n_nchash; i++) if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0) break; free(cntbuf, M_TEMP); return (error); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); static int sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) { int error; struct nchashhead *ncpp; struct namecache *ncp; int n_nchash; int count, maxlength, used, pct; if (!req->oldptr) return SYSCTL_OUT(req, 0, 4 * sizeof(int)); CACHE_RLOCK(); n_nchash = nchash + 1; /* nchash is max index, not count */ used = 0; maxlength = 0; /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { count = 0; LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } if (count) used++; if (maxlength < count) maxlength = count; } n_nchash = nchash + 1; CACHE_RUNLOCK(); pct = (used * 100) / (n_nchash / 100); error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); if (error) return (error); error = SYSCTL_OUT(req, &used, sizeof(used)); if (error) return (error); error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); if (error) return (error); error = SYSCTL_OUT(req, &pct, sizeof(pct)); if (error) return (error); return (0); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)"); #endif /* * cache_zap(): * * Removes a namecache entry from cache, whether it contains an actual * pointer to a vnode or if it is just a negative cache entry. */ static void cache_zap(ncp) struct namecache *ncp; { struct vnode *vp; rw_assert(&cache_lock, RA_WLOCKED); CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); #ifdef KDTRACE_HOOKS if (ncp->nc_vp != NULL) { SDT_PROBE(vfs, namecache, zap, done, ncp->nc_dvp, nc_get_name(ncp), ncp->nc_vp, 0, 0); } else { SDT_PROBE(vfs, namecache, zap_negative, done, ncp->nc_dvp, nc_get_name(ncp), 0, 0, 0); } #endif vp = NULL; LIST_REMOVE(ncp, nc_hash); if (ncp->nc_flag & NCF_ISDOTDOT) { if (ncp == ncp->nc_dvp->v_cache_dd) ncp->nc_dvp->v_cache_dd = NULL; } else { LIST_REMOVE(ncp, nc_src); if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { vp = ncp->nc_dvp; numcachehv--; } } if (ncp->nc_vp) { TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); if (ncp == ncp->nc_vp->v_cache_dd) ncp->nc_vp->v_cache_dd = NULL; } else { TAILQ_REMOVE(&ncneg, ncp, nc_dst); numneg--; } numcache--; cache_free(ncp); if (vp) vdrop(vp); } /* * Lookup an entry in the cache * * Lookup is called with dvp pointing to the directory to search, * cnp pointing to the name of the entry being sought. If the lookup * succeeds, the vnode is returned in *vpp, and a status of -1 is * returned. If the lookup determines that the name does not exist * (negative cacheing), a status of ENOENT is returned. If the lookup * fails, a status of zero is returned. If the directory vnode is * recycled out from under us due to a forced unmount, a status of * ENOENT is returned. * * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is * unlocked. If we're looking up . an extra ref is taken, but the lock is * not recursively acquired. */ int cache_lookup(dvp, vpp, cnp, tsp, ticksp) struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; struct timespec *tsp; int *ticksp; { struct namecache *ncp; uint32_t hash; int error, ltype, wlocked; if (!doingcache) { cnp->cn_flags &= ~MAKEENTRY; return (0); } retry: CACHE_RLOCK(); wlocked = 0; numcalls++; error = 0; retry_wlocked: if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { *vpp = dvp; CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", dvp, cnp->cn_nameptr); dothits++; SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".", *vpp, 0, 0); if (tsp != NULL) timespecclear(tsp); if (ticksp != NULL) *ticksp = ticks; goto success; } if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { dotdothits++; if (dvp->v_cache_dd == NULL) { SDT_PROBE(vfs, namecache, lookup, miss, dvp, "..", NULL, 0, 0); goto unlock; } if ((cnp->cn_flags & MAKEENTRY) == 0) { if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) cache_zap(dvp->v_cache_dd); dvp->v_cache_dd = NULL; CACHE_WUNLOCK(); return (0); } ncp = dvp->v_cache_dd; if (ncp->nc_flag & NCF_ISDOTDOT) *vpp = ncp->nc_vp; else *vpp = ncp->nc_dvp; /* Return failure if negative entry was found. */ if (*vpp == NULL) goto negative_success; CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", dvp, cnp->cn_nameptr, *vpp); SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..", *vpp, 0, 0); cache_out_ts(ncp, tsp, ticksp); if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) == NCF_DTS && tsp != NULL) *tsp = ((struct namecache_ts *)ncp)-> nc_dotdottime; goto success; } } hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); hash = fnv_32_buf(&dvp, sizeof(dvp), hash); LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { numchecks++; if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen)) break; } /* We failed to find an entry */ if (ncp == NULL) { SDT_PROBE(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, NULL, 0, 0); if ((cnp->cn_flags & MAKEENTRY) == 0) { nummisszap++; } else { nummiss++; } nchstats.ncs_miss++; goto unlock; } /* We don't want to have an entry, so dump it */ if ((cnp->cn_flags & MAKEENTRY) == 0) { numposzaps++; nchstats.ncs_badhits++; if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; cache_zap(ncp); CACHE_WUNLOCK(); return (0); } /* We found a "positive" match, return the vnode */ if (ncp->nc_vp) { numposhits++; nchstats.ncs_goodhits++; *vpp = ncp->nc_vp; CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", dvp, cnp->cn_nameptr, *vpp, ncp); SDT_PROBE(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp), *vpp, 0, 0); cache_out_ts(ncp, tsp, ticksp); goto success; } negative_success: /* We found a negative match, and want to create it, so purge */ if (cnp->cn_nameiop == CREATE) { numnegzaps++; nchstats.ncs_badhits++; if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; cache_zap(ncp); CACHE_WUNLOCK(); return (0); } if (!wlocked && !CACHE_UPGRADE_LOCK()) goto wlock; numneghits++; /* * We found a "negative" match, so we shift it to the end of * the "negative" cache entries queue to satisfy LRU. Also, * check to see if the entry is a whiteout; indicate this to * the componentname, if so. */ TAILQ_REMOVE(&ncneg, ncp, nc_dst); TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); nchstats.ncs_neghits++; if (ncp->nc_flag & NCF_WHITE) cnp->cn_flags |= ISWHITEOUT; SDT_PROBE(vfs, namecache, lookup, hit__negative, dvp, nc_get_name(ncp), 0, 0, 0); cache_out_ts(ncp, tsp, ticksp); CACHE_WUNLOCK(); return (ENOENT); wlock: /* * We need to update the cache after our lookup, so upgrade to * a write lock and retry the operation. */ CACHE_RUNLOCK(); CACHE_WLOCK(); numupgrades++; wlocked = 1; goto retry_wlocked; success: /* * On success we return a locked and ref'd vnode as per the lookup * protocol. */ if (dvp == *vpp) { /* lookup on "." */ VREF(*vpp); if (wlocked) CACHE_WUNLOCK(); else CACHE_RUNLOCK(); /* * When we lookup "." we still can be asked to lock it * differently... */ ltype = cnp->cn_lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(*vpp)) { if (ltype == LK_EXCLUSIVE) { vn_lock(*vpp, LK_UPGRADE | LK_RETRY); if ((*vpp)->v_iflag & VI_DOOMED) { /* forced unmount */ vrele(*vpp); *vpp = NULL; return (ENOENT); } } else vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); } return (-1); } ltype = 0; /* silence gcc warning */ if (cnp->cn_flags & ISDOTDOT) { ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); } vhold(*vpp); if (wlocked) CACHE_WUNLOCK(); else CACHE_RUNLOCK(); error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { if (error == 0) vput(*vpp); *vpp = NULL; return (ENOENT); } } if (error) { *vpp = NULL; goto retry; } if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); } return (-1); unlock: if (wlocked) CACHE_WUNLOCK(); else CACHE_RUNLOCK(); return (0); } /* * Add an entry to the cache. */ void cache_enter_time(dvp, vp, cnp, tsp, dtsp) struct vnode *dvp; struct vnode *vp; struct componentname *cnp; struct timespec *tsp; struct timespec *dtsp; { struct namecache *ncp, *n2; struct namecache_ts *n3; struct nchashhead *ncpp; uint32_t hash; int flag; int hold; int zap; int len; CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, ("cache_enter: Adding a doomed vnode")); VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp, ("cache_enter: Doomed vnode used as src")); if (!doingcache) return; /* * Avoid blowout in namecache entries. */ if (numcache >= desiredvnodes * ncsizefactor) return; flag = 0; if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) return; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { CACHE_WLOCK(); /* * If dotdot entry already exists, just retarget it * to new parent vnode, otherwise continue with new * namecache entry allocation. */ if ((ncp = dvp->v_cache_dd) != NULL && ncp->nc_flag & NCF_ISDOTDOT) { KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent")); if (ncp->nc_vp != NULL) { TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); } else { TAILQ_REMOVE(&ncneg, ncp, nc_dst); numneg--; } if (vp != NULL) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); } else { TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); numneg++; } ncp->nc_vp = vp; CACHE_WUNLOCK(); return; } dvp->v_cache_dd = NULL; SDT_PROBE(vfs, namecache, enter, done, dvp, "..", vp, 0, 0); CACHE_WUNLOCK(); flag = NCF_ISDOTDOT; } } hold = 0; zap = 0; /* * Calculate the hash key and setup as much of the new * namecache entry as possible before acquiring the lock. */ ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); ncp->nc_vp = vp; ncp->nc_dvp = dvp; ncp->nc_flag = flag; if (tsp != NULL) { n3 = (struct namecache_ts *)ncp; n3->nc_time = *tsp; n3->nc_ticks = ticks; n3->nc_flag |= NCF_TS; if (dtsp != NULL) { n3->nc_dotdottime = *dtsp; n3->nc_flag |= NCF_DTS; } } len = ncp->nc_nlen = cnp->cn_namelen; hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1); hash = fnv_32_buf(&dvp, sizeof(dvp), hash); CACHE_WLOCK(); /* * See if this vnode or negative entry is already in the cache * with this name. This can happen with concurrent lookups of * the same path name. */ ncpp = NCHHASH(hash); LIST_FOREACH(n2, ncpp, nc_hash) { if (n2->nc_dvp == dvp && n2->nc_nlen == cnp->cn_namelen && !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) { if (tsp != NULL) { KASSERT((n2->nc_flag & NCF_TS) != 0, ("no NCF_TS")); n3 = (struct namecache_ts *)n2; n3->nc_time = ((struct namecache_ts *)ncp)->nc_time; n3->nc_ticks = ((struct namecache_ts *)ncp)->nc_ticks; if (dtsp != NULL) { n3->nc_dotdottime = ((struct namecache_ts *)ncp)-> nc_dotdottime; n3->nc_flag |= NCF_DTS; } } CACHE_WUNLOCK(); cache_free(ncp); return; } } if (flag == NCF_ISDOTDOT) { /* * See if we are trying to add .. entry, but some other lookup * has populated v_cache_dd pointer already. */ if (dvp->v_cache_dd != NULL) { CACHE_WUNLOCK(); cache_free(ncp); return; } KASSERT(vp == NULL || vp->v_type == VDIR, ("wrong vnode type %p", vp)); dvp->v_cache_dd = ncp; } numcache++; if (!vp) { numneg++; if (cnp->cn_flags & ISWHITEOUT) ncp->nc_flag |= NCF_WHITE; } else if (vp->v_type == VDIR) { if (flag != NCF_ISDOTDOT) { /* * For this case, the cache entry maps both the * directory name in it and the name ".." for the * directory's parent. */ if ((n2 = vp->v_cache_dd) != NULL && (n2->nc_flag & NCF_ISDOTDOT) != 0) cache_zap(n2); vp->v_cache_dd = ncp; } } else { vp->v_cache_dd = NULL; } /* * Insert the new namecache entry into the appropriate chain * within the cache entries table. */ LIST_INSERT_HEAD(ncpp, ncp, nc_hash); if (flag != NCF_ISDOTDOT) { if (LIST_EMPTY(&dvp->v_cache_src)) { hold = 1; numcachehv++; } LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); } /* * If the entry is "negative", we place it into the * "negative" cache queue, otherwise, we place it into the * destination vnode's cache entries queue. */ if (vp) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); SDT_PROBE(vfs, namecache, enter, done, dvp, nc_get_name(ncp), vp, 0, 0); } else { TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); SDT_PROBE(vfs, namecache, enter_negative, done, dvp, nc_get_name(ncp), 0, 0, 0); } if (numneg * ncnegfactor > numcache) { ncp = TAILQ_FIRST(&ncneg); KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg", ncp, ncp->nc_vp)); zap = 1; } if (hold) vhold(dvp); if (zap) cache_zap(ncp); CACHE_WUNLOCK(); } /* * Name cache initialization, from vfs_init() when we are booting */ static void nchinit(void *dummy __unused) { TAILQ_INIT(&ncneg); cache_zone_small = uma_zcreate("S VFS Cache", sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cache_zone_small_ts = uma_zcreate("STS VFS Cache", sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cache_zone_large = uma_zcreate("L VFS Cache", sizeof(struct namecache) + NAME_MAX + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cache_zone_large_ts = uma_zcreate("LTS VFS Cache", sizeof(struct namecache_ts) + NAME_MAX + 1, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); +void +cache_changesize(int newmaxvnodes) +{ + struct nchashhead *new_nchashtbl, *old_nchashtbl; + u_long new_nchash, old_nchash; + struct namecache *ncp; + uint32_t hash; + int i; + + new_nchashtbl = hashinit(newmaxvnodes * 2, M_VFSCACHE, &new_nchash); + /* If same hash table size, nothing to do */ + if (nchash == new_nchash) { + free(new_nchashtbl, M_VFSCACHE); + return; + } + /* + * Move everything from the old hash table to the new table. + * None of the namecache entries in the table can be removed + * because to do so, they have to be removed from the hash table. + */ + CACHE_WLOCK(); + old_nchashtbl = nchashtbl; + old_nchash = nchash; + nchashtbl = new_nchashtbl; + nchash = new_nchash; + for (i = 0; i <= old_nchash; i++) { + while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) { + hash = fnv_32_buf(nc_get_name(ncp), ncp->nc_nlen, + FNV1_32_INIT); + hash = fnv_32_buf(&ncp->nc_dvp, sizeof(ncp->nc_dvp), + hash); + LIST_REMOVE(ncp, nc_hash); + LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash); + } + } + CACHE_WUNLOCK(); + free(old_nchashtbl, M_VFSCACHE); +} /* * Invalidate all entries to a particular vnode. */ void cache_purge(vp) struct vnode *vp; { CTR1(KTR_VFS, "cache_purge(%p)", vp); SDT_PROBE(vfs, namecache, purge, done, vp, 0, 0, 0, 0); CACHE_WLOCK(); while (!LIST_EMPTY(&vp->v_cache_src)) cache_zap(LIST_FIRST(&vp->v_cache_src)); while (!TAILQ_EMPTY(&vp->v_cache_dst)) cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); if (vp->v_cache_dd != NULL) { KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT, ("lost dotdot link")); cache_zap(vp->v_cache_dd); } KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); CACHE_WUNLOCK(); } /* * Invalidate all negative entries for a particular directory vnode. */ void cache_purge_negative(vp) struct vnode *vp; { struct namecache *cp, *ncp; CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); SDT_PROBE(vfs, namecache, purge_negative, done, vp, 0, 0, 0, 0); CACHE_WLOCK(); LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) { if (cp->nc_vp == NULL) cache_zap(cp); } CACHE_WUNLOCK(); } /* * Flush all entries referencing a particular filesystem. */ void cache_purgevfs(mp) struct mount *mp; { struct nchashhead *ncpp; struct namecache *ncp, *nnp; /* Scan hash tables for applicable entries */ SDT_PROBE(vfs, namecache, purgevfs, done, mp, 0, 0, 0, 0); CACHE_WLOCK(); for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) { if (ncp->nc_dvp->v_mount == mp) cache_zap(ncp); } } CACHE_WUNLOCK(); } /* * Perform canonical checks and cache lookup and pass on to filesystem * through the vop_cachedlookup only if needed. */ int vfs_cache_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { struct vnode *dvp; int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; struct thread *td = cnp->cn_thread; *vpp = NULL; dvp = ap->a_dvp; if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); error = VOP_ACCESS(dvp, VEXEC, cred, td); if (error) return (error); error = cache_lookup(dvp, vpp, cnp, NULL, NULL); if (error == 0) return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); if (error == -1) return (0); return (error); } /* * XXX All of these sysctls would probably be more productive dead. */ static int disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "Disable the getcwd syscall"); /* Implementation of the getcwd syscall. */ int sys___getcwd(td, uap) struct thread *td; struct __getcwd_args *uap; { return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen, MAXPATHLEN)); } int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, u_int buflen, u_int path_max) { char *bp, *tmpbuf; struct filedesc *fdp; struct vnode *cdir, *rdir; int error; if (disablecwd) return (ENODEV); if (buflen < 2) return (EINVAL); if (buflen > path_max) buflen = path_max; tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); cdir = fdp->fd_cdir; VREF(cdir); rdir = fdp->fd_rdir; VREF(rdir); FILEDESC_SUNLOCK(fdp); error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); vrele(rdir); vrele(cdir); if (!error) { if (bufseg == UIO_SYSSPACE) bcopy(bp, buf, strlen(bp) + 1); else error = copyout(bp, buf, strlen(bp) + 1); #ifdef KTRACE if (KTRPOINT(curthread, KTR_NAMEI)) ktrnamei(bp); #endif } free(tmpbuf, M_TEMP); return (error); } /* * Thus begins the fullpath magic. */ #undef STATNODE #define STATNODE(name, descr) \ static u_int name; \ SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr) static int disablefullpath; SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, "Disable the vn_fullpath function"); /* These count for kern___getcwd(), too. */ STATNODE(numfullpathcalls, "Number of fullpath search calls"); STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)"); STATNODE(numfullpathfail2, "Number of fullpath search errors (VOP_VPTOCNP failures)"); STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)"); STATNODE(numfullpathfound, "Number of successful fullpath calls"); /* * Retrieve the full filesystem path that correspond to a vnode from the name * cache (if available) */ int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { char *buf; struct filedesc *fdp; struct vnode *rdir; int error; if (disablefullpath) return (ENODEV); if (vn == NULL) return (EINVAL); buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); rdir = fdp->fd_rdir; VREF(rdir); FILEDESC_SUNLOCK(fdp); error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); vrele(rdir); if (!error) *freebuf = buf; else free(buf, M_TEMP); return (error); } /* * This function is similar to vn_fullpath, but it attempts to lookup the * pathname relative to the global root mount point. This is required for the * auditing sub-system, as audited pathnames must be absolute, relative to the * global root mount point. */ int vn_fullpath_global(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { char *buf; int error; if (disablefullpath) return (ENODEV); if (vn == NULL) return (EINVAL); buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); if (!error) *freebuf = buf; else free(buf, M_TEMP); return (error); } int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) { int error; CACHE_RLOCK(); error = vn_vptocnp_locked(vp, cred, buf, buflen); if (error == 0) CACHE_RUNLOCK(); return (error); } static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) { struct vnode *dvp; struct namecache *ncp; int error; TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; } if (ncp != NULL) { if (*buflen < ncp->nc_nlen) { CACHE_RUNLOCK(); vrele(*vp); numfullpathfail4++; error = ENOMEM; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); return (error); } *buflen -= ncp->nc_nlen; memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen); SDT_PROBE(vfs, namecache, fullpath, hit, ncp->nc_dvp, nc_get_name(ncp), vp, 0, 0); dvp = *vp; *vp = ncp->nc_dvp; vref(*vp); CACHE_RUNLOCK(); vrele(dvp); CACHE_RLOCK(); return (0); } SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0); CACHE_RUNLOCK(); vn_lock(*vp, LK_SHARED | LK_RETRY); error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); vput(*vp); if (error) { numfullpathfail2++; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); return (error); } *vp = dvp; CACHE_RLOCK(); if (dvp->v_iflag & VI_DOOMED) { /* forced unmount */ CACHE_RUNLOCK(); vrele(dvp); error = ENOENT; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); return (error); } /* * *vp has its use count incremented still. */ return (0); } /* * The magic behind kern___getcwd() and vn_fullpath(). */ static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen) { int error, slash_prefixed; #ifdef KDTRACE_HOOKS struct vnode *startvp = vp; #endif struct vnode *vp1; buflen--; buf[buflen] = '\0'; error = 0; slash_prefixed = 0; SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0); numfullpathcalls++; vref(vp); CACHE_RLOCK(); if (vp->v_type != VDIR) { error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); if (error) return (error); if (buflen == 0) { CACHE_RUNLOCK(); vrele(vp); return (ENOMEM); } buf[--buflen] = '/'; slash_prefixed = 1; } while (vp != rdir && vp != rootvnode) { if (vp->v_vflag & VV_ROOT) { if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ CACHE_RUNLOCK(); vrele(vp); error = ENOENT; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); break; } vp1 = vp->v_mount->mnt_vnodecovered; vref(vp1); CACHE_RUNLOCK(); vrele(vp); vp = vp1; CACHE_RLOCK(); continue; } if (vp->v_type != VDIR) { CACHE_RUNLOCK(); vrele(vp); numfullpathfail1++; error = ENOTDIR; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, NULL, 0, 0); break; } error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); if (error) break; if (buflen == 0) { CACHE_RUNLOCK(); vrele(vp); error = ENOMEM; SDT_PROBE(vfs, namecache, fullpath, return, error, startvp, NULL, 0, 0); break; } buf[--buflen] = '/'; slash_prefixed = 1; } if (error) return (error); if (!slash_prefixed) { if (buflen == 0) { CACHE_RUNLOCK(); vrele(vp); numfullpathfail4++; SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM, startvp, NULL, 0, 0); return (ENOMEM); } buf[--buflen] = '/'; } numfullpathfound++; CACHE_RUNLOCK(); vrele(vp); SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen, 0, 0); *retbuf = buf + buflen; return (0); } struct vnode * vn_dir_dd_ino(struct vnode *vp) { struct namecache *ncp; struct vnode *ddvp; ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); CACHE_RLOCK(); TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) continue; ddvp = ncp->nc_dvp; vhold(ddvp); CACHE_RUNLOCK(); if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread)) return (NULL); return (ddvp); } CACHE_RUNLOCK(); return (NULL); } int vn_commname(struct vnode *vp, char *buf, u_int buflen) { struct namecache *ncp; int l; CACHE_RLOCK(); TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; if (ncp == NULL) { CACHE_RUNLOCK(); return (ENOENT); } l = min(ncp->nc_nlen, buflen - 1); memcpy(buf, nc_get_name(ncp), l); CACHE_RUNLOCK(); buf[l] = '\0'; return (0); } /* ABI compat shims for old kernel modules. */ #undef cache_enter void cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp); void cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { cache_enter_time(dvp, vp, cnp, NULL, NULL); } /* * This function updates path string to vnode's full global path * and checks the size of the new path string against the pathlen argument. * * Requires a locked, referenced vnode. * Vnode is re-locked on success or ENODEV, otherwise unlocked. * * If sysctl debug.disablefullpath is set, ENODEV is returned, * vnode is left locked and path remain untouched. * * If vp is a directory, the call to vn_fullpath_global() always succeeds * because it falls back to the ".." lookup if the namecache lookup fails. */ int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen) { struct nameidata nd; struct vnode *vp1; char *rpath, *fbuf; int error; ASSERT_VOP_ELOCKED(vp, __func__); /* Return ENODEV if sysctl debug.disablefullpath==1 */ if (disablefullpath) return (ENODEV); /* Construct global filesystem path from vp. */ VOP_UNLOCK(vp, 0); error = vn_fullpath_global(td, vp, &rpath, &fbuf); if (error != 0) { vrele(vp); return (error); } if (strlen(rpath) >= pathlen) { vrele(vp); error = ENAMETOOLONG; goto out; } /* * Re-lookup the vnode by path to detect a possible rename. * As a side effect, the vnode is relocked. * If vnode was renamed, return ENOENT. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, path, td); error = namei(&nd); if (error != 0) { vrele(vp); goto out; } NDFREE(&nd, NDF_ONLY_PNBUF); vp1 = nd.ni_vp; vrele(vp); if (vp1 == vp) strcpy(path, rpath); else { vput(vp1); error = ENOENT; } out: free(fbuf, M_TEMP); return (error); } Index: projects/clang-trunk/sys/kern/vfs_hash.c =================================================================== --- projects/clang-trunk/sys/kern/vfs_hash.c (revision 287501) +++ projects/clang-trunk/sys/kern/vfs_hash.c (revision 287502) @@ -1,163 +1,200 @@ /*- * Copyright (c) 2005 Poul-Henning Kamp * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include static MALLOC_DEFINE(M_VFS_HASH, "vfs_hash", "VFS hash table"); static LIST_HEAD(vfs_hash_head, vnode) *vfs_hash_tbl; static LIST_HEAD(,vnode) vfs_hash_side; static u_long vfs_hash_mask; static struct rwlock vfs_hash_lock; static void vfs_hashinit(void *dummy __unused) { vfs_hash_tbl = hashinit(desiredvnodes, M_VFS_HASH, &vfs_hash_mask); rw_init(&vfs_hash_lock, "vfs hash"); LIST_INIT(&vfs_hash_side); } /* Must be SI_ORDER_SECOND so desiredvnodes is available */ SYSINIT(vfs_hash, SI_SUB_VFS, SI_ORDER_SECOND, vfs_hashinit, NULL); u_int vfs_hash_index(struct vnode *vp) { return (vp->v_hash + vp->v_mount->mnt_hashseed); } static struct vfs_hash_head * vfs_hash_bucket(const struct mount *mp, u_int hash) { return (&vfs_hash_tbl[(hash + mp->mnt_hashseed) & vfs_hash_mask]); } int vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg) { struct vnode *vp; int error; while (1) { rw_rlock(&vfs_hash_lock); LIST_FOREACH(vp, vfs_hash_bucket(mp, hash), v_hashlist) { if (vp->v_hash != hash) continue; if (vp->v_mount != mp) continue; if (fn != NULL && fn(vp, arg)) continue; vhold(vp); rw_runlock(&vfs_hash_lock); error = vget(vp, flags | LK_VNHELD, td); if (error == ENOENT && (flags & LK_NOWAIT) == 0) break; if (error) return (error); *vpp = vp; return (0); } if (vp == NULL) { rw_runlock(&vfs_hash_lock); *vpp = NULL; return (0); } } } void vfs_hash_remove(struct vnode *vp) { rw_wlock(&vfs_hash_lock); LIST_REMOVE(vp, v_hashlist); rw_wunlock(&vfs_hash_lock); } int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg) { struct vnode *vp2; int error; *vpp = NULL; while (1) { rw_wlock(&vfs_hash_lock); LIST_FOREACH(vp2, vfs_hash_bucket(vp->v_mount, hash), v_hashlist) { if (vp2->v_hash != hash) continue; if (vp2->v_mount != vp->v_mount) continue; if (fn != NULL && fn(vp2, arg)) continue; vhold(vp2); rw_wunlock(&vfs_hash_lock); error = vget(vp2, flags | LK_VNHELD, td); if (error == ENOENT && (flags & LK_NOWAIT) == 0) break; rw_wlock(&vfs_hash_lock); LIST_INSERT_HEAD(&vfs_hash_side, vp, v_hashlist); rw_wunlock(&vfs_hash_lock); vput(vp); if (!error) *vpp = vp2; return (error); } if (vp2 == NULL) break; } vp->v_hash = hash; LIST_INSERT_HEAD(vfs_hash_bucket(vp->v_mount, hash), vp, v_hashlist); rw_wunlock(&vfs_hash_lock); return (0); } void vfs_hash_rehash(struct vnode *vp, u_int hash) { rw_wlock(&vfs_hash_lock); LIST_REMOVE(vp, v_hashlist); LIST_INSERT_HEAD(vfs_hash_bucket(vp->v_mount, hash), vp, v_hashlist); vp->v_hash = hash; rw_wunlock(&vfs_hash_lock); } + +void +vfs_hash_changesize(int newmaxvnodes) +{ + struct vfs_hash_head *vfs_hash_newtbl, *vfs_hash_oldtbl; + u_long vfs_hash_newmask, vfs_hash_oldmask; + struct vnode *vp; + int i; + + vfs_hash_newtbl = hashinit(newmaxvnodes, M_VFS_HASH, + &vfs_hash_newmask); + /* If same hash table size, nothing to do */ + if (vfs_hash_mask == vfs_hash_newmask) { + free(vfs_hash_newtbl, M_VFS_HASH); + return; + } + /* + * Move everything from the old hash table to the new table. + * None of the vnodes in the table can be recycled because to + * do so, they have to be removed from the hash table. + */ + rw_wlock(&vfs_hash_lock); + vfs_hash_oldtbl = vfs_hash_tbl; + vfs_hash_oldmask = vfs_hash_mask; + vfs_hash_tbl = vfs_hash_newtbl; + vfs_hash_mask = vfs_hash_newmask; + for (i = 0; i <= vfs_hash_oldmask; i++) { + while ((vp = LIST_FIRST(&vfs_hash_oldtbl[i])) != NULL) { + LIST_REMOVE(vp, v_hashlist); + LIST_INSERT_HEAD( + vfs_hash_bucket(vp->v_mount, vp->v_hash), + vp, v_hashlist); + } + } + rw_wunlock(&vfs_hash_lock); + free(vfs_hash_oldtbl, M_VFS_HASH); +} Index: projects/clang-trunk/sys/kern/vfs_subr.c =================================================================== --- projects/clang-trunk/sys/kern/vfs_subr.c (revision 287501) +++ projects/clang-trunk/sys/kern/vfs_subr.c (revision 287502) @@ -1,4955 +1,4972 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ /* * External virtual filesystem routines */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void v_init_counters(struct vnode *); static void v_incr_usecount(struct vnode *); static void v_incr_devcount(struct vnode *); static void v_decr_devcount(struct vnode *); static void vnlru_free(int); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); static void destroy_vpollinfo(struct vpollinfo *vi); /* * Number of vnodes in existence. Increased whenever getnewvnode() * allocates a new vnode, decreased in vdropl() for VI_DOOMED vnode. */ static unsigned long numvnodes; SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes in existence"); static u_long vnodes_created; SYSCTL_ULONG(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, 0, "Number of vnodes created by getnewvnode"); /* * Conversion tables for conversion from vnode types to inode formats * and back. */ enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[10] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT }; /* * List of vnodes that are ready for recycling. */ static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* * Free vnode target. Free vnodes may simply be files which have been stat'd * but not read. This is somewhat common, and a small cache of such files * should be kept to avoid recreation costs. */ static u_long wantfreevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); /* Number of vnodes in the free list. */ static u_long freevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "Number of vnodes in the free list"); static int vlru_allow_cache_src; SYSCTL_INT(_vfs, OID_AUTO, vlru_allow_cache_src, CTLFLAG_RW, &vlru_allow_cache_src, 0, "Allow vlru to reclaim source vnode"); static u_long recycles_count; SYSCTL_ULONG(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, 0, "Number of vnodes recycled to avoid exceding kern.maxvnodes"); /* * Various variables used for debugging the new implementation of * reassignbuf(). * XXX these are probably of (very) limited utility now. */ static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "Number of calls to reassignbuf"); static u_long free_owe_inact; SYSCTL_ULONG(_vfs, OID_AUTO, free_owe_inact, CTLFLAG_RD, &free_owe_inact, 0, "Number of times free vnodes kept on active list due to VFS " "owing inactivation"); /* * Cache for the mount type id assigned to NFS. This is used for * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c. */ int nfs_mount_type = -1; /* To keep more than one thread at a time from running vfs_getnewfsid */ static struct mtx mntid_mtx; /* * Lock for any access to the following: * vnode_free_list * numvnodes * freevnodes */ static struct mtx vnode_free_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; static uma_zone_t buf_trie_zone; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); static struct synclist *syncer_workitem_pending; /* * The sync_mtx protects: * bo->bo_synclist * sync_vnode_count * syncer_delayno * syncer_state * syncer_workitem_pending * syncer_worklist_len * rushjob */ static struct mtx sync_mtx; static struct cv sync_wakeup; #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "Time to delay syncing files (in seconds)"); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "Time to delay syncing directories (in seconds)"); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "Time to delay syncing metadata (in seconds)"); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "Number of times I/O speeded up (rush requests)"); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 static int sync_vnode_count; static int syncer_worklist_len; static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } syncer_state; /* * Number of vnodes we want to exist at any one time. This is mostly used * to size hash tables in vnode-related code. It is normally not used in * getnewvnode(), as wantfreevnodes is normally nonzero.) * * XXX desiredvnodes is historical cruft and should not exist. */ int desiredvnodes; -SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, - &desiredvnodes, 0, "Maximum number of vnodes"); + +static int +sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS) +{ + int error, old_desiredvnodes; + + old_desiredvnodes = desiredvnodes; + if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0) + return (error); + if (old_desiredvnodes != desiredvnodes) { + vfs_hash_changesize(desiredvnodes); + cache_changesize(desiredvnodes); + } + return (0); +} + +SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes, + CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &desiredvnodes, 0, + sysctl_update_desiredvnodes, "I", "Maximum number of vnodes"); SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Minimum number of vnodes (legacy)"); static int vnlru_nowhere; SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ static int vnsz2log; /* * Support for the bufobj clean & dirty pctrie. */ static void * buf_trie_alloc(struct pctrie *ptree) { return uma_zalloc(buf_trie_zone, M_NOWAIT); } static void buf_trie_free(struct pctrie *ptree, void *node) { uma_zfree(buf_trie_zone, node); } PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); /* * Initialize the vnode management data structures. * * Reevaluate the following cap on the number of vnodes after the physical * memory size exceeds 512GB. In the limit, as the physical memory size * grows, the ratio of physical pages to vnodes approaches sixteen to one. */ #ifndef MAXVNODES_MAX #define MAXVNODES_MAX (512 * (1024 * 1024 * 1024 / (int)PAGE_SIZE / 16)) #endif static void vntblinit(void *dummy __unused) { u_int i; int physvnodes, virtvnodes; /* * Desiredvnodes is a function of the physical memory size and the * kernel's heap size. Generally speaking, it scales with the * physical memory size. The ratio of desiredvnodes to physical pages * is one to four until desiredvnodes exceeds 98,304. Thereafter, the * marginal ratio of desiredvnodes to physical pages is one to * sixteen. However, desiredvnodes is limited by the kernel's heap * size. The memory required by desiredvnodes vnodes and vm objects * may not exceed one seventh of the kernel's heap size. */ physvnodes = maxproc + vm_cnt.v_page_count / 16 + 3 * min(98304 * 4, vm_cnt.v_page_count) / 16; virtvnodes = vm_kmem_size / (7 * (sizeof(struct vm_object) + sizeof(struct vnode))); desiredvnodes = min(physvnodes, virtvnodes); if (desiredvnodes > MAXVNODES_MAX) { if (bootverbose) printf("Reducing kern.maxvnodes %d -> %d\n", desiredvnodes, MAXVNODES_MAX); desiredvnodes = MAXVNODES_MAX; } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); TAILQ_INIT(&vnode_free_list); mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* * Preallocate enough nodes to support one-per buf so that * we can not fail an insert. reassignbuf() callers can not * tolerate the insertion failure. */ buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); uma_prealloc(buf_trie_zone, nbuf); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); cv_init(&sync_wakeup, "syncer"); for (i = 1; i <= sizeof(struct vnode); i <<= 1) vnsz2log++; vnsz2log--; } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Eventually, mountlist_mtx is not released on failure. * * vfs_busy() is a custom lock, it can block the caller. * vfs_busy() only sleeps if the unmount is active on the mount point. * For a mountpoint mp, vfs_busy-enforced lock is before lock of any * vnode belonging to mp. * * Lookup uses vfs_busy() to traverse mount points. * root fs var fs * / vnode lock A / vnode lock (/var) D * /var vnode lock B /log vnode lock(/var/log) E * vfs_busy lock C vfs_busy lock F * * Within each file system, the lock order is C->A->B and F->D->E. * * When traversing across mounts, the system follows that lock order: * * C->A->B * | * +->F->D->E * * The lookup() process for namei("/var") illustrates the process: * VOP_LOOKUP() obtains B while A is held * vfs_busy() obtains a shared lock on F while A and B are held * vput() releases lock on B * vput() releases lock on A * VFS_ROOT() obtains lock on D while shared lock on F is held * vfs_unbusy() releases shared lock on F * vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A. * Attempt to lock A (instead of vp_crossmp) while D is held would * violate the global order, causing deadlocks. * * dounmount() locks B while F is drained. */ int vfs_busy(struct mount *mp, int flags) { MPASS((flags & ~MBF_MASK) == 0); CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags); MNT_ILOCK(mp); MNT_REF(mp); /* * If mount point is currenly being unmounted, sleep until the * mount point fate is decided. If thread doing the unmounting fails, * it will clear MNTK_UNMOUNT flag before waking us up, indicating * that this mount point has survived the unmount attempt and vfs_busy * should retry. Otherwise the unmounter thread will set MNTK_REFEXPIRE * flag in addition to MNTK_UNMOUNT, indicating that mount point is * about to be really destroyed. vfs_busy needs to release its * reference on the mount point in this case and return with ENOENT, * telling the caller that mount mount it tried to busy is no longer * valid. */ while (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) { MNT_REL(mp); MNT_IUNLOCK(mp); CTR1(KTR_VFS, "%s: failed busying before sleeping", __func__); return (ENOENT); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_kern_flag |= MNTK_MWAIT; msleep(mp, MNT_MTX(mp), PVFS | PDROP, "vfs_busy", 0); if (flags & MBF_MNTLSTLOCK) mtx_lock(&mountlist_mtx); MNT_ILOCK(mp); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_lockref++; MNT_IUNLOCK(mp); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(struct mount *mp) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_REL(mp); KASSERT(mp->mnt_lockref > 0, ("negative mnt_lockref")); mp->mnt_lockref--; if (mp->mnt_lockref == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) { MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT); CTR1(KTR_VFS, "%s: waking up waiters", __func__); mp->mnt_kern_flag &= ~MNTK_DRAINING; wakeup(&mp->mnt_lockref); } MNT_IUNLOCK(mp); } /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { vfs_ref(mp); mtx_unlock(&mountlist_mtx); return (mp); } } mtx_unlock(&mountlist_mtx); CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); return ((struct mount *) 0); } /* * Lookup a mount point by filesystem identifier, busying it before * returning. * * To avoid congestion on mountlist_mtx, implement simple direct-mapped * cache for popular filesystem identifiers. The cache is lockess, using * the fact that struct mount's are never freed. In worst case we may * get pointer to unmounted or even different filesystem, so we have to * check what we got, and go slow way if so. */ struct mount * vfs_busyfs(fsid_t *fsid) { #define FSID_CACHE_SIZE 256 typedef struct mount * volatile vmp_t; static vmp_t cache[FSID_CACHE_SIZE]; struct mount *mp; int error; uint32_t hash; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); hash = fsid->val[0] ^ fsid->val[1]; hash = (hash >> 16 ^ hash) & (FSID_CACHE_SIZE - 1); mp = cache[hash]; if (mp == NULL || mp->mnt_stat.f_fsid.val[0] != fsid->val[0] || mp->mnt_stat.f_fsid.val[1] != fsid->val[1]) goto slow; if (vfs_busy(mp, 0) != 0) { cache[hash] = NULL; goto slow; } if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); else vfs_unbusy(mp); slow: mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { error = vfs_busy(mp, MBF_MNTLSTLOCK); if (error) { cache[hash] = NULL; mtx_unlock(&mountlist_mtx); return (NULL); } cache[hash] = mp; return (mp); } } CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); mtx_unlock(&mountlist_mtx); return ((struct mount *) 0); } /* * Check if a user can access privileged mount options. */ int vfs_suser(struct mount *mp, struct thread *td) { int error; /* * If the thread is jailed, but this is not a jail-friendly file * system, deny immediately. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_JAIL) && jailed(td->td_ucred)) return (EPERM); /* * If the file system was mounted outside the jail of the calling * thread, deny immediately. */ if (prison_check(td->td_ucred, mp->mnt_cred) != 0) return (EPERM); /* * If file system supports delegated administration, we don't check * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified * by the file system itself. * If this is not the user that did original mount, we check for * the PRIV_VFS_MOUNT_OWNER privilege. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) && mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) { if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0) return (error); } return (0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(struct mount *mp) { static uint16_t mntid_base; struct mount *nmp; fsid_t tfsid; int mtype; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); mtx_lock(&mntid_mtx); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; for (;;) { tfsid.val[0] = makedev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; if ((nmp = vfs_getvfs(&tfsid)) == NULL) break; vfs_rel(nmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; mtx_unlock(&mntid_mtx); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_USEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, "File timestamp precision (0: seconds, " "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to ms, " "3+: sec + ns (max. precision))"); /* * Get a current timestamp. */ void vfs_timestamp(struct timespec *tsp) { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(struct vattr *vap) { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_birthtime.tv_sec = VNOVAL; vap->va_birthtime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * This routine is called when we have too many vnodes. It attempts * to free vnodes and will potentially free vnodes that still * have VM backing store (VM backing store is typically the cause * of a vnode blowout so we want to do this). Therefore, this operation * is not considered cheap. * * A number of conditions may prevent a vnode from being reclaimed. * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not * desireable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. */ static int vlrureclaim(struct mount *mp) { struct vnode *vp; int done; int trigger; int usevnodes; int count; /* * Calculate the trigger point, don't allow user * screwups to blow us up. This prevents us from * recycling vnodes with lots of resident pages. We * aren't trying to free memory, we are trying to * free vnodes. */ usevnodes = desiredvnodes; if (usevnodes <= 0) usevnodes = 1; trigger = vm_cnt.v_page_count * 2 / usevnodes; done = 0; vn_start_write(NULL, &mp, V_WAIT); MNT_ILOCK(mp); count = mp->mnt_nvnodelistsize / 10 + 1; while (count != 0) { vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); if (vp == NULL) break; TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); --count; if (!VI_TRYLOCK(vp)) goto next_iter; /* * If it's been deconstructed already, it's still * referenced, or it exceeds the trigger, skip it. */ if (vp->v_usecount || (!vlru_allow_cache_src && !LIST_EMPTY(&(vp)->v_cache_src)) || (vp->v_iflag & VI_DOOMED) != 0 || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); goto next_iter; } MNT_IUNLOCK(mp); vholdl(vp); if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) { vdrop(vp); goto next_iter_mntunlocked; } VI_LOCK(vp); /* * v_usecount may have been bumped after VOP_LOCK() dropped * the vnode interlock and before it was locked again. * * It is not necessary to recheck VI_DOOMED because it can * only be set by another thread that holds both the vnode * lock and vnode interlock. If another thread has the * vnode lock before we get to VOP_LOCK() and obtains the * vnode interlock after VOP_LOCK() drops the vnode * interlock, the other thread will be unable to drop the * vnode lock before our VOP_LOCK() call fails. */ if (vp->v_usecount || (!vlru_allow_cache_src && !LIST_EMPTY(&(vp)->v_cache_src)) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VOP_UNLOCK(vp, LK_INTERLOCK); vdrop(vp); goto next_iter_mntunlocked; } KASSERT((vp->v_iflag & VI_DOOMED) == 0, ("VI_DOOMED unexpectedly detected in vlrureclaim()")); atomic_add_long(&recycles_count, 1); vgonel(vp); VOP_UNLOCK(vp, 0); vdropl(vp); done++; next_iter_mntunlocked: if (!should_yield()) goto relock_mnt; goto yield; next_iter: if (!should_yield()) continue; MNT_IUNLOCK(mp); yield: kern_yield(PRI_USER); relock_mnt: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); vn_finished_write(mp); return done; } /* * Attempt to keep the free list at wantfreevnodes length. */ static void vnlru_free(int count) { struct vnode *vp; mtx_assert(&vnode_free_list_mtx, MA_OWNED); for (; count > 0; count--) { vp = TAILQ_FIRST(&vnode_free_list); /* * The list can be modified while the free_list_mtx * has been dropped and vp could be NULL here. */ if (!vp) break; VNASSERT(vp->v_op != NULL, vp, ("vnlru_free: vnode already reclaimed.")); KASSERT((vp->v_iflag & VI_FREE) != 0, ("Removing vnode not on freelist")); KASSERT((vp->v_iflag & VI_ACTIVE) == 0, ("Mangling active vnode")); TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); /* * Don't recycle if we can't get the interlock. */ if (!VI_TRYLOCK(vp)) { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); continue; } VNASSERT((vp->v_iflag & VI_FREE) != 0 && vp->v_holdcnt == 0, vp, ("vp inconsistent on freelist")); /* * The clear of VI_FREE prevents activation of the * vnode. There is no sense in putting the vnode on * the mount point active list, only to remove it * later during recycling. Inline the relevant part * of vholdl(), to avoid triggering assertions or * activating. */ freevnodes--; vp->v_iflag &= ~VI_FREE; refcount_acquire(&vp->v_holdcnt); mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); vtryrecycle(vp); /* * If the recycled succeeded this vdrop will actually free * the vnode. If not it will simply place it back on * the free list. */ vdrop(vp); mtx_lock(&vnode_free_list_mtx); } } /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some * interesting deadlock problems. */ static struct proc *vnlruproc; static int vnlruproc_sig; static void vnlru_proc(void) { struct mount *mp, *nmp; int done; struct proc *p = vnlruproc; EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p, SHUTDOWN_PRI_FIRST); for (;;) { kproc_suspend_check(p); mtx_lock(&vnode_free_list_mtx); if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); if (numvnodes <= desiredvnodes * 9 / 10) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); msleep(vnlruproc, &vnode_free_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } mtx_unlock(&vnode_free_list_mtx); done = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } done += vlrureclaim(mp); mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); if (done == 0) { #if 0 /* These messages are temporary debugging aids */ if (vnlru_nowhere < 5) printf("vnlru process getting nowhere..\n"); else if (vnlru_nowhere == 5) printf("vnlru process messages stopped.\n"); #endif vnlru_nowhere++; tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); } else kern_yield(PRI_USER); } } static struct kproc_desc vnlru_kp = { "vnlru", vnlru_proc, &vnlruproc }; SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp); /* * Routines having to do with the management of the vnode table. */ /* * Try to recycle a freed vnode. We abort if anyone picks up a reference * before we actually vgone(). This function must be called with the vnode * held to prevent the vnode from being returned to the free list midway * through vgone(). */ static int vtryrecycle(struct vnode *vp) { struct mount *vnmp; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); VNASSERT(vp->v_holdcnt, vp, ("vtryrecycle: Recycling vp %p without a reference.", vp)); /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { CTR2(KTR_VFS, "%s: impossible to recycle, vp %p lock is already held", __func__, vp); return (EWOULDBLOCK); } /* * Don't recycle if its filesystem is being suspended. */ if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { VOP_UNLOCK(vp, 0); CTR2(KTR_VFS, "%s: impossible to recycle, cannot start the write for %p", __func__, vp); return (EBUSY); } /* * If we got this far, we need to acquire the interlock and see if * anyone picked up this vnode from another list. If not, we will * mark it with DOOMED via vgonel() so that anyone who does find it * will skip over it. */ VI_LOCK(vp); if (vp->v_usecount) { VOP_UNLOCK(vp, LK_INTERLOCK); vn_finished_write(vnmp); CTR2(KTR_VFS, "%s: impossible to recycle, %p is already referenced", __func__, vp); return (EBUSY); } if ((vp->v_iflag & VI_DOOMED) == 0) { atomic_add_long(&recycles_count, 1); vgonel(vp); } VOP_UNLOCK(vp, LK_INTERLOCK); vn_finished_write(vnmp); return (0); } /* * Wait for available vnodes. */ static int getnewvnode_wait(int suspended) { mtx_assert(&vnode_free_list_mtx, MA_OWNED); if (numvnodes > desiredvnodes) { if (suspended) { /* * File system is beeing suspended, we cannot risk a * deadlock here, so allocate new vnode anyway. */ if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); return (0); } if (vnlruproc_sig == 0) { vnlruproc_sig = 1; /* avoid unnecessary wakeups */ wakeup(vnlruproc); } msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, "vlruwk", hz); } return (numvnodes > desiredvnodes ? ENFILE : 0); } void getnewvnode_reserve(u_int count) { struct thread *td; td = curthread; /* First try to be quick and racy. */ if (atomic_fetchadd_long(&numvnodes, count) + count <= desiredvnodes) { td->td_vp_reserv += count; return; } else atomic_subtract_long(&numvnodes, count); mtx_lock(&vnode_free_list_mtx); while (count > 0) { if (getnewvnode_wait(0) == 0) { count--; td->td_vp_reserv++; atomic_add_long(&numvnodes, 1); } } mtx_unlock(&vnode_free_list_mtx); } void getnewvnode_drop_reserve(void) { struct thread *td; td = curthread; atomic_subtract_long(&numvnodes, td->td_vp_reserv); td->td_vp_reserv = 0; } /* * Return the next vnode from the free list. */ int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp) { struct vnode *vp; struct bufobj *bo; struct thread *td; int error; CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag); vp = NULL; td = curthread; if (td->td_vp_reserv > 0) { td->td_vp_reserv -= 1; goto alloc; } mtx_lock(&vnode_free_list_mtx); /* * Lend our context to reclaim vnodes if they've exceeded the max. */ if (freevnodes > wantfreevnodes) vnlru_free(1); error = getnewvnode_wait(mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (error != 0) { mtx_unlock(&vnode_free_list_mtx); return (error); } #endif atomic_add_long(&numvnodes, 1); mtx_unlock(&vnode_free_list_mtx); alloc: atomic_add_long(&vnodes_created, 1); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO); /* * Setup locks. */ vp->v_vnlock = &vp->v_lock; mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); /* * By default, don't allow shared locks unless filesystems * opt-in. */ lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE | LK_IS_VNODE); /* * Initialize bufobj. */ bo = &vp->v_bufobj; bo->__bo_vnode = vp; rw_init(BO_LOCKPTR(bo), "bufobj interlock"); bo->bo_ops = &buf_ops_bio; bo->bo_private = vp; TAILQ_INIT(&bo->bo_clean.bv_hd); TAILQ_INIT(&bo->bo_dirty.bv_hd); /* * Initialize namecache. */ LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); /* * Finalize various vnode identity bits. */ vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; v_init_counters(vp); vp->v_data = NULL; #ifdef MAC mac_vnode_init(vp); if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0) mac_vnode_associate_singlelabel(mp, vp); else if (mp == NULL && vops != &dead_vnodeops) printf("NULL mp in getnewvnode()\n"); #endif if (mp != NULL) { bo->bo_bsize = mp->mnt_stat.f_iosize; if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0) vp->v_vflag |= VV_NOKNOTE; } rangelock_init(&vp->v_rl); /* * For the filesystems which do not use vfs_hash_insert(), * still initialize v_hash to have vfs_hash_index() useful. * E.g., nullfs uses vfs_hash_index() on the lower vnode for * its own hashing. */ vp->v_hash = (uintptr_t)vp >> vnsz2log; *vpp = vp; return (0); } /* * Delete from old mount point vnode list, if on one. */ static void delmntque(struct vnode *vp) { struct mount *mp; int active; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); VI_LOCK(vp); KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize, ("Active vnode list size %d > Vnode list size %d", mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize)); active = vp->v_iflag & VI_ACTIVE; vp->v_iflag &= ~VI_ACTIVE; if (active) { mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize--; mtx_unlock(&vnode_free_list_mtx); } vp->v_mount = NULL; VI_UNLOCK(vp); VNASSERT(mp->mnt_nvnodelistsize > 0, vp, ("bad mount point vnode list size")); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize--; MNT_REL(mp); MNT_IUNLOCK(mp); } static void insmntque_stddtr(struct vnode *vp, void *dtr_arg) { vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Insert into list of vnodes for the new mount point, if available. */ int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg) { KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); /* * We acquire the vnode interlock early to ensure that the * vnode cannot be recycled by another process releasing a * holdcnt on it before we get it on both the vnode list * and the active vnode list. The mount mutex protects only * manipulation of the vnode list and the vnode freelist * mutex protects only manipulation of the active vnode list. * Hence the need to hold the vnode interlock throughout. */ MNT_ILOCK(mp); VI_LOCK(vp); if (((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 && ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || mp->mnt_nvnodelistsize == 0)) && (vp->v_vflag & VV_FORCEINSMQ) == 0) { VI_UNLOCK(vp); MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); return (EBUSY); } vp->v_mount = mp; MNT_REF(mp); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; KASSERT((vp->v_iflag & VI_ACTIVE) == 0, ("Activating already active vnode")); vp->v_iflag |= VI_ACTIVE; mtx_lock(&vnode_free_list_mtx); TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize++; mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); MNT_IUNLOCK(mp); return (0); } int insmntque(struct vnode *vp, struct mount *mp) { return (insmntque1(vp, mp, insmntque_stddtr, NULL)); } /* * Flush out and invalidate all buffers associated with a bufobj * Called with the underlying object locked. */ int bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo) { int error; BO_LOCK(bo); if (flags & V_SAVE) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); return (error); } if (bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); if ((error = BO_SYNC(bo, MNT_WAIT)) != 0) return (error); /* * XXX We could save a lock/unlock if this was only * enabled under INVARIANTS */ BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: dirty bufs"); } } /* * If you alter this loop please notice that interlock is dropped and * reacquired in flushbuflist. Special care is needed to ensure that * no race conditions occur from this. */ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); if (error == 0 && !(flags & V_CLEANONLY)) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { BO_UNLOCK(bo); return (error); } } while (error != 0); /* * Wait for I/O to complete. XXX needs cleaning up. The vnode can * have write I/O in-progress but if there is a VM object then the * VM object can also have read-I/O in-progress. */ do { bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); if (bo->bo_object != NULL) { VM_OBJECT_WLOCK(bo->bo_object); vm_object_pip_wait(bo->bo_object, "bovlbx"); VM_OBJECT_WUNLOCK(bo->bo_object); } BO_LOCK(bo); } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); /* * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0) { VM_OBJECT_WLOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? OBJPR_CLEANONLY : 0); VM_OBJECT_WUNLOCK(bo->bo_object); } #ifdef INVARIANTS BO_LOCK(bo); if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); BO_UNLOCK(bo); #endif return (0); } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo) { CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); ASSERT_VOP_LOCKED(vp, "vinvalbuf"); if (vp->v_object != NULL && vp->v_object->handle != vp) return (0); return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo)); } /* * Flush out buffers on the specified list. * */ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo) { struct buf *bp, *nbp; int retval, error; daddr_t lblkno; b_xflags_t xflags; ASSERT_BO_WLOCKED(bo); retval = 0; TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) { if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) || ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0)) { continue; } lblkno = 0; xflags = 0; if (nbp != NULL) { lblkno = nbp->b_lblkno; xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "flushbuf", slpflag, slptimeo); if (error) { BO_LOCK(bo); return (error != ENOLCK ? error : EAGAIN); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); if (bp->b_bufobj != bo) { /* XXX: necessary ? */ BUF_UNLOCK(bp); BO_LOCK(bo); return (EAGAIN); } /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { bremfree(bp); bp->b_flags |= B_ASYNC; bwrite(bp); BO_LOCK(bo); return (EAGAIN); /* XXX: why not loop ? */ } bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); BO_LOCK(bo); if (nbp != NULL && (nbp->b_bufobj != bo || nbp->b_lblkno != lblkno || (nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) != xflags)) break; /* nbp invalid */ } return (retval); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize) { struct buf *bp, *nbp; int anyfreed; int trunclbn; struct bufobj *bo; CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__, vp, cred, blksize, (uintmax_t)length); /* * Round up to the *next* lbn. */ trunclbn = (length + blksize - 1) / blksize; ASSERT_VOP_LOCKED(vp, "vtruncbuf"); restart: bo = &vp->v_bufobj; BO_LOCK(bo); anyfreed = 1; for (;anyfreed;) { anyfreed = 0; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNCLEAN) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { BO_UNLOCK(bo); goto restart; } } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { BO_UNLOCK(bo); goto restart; } } } if (length > 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) continue; /* * Since we hold the vnode lock this should only * fail if we're racing with the buf daemon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { goto restart; } VNASSERT((bp->b_flags & B_DELWRI), vp, ("buf(%p) on dirty queue without DELWRI", bp)); bremfree(bp); bawrite(bp); BO_LOCK(bo); goto restartsync; } } bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); vnode_pager_setsize(vp, length); return (0); } static void buf_vlist_remove(struct buf *bp) { struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); ASSERT_BO_WLOCKED(bp->b_bufobj); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != (BX_VNDIRTY|BX_VNCLEAN), ("buf_vlist_remove: Buf %p is on two lists", bp)); if (bp->b_xflags & BX_VNDIRTY) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno); TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* * Add the buffer to the sorted clean or dirty block list. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { struct bufv *bv; struct buf *n; int error; ASSERT_BO_WLOCKED(bo); KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0, ("dead bo %p", bo)); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); bp->b_xflags |= xflags; if (xflags & BX_VNDIRTY) bv = &bo->bo_dirty; else bv = &bo->bo_clean; /* * Keep the list ordered. Optimize empty list insertion. Assume * we tend to grow at the tail so lookup_le should usually be cheaper * than _ge. */ if (bv->bv_cnt == 0 || bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno) TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL) TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs); else TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs); error = BUF_PCTRIE_INSERT(&bv->bv_root, bp); if (error) panic("buf_vlist_add: Preallocated nodes insufficient."); bv->bv_cnt++; } /* * Look up a buffer using the buffer tries. */ struct buf * gbincore(struct bufobj *bo, daddr_t lblkno) { struct buf *bp; ASSERT_BO_LOCKED(bo); bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); if (bp != NULL) return (bp); return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); } /* * Associate a buffer with a vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { struct bufobj *bo; bo = &vp->v_bufobj; ASSERT_BO_WLOCKED(bo); VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); vhold(vp); bp->b_vp = vp; bp->b_bufobj = bo; /* * Insert onto list for new vnode. */ buf_vlist_add(bp, bo, BX_VNCLEAN); } /* * Disassociate a buffer from a vnode. */ void brelvp(struct buf *bp) { struct bufobj *bo; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; /* XXX */ bo = bp->b_bufobj; BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("brelvp: Buffer %p not on queue.", bp); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { bo->bo_flag &= ~BO_ONWORKLST; mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); } bp->b_vp = NULL; bp->b_bufobj = NULL; BO_UNLOCK(bo); vdrop(vp); } /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { int slot; ASSERT_BO_WLOCKED(bo); mtx_lock(&sync_mtx); if (bo->bo_flag & BO_ONWORKLST) LIST_REMOVE(bo, bo_synclist); else { bo->bo_flag |= BO_ONWORKLST; syncer_worklist_len++; } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist); mtx_unlock(&sync_mtx); } static int sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) { int error, len; mtx_lock(&sync_mtx); len = syncer_worklist_len - sync_vnode_count; mtx_unlock(&sync_mtx); error = SYSCTL_OUT(req, &len, sizeof(len)); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); static struct proc *updateproc; static void sched_sync(void); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); static int sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) { struct vnode *vp; struct mount *mp; *bo = LIST_FIRST(slp); if (*bo == NULL) return (0); vp = (*bo)->__bo_vnode; /* XXX */ if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) return (1); /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from * going away when we unlock the sync_mtx so that * we can acquire the vnode interlock. */ vholdl(vp); mtx_unlock(&sync_mtx); VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); mtx_lock(&sync_mtx); return (*bo == LIST_FIRST(slp)); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); BO_LOCK(*bo); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(*bo, syncdelay); } BO_UNLOCK(*bo); vdrop(vp); mtx_lock(&sync_mtx); return (0); } static int first_printf = 1; /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *next, *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; int last_work_seen; int net_worklist_len; int syncer_final_iter; int error; last_work_seen = 0; syncer_final_iter = 0; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); mtx_lock(&sync_mtx); for (;;) { if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kproc_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining..."); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while (!LIST_EMPTY(slp)) { error = sync_vnode(slp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } if (first_printf == 0) { /* * Drop the sync mutex, because some watchdog * drivers need to sleep while patting */ mtx_unlock(&sync_mtx); wdog_kern_pat(WD_LASTVAL); mtx_lock(&sync_mtx); } } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (syncer_state != SYNCER_RUNNING || time_uptime == starttime) { thread_lock(td); sched_prio(td, PPAUSE); thread_unlock(td); } if (syncer_state != SYNCER_RUNNING) cv_timedwait(&sync_wakeup, &sync_mtx, hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) cv_timedwait(&sync_wakeup, &sync_mtx, hz); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { int ret = 0; mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; ret = 1; } mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); return (ret); } /* * Tell the syncer to speed up its work and run though its work * list several times, then tell it to shut down. */ static void syncer_shutdown(void *arg, int howto) { if (howto & RB_NOSYNC) return; mtx_lock(&sync_mtx); syncer_state = SYNCER_SHUTTING_DOWN; rushjob = 0; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_shutdown(arg, howto); } void syncer_suspend(void) { syncer_shutdown(updateproc, 0); } void syncer_resume(void) { mtx_lock(&sync_mtx); first_printf = 1; syncer_state = SYNCER_RUNNING; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_resume(updateproc); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(struct buf *bp) { struct vnode *vp; struct bufobj *bo; int delay; #ifdef INVARIANTS struct bufv *bv; #endif vp = bp->b_vp; bo = bp->b_bufobj; ++reassignbufcalls; CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); /* * Delete from old vnode list, if on one. */ BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("reassignbuf: Buffer %p not on queue.", bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { if ((bo->bo_flag & BO_ONWORKLST) == 0) { switch (vp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: delay = metadelay; break; default: delay = filedelay; } vn_syncer_add_to_worklist(bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } } #ifdef INVARIANTS bv = &bo->bo_clean; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bv = &bo->bo_dirty; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif BO_UNLOCK(bo); } /* * A temporary hack until refcount_* APIs are sorted out. */ static __inline int vfs_refcount_acquire_if_not_zero(volatile u_int *count) { u_int old; for (;;) { old = *count; if (old == 0) return (0); if (atomic_cmpset_int(count, old, old + 1)) return (1); } } static __inline int vfs_refcount_release_if_not_last(volatile u_int *count) { u_int old; for (;;) { old = *count; if (old == 1) return (0); if (atomic_cmpset_int(count, old, old - 1)) return (1); } } static void v_init_counters(struct vnode *vp) { VNASSERT(vp->v_type == VNON && vp->v_data == NULL && vp->v_iflag == 0, vp, ("%s called for an initialized vnode", __FUNCTION__)); ASSERT_VI_UNLOCKED(vp, __FUNCTION__); refcount_init(&vp->v_holdcnt, 1); refcount_init(&vp->v_usecount, 1); } /* * Increment the use and hold counts on the vnode, taking care to reference * the driver's usecount if this is a chardev. The _vhold() will remove * the vnode from the free list if it is presently free. */ static void v_incr_usecount(struct vnode *vp) { ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (vp->v_type == VCHR) { VI_LOCK(vp); _vhold(vp, true); if (vp->v_iflag & VI_OWEINACT) { VNASSERT(vp->v_usecount == 0, vp, ("vnode with usecount and VI_OWEINACT set")); vp->v_iflag &= ~VI_OWEINACT; } refcount_acquire(&vp->v_usecount); v_incr_devcount(vp); VI_UNLOCK(vp); return; } _vhold(vp, false); if (vfs_refcount_acquire_if_not_zero(&vp->v_usecount)) { VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with usecount and VI_OWEINACT set")); } else { VI_LOCK(vp); if (vp->v_iflag & VI_OWEINACT) vp->v_iflag &= ~VI_OWEINACT; refcount_acquire(&vp->v_usecount); VI_UNLOCK(vp); } } /* * Increment si_usecount of the associated device, if any. */ static void v_incr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } } /* * Decrement si_usecount of the associated device, if any. */ static void v_decr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. VI_DOOMED is set if the vnode * is being destroyed. Only callers who specify LK_RETRY will * see doomed vnodes. If inactive processing was delayed in * vput try to do it here. * * Notes on lockless counter manipulation: * _vhold, vputx and other routines make various decisions based * on either holdcnt or usecount being 0. As long as either contuner * is not transitioning 0->1 nor 1->0, the manipulation can be done * with atomic operations. Otherwise the interlock is taken. */ int vget(struct vnode *vp, int flags, struct thread *td) { int error, oweinact; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("vget: invalid lock operation")); if ((flags & LK_INTERLOCK) != 0) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); if ((flags & LK_VNHELD) != 0) VNASSERT((vp->v_holdcnt > 0), vp, ("vget: LK_VNHELD passed but vnode not held")); CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); if ((flags & LK_VNHELD) == 0) _vhold(vp, (flags & LK_INTERLOCK) != 0); if ((error = vn_lock(vp, flags)) != 0) { vdrop(vp); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); } if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0) panic("vget: vn_lock failed to return ENOENT\n"); /* * We don't guarantee that any particular close will * trigger inactive processing so just make a best effort * here at preventing a reference to a removed file. If * we don't succeed no harm is done. * * Upgrade our holdcnt to a usecount. */ if (vp->v_type != VCHR && vfs_refcount_acquire_if_not_zero(&vp->v_usecount)) { VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with usecount and VI_OWEINACT set")); } else { VI_LOCK(vp); if ((vp->v_iflag & VI_OWEINACT) == 0) { oweinact = 0; } else { oweinact = 1; vp->v_iflag &= ~VI_OWEINACT; } refcount_acquire(&vp->v_usecount); v_incr_devcount(vp); if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE && (flags & LK_NOWAIT) == 0) vinactive(vp, td); VI_UNLOCK(vp); } return (0); } /* * Increase the reference count of a vnode. */ void vref(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); v_incr_usecount(vp); } /* * Return reference count of a vnode. * * The results of this call are only guaranteed when some mechanism is used to * stop other processes from gaining references to the vnode. This may be the * case if the caller holds the only reference. This is also useful when stale * data is acceptable as race conditions may be accounted for by some other * means. */ int vrefcnt(struct vnode *vp) { return (vp->v_usecount); } #define VPUTX_VRELE 1 #define VPUTX_VPUT 2 #define VPUTX_VUNREF 3 /* * Decrement the use and hold counts for a vnode. * * See an explanation near vget() as to why atomic operation is safe. */ static void vputx(struct vnode *vp, int func) { int error; KASSERT(vp != NULL, ("vputx: null vp")); if (func == VPUTX_VUNREF) ASSERT_VOP_LOCKED(vp, "vunref"); else if (func == VPUTX_VPUT) ASSERT_VOP_LOCKED(vp, "vput"); else KASSERT(func == VPUTX_VRELE, ("vputx: wrong func")); ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (vp->v_type != VCHR && vfs_refcount_release_if_not_last(&vp->v_usecount)) { if (func == VPUTX_VPUT) VOP_UNLOCK(vp, 0); vdrop(vp); return; } VI_LOCK(vp); /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ if (!refcount_release(&vp->v_usecount) || (vp->v_iflag & VI_DOINGINACT)) { if (func == VPUTX_VPUT) VOP_UNLOCK(vp, 0); v_decr_devcount(vp); vdropl(vp); return; } v_decr_devcount(vp); error = 0; if (vp->v_usecount != 0) { vprint("vputx: usecount not zero", vp); panic("vputx: usecount not zero"); } CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp); /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. */ vp->v_iflag |= VI_OWEINACT; switch (func) { case VPUTX_VRELE: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); VI_LOCK(vp); break; case VPUTX_VPUT: if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK | LK_NOWAIT); VI_LOCK(vp); } break; case VPUTX_VUNREF: if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_TRYUPGRADE | LK_INTERLOCK); VI_LOCK(vp); } break; } VNASSERT(vp->v_usecount == 0 || (vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with usecount and VI_OWEINACT set")); if (error == 0) { if (vp->v_iflag & VI_OWEINACT) vinactive(vp, curthread); if (func != VPUTX_VUNREF) VOP_UNLOCK(vp, 0); } vdropl(vp); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(struct vnode *vp) { vputx(vp, VPUTX_VRELE); } /* * Release an already locked vnode. This give the same effects as * unlock+vrele(), but takes less time and avoids releasing and * re-aquiring the lock (as vrele() acquires the lock internally.) */ void vput(struct vnode *vp) { vputx(vp, VPUTX_VPUT); } /* * Release an exclusively locked vnode. Do not unlock the vnode lock. */ void vunref(struct vnode *vp) { vputx(vp, VPUTX_VUNREF); } /* * Increase the hold count and activate if this is the first reference. */ void _vhold(struct vnode *vp, bool locked) { struct mount *mp; if (locked) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (!locked && vfs_refcount_acquire_if_not_zero(&vp->v_holdcnt)) { VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("_vhold: vnode with holdcnt is free")); return; } if (!locked) VI_LOCK(vp); if ((vp->v_iflag & VI_FREE) == 0) { refcount_acquire(&vp->v_holdcnt); if (!locked) VI_UNLOCK(vp); return; } VNASSERT(vp->v_holdcnt == 0, vp, ("%s: wrong hold count", __func__)); VNASSERT(vp->v_op != NULL, vp, ("%s: vnode already reclaimed.", __func__)); /* * Remove a vnode from the free list, mark it as in use, * and put it on the active list. */ mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); freevnodes--; vp->v_iflag &= ~(VI_FREE|VI_AGE); KASSERT((vp->v_iflag & VI_ACTIVE) == 0, ("Activating already active vnode")); vp->v_iflag |= VI_ACTIVE; mp = vp->v_mount; TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize++; mtx_unlock(&vnode_free_list_mtx); refcount_acquire(&vp->v_holdcnt); if (!locked) VI_UNLOCK(vp); } /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we place it on the free list unless it has been vgone'd * (marked VI_DOOMED) in which case we will free it. */ void _vdrop(struct vnode *vp, bool locked) { struct bufobj *bo; struct mount *mp; int active; if (locked) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if ((int)vp->v_holdcnt <= 0) panic("vdrop: holdcnt %d", vp->v_holdcnt); if (vfs_refcount_release_if_not_last(&vp->v_holdcnt)) { if (locked) VI_UNLOCK(vp); return; } if (!locked) VI_LOCK(vp); if (refcount_release(&vp->v_holdcnt) == 0) { VI_UNLOCK(vp); return; } if ((vp->v_iflag & VI_DOOMED) == 0) { /* * Mark a vnode as free: remove it from its active list * and put it up for recycling on the freelist. */ VNASSERT(vp->v_op != NULL, vp, ("vdropl: vnode already reclaimed.")); VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free")); VNASSERT(vp->v_holdcnt == 0, vp, ("vdropl: freeing when we shouldn't")); active = vp->v_iflag & VI_ACTIVE; if ((vp->v_iflag & VI_OWEINACT) == 0) { vp->v_iflag &= ~VI_ACTIVE; mp = vp->v_mount; mtx_lock(&vnode_free_list_mtx); if (active) { TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize--; } if (vp->v_iflag & VI_AGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_actfreelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); } freevnodes++; vp->v_iflag &= ~VI_AGE; vp->v_iflag |= VI_FREE; mtx_unlock(&vnode_free_list_mtx); } else { atomic_add_long(&free_owe_inact, 1); } VI_UNLOCK(vp); return; } /* * The vnode has been marked for destruction, so free it. */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); atomic_subtract_long(&numvnodes, 1); bo = &vp->v_bufobj; VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("cleaned vnode still on the free list.")); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, ("clean blk trie not empty")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp, ("dirty blk trie not empty")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); VI_UNLOCK(vp); #ifdef MAC mac_vnode_destroy(vp); #endif if (vp->v_pollinfo != NULL) destroy_vpollinfo(vp->v_pollinfo); #ifdef INVARIANTS /* XXX Elsewhere we detect an already freed vnode via NULL v_op. */ vp->v_op = NULL; #endif rangelock_destroy(&vp->v_rl); lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); rw_destroy(BO_LOCKPTR(bo)); uma_zfree(vnode_zone, vp); } /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. * OWEINACT tracks whether a vnode missed a call to inactive due to a * failed lock upgrade. */ void vinactive(struct vnode *vp, struct thread *td) { struct vm_object *obj; ASSERT_VOP_ELOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, ("vinactive: recursed on VI_DOINGINACT")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_iflag |= VI_DOINGINACT; vp->v_iflag &= ~VI_OWEINACT; VI_UNLOCK(vp); /* * Before moving off the active list, we must be sure that any * modified pages are on the vnode's dirty list since these will * no longer be checked once the vnode is on the inactive list. * Because the vnode vm object keeps a hold reference on the vnode * if there is at least one resident non-cached page, the vnode * cannot leave the active list without the page cleanup done. */ obj = vp->v_object; if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, OBJPC_NOSYNC); VM_OBJECT_WUNLOCK(obj); } VOP_INACTIVE(vp, td); VI_LOCK(vp); VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, ("vinactive: lost VI_DOINGINACT")); vp->v_iflag &= ~VI_DOINGINACT; } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. * * `rootrefs' specifies the base reference count for the root vnode * of this filesystem. The root vnode is considered busy if its * v_usecount exceeds this value. On a successful return, vflush(, td) * will call vrele() on the root vnode exactly rootrefs times. * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must * be zero. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes"); #endif int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td) { struct vnode *vp, *mvp, *rootvp = NULL; struct vattr vattr; int busy = 0, error; CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp, rootrefs, flags); if (rootrefs > 0) { KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0, ("vflush: bad args")); /* * Get the filesystem root vnode. We can vput() it * immediately, since with rootrefs > 0, it won't go away. */ if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) { CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d", __func__, error); return (error); } vput(rootvp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { vholdl(vp); error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE); if (error) { vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } /* * Skip over a vnodes marked VV_SYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { VOP_UNLOCK(vp, 0); vdrop(vp); continue; } /* * If WRITECLOSE is set, flush out unlinked but still open * files (even if open only for reading) and regular file * vnodes open for writing. */ if (flags & WRITECLOSE) { if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); if (error != 0) { VOP_UNLOCK(vp, 0); vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } error = VOP_GETATTR(vp, &vattr, td->td_ucred); VI_LOCK(vp); if ((vp->v_type == VNON || (error == 0 && vattr.va_nlink > 0)) && (vp->v_writecount == 0 || vp->v_type != VREG)) { VOP_UNLOCK(vp, 0); vdropl(vp); continue; } } else VI_LOCK(vp); /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. * * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { vgonel(vp); } else { busy++; #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif } VOP_UNLOCK(vp, 0); vdropl(vp); } if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ VI_LOCK(rootvp); KASSERT(busy > 0, ("vflush: not busy")); VNASSERT(rootvp->v_usecount >= rootrefs, rootvp, ("vflush: usecount %d < rootrefs %d", rootvp->v_usecount, rootrefs)); if (busy == 1 && rootvp->v_usecount == rootrefs) { VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK); vgone(rootvp); VOP_UNLOCK(rootvp, 0); busy = 0; } else VI_UNLOCK(rootvp); } if (busy) { CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__, busy); return (EBUSY); } for (; rootrefs > 0; rootrefs--) vrele(rootvp); return (0); } /* * Recycle an unused vnode to the front of the free list. */ int vrecycle(struct vnode *vp) { int recycled; ASSERT_VOP_ELOCKED(vp, "vrecycle"); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); recycled = 0; VI_LOCK(vp); if (vp->v_usecount == 0) { recycled = 1; vgonel(vp); } VI_UNLOCK(vp); return (recycled); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(struct vnode *vp) { VI_LOCK(vp); vgonel(vp); VI_UNLOCK(vp); } static void notify_lowervp_vfs_dummy(struct mount *mp __unused, struct vnode *lowervp __unused) { } /* * Notify upper mounts about reclaimed or unlinked vnode. */ void vfs_notify_upper(struct vnode *vp, int event) { static struct vfsops vgonel_vfsops = { .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, }; struct mount *mp, *ump, *mmp; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); if (TAILQ_EMPTY(&mp->mnt_uppers)) goto unlock; MNT_IUNLOCK(mp); mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO); mmp->mnt_op = &vgonel_vfsops; mmp->mnt_kern_flag |= MNTK_MARKER; MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_VGONE_UPPER; for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) { if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) { ump = TAILQ_NEXT(ump, mnt_upper_link); continue; } TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); MNT_IUNLOCK(mp); switch (event) { case VFS_NOTIFY_UPPER_RECLAIM: VFS_RECLAIM_LOWERVP(ump, vp); break; case VFS_NOTIFY_UPPER_UNLINK: VFS_UNLINK_LOWERVP(ump, vp); break; default: KASSERT(0, ("invalid event %d", event)); break; } MNT_ILOCK(mp); ump = TAILQ_NEXT(mmp, mnt_upper_link); TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); } free(mmp, M_TEMP); mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER; if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) { mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER; wakeup(&mp->mnt_uppers); } unlock: MNT_IUNLOCK(mp); } /* * vgone, with the vp interlock held. */ static void vgonel(struct vnode *vp) { struct thread *td; int oweinact; int active; struct mount *mp; ASSERT_VOP_ELOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); VNASSERT(vp->v_holdcnt, vp, ("vgonel: vp %p has no reference.", vp)); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); td = curthread; /* * Don't vgonel if we're already doomed. */ if (vp->v_iflag & VI_DOOMED) return; vp->v_iflag |= VI_DOOMED; /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED, td); if (oweinact || active) { VI_LOCK(vp); if ((vp->v_iflag & VI_DOINGINACT) == 0) vinactive(vp, td); VI_UNLOCK(vp); } if (vp->v_type == VSOCK) vfs_unp_reclaim(vp); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ mp = NULL; if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd)) (void) vn_start_secondary_write(vp, &mp, V_WAIT); if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) { while (vinvalbuf(vp, 0, 0, 0) != 0) ; } BO_LOCK(&vp->v_bufobj); KASSERT(TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd) && vp->v_bufobj.bo_dirty.bv_cnt == 0 && TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) && vp->v_bufobj.bo_clean.bv_cnt == 0, ("vp %p bufobj not invalidated", vp)); vp->v_bufobj.bo_flag |= BO_DEAD; BO_UNLOCK(&vp->v_bufobj); /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, td)) panic("vgone: cannot reclaim"); if (mp != NULL) vn_finished_secondary_write(mp); VNASSERT(vp->v_object == NULL, vp, ("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag)); /* * Clear the advisory locks and wake up waiting threads. */ (void)VOP_ADVLOCKPURGE(vp); /* * Delete from old mount point vnode list. */ delmntque(vp); cache_purge(vp); /* * Done with purge, reset to the standard lock and invalidate * the vnode. */ VI_LOCK(vp); vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_tag = "none"; vp->v_type = VBAD; } /* * Calculate the total number of references to a special device. */ int vcount(struct vnode *vp) { int count; dev_lock(); count = vp->v_rdev->si_usecount; dev_unlock(); return (count); } /* * Same as above, but using the struct cdev *as argument */ int count_dev(struct cdev *dev) { int count; dev_lock(); count = dev->si_usecount; dev_unlock(); return(count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD", "VMARKER"}; void vn_printf(struct vnode *vp, const char *fmt, ...) { va_list ap; char buf[256], buf2[16]; u_long flags; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); printf("tag %s, type %s\n", vp->v_tag, typename[vp->v_type]); printf(" usecount %d, writecount %d, refcount %d mountedhere %p\n", vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_mountedhere); buf[0] = '\0'; buf[1] = '\0'; if (vp->v_vflag & VV_ROOT) strlcat(buf, "|VV_ROOT", sizeof(buf)); if (vp->v_vflag & VV_ISTTY) strlcat(buf, "|VV_ISTTY", sizeof(buf)); if (vp->v_vflag & VV_NOSYNC) strlcat(buf, "|VV_NOSYNC", sizeof(buf)); if (vp->v_vflag & VV_ETERNALDEV) strlcat(buf, "|VV_ETERNALDEV", sizeof(buf)); if (vp->v_vflag & VV_CACHEDLABEL) strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf)); if (vp->v_vflag & VV_TEXT) strlcat(buf, "|VV_TEXT", sizeof(buf)); if (vp->v_vflag & VV_COPYONWRITE) strlcat(buf, "|VV_COPYONWRITE", sizeof(buf)); if (vp->v_vflag & VV_SYSTEM) strlcat(buf, "|VV_SYSTEM", sizeof(buf)); if (vp->v_vflag & VV_PROCDEP) strlcat(buf, "|VV_PROCDEP", sizeof(buf)); if (vp->v_vflag & VV_NOKNOTE) strlcat(buf, "|VV_NOKNOTE", sizeof(buf)); if (vp->v_vflag & VV_DELETED) strlcat(buf, "|VV_DELETED", sizeof(buf)); if (vp->v_vflag & VV_MD) strlcat(buf, "|VV_MD", sizeof(buf)); if (vp->v_vflag & VV_FORCEINSMQ) strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf)); flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV | VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP | VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); if (vp->v_iflag & VI_AGE) strlcat(buf, "|VI_AGE", sizeof(buf)); if (vp->v_iflag & VI_DOOMED) strlcat(buf, "|VI_DOOMED", sizeof(buf)); if (vp->v_iflag & VI_FREE) strlcat(buf, "|VI_FREE", sizeof(buf)); if (vp->v_iflag & VI_ACTIVE) strlcat(buf, "|VI_ACTIVE", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); flags = vp->v_iflag & ~(VI_MOUNT | VI_AGE | VI_DOOMED | VI_FREE | VI_ACTIVE | VI_DOINGINACT | VI_OWEINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } printf(" flags (%s)\n", buf + 1); if (mtx_owned(VI_MTX(vp))) printf(" VI_LOCKed"); if (vp->v_object != NULL) printf(" v_object %p ref %d pages %d " "cleanbuf %d dirtybuf %d\n", vp->v_object, vp->v_object->ref_count, vp->v_object->resident_page_count, vp->v_bufobj.bo_dirty.bv_cnt, vp->v_bufobj.bo_clean.bv_cnt); printf(" "); lockmgr_printinfo(vp->v_vnlock); if (vp->v_data != NULL) VOP_PRINT(vp); } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) { struct mount *mp; struct vnode *vp; /* * Note: because this is DDB, we can't obey the locking semantics * for these structures, which means we could catch an inconsistent * state and dereference a nasty pointer. Not much to be done * about that. */ db_printf("Locked vnodes\n"); TAILQ_FOREACH(mp, &mountlist, mnt_list) { TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp)) vprint("", vp); } } } /* * Show details about the given vnode. */ DB_SHOW_COMMAND(vnode, db_show_vnode) { struct vnode *vp; if (!have_addr) return; vp = (struct vnode *)addr; vn_printf(vp, "vnode "); } /* * Show details about the given mount point. */ DB_SHOW_COMMAND(mount, db_show_mount) { struct mount *mp; struct vfsopt *opt; struct statfs *sp; struct vnode *vp; char buf[512]; uint64_t mflags; u_int flags; if (!have_addr) { /* No address given, print short info about all mount points. */ TAILQ_FOREACH(mp, &mountlist, mnt_list) { db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); if (db_pager_quit) break; } db_printf("\nMore info: show mount \n"); return; } mp = (struct mount *)addr; db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); buf[0] = '\0'; mflags = mp->mnt_flag; #define MNT_FLAG(flag) do { \ if (mflags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 4, sizeof(buf)); \ mflags &= ~(flag); \ } \ } while (0) MNT_FLAG(MNT_RDONLY); MNT_FLAG(MNT_SYNCHRONOUS); MNT_FLAG(MNT_NOEXEC); MNT_FLAG(MNT_NOSUID); MNT_FLAG(MNT_NFS4ACLS); MNT_FLAG(MNT_UNION); MNT_FLAG(MNT_ASYNC); MNT_FLAG(MNT_SUIDDIR); MNT_FLAG(MNT_SOFTDEP); MNT_FLAG(MNT_NOSYMFOLLOW); MNT_FLAG(MNT_GJOURNAL); MNT_FLAG(MNT_MULTILABEL); MNT_FLAG(MNT_ACLS); MNT_FLAG(MNT_NOATIME); MNT_FLAG(MNT_NOCLUSTERR); MNT_FLAG(MNT_NOCLUSTERW); MNT_FLAG(MNT_SUJ); MNT_FLAG(MNT_EXRDONLY); MNT_FLAG(MNT_EXPORTED); MNT_FLAG(MNT_DEFEXPORTED); MNT_FLAG(MNT_EXPORTANON); MNT_FLAG(MNT_EXKERB); MNT_FLAG(MNT_EXPUBLIC); MNT_FLAG(MNT_LOCAL); MNT_FLAG(MNT_QUOTA); MNT_FLAG(MNT_ROOTFS); MNT_FLAG(MNT_USER); MNT_FLAG(MNT_IGNORE); MNT_FLAG(MNT_UPDATE); MNT_FLAG(MNT_DELEXPORT); MNT_FLAG(MNT_RELOAD); MNT_FLAG(MNT_FORCE); MNT_FLAG(MNT_SNAPSHOT); MNT_FLAG(MNT_BYFSID); #undef MNT_FLAG if (mflags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%016jx", mflags); } db_printf(" mnt_flag = %s\n", buf); buf[0] = '\0'; flags = mp->mnt_kern_flag; #define MNT_KERN_FLAG(flag) do { \ if (flags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 5, sizeof(buf)); \ flags &= ~(flag); \ } \ } while (0) MNT_KERN_FLAG(MNTK_UNMOUNTF); MNT_KERN_FLAG(MNTK_ASYNC); MNT_KERN_FLAG(MNTK_SOFTDEP); MNT_KERN_FLAG(MNTK_NOINSMNTQ); MNT_KERN_FLAG(MNTK_DRAINING); MNT_KERN_FLAG(MNTK_REFEXPIRE); MNT_KERN_FLAG(MNTK_EXTENDED_SHARED); MNT_KERN_FLAG(MNTK_SHARED_WRITES); MNT_KERN_FLAG(MNTK_NO_IOPF); MNT_KERN_FLAG(MNTK_VGONE_UPPER); MNT_KERN_FLAG(MNTK_VGONE_WAITER); MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); MNT_KERN_FLAG(MNTK_SUSPEND); MNT_KERN_FLAG(MNTK_SUSPEND2); MNT_KERN_FLAG(MNTK_SUSPENDED); MNT_KERN_FLAG(MNTK_LOOKUP_SHARED); MNT_KERN_FLAG(MNTK_NOKNOTE); #undef MNT_KERN_FLAG if (flags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%08x", flags); } db_printf(" mnt_kern_flag = %s\n", buf); db_printf(" mnt_opt = "); opt = TAILQ_FIRST(mp->mnt_opt); if (opt != NULL) { db_printf("%s", opt->name); opt = TAILQ_NEXT(opt, link); while (opt != NULL) { db_printf(", %s", opt->name); opt = TAILQ_NEXT(opt, link); } } db_printf("\n"); sp = &mp->mnt_stat; db_printf(" mnt_stat = { version=%u type=%u flags=0x%016jx " "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju " "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju " "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n", (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags, (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize, (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree, (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files, (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites, (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads, (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax, (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]); db_printf(" mnt_cred = { uid=%u ruid=%u", (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid); if (jailed(mp->mnt_cred)) db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id); db_printf(" }\n"); db_printf(" mnt_ref = %d\n", mp->mnt_ref); db_printf(" mnt_gen = %d\n", mp->mnt_gen); db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); db_printf(" mnt_activevnodelistsize = %d\n", mp->mnt_activevnodelistsize); db_printf(" mnt_writeopcount = %d\n", mp->mnt_writeopcount); db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); db_printf(" mnt_lockref = %d\n", mp->mnt_lockref); db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); db_printf(" mnt_secondary_accwrites = %d\n", mp->mnt_secondary_accwrites); db_printf(" mnt_gjprovider = %s\n", mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL"); db_printf("\n\nList of active vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) { if (vp->v_type != VMARKER) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } db_printf("\n\nList of inactive vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } } #endif /* DDB */ /* * Fill in a struct xvfsconf based on a struct vfsconf. */ static int vfsconf2x(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf xvfsp; bzero(&xvfsp, sizeof(xvfsp)); strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; /* * These are unused in userland, we keep them * to not break binary compatibility. */ xvfsp.vfc_vfsops = NULL; xvfsp.vfc_next = NULL; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #ifdef COMPAT_FREEBSD32 struct xvfsconf32 { uint32_t vfc_vfsops; char vfc_name[MFSNAMELEN]; int32_t vfc_typenum; int32_t vfc_refcount; int32_t vfc_flags; uint32_t vfc_next; }; static int vfsconf2x32(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf32 xvfsp; strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; xvfsp.vfc_vfsops = 0; xvfsp.vfc_next = 0; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS) { struct vfsconf *vfsp; int error; error = 0; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) error = vfsconf2x32(req, vfsp); else #endif error = vfsconf2x(req, vfsp); if (error) break; } vfsconf_sunlock(); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist, "S,xvfsconf", "List of all configured filesystems"); #ifndef BURN_BRIDGES static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS); static int vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; log(LOG_WARNING, "userland calling deprecated sysctl, " "please rebuild world\n"); #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { if (vfsp->vfc_typenum == name[2]) break; } vfsconf_sunlock(); if (vfsp == NULL) return (EOPNOTSUPP); #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) return (vfsconf2x32(req, vfsp)); else #endif return (vfsconf2x(req, vfsp)); } return (EOPNOTSUPP); } static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&ovfs, sizeof(ovfs)); ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error != 0) { vfsconf_sunlock(); return (error); } } vfsconf_sunlock(); return (0); } #endif /* 1 || COMPAT_PRELITE2 */ #endif /* !BURN_BRIDGES */ #define KINFO_VNODESLOP 10 #ifdef notyet /* * Dump vnode list (via sysctl). */ /* ARGSUSED */ static int sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct xvnode *xvn; struct mount *mp; struct vnode *vp; int error, len, n; /* * Stale numvnodes access is not fatal here. */ req->lock = 0; len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, len)); error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK); n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (n == len) break; vref(vp); xvn[n].xv_size = sizeof *xvn; xvn[n].xv_vnode = vp; xvn[n].xv_id = 0; /* XXX compat */ #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field XV_COPY(usecount); XV_COPY(writecount); XV_COPY(holdcnt); XV_COPY(mount); XV_COPY(numoutput); XV_COPY(type); #undef XV_COPY xvn[n].xv_flag = vp->v_vflag; switch (vp->v_type) { case VREG: case VDIR: case VLNK: break; case VBLK: case VCHR: if (vp->v_rdev == NULL) { vrele(vp); continue; } xvn[n].xv_dev = dev2udev(vp->v_rdev); break; case VSOCK: xvn[n].xv_socket = vp->v_socket; break; case VFIFO: xvn[n].xv_fifo = vp->v_fifoinfo; break; case VNON: case VBAD: default: /* shouldn't happen? */ vrele(vp); continue; } vrele(vp); ++n; } MNT_IUNLOCK(mp); mtx_lock(&mountlist_mtx); vfs_unbusy(mp); if (n == len) break; } mtx_unlock(&mountlist_mtx); error = SYSCTL_OUT(req, xvn, n * sizeof *xvn); free(xvn, M_TEMP); return (error); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode", ""); #endif static void unmount_or_warn(struct mount *mp) { int error; error = dounmount(mp, MNT_FORCE, curthread); if (error != 0) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall(void) { struct mount *mp, *tmp; CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__); /* * Since this only runs when rebooting, it is not interlocked. */ TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) { vfs_ref(mp); /* * Forcibly unmounting "/dev" before "/" would prevent clean * unmount of the latter. */ if (mp == rootdevmp) continue; unmount_or_warn(mp); } if (rootdevmp != NULL) unmount_or_warn(rootdevmp); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *mvp; struct vm_object *obj; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) { obj = vp->v_object; if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 && (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)) { if (!vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, curthread)) { if (vp->v_vflag & VV_NOSYNC) { /* unlinked */ vput(vp); continue; } obj = vp->v_object; if (obj != NULL) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); VM_OBJECT_WUNLOCK(obj); } vput(vp); } } else VI_UNLOCK(vp); } } static void destroy_vpollinfo_free(struct vpollinfo *vi) { knlist_destroy(&vi->vpi_selinfo.si_note); mtx_destroy(&vi->vpi_lock); uma_zfree(vnodepoll_zone, vi); } static void destroy_vpollinfo(struct vpollinfo *vi) { knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); } /* * Initalize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; if (vp->v_pollinfo != NULL) return; vi = uma_zalloc(vnodepoll_zone, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); destroy_vpollinfo_free(vi); return; } vp->v_pollinfo = vi; VI_UNLOCK(vp); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { v_addpollinfo(vp); mtx_lock(&vp->v_pollinfo->vpi_lock); if (vp->v_pollinfo->vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo->vpi_revents; vp->v_pollinfo->vpi_revents &= ~events; mtx_unlock(&vp->v_pollinfo->vpi_lock); return (events); } vp->v_pollinfo->vpi_events |= events; selrecord(td, &vp->v_pollinfo->vpi_selinfo); mtx_unlock(&vp->v_pollinfo->vpi_lock); return (0); } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*)(struct vop_close_args *))nullop) static int sync_fsync(struct vop_fsync_args *); static int sync_inactive(struct vop_inactive_args *); static int sync_reclaim(struct vop_reclaim_args *); static struct vop_vector sync_vnodeops = { .vop_bypass = VOP_EOPNOTSUPP, .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ .vop_islocked = vop_stdislocked, /* islocked */ }; /* * Create a new filesystem syncer vnode for the specified mount point. */ void vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; struct bufobj *bo; static long start, incr, next; int error; /* Allocate a new vnode */ error = getnewvnode("syncer", mp, &sync_vnodeops, &vp); if (error != 0) panic("vfs_allocate_syncvnode: getnewvnode() failed"); vp->v_type = VNON; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque() failed"); vp->v_vflag &= ~VV_FORCEINSMQ; VOP_UNLOCK(vp, 0); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } bo = &vp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; if (mp->mnt_syncer == NULL) { mp->mnt_syncer = vp; vp = NULL; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); if (vp != NULL) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vgone(vp); vput(vp); } } void vfs_deallocate_syncvnode(struct mount *mp) { struct vnode *vp; mtx_lock(&sync_mtx); vp = mp->mnt_syncer; if (vp != NULL) mp->mnt_syncer = NULL; mtx_unlock(&sync_mtx); if (vp != NULL) vrele(vp); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(struct vop_fsync_args *ap) { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; int error, save; struct bufobj *bo; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ bo = &syncvp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay); BO_UNLOCK(bo); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ if (vfs_busy(mp, MBF_NOWAIT) != 0) return (0); if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { vfs_unbusy(mp); return (0); } save = curthread_pflags_set(TDP_SYNCIO); vfs_msync(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); curthread_pflags_restore(save); vn_finished_write(mp); vfs_unbusy(mp); return (error); } /* * The syncer vnode is no referenced. */ static int sync_inactive(struct vop_inactive_args *ap) { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected by sync_mtx. */ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj *bo; bo = &vp->v_bufobj; BO_LOCK(bo); mtx_lock(&sync_mtx); if (vp->v_mount->mnt_syncer == vp) vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; sync_vnode_count--; bo->bo_flag &= ~BO_ONWORKLST; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); return (0); } /* * Check if vnode represents a disk device */ int vn_isdisk(struct vnode *vp, int *errp) { int error; if (vp->v_type != VCHR) { error = ENOTBLK; goto out; } error = 0; dev_lock(); if (vp->v_rdev == NULL) error = ENXIO; else if (vp->v_rdev->si_devsw == NULL) error = ENXIO; else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK)) error = ENOTBLK; dev_unlock(); out: if (errp != NULL) *errp = error; return (error == 0); } /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() * to indicate to the caller whether privilege was used to satisfy the * request (obsoleted). Returns 0 on success, or an errno on failure. */ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused) { accmode_t dac_granted; accmode_t priv_granted; KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE), ("VAPPEND without VWRITE")); /* * Look for a normal, non-privileged way to access the file/directory * as requested. If it exists, go with that. */ if (privused != NULL) *privused = 0; dac_granted = 0; /* Check the owner. */ if (cred->cr_uid == file_uid) { dac_granted |= VADMIN; if (file_mode & S_IXUSR) dac_granted |= VEXEC; if (file_mode & S_IRUSR) dac_granted |= VREAD; if (file_mode & S_IWUSR) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check the groups (first match) */ if (groupmember(file_gid, cred)) { if (file_mode & S_IXGRP) dac_granted |= VEXEC; if (file_mode & S_IRGRP) dac_granted |= VREAD; if (file_mode & S_IWGRP) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check everyone else. */ if (file_mode & S_IXOTH) dac_granted |= VEXEC; if (file_mode & S_IROTH) dac_granted |= VREAD; if (file_mode & S_IWOTH) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); privcheck: /* * Build a privilege mask to determine if the set of privileges * satisfies the requirements when combined with the granted mask * from above. For each privilege, if the privilege is required, * bitwise or the request type onto the priv_granted mask. */ priv_granted = 0; if (type == VDIR) { /* * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC * requests, instead of PRIV_VFS_EXEC. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_LOOKUP, 0)) priv_granted |= VEXEC; } else { /* * Ensure that at least one execute bit is on. Otherwise, * a privileged user will always succeed, and we don't want * this to happen unless the file really is executable. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 && !priv_check_cred(cred, PRIV_VFS_EXEC, 0)) priv_granted |= VEXEC; } if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) && !priv_check_cred(cred, PRIV_VFS_READ, 0)) priv_granted |= VREAD; if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) && !priv_check_cred(cred, PRIV_VFS_WRITE, 0)) priv_granted |= (VWRITE | VAPPEND); if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) && !priv_check_cred(cred, PRIV_VFS_ADMIN, 0)) priv_granted |= VADMIN; if ((accmode & (priv_granted | dac_granted)) == accmode) { /* XXX audit: privilege used */ if (privused != NULL) *privused = 1; return (0); } return ((accmode & VADMIN) ? EPERM : EACCES); } /* * Credential check based on process requesting service, and per-attribute * permissions. */ int extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, struct thread *td, accmode_t accmode) { /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: /* Potentially should be: return (EPERM); */ return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM, 0)); case EXTATTR_NAMESPACE_USER: return (VOP_ACCESS(vp, accmode, cred, td)); default: return (EPERM); } } #ifdef DEBUG_VFS_LOCKS /* * This only exists to supress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) (panicstr != NULL || (vp) == NULL || \ (vp)->v_type == VCHR || (vp)->v_type == VBAD) int vfs_badlock_ddb = 1; /* Drop into debugger on violation. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0, "Drop into debugger on lock violation"); int vfs_badlock_mutex = 1; /* Check for interlock across VOPs. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex, 0, "Check for interlock across VOPs"); int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, "Print lock violations"); #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, &vfs_badlock_backtrace, 0, "Print backtrace at lock violations"); #endif static void vfs_badlock(const char *msg, const char *str, struct vnode *vp) { #ifdef KDB if (vfs_badlock_backtrace) kdb_backtrace(); #endif if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } void assert_vi_locked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is not locked but should be", str, vp); } void assert_vi_unlocked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is locked but should not be", str, vp); } void assert_vop_locked(struct vnode *vp, const char *str) { int locked; if (!IGNORE_LOCK(vp)) { locked = VOP_ISLOCKED(vp); if (locked == 0 || locked == LK_EXCLOTHER) vfs_badlock("is not locked but should be", str, vp); } } void assert_vop_unlocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) vfs_badlock("is locked but should not be", str, vp); } void assert_vop_elocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vfs_badlock("is not exclusive locked but should be", str, vp); } #if 0 void assert_vop_elocked_other(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLOTHER) vfs_badlock("is not exclusive locked by another thread", str, vp); } void assert_vop_slocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_SHARED) vfs_badlock("is not locked shared but should be", str, vp); } #endif /* 0 */ #endif /* DEBUG_VFS_LOCKS */ void vop_rename_fail(struct vop_rename_args *ap) { if (ap->a_tvp != NULL) vput(ap->a_tvp); if (ap->a_tdvp == ap->a_tvp) vrele(ap->a_tdvp); else vput(ap->a_tdvp); vrele(ap->a_fdvp); vrele(ap->a_fvp); } void vop_rename_pre(void *ap) { struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME"); /* Check the source (from). */ if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock && (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock)) ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked"); if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock) ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked"); /* Check the target. */ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) vhold(a->a_fvp); vhold(a->a_tdvp); if (a->a_tvp) vhold(a->a_tvp); } void vop_strategy_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_strategy_args *a; struct buf *bp; a = ap; bp = a->a_bp; /* * Cluster ops lock their component buffers but not the IO container. */ if ((bp->b_flags & B_CLUSTER) != 0) return; if (panicstr == NULL && !BUF_ISLOCKED(bp)) { if (vfs_badlock_print) printf( "VOP_STRATEGY: bp is not locked but should be\n"); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } #endif } void vop_lock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_lock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_unlock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_LOCKED(a->a_vp, "VOP_UNLOCK"); ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_unlock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_create_post(void *ap, int rc) { struct vop_create_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_deleteextattr_post(void *ap, int rc) { struct vop_deleteextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_link_post(void *ap, int rc) { struct vop_link_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } void vop_mkdir_post(void *ap, int rc) { struct vop_mkdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void vop_mknod_post(void *ap, int rc) { struct vop_mknod_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_remove_post(void *ap, int rc) { struct vop_remove_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_rename_post(void *ap, int rc) { struct vop_rename_args *a = ap; if (!rc) { VFS_KNOTE_UNLOCKED(a->a_fdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_tdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); if (a->a_tvp != a->a_fvp) vdrop(a->a_fvp); vdrop(a->a_tdvp); if (a->a_tvp) vdrop(a->a_tvp); } void vop_rmdir_post(void *ap, int rc) { struct vop_rmdir_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_setattr_post(void *ap, int rc) { struct vop_setattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_setextattr_post(void *ap, int rc) { struct vop_setextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_symlink_post(void *ap, int rc) { struct vop_symlink_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } static struct knlist fs_knlist; static void vfs_event_init(void *arg) { knlist_init_mtx(&fs_knlist, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); void vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused) { KNOTE_UNLOCKED(&fs_knlist, event); } static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); struct filterops fs_filtops = { .f_isfd = 0, .f_attach = filt_fsattach, .f_detach = filt_fsdetach, .f_event = filt_fsevent }; static int filt_fsattach(struct knote *kn) { kn->kn_flags |= EV_CLEAR; knlist_add(&fs_knlist, kn, 0); return (0); } static void filt_fsdetach(struct knote *kn) { knlist_remove(&fs_knlist, kn, 0); } static int filt_fsevent(struct knote *kn, long hint) { kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } static int sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS) { struct vfsidctl vc; int error; struct mount *mp; error = SYSCTL_IN(req, &vc, sizeof(vc)); if (error) return (error); if (vc.vc_vers != VFS_CTL_VERS1) return (EINVAL); mp = vfs_getvfs(&vc.vc_fsid); if (mp == NULL) return (ENOENT); /* ensure that a specific sysctl goes to the right filesystem. */ if (strcmp(vc.vc_fstypename, "*") != 0 && strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) { vfs_rel(mp); return (EINVAL); } VCTLTOREQ(&vc, req); error = VFS_SYSCTL(mp, vc.vc_op, req); vfs_rel(mp); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "", "Sysctl by fsid"); /* * Function to initialize a va_filerev field sensibly. * XXX: Wouldn't a random number make a lot more sense ?? */ u_quad_t init_va_filerev(void) { struct bintime bt; getbinuptime(&bt); return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL)); } static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static struct filterops vfsread_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsread }; static struct filterops vfswrite_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfswrite }; static struct filterops vfsvnode_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsvnode }; static void vfs_knllock(void *arg) { struct vnode *vp = arg; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } static void vfs_knlunlock(void *arg) { struct vnode *vp = arg; VOP_UNLOCK(vp, 0); } static void vfs_knl_assert_locked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked"); #endif } static void vfs_knl_assert_unlocked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked"); #endif } int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vfsread_filtops; break; case EVFILT_WRITE: kn->kn_fop = &vfswrite_filtops; break; case EVFILT_VNODE: kn->kn_fop = &vfsvnode_filtops; break; default: return (EINVAL); } kn->kn_hook = (caddr_t)vp; v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); knl = &vp->v_pollinfo->vpi_selinfo.si_note; vhold(vp); knlist_add(knl, kn, 0); return (0); } /* * Detach knote from vnode */ static void filt_vfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo")); knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); vdrop(vp); } /*ARGSUSED*/ static int filt_vfsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; struct vattr va; int res; /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) { VI_LOCK(vp); kn->kn_flags |= (EV_EOF | EV_ONESHOT); VI_UNLOCK(vp); return (1); } if (VOP_GETATTR(vp, &va, curthread->td_ucred)) return (0); VI_LOCK(vp); kn->kn_data = va.va_size - kn->kn_fp->f_offset; res = (kn->kn_sfflags & NOTE_FILE_POLL) != 0 || kn->kn_data != 0; VI_UNLOCK(vp); return (res); } /*ARGSUSED*/ static int filt_vfswrite(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; VI_LOCK(vp); /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = 0; VI_UNLOCK(vp); return (1); } static int filt_vfsvnode(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; int res; VI_LOCK(vp); if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; if (hint == NOTE_REVOKE) { kn->kn_flags |= EV_EOF; VI_UNLOCK(vp); return (1); } res = (kn->kn_fflags != 0); VI_UNLOCK(vp); return (res); } int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { int error; if (dp->d_reclen > ap->a_uio->uio_resid) return (ENAMETOOLONG); error = uiomove(dp, dp->d_reclen, ap->a_uio); if (error) { if (ap->a_ncookies != NULL) { if (ap->a_cookies != NULL) free(ap->a_cookies, M_TEMP); ap->a_cookies = NULL; *ap->a_ncookies = 0; } return (error); } if (ap->a_ncookies == NULL) return (0); KASSERT(ap->a_cookies, ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!")); *ap->a_cookies = realloc(*ap->a_cookies, (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO); (*ap->a_cookies)[*ap->a_ncookies] = off; return (0); } /* * Mark for update the access time of the file if the filesystem * supports VOP_MARKATIME. This functionality is used by execve and * mmap, so we want to avoid the I/O implied by directly setting * va_atime for the sake of efficiency. */ void vfs_mark_atime(struct vnode *vp, struct ucred *cred) { struct mount *mp; mp = vp->v_mount; ASSERT_VOP_LOCKED(vp, "vfs_mark_atime"); if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) (void)VOP_MARKATIME(vp); } /* * The purpose of this routine is to remove granularity from accmode_t, * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE, * VADMIN and VAPPEND. * * If it returns 0, the caller is supposed to continue with the usual * access checks using 'accmode' as modified by this routine. If it * returns nonzero value, the caller is supposed to return that value * as errno. * * Note that after this routine runs, accmode may be zero. */ int vfs_unixify_accmode(accmode_t *accmode) { /* * There is no way to specify explicit "deny" rule using * file mode or POSIX.1e ACLs. */ if (*accmode & VEXPLICIT_DENY) { *accmode = 0; return (0); } /* * None of these can be translated into usual access bits. * Also, the common case for NFSv4 ACLs is to not contain * either of these bits. Caller should check for VWRITE * on the containing directory instead. */ if (*accmode & (VDELETE_CHILD | VDELETE)) return (EPERM); if (*accmode & VADMIN_PERMS) { *accmode &= ~VADMIN_PERMS; *accmode |= VADMIN; } /* * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL * or VSYNCHRONIZE using file mode or POSIX.1e ACL. */ *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE); return (0); } /* * These are helper functions for filesystems to traverse all * their vnodes. See MNT_VNODE_FOREACH_ALL() in sys/mount.h. * * This interface replaces MNT_VNODE_FOREACH. */ MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); struct vnode * __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; if (should_yield()) kern_yield(PRI_USER); MNT_ILOCK(mp); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); vp = TAILQ_NEXT(*mvp, v_nmntvnodes); while (vp != NULL && (vp->v_type == VMARKER || (vp->v_iflag & VI_DOOMED) != 0)) vp = TAILQ_NEXT(vp, v_nmntvnodes); /* Check if we are done */ if (vp == NULL) { __mnt_vnode_markerfree_all(mvp, mp); /* MNT_IUNLOCK(mp); -- done in above function */ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); return (NULL); } TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); VI_LOCK(vp); MNT_IUNLOCK(mp); return (vp); } struct vnode * __mnt_vnode_first_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; *mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); MNT_ILOCK(mp); MNT_REF(mp); (*mvp)->v_type = VMARKER; vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && (vp->v_type == VMARKER || (vp->v_iflag & VI_DOOMED) != 0)) vp = TAILQ_NEXT(vp, v_nmntvnodes); /* Check if we are done */ if (vp == NULL) { MNT_REL(mp); MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); *mvp = NULL; return (NULL); } (*mvp)->v_mount = mp; TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); VI_LOCK(vp); MNT_IUNLOCK(mp); return (vp); } void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) { MNT_IUNLOCK(mp); return; } mtx_assert(MNT_MTX(mp), MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); MNT_REL(mp); MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); *mvp = NULL; } /* * These are helper functions for filesystems to traverse their * active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h */ static void mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) { KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); MNT_ILOCK(mp); MNT_REL(mp); MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); *mvp = NULL; } static struct vnode * mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) { struct vnode *vp, *nvp; mtx_assert(&vnode_free_list_mtx, MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); restart: vp = TAILQ_NEXT(*mvp, v_actfreelist); TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); while (vp != NULL) { if (vp->v_type == VMARKER) { vp = TAILQ_NEXT(vp, v_actfreelist); continue; } if (!VI_TRYLOCK(vp)) { if (mp_ncpus == 1 || should_yield()) { TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); pause("vnacti", 1); mtx_lock(&vnode_free_list_mtx); goto restart; } continue; } KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp)); KASSERT(vp->v_mount == mp || vp->v_mount == NULL, ("alien vnode on the active list %p %p", vp, mp)); if (vp->v_mount == mp && (vp->v_iflag & VI_DOOMED) == 0) break; nvp = TAILQ_NEXT(vp, v_actfreelist); VI_UNLOCK(vp); vp = nvp; } /* Check if we are done */ if (vp == NULL) { mtx_unlock(&vnode_free_list_mtx); mnt_vnode_markerfree_active(mvp, mp); return (NULL); } TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); ASSERT_VI_LOCKED(vp, "active iter"); KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp)); return (vp); } struct vnode * __mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) { if (should_yield()) kern_yield(PRI_USER); mtx_lock(&vnode_free_list_mtx); return (mnt_vnode_next_active(mvp, mp)); } struct vnode * __mnt_vnode_first_active(struct vnode **mvp, struct mount *mp) { struct vnode *vp; *mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); MNT_ILOCK(mp); MNT_REF(mp); MNT_IUNLOCK(mp); (*mvp)->v_type = VMARKER; (*mvp)->v_mount = mp; mtx_lock(&vnode_free_list_mtx); vp = TAILQ_FIRST(&mp->mnt_activevnodelist); if (vp == NULL) { mtx_unlock(&vnode_free_list_mtx); mnt_vnode_markerfree_active(mvp, mp); return (NULL); } TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); return (mnt_vnode_next_active(mvp, mp)); } void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) return; mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); mnt_vnode_markerfree_active(mvp, mp); } Index: projects/clang-trunk/sys/sys/vnode.h =================================================================== --- projects/clang-trunk/sys/sys/vnode.h (revision 287501) +++ projects/clang-trunk/sys/sys/vnode.h (revision 287502) @@ -1,871 +1,873 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 * $FreeBSD$ */ #ifndef _SYS_VNODE_H_ #define _SYS_VNODE_H_ #include #include #include #include #include #include #include #include #include #include /* * The vnode is the focus of all file activity in UNIX. There is a * unique vnode allocated for each active file, each current directory, * each mounted-on file, text file, and the root. */ /* * Vnode types. VNON means no type. */ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD, VMARKER }; /* * Each underlying filesystem allocates its own private area and hangs * it from v_data. If non-null, this area is freed in getnewvnode(). */ struct namecache; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ }; /* * Reading or writing any of these items requires holding the appropriate lock. * * Lock reference: * c - namecache mutex * f - freelist mutex * i - interlock * I - updated with atomics, 0->1 and 1->0 transitions with interlock held * m - mount point interlock * p - pollinfo lock * u - Only a reference to the vnode is needed to read. * v - vnode lock * * Vnodes may be found on many lists. The general way to deal with operating * on a vnode that is on a list is: * 1) Lock the list and find the vnode. * 2) Lock interlock so that the vnode does not go away. * 3) Unlock the list to avoid lock order reversals. * 4) vget with LK_INTERLOCK and check for ENOENT, or * 5) Check for DOOMED if the vnode lock is not required. * 6) Perform your operation, then vput(). */ #if defined(_KERNEL) || defined(_KVM_VNODE) struct vnode { /* * Fields which define the identity of the vnode. These fields are * owned by the filesystem (XXX: and vgone() ?) */ const char *v_tag; /* u type of underlying data */ struct vop_vector *v_op; /* u vnode operations vector */ void *v_data; /* u private data for fs */ /* * Filesystem instance stuff */ struct mount *v_mount; /* u ptr to vfs we are in */ TAILQ_ENTRY(vnode) v_nmntvnodes; /* m vnodes for mount point */ /* * Type specific fields, only one applies to any given vnode. * See #defines below for renaming to v_* namespace. */ union { struct mount *vu_mount; /* v ptr to mountpoint (VDIR) */ struct socket *vu_socket; /* v unix domain net (VSOCK) */ struct cdev *vu_cdev; /* v device (VCHR, VBLK) */ struct fifoinfo *vu_fifoinfo; /* v fifo (VFIFO) */ } v_un; /* * vfs_hash: (mount + inode) -> vnode hash. The hash value * itself is grouped with other int fields, to avoid padding. */ LIST_ENTRY(vnode) v_hashlist; /* * VFS_namecache stuff */ LIST_HEAD(, namecache) v_cache_src; /* c Cache entries from us */ TAILQ_HEAD(, namecache) v_cache_dst; /* c Cache entries to us */ struct namecache *v_cache_dd; /* c Cache entry for .. vnode */ /* * Locking */ struct lock v_lock; /* u (if fs don't have one) */ struct mtx v_interlock; /* lock for "i" things */ struct lock *v_vnlock; /* u pointer to vnode lock */ /* * The machinery of being a vnode */ TAILQ_ENTRY(vnode) v_actfreelist; /* f vnode active/free lists */ struct bufobj v_bufobj; /* * Buffer cache object */ /* * Hooks for various subsystems and features. */ struct vpollinfo *v_pollinfo; /* i Poll events, p for *v_pi */ struct label *v_label; /* MAC label for vnode */ struct lockf *v_lockf; /* Byte-level advisory lock list */ struct rangelock v_rl; /* Byte-range lock */ /* * clustering stuff */ daddr_t v_cstart; /* v start block of cluster */ daddr_t v_lasta; /* v last allocation */ daddr_t v_lastw; /* v last write */ int v_clen; /* v length of cur. cluster */ u_int v_holdcnt; /* I prevents recycling. */ u_int v_usecount; /* I ref count of users */ u_int v_iflag; /* i vnode flags (see below) */ u_int v_vflag; /* v vnode flags */ int v_writecount; /* v ref count of writers */ u_int v_hash; enum vtype v_type; /* u vnode type */ }; #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */ #define v_mountedhere v_un.vu_mount #define v_socket v_un.vu_socket #define v_rdev v_un.vu_cdev #define v_fifoinfo v_un.vu_fifoinfo /* XXX: These are temporary to avoid a source sweep at this time */ #define v_object v_bufobj.bo_object /* * Userland version of struct vnode, for sysctl. */ struct xvnode { size_t xv_size; /* sizeof(struct xvnode) */ void *xv_vnode; /* address of real vnode */ u_long xv_flag; /* vnode vflags */ int xv_usecount; /* reference count of users */ int xv_writecount; /* reference count of writers */ int xv_holdcnt; /* page & buffer references */ u_long xv_id; /* capability identifier */ void *xv_mount; /* address of parent mount */ long xv_numoutput; /* num of writes in progress */ enum vtype xv_type; /* vnode type */ union { void *xvu_socket; /* socket, if VSOCK */ void *xvu_fifo; /* fifo, if VFIFO */ dev_t xvu_rdev; /* maj/min, if VBLK/VCHR */ struct { dev_t xvu_dev; /* device, if VDIR/VREG/VLNK */ ino_t xvu_ino; /* id, if VDIR/VREG/VLNK */ } xv_uns; } xv_un; }; #define xv_socket xv_un.xvu_socket #define xv_fifo xv_un.xvu_fifo #define xv_rdev xv_un.xvu_rdev #define xv_dev xv_un.xv_uns.xvu_dev #define xv_ino xv_un.xv_uns.xvu_ino /* We don't need to lock the knlist */ #define VN_KNLIST_EMPTY(vp) ((vp)->v_pollinfo == NULL || \ KNLIST_EMPTY(&(vp)->v_pollinfo->vpi_selinfo.si_note)) #define VN_KNOTE(vp, b, a) \ do { \ if (!VN_KNLIST_EMPTY(vp)) \ KNOTE(&vp->v_pollinfo->vpi_selinfo.si_note, (b), \ (a) | KNF_NOKQLOCK); \ } while (0) #define VN_KNOTE_LOCKED(vp, b) VN_KNOTE(vp, b, KNF_LISTLOCKED) #define VN_KNOTE_UNLOCKED(vp, b) VN_KNOTE(vp, b, 0) /* * Vnode flags. * VI flags are protected by interlock and live in v_iflag * VV flags are protected by the vnode lock and live in v_vflag * * VI_DOOMED is doubly protected by the interlock and vnode lock. Both * are required for writing but the status may be checked with either. */ #define VI_MOUNT 0x0020 /* Mount in progress */ #define VI_AGE 0x0040 /* Insert vnode at head of free list */ #define VI_DOOMED 0x0080 /* This vnode is being recycled */ #define VI_FREE 0x0100 /* This vnode is on the freelist */ #define VI_ACTIVE 0x0200 /* This vnode is on the active list */ #define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */ #define VI_OWEINACT 0x1000 /* Need to call inactive */ #define VV_ROOT 0x0001 /* root of its filesystem */ #define VV_ISTTY 0x0002 /* vnode represents a tty */ #define VV_NOSYNC 0x0004 /* unlinked, stop syncing */ #define VV_ETERNALDEV 0x0008 /* device that is never destroyed */ #define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */ #define VV_TEXT 0x0020 /* vnode is a pure text prototype */ #define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */ #define VV_SYSTEM 0x0080 /* vnode being used by kernel */ #define VV_PROCDEP 0x0100 /* vnode is process dependent */ #define VV_NOKNOTE 0x0200 /* don't activate knotes on this vnode */ #define VV_DELETED 0x0400 /* should be removed */ #define VV_MD 0x0800 /* vnode backs the md device */ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value * is unavailable (getattr) or which is not to be changed (setattr). */ struct vattr { enum vtype va_type; /* vnode type (for create) */ u_short va_mode; /* files access mode and type */ short va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ dev_t va_fsid; /* filesystem id */ long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ struct timespec va_atime; /* time of last access */ struct timespec va_mtime; /* time of last modification */ struct timespec va_ctime; /* time file changed */ struct timespec va_birthtime; /* time file created */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ dev_t va_rdev; /* device the special file represents */ u_quad_t va_bytes; /* bytes of disk space held by file */ u_quad_t va_filerev; /* file modification number */ u_int va_vaflags; /* operations flags, see below */ long va_spare; /* remain quad aligned */ }; /* * Flags for va_vaflags. */ #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ #define VA_EXCLUSIVE 0x02 /* exclusive create request */ /* * Flags for ioflag. (high 16 bits used to ask for read-ahead and * help with write clustering) * NB: IO_NDELAY and IO_DIRECT are linked to fcntl.h */ #define IO_UNIT 0x0001 /* do I/O as atomic unit */ #define IO_APPEND 0x0002 /* append write to end */ #define IO_NDELAY 0x0004 /* FNDELAY flag set in file table */ #define IO_NODELOCKED 0x0008 /* underlying node already locked */ #define IO_ASYNC 0x0010 /* bawrite rather then bdwrite */ #define IO_VMIO 0x0020 /* data already in VMIO space */ #define IO_INVAL 0x0040 /* invalidate after I/O */ #define IO_SYNC 0x0080 /* do I/O synchronously */ #define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */ #define IO_EXT 0x0400 /* operate on external attributes */ #define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ #define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */ #define IO_RANGELOCKED 0x4000 /* range locked */ #define IO_SEQMAX 0x7F /* seq heuristic max value */ #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */ /* * Flags for accmode_t. */ #define VEXEC 000000000100 /* execute/search permission */ #define VWRITE 000000000200 /* write permission */ #define VREAD 000000000400 /* read permission */ #define VADMIN 000000010000 /* being the file owner */ #define VAPPEND 000000040000 /* permission to write/append */ /* * VEXPLICIT_DENY makes VOP_ACCESSX(9) return EPERM or EACCES only * if permission was denied explicitly, by a "deny" rule in NFSv4 ACL, * and 0 otherwise. This never happens with ordinary unix access rights * or POSIX.1e ACLs. Obviously, VEXPLICIT_DENY must be OR-ed with * some other V* constant. */ #define VEXPLICIT_DENY 000000100000 #define VREAD_NAMED_ATTRS 000000200000 /* not used */ #define VWRITE_NAMED_ATTRS 000000400000 /* not used */ #define VDELETE_CHILD 000001000000 #define VREAD_ATTRIBUTES 000002000000 /* permission to stat(2) */ #define VWRITE_ATTRIBUTES 000004000000 /* change {m,c,a}time */ #define VDELETE 000010000000 #define VREAD_ACL 000020000000 /* read ACL and file mode */ #define VWRITE_ACL 000040000000 /* change ACL and/or file mode */ #define VWRITE_OWNER 000100000000 /* change file owner */ #define VSYNCHRONIZE 000200000000 /* not used */ #define VCREAT 000400000000 /* creating new file */ #define VVERIFY 001000000000 /* verification required */ /* * Permissions that were traditionally granted only to the file owner. */ #define VADMIN_PERMS (VADMIN | VWRITE_ATTRIBUTES | VWRITE_ACL | \ VWRITE_OWNER) /* * Permissions that were traditionally granted to everyone. */ #define VSTAT_PERMS (VREAD_ATTRIBUTES | VREAD_ACL) /* * Permissions that allow to change the state of the file in any way. */ #define VMODIFY_PERMS (VWRITE | VAPPEND | VADMIN_PERMS | VDELETE_CHILD | \ VDELETE) /* * Token indicating no attribute value yet assigned. */ #define VNOVAL (-1) /* * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon) */ #define VLKTIMEOUT (hz / 20 + 1) #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_VNODE); #endif /* * Convert between vnode types and inode formats (since POSIX.1 * defines mode word of stat structure in terms of inode formats). */ extern enum vtype iftovt_tab[]; extern int vttoif_tab[]; #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) /* * Flags to various vnode functions. */ #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */ #define FORCECLOSE 0x0002 /* vflush: force file closure */ #define WRITECLOSE 0x0004 /* vflush: only close writable files */ #define EARLYFLUSH 0x0008 /* vflush: early call for ffs_flushfiles */ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */ #define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */ #define V_CLEANONLY 0x0008 /* vinvalbuf: invalidate only clean bufs */ #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */ #define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */ #define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */ #define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */ #define V_MNTREF 0x0010 /* vn_start_write: mp is already ref-ed */ #define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */ #define VR_NO_SUSPCLR 0x0002 /* vfs_write_resume: do not clear suspension */ #define VS_SKIP_UNMOUNT 0x0001 /* vfs_write_suspend: fail if the filesystem is being unmounted */ #define VREF(vp) vref(vp) #ifdef DIAGNOSTIC #define VATTR_NULL(vap) vattr_null(vap) #else #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ #endif /* DIAGNOSTIC */ #define NULLVP ((struct vnode *)NULL) /* * Global vnode data. */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern struct mount *rootdevmp; /* "/dev" mount */ extern int async_io_version; /* 0 or POSIX version of AIO i'face */ extern int desiredvnodes; /* number of vnodes desired */ extern struct uma_zone *namei_zone; extern struct vattr va_null; /* predefined null vattr structure */ #define VI_LOCK(vp) mtx_lock(&(vp)->v_interlock) #define VI_LOCK_FLAGS(vp, flags) mtx_lock_flags(&(vp)->v_interlock, (flags)) #define VI_TRYLOCK(vp) mtx_trylock(&(vp)->v_interlock) #define VI_UNLOCK(vp) mtx_unlock(&(vp)->v_interlock) #define VI_MTX(vp) (&(vp)->v_interlock) #define VN_LOCK_AREC(vp) lockallowrecurse((vp)->v_vnlock) #define VN_LOCK_ASHARE(vp) lockallowshare((vp)->v_vnlock) #define VN_LOCK_DSHARE(vp) lockdisableshare((vp)->v_vnlock) #endif /* _KERNEL */ /* * Mods for extensibility. */ /* * Flags for vdesc_flags: */ #define VDESC_MAX_VPS 16 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */ #define VDESC_VP0_WILLRELE 0x0001 #define VDESC_VP1_WILLRELE 0x0002 #define VDESC_VP2_WILLRELE 0x0004 #define VDESC_VP3_WILLRELE 0x0008 #define VDESC_NOMAP_VPP 0x0100 #define VDESC_VPP_WILLRELE 0x0200 /* * A generic structure. * This can be used by bypass routines to identify generic arguments. */ struct vop_generic_args { struct vnodeop_desc *a_desc; /* other random data follows, presumably */ }; typedef int vop_bypass_t(struct vop_generic_args *); /* * VDESC_NO_OFFSET is used to identify the end of the offset list * and in places where no such field exists. */ #define VDESC_NO_OFFSET -1 /* * This structure describes the vnode operation taking place. */ struct vnodeop_desc { char *vdesc_name; /* a readable name for debugging */ int vdesc_flags; /* VDESC_* flags */ vop_bypass_t *vdesc_call; /* Function to call */ /* * These ops are used by bypass routines to map and locate arguments. * Creds and procs are not needed in bypass routines, but sometimes * they are useful to (for example) transport layers. * Nameidata is useful because it has a cred in it. */ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ int vdesc_vpp_offset; /* return vpp location */ int vdesc_cred_offset; /* cred location, if any */ int vdesc_thread_offset; /* thread location, if any */ int vdesc_componentname_offset; /* if any */ }; #ifdef _KERNEL /* * A list of all the operation descs. */ extern struct vnodeop_desc *vnodeop_descs[]; #define VOPARG_OFFSETOF(s_type, field) __offsetof(s_type, field) #define VOPARG_OFFSETTO(s_type, s_offset, struct_p) \ ((s_type)(((char*)(struct_p)) + (s_offset))) #ifdef DEBUG_VFS_LOCKS /* * Support code to aid in debugging VFS locking problems. Not totally * reliable since if the thread sleeps between changing the lock * state and checking it with the assert, some other thread could * change the state. They are good enough for debugging a single * filesystem using a single-threaded test. Note that the unreliability is * limited to false negatives; efforts were made to ensure that false * positives cannot occur. */ void assert_vi_locked(struct vnode *vp, const char *str); void assert_vi_unlocked(struct vnode *vp, const char *str); void assert_vop_elocked(struct vnode *vp, const char *str); #if 0 void assert_vop_elocked_other(struct vnode *vp, const char *str); #endif void assert_vop_locked(struct vnode *vp, const char *str); #if 0 voi0 assert_vop_slocked(struct vnode *vp, const char *str); #endif void assert_vop_unlocked(struct vnode *vp, const char *str); #define ASSERT_VI_LOCKED(vp, str) assert_vi_locked((vp), (str)) #define ASSERT_VI_UNLOCKED(vp, str) assert_vi_unlocked((vp), (str)) #define ASSERT_VOP_ELOCKED(vp, str) assert_vop_elocked((vp), (str)) #if 0 #define ASSERT_VOP_ELOCKED_OTHER(vp, str) assert_vop_locked_other((vp), (str)) #endif #define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str)) #if 0 #define ASSERT_VOP_SLOCKED(vp, str) assert_vop_slocked((vp), (str)) #endif #define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str)) #else /* !DEBUG_VFS_LOCKS */ #define ASSERT_VI_LOCKED(vp, str) ((void)0) #define ASSERT_VI_UNLOCKED(vp, str) ((void)0) #define ASSERT_VOP_ELOCKED(vp, str) ((void)0) #if 0 #define ASSERT_VOP_ELOCKED_OTHER(vp, str) #endif #define ASSERT_VOP_LOCKED(vp, str) ((void)0) #if 0 #define ASSERT_VOP_SLOCKED(vp, str) #endif #define ASSERT_VOP_UNLOCKED(vp, str) ((void)0) #endif /* DEBUG_VFS_LOCKS */ /* * This call works for vnodes in the kernel. */ #define VCALL(c) ((c)->a_desc->vdesc_call(c)) #define DOINGASYNC(vp) \ (((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC) != 0 && \ ((curthread->td_pflags & TDP_SYNCIO) == 0)) /* * VMIO support inline */ extern int vmiodirenable; static __inline int vn_canvmio(struct vnode *vp) { if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR))) return(TRUE); return(FALSE); } /* * Finally, include the default set of vnode operations. */ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int); #include "vnode_if.h" /* vn_open_flags */ #define VN_OPEN_NOAUDIT 0x00000001 #define VN_OPEN_NOCAPCHECK 0x00000002 #define VN_OPEN_NAMECACHE 0x00000004 /* * Public vnode manipulation functions. */ struct componentname; struct file; struct mount; struct nameidata; struct ostat; struct thread; struct proc; struct stat; struct nstat; struct ucred; struct uio; struct vattr; struct vnode; typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **); /* cache_* may belong in namei.h. */ +void cache_changesize(int newhashsize); #define cache_enter(dvp, vp, cnp) \ cache_enter_time(dvp, vp, cnp, NULL, NULL) void cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp); int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp); void cache_purge(struct vnode *vp); void cache_purge_negative(struct vnode *vp); void cache_purgevfs(struct mount *mp); int change_dir(struct vnode *vp, struct thread *td); void cvtstat(struct stat *st, struct ostat *ost); void cvtnstat(struct stat *sb, struct nstat *nsb); int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp); void getnewvnode_reserve(u_int count); void getnewvnode_drop_reserve(void); int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg); int insmntque(struct vnode *vp, struct mount *mp); u_quad_t init_va_filerev(void); int speedup_syncer(void); int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen); #define textvp_fullpath(p, rb, rfb) \ vn_fullpath(FIRST_THREAD_IN_PROC(p), (p)->p_textvp, rb, rfb) int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf); int vn_fullpath_global(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf); struct vnode * vn_dir_dd_ino(struct vnode *vp); int vn_commname(struct vnode *vn, char *buf, u_int buflen); int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen); int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused); int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid, struct acl *aclp, accmode_t accmode, struct ucred *cred, int *privused); int vaccess_acl_posix1e(enum vtype type, uid_t file_uid, gid_t file_gid, struct acl *acl, accmode_t accmode, struct ucred *cred, int *privused); void vattr_null(struct vattr *vap); int vcount(struct vnode *vp); #define vdrop(vp) _vdrop((vp), 0) #define vdropl(vp) _vdrop((vp), 1) void _vdrop(struct vnode *, bool); int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td); int vget(struct vnode *vp, int lockflag, struct thread *td); void vgone(struct vnode *vp); #define vhold(vp) _vhold((vp), 0) #define vholdl(vp) _vhold((vp), 1) void _vhold(struct vnode *, bool); void vinactive(struct vnode *, struct thread *); int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo); int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize); void vunref(struct vnode *); void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); #define vprint(label, vp) vn_printf((vp), "%s\n", (label)) int vrecycle(struct vnode *vp); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_close(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td); void vn_finished_write(struct mount *mp); void vn_finished_secondary_write(struct mount *mp); int vn_isdisk(struct vnode *vp, int *errp); int _vn_lock(struct vnode *vp, int flags, char *file, int line); #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__) int vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp); int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags, struct ucred *cred, struct file *fp); int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, struct thread *td, struct file *fp); void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end); int vn_pollrecord(struct vnode *vp, struct thread *p, int events); int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid, struct thread *td); int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base, size_t len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, size_t *aresid, struct thread *td); int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio, struct thread *td); int vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred, struct ucred *file_cred, struct thread *td); int vn_start_write(struct vnode *vp, struct mount **mpp, int flags); int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags); int vn_writechk(struct vnode *vp); int vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int *buflen, char *buf, struct thread *td); int vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int buflen, char *buf, struct thread *td); int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, struct thread *td); int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp); int vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc, void *alloc_arg, int lkflags, struct vnode **rvp); int vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td); int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio); int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, struct uio *uio); #define vn_rangelock_unlock(vp, cookie) \ rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp)) #define vn_rangelock_unlock_range(vp, cookie, start, end) \ rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end), \ VI_MTX(vp)) #define vn_rangelock_rlock(vp, start, end) \ rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) #define vn_rangelock_wlock(vp, start, end) \ rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) int vfs_cache_lookup(struct vop_lookup_args *ap); void vfs_timestamp(struct timespec *); void vfs_write_resume(struct mount *mp, int flags); int vfs_write_suspend(struct mount *mp, int flags); int vfs_write_suspend_umnt(struct mount *mp); int vop_stdbmap(struct vop_bmap_args *); int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); int vop_stdislocked(struct vop_islocked_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); int vop_stdputpages(struct vop_putpages_args *); int vop_stdunlock(struct vop_unlock_args *); int vop_nopoll(struct vop_poll_args *); int vop_stdaccess(struct vop_access_args *ap); int vop_stdaccessx(struct vop_accessx_args *ap); int vop_stdadvise(struct vop_advise_args *ap); int vop_stdadvlock(struct vop_advlock_args *ap); int vop_stdadvlockasync(struct vop_advlockasync_args *ap); int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap); int vop_stdallocate(struct vop_allocate_args *ap); int vop_stdpathconf(struct vop_pathconf_args *); int vop_stdpoll(struct vop_poll_args *); int vop_stdvptocnp(struct vop_vptocnp_args *ap); int vop_stdvptofh(struct vop_vptofh_args *ap); int vop_stdunp_bind(struct vop_unp_bind_args *ap); int vop_stdunp_connect(struct vop_unp_connect_args *ap); int vop_stdunp_detach(struct vop_unp_detach_args *ap); int vop_eopnotsupp(struct vop_generic_args *ap); int vop_ebadf(struct vop_generic_args *ap); int vop_einval(struct vop_generic_args *ap); int vop_enoent(struct vop_generic_args *ap); int vop_enotty(struct vop_generic_args *ap); int vop_null(struct vop_generic_args *ap); int vop_panic(struct vop_generic_args *ap); int dead_poll(struct vop_poll_args *ap); int dead_read(struct vop_read_args *ap); int dead_write(struct vop_write_args *ap); /* These are called from within the actual VOPS. */ void vop_create_post(void *a, int rc); void vop_deleteextattr_post(void *a, int rc); void vop_link_post(void *a, int rc); void vop_lock_pre(void *a); void vop_lock_post(void *a, int rc); void vop_lookup_post(void *a, int rc); void vop_lookup_pre(void *a); void vop_mkdir_post(void *a, int rc); void vop_mknod_post(void *a, int rc); void vop_remove_post(void *a, int rc); void vop_rename_post(void *a, int rc); void vop_rename_pre(void *a); void vop_rmdir_post(void *a, int rc); void vop_setattr_post(void *a, int rc); void vop_setextattr_post(void *a, int rc); void vop_strategy_pre(void *a); void vop_symlink_post(void *a, int rc); void vop_unlock_post(void *a, int rc); void vop_unlock_pre(void *a); void vop_rename_fail(struct vop_rename_args *ap); #define VOP_WRITE_PRE(ap) \ struct vattr va; \ int error, osize, ooffset, noffset; \ \ osize = ooffset = noffset = 0; \ if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \ error = VOP_GETATTR((ap)->a_vp, &va, (ap)->a_cred); \ if (error) \ return (error); \ ooffset = (ap)->a_uio->uio_offset; \ osize = va.va_size; \ } #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \ VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \ | (noffset > osize ? NOTE_EXTEND : 0)); \ } #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__) void vput(struct vnode *vp); void vrele(struct vnode *vp); void vref(struct vnode *vp); int vrefcnt(struct vnode *vp); void v_addpollinfo(struct vnode *vp); int vnode_create_vobject(struct vnode *vp, off_t size, struct thread *td); void vnode_destroy_vobject(struct vnode *vp); extern struct vop_vector fifo_specops; extern struct vop_vector dead_vnodeops; extern struct vop_vector default_vnodeops; #define VOP_PANIC ((void*)(uintptr_t)vop_panic) #define VOP_NULL ((void*)(uintptr_t)vop_null) #define VOP_EBADF ((void*)(uintptr_t)vop_ebadf) #define VOP_ENOTTY ((void*)(uintptr_t)vop_enotty) #define VOP_EINVAL ((void*)(uintptr_t)vop_einval) #define VOP_ENOENT ((void*)(uintptr_t)vop_enoent) #define VOP_EOPNOTSUPP ((void*)(uintptr_t)vop_eopnotsupp) /* fifo_vnops.c */ int fifo_printinfo(struct vnode *); /* vfs_hash.c */ typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg); +void vfs_hash_changesize(int newhashsize); int vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); u_int vfs_hash_index(struct vnode *vp); int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); void vfs_hash_rehash(struct vnode *vp, u_int hash); void vfs_hash_remove(struct vnode *vp); int vfs_kqfilter(struct vop_kqfilter_args *); void vfs_mark_atime(struct vnode *vp, struct ucred *cred); struct dirent; int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off); int vfs_unixify_accmode(accmode_t *accmode); void vfs_unp_reclaim(struct vnode *vp); int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode); int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, gid_t gid); int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td); int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td); #endif /* _KERNEL */ #endif /* !_SYS_VNODE_H_ */ Index: projects/clang-trunk/sys =================================================================== --- projects/clang-trunk/sys (revision 287501) +++ projects/clang-trunk/sys (revision 287502) Property changes on: projects/clang-trunk/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r287490-287501 Index: projects/clang-trunk/usr.bin/procstat/procstat_kstack.c =================================================================== --- projects/clang-trunk/usr.bin/procstat/procstat_kstack.c (revision 287501) +++ projects/clang-trunk/usr.bin/procstat/procstat_kstack.c (revision 287502) @@ -1,246 +1,246 @@ /*- * Copyright (c) 2007 Robert N. M. Watson * Copyright (c) 2015 Allan Jude * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include "procstat.h" /* * Walk the stack trace provided by the kernel and reduce it to what we * actually want to print. This involves stripping true instruction pointers, * frame numbers, and carriage returns as generated by stack(9). If -kk is * specified, print the function and offset, otherwise just the function. */ enum trace_state { TS_FRAMENUM, TS_PC, TS_AT, TS_FUNC, TS_OFF }; static enum trace_state kstack_nextstate(enum trace_state ts) { switch (ts) { case TS_FRAMENUM: return (TS_PC); case TS_PC: return (TS_AT); case TS_AT: return (TS_FUNC); case TS_FUNC: return (TS_OFF); case TS_OFF: return TS_FRAMENUM; default: errx(-1, "kstack_nextstate"); } } static void kstack_cleanup(const char *old, char *new, int kflag) { enum trace_state old_ts, ts; const char *cp_old; char *cp_new; ts = TS_FRAMENUM; for (cp_old = old, cp_new = new; *cp_old != '\0'; cp_old++) { switch (*cp_old) { case ' ': case '\n': case '+': old_ts = ts; ts = kstack_nextstate(old_ts); if (old_ts == TS_OFF) { *cp_new = ' '; cp_new++; } if (kflag > 1 && old_ts == TS_FUNC) { *cp_new = '+'; cp_new++; } continue; } if (ts == TS_FUNC || (kflag > 1 && ts == TS_OFF)) { *cp_new = *cp_old; cp_new++; } } *cp_new = '\0'; } static void kstack_cleanup_encoded(const char *old, char *new, int kflag) { enum trace_state old_ts, ts; const char *cp_old; char *cp_new, *cp_loop, *cp_tofree, *cp_line; ts = TS_FRAMENUM; if (kflag == 1) { for (cp_old = old, cp_new = new; *cp_old != '\0'; cp_old++) { switch (*cp_old) { case '\n': *cp_new = *cp_old; cp_new++; case ' ': case '+': old_ts = ts; ts = kstack_nextstate(old_ts); continue; } if (ts == TS_FUNC) { *cp_new = *cp_old; cp_new++; } } + *cp_new = '\0'; cp_tofree = cp_loop = strdup(new); } else cp_tofree = cp_loop = strdup(old); while ((cp_line = strsep(&cp_loop, "\n")) != NULL) { if (strlen(cp_line) != 0 && *cp_line != 127) xo_emit("{le:token/%s}", cp_line); } - *cp_new = '\0'; free(cp_tofree); } /* * Sort threads by tid. */ static int kinfo_kstack_compare(const void *a, const void *b) { return ((const struct kinfo_kstack *)a)->kkst_tid - ((const struct kinfo_kstack *)b)->kkst_tid; } static void kinfo_kstack_sort(struct kinfo_kstack *kkstp, int count) { qsort(kkstp, count, sizeof(*kkstp), kinfo_kstack_compare); } void procstat_kstack(struct procstat *procstat, struct kinfo_proc *kipp, int kflag) { struct kinfo_kstack *kkstp, *kkstp_free; struct kinfo_proc *kip, *kip_free; char trace[KKST_MAXLEN], encoded_trace[KKST_MAXLEN]; unsigned int i, j; unsigned int kip_count, kstk_count; if (!hflag) xo_emit("{T:/%5s %6s %-16s %-16s %-29s}\n", "PID", "TID", "COMM", "TDNAME", "KSTACK"); kkstp = kkstp_free = procstat_getkstack(procstat, kipp, &kstk_count); if (kkstp == NULL) return; /* * We need to re-query for thread information, so don't use *kipp. */ kip = kip_free = procstat_getprocs(procstat, KERN_PROC_PID | KERN_PROC_INC_THREAD, kipp->ki_pid, &kip_count); if (kip == NULL) { procstat_freekstack(procstat, kkstp_free); return; } kinfo_kstack_sort(kkstp, kstk_count); for (i = 0; i < kstk_count; i++) { kkstp = &kkstp_free[i]; /* * Look up the specific thread using its tid so we can * display the per-thread command line. */ kipp = NULL; for (j = 0; j < kip_count; j++) { kipp = &kip_free[j]; if (kkstp->kkst_tid == kipp->ki_tid) break; } if (kipp == NULL) continue; xo_emit("{k:process_id/%5d/%d} ", kipp->ki_pid); xo_emit("{:thread_id/%6d/%d} ", kkstp->kkst_tid); xo_emit("{:command/%-16s/%s} ", kipp->ki_comm); xo_emit("{:thread_name/%-16s/%s} ", (strlen(kipp->ki_tdname) && (strcmp(kipp->ki_comm, kipp->ki_tdname) != 0)) ? kipp->ki_tdname : "-"); switch (kkstp->kkst_state) { case KKST_STATE_RUNNING: xo_emit("{:state/%-29s/%s}\n", ""); continue; case KKST_STATE_SWAPPED: xo_emit("{:state/%-29s/%s}\n", ""); continue; case KKST_STATE_STACKOK: break; default: xo_emit("{:state/%-29s/%s}\n", ""); continue; } /* * The kernel generates a trace with carriage returns between * entries, but for a more compact view, we convert carriage * returns to spaces. */ kstack_cleanup(kkstp->kkst_trace, trace, kflag); xo_open_list("trace"); kstack_cleanup_encoded(kkstp->kkst_trace, encoded_trace, kflag); xo_close_list("trace"); xo_emit("{d:trace/%-29s}\n", trace); } procstat_freekstack(procstat, kkstp_free); procstat_freeprocs(procstat, kip_free); } Index: projects/clang-trunk/usr.bin/procstat =================================================================== --- projects/clang-trunk/usr.bin/procstat (revision 287501) +++ projects/clang-trunk/usr.bin/procstat (revision 287502) Property changes on: projects/clang-trunk/usr.bin/procstat ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/usr.bin/procstat:r287490-287501 Index: projects/clang-trunk/usr.sbin/ctladm/ctladm.8 =================================================================== --- projects/clang-trunk/usr.sbin/ctladm/ctladm.8 (revision 287501) +++ projects/clang-trunk/usr.sbin/ctladm/ctladm.8 (revision 287502) @@ -1,1117 +1,1124 @@ .\" .\" Copyright (c) 2003 Silicon Graphics International Corp. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer .\" substantially similar to the "NO WARRANTY" disclaimer below .\" ("Disclaimer") and any redistribution must be conditioned upon .\" including a substantially similar Disclaimer requirement for further .\" binary redistribution. .\" .\" NO WARRANTY .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" ctladm utility man page. .\" .\" Author: Ken Merry .\" .\" $Id: //depot/users/kenm/FreeBSD-test2/usr.sbin/ctladm/ctladm.8#3 $ .\" $FreeBSD$ .\" -.Dd June 3, 2015 +.Dd September 6, 2015 .Dt CTLADM 8 .Os .Sh NAME .Nm ctladm .Nd CAM Target Layer control utility .Sh SYNOPSIS .Nm .Aq Ar command .Op target:lun .Op generic args .Op command args .Nm .Ic tur .Aq target:lun .Op general options .Nm .Ic inquiry .Aq target:lun .Op general options .Nm .Ic reqsense .Aq target:lun .Op general options .Nm .Ic reportluns .Aq target:lun .Op general options .Nm .Ic read .Aq target:lun .Op general options .Aq Fl l Ar lba .Aq Fl d Ar datalen .Aq Fl f Ar file|- .Aq Fl b Ar blocksize_bytes .Op Fl c Ar cdbsize .Op Fl N .Nm .Ic write .Aq target:lun .Op general options .Aq Fl l Ar lba .Aq Fl d Ar datalen .Aq Fl f Ar file|- .Aq Fl b Ar blocksize_bytes .Op Fl c Ar cdbsize .Op Fl N .Nm .Ic readcap .Aq target:lun .Op general options .Op Fl c Ar cdbsize .Nm .Ic modesense .Aq target:lun .Aq Fl m Ar page | Fl l .Op Fl P Ar pc .Op Fl d .Op Fl S Ar subpage .Op Fl c Ar size .Nm .Ic start .Aq target:lun .Op general options .Op Fl i .Op Fl o .Nm .Ic stop .Aq target:lun .Op general options .Op Fl i .Op Fl o .Nm .Ic synccache .Aq target:lun .Op general options .Op Fl l Ar lba .Op Fl b Ar blockcount .Op Fl r .Op Fl i .Op Fl c Ar cdbsize .Nm .Ic shutdown .Op general options .Nm .Ic startup .Op general options .Nm .Ic lunlist .Nm .Ic delay .Aq target:lun .Aq Fl l Ar datamove|done .Aq Fl t Ar secs .Op Fl T Ar oneshot|cont .Nm .Ic realsync Aq on|off|query .Nm .Ic setsync interval .Aq target:lun .Aq Fl i Ar interval .Nm .Ic getsync .Aq target:lun .Nm .Ic inject .Aq Fl i Ar action .Aq Fl p Ar pattern .Op Fl r Ar lba,len .Op Fl s Ar len fmt Op Ar args .Op Fl c .Op Fl d Ar delete_id .Nm .Ic create .Aq Fl b Ar backend .Op Fl B Ar blocksize .Op Fl d Ar device_id .Op Fl l Ar lun_id .Op Fl o Ar name=value .Op Fl s Ar size_bytes .Op Fl S Ar serial_num .Op Fl t Ar device_type .Nm .Ic remove .Aq Fl b Ar backend .Aq Fl l Ar lun_id .Op Fl o Ar name=value .Nm .Ic modify .Aq Fl b Ar backend .Aq Fl l Ar lun_id +.Op Fl o Ar name=value .Aq Fl s Ar size_bytes .Nm .Ic devlist .Op Fl b Ar backend .Op Fl v .Op Fl x .Nm .Ic port .Op Fl l .Op Fl o Ar on|off .Op Fl w Ar wwpn .Op Fl W Ar wwnn .Op Fl p Ar targ_port .Op Fl t Ar fe_type .Op Fl q .Op Fl x .Nm .Ic portlist .Op Fl f Ar frontend .Op Fl i .Op Fl l .Op Fl p Ar targ_port .Op Fl q .Op Fl v .Op Fl x .Nm .Ic lunmap .Aq Fl p Ar targ_port .Op Fl l Ar pLUN .Op Fl L Ar cLUN .Nm .Ic dumpooa .Nm .Ic dumpstructs .Nm .Ic islist .Op Fl v .Op Fl x .Nm .Ic islogout .Aq Fl a | Fl c Ar connection-id | Fl i Ar name | Fl p Ar portal .Nm .Ic isterminate .Aq Fl a | Fl c Ar connection-id | Fl i Ar name | Fl p Ar portal .Nm .Ic help .Sh DESCRIPTION The .Nm utility is designed to provide a way to access and control the CAM Target Layer (CTL). It provides a way to send .Tn SCSI commands to the CTL layer, and also provides some meta-commands that utilize .Tn SCSI commands. (For instance, the .Ic lunlist command is implemented using the .Tn SCSI REPORT LUNS and INQUIRY commands.) .Pp The .Nm utility has a number of primary functions, many of which require a device identifier. The device identifier takes the following form: .Bl -tag -width 14n .It target:lun Specify the target (almost always 0) and LUN number to operate on. .El Many of the primary functions of the .Nm utility take the following optional arguments: .Bl -tag -width 10n .It Fl C Ar retries Specify the number of times to retry a command in the event of failure. .It Fl D Ar device Specify the device to open. This allows opening a device other than the default device, .Pa /dev/cam/ctl , to be opened for sending commands. .It Fl I Ar id Specify the initiator number to use. By default, .Nm will use 7 as the initiator number. .El .Pp Primary commands: .Bl -tag -width 11n .It Ic tur Send the .Tn SCSI TEST UNIT READY command to the device and report whether or not it is ready. .It Ic inquiry Send the .Tn SCSI INQUIRY command to the device and display some of the returned inquiry data. .It Ic reqsense Send the .Tn SCSI REQUEST SENSE command to the device and display the returned sense information. .It Ic reportluns Send the .Tn SCSI REPORT LUNS command to the device and display supported LUNs. .It Ic read Send a .Tn SCSI READ command to the device, and write the requested data to a file or stdout. .Bl -tag -width 12n .It Fl l Ar lba Specify the starting Logical Block Address for the READ. This can be specified in decimal, octal (starting with 0), hexadecimal (starting with 0x) or any other base supported by .Xr strtoull 3 . .It Fl d Ar datalen Specify the length, in 512 byte blocks, of the READ request. .It Fl f Ar file Specify the destination for the data read by the READ command. Either a filename or .Sq - for stdout may be specified. .It Fl c Ar cdbsize Specify the minimum .Tn SCSI CDB (Command Data Block) size to be used for the READ request. Allowable values are 6, 10, 12 and 16. Depending upon the LBA and amount of data requested, a larger CDB size may be used to satisfy the request. (e.g., for LBAs above 0xffffffff, READ(16) must be used to satisfy the request.) .It Fl b Ar blocksize Specify the blocksize of the underlying .Tn SCSI device, so the transfer length can be calculated accurately. The blocksize can be obtained via the .Tn SCSI READ CAPACITY command. .It Fl N Do not copy data to .Nm from the kernel when doing a read, just execute the command without copying data. This is to be used for performance testing. .El .It Ic write Read data from a file or stdin, and write the data to the device using the .Tn SCSI WRITE command. .Bl -tag -width 12n .It Fl l Ar lba Specify the starting Logical Block Address for the WRITE. This can be specified in decimal, octal (starting with 0), hexadecimal (starting with 0x) or any other base supported by .Xr strtoull 3 . .It Fl d Ar atalen Specify the length, in 512 byte blocks, of the WRITE request. .It Fl f Ar file Specify the source for the data to be written by the WRITE command. Either a filename or .Sq - for stdin may be specified. .It Fl c Ar cdbsize Specify the minimum .Tn SCSI CDB (Command Data Block) size to be used for the READ request. Allowable values are 6, 10, 12 and 16. Depending upon the LBA and amount of data requested, a larger CDB size may be used to satisfy the request. (e.g., for LBAs above 0xffffffff, READ(16) must be used to satisfy the request.) .It Fl b Ar blocksize Specify the blocksize of the underlying .Tn SCSI device, so the transfer length can be calculated accurately. The blocksize can be obtained via the .Tn SCSI READ CAPACITY command. .It Fl N Do not copy data to .Nm to the kernel when doing a write, just execute the command without copying data. This is to be used for performance testing. .El .It Ic readcap Send the .Tn SCSI READ CAPACITY command to the device and display the device size and device block size. By default, READ CAPACITY(10) is used. If the device returns a maximum LBA of 0xffffffff, however, .Nm will automatically issue a READ CAPACITY(16), which is implemented as a service action of the SERVICE ACTION IN(16) opcode. The user can specify the minimum CDB size with the .Fl c argument. Valid values for the .Fl c option are 10 and 16. If a 10 byte CDB is specified, the request will be automatically reissued with a 16 byte CDB if the maximum LBA returned is 0xffffffff. .It Ic modesense Send a .Tn SCSI MODE SENSE command to the device, and display the requested mode page(s) or page list. .Bl -tag -width 10n .It Fl m Ar page Specify the mode page to display. This option and the .Fl l option are mutually exclusive. One of the two must be specified, though. Mode page numbers may be specified in decimal or hexadecimal. .It Fl l Request that the list of mode pages supported by the device be returned. This option and the .Fl m option are mutually exclusive. One of the two must be specified, though. .It Fl P Ar pc Specify the mode page control value. Possible values are: .Bl -tag -width 2n -compact .It 0 Current values. .It 1 Changeable value bitmask. .It 2 Default values. .It 3 Saved values. .El .It Fl d Disable block descriptors when sending the mode sense request. .It Fl S Ar subpage Specify the subpage used with the mode sense request. .It Fl c Ar cdbsize Specify the CDB size used for the mode sense request. Supported values are 6 and 10. .El .It Ic start Send the .Tn SCSI START STOP UNIT command to the specified LUN with the start bit set. .Bl -tag -width 4n .It Fl i Set the immediate bit in the CDB. Note that CTL does not support the immediate bit, so this is primarily useful for making sure that CTL returns the proper error. .It Fl o Set the Copan proprietary on/offline bit in the CDB. When this flag is used, the LUN will be marked online again (see the description of the .Ic shutdown and .Ic startup commands). When this flag is used with a start command, the LUN will NOT be spun up. You need to use a start command without the .Fl o flag to spin up the disks in the LUN. .El .It Ic stop Send the .Tn SCSI START STOP UNIT command to the specified LUN with the start bit cleared. We use an ordered tag to stop the LUN, so we can guarantee that all pending I/O executes before it is stopped. (CTL guarantees this anyway, but .Nm sends an ordered tag for completeness.) .Bl -tag -width 4n .It Fl i Set the immediate bit in the CDB. Note that CTL does not support the immediate bit, so this is primarily useful for making sure that CTL returns the proper error. .It Fl o Set the Copan proprietary on/offline bit in the CDB. When this flag is used, the LUN will be spun down and taken offline ("Logical unit not ready, manual intervention required"). See the description of the .Ic shutdown and .Ic startup options. .El .It Ic synccache Send the .Tn SCSI SYNCHRONIZE CACHE command to the device. By default, SYNCHRONIZE CACHE(10) is used. If the specified starting LBA is greater than 0xffffffff or the length is greater than 0xffff, though, SYNCHRONIZE CACHE(16) will be used. The 16 byte command will also be used if the user specifies a 16 byte CDB with the .Fl c argument. .Bl -tag -width 14n .It Fl l Ar lba Specify the starting LBA of the cache region to synchronize. This option is a no-op for CTL. If you send a SYNCHRONIZE CACHE command, it will sync the cache for the entire LUN. .It Fl b Ar blockcount Specify the length of the cache region to synchronize. This option is a no-op for CTL. If you send a SYNCHRONIZE CACHE command, it will sync the cache for the entire LUN. .It Fl r Specify relative addressing for the starting LBA. CTL does not support relative addressing, since it only works for linked commands, and CTL does not support linked commands. .It Fl i Tell the target to return status immediately after issuing the SYNCHRONIZE CACHE command rather than waiting for the cache to finish syncing. CTL does not support this bit. .It Fl c Ar cdbsize Specify the minimum CDB size. Valid values are 10 and 16 bytes. .El .It Ic shutdown Issue a .Tn SCSI START STOP UNIT command with the start bit cleared and the on/offline bit set to all direct access LUNs. This will spin down all direct access LUNs, and mark them offline ("Logical unit not ready, manual intervention required"). Once marked offline, the state can only be cleared by sending a START STOP UNIT command with the start bit set and the on/offline bit set. The .Nm commands .Ic startup and .Ic start will accomplish this. Note that the on/offline bit is a non-standard Copan extension to the .Tn SCSI START STOP UNIT command, so merely sending a normal start command from an initiator will not clear the condition. (This is by design.) .It Ic startup Issue a .Tn SCSI START STOP UNIT command with the start bit set and the on/offline bit set to all direct access LUNs. This will mark all direct access LUNs "online" again. It will not cause any LUNs to start up. A separate start command without the on/offline bit set is necessary for that. .It Ic lunlist List all LUNs registered with CTL. Because this command uses the ioctl port, it will only work when the FETDs (Front End Target Drivers) are enabled. This command is the equivalent of doing a REPORT LUNS on one LUN and then an INQUIRY on each LUN in the system. .It Ic delay Delay commands at the given location. There are two places where commands may be delayed currently: before data is transferred .Pq Dq datamove and just prior to sending status to the host .Pq Dq done . One of the two must be supplied as an argument to the .Fl l option. The .Fl t option must also be specified. .Bl -tag -width 12n .It Fl l Ar delayloc Delay command(s) at the specified location. This can either be at the data movement stage (datamove) or prior to command completion (done). .It Fl t Ar delaytime Delay command(s) for the specified number of seconds. This must be specified. If set to 0, it will clear out any previously set delay for this particular location (datamove or done). .It Fl T Ar delaytype Specify the delay type. By default, the .Ic delay option will delay the next command sent to the given LUN. With the .Fl T Ar cont option, every command will be delayed by the specified period of time. With the .Fl T Ar oneshot the next command sent to the given LUN will be delayed and all subsequent commands will be completed normally. This is the default. .El .It Ic realsync Query and control CTL's SYNCHRONIZE CACHE behavior. The .Sq query argument will show whether SYNCHRONIZE CACHE commands are being sent to the backend or not. The default is to send SYNCHRONIZE CACHE commands to the backend. The .Sq on argument will cause all SYNCHRONIZE CACHE commands sent to all LUNs to be sent to the backend. The .Sq off argument will cause all SYNCHRONIZE CACHE commands sent to all LUNs to be immediately returned to the initiator with successful status. .It Ic setsync For a given lun, only actually service every Nth SYNCHRONIZE CACHE command that is sent. This can be used for debugging the optimal time period for sending SYNCHRONIZE cache commands. An interval of 0 means that the cache will be flushed for this LUN every time a SYNCHRONIZE CACHE command is received. .Pp You must specify the target and LUN you want to modify. .It Ic getsync Get the interval at which we actually service the SYNCHRONIZE CACHE command, as set by the .Ic setsync command above. The reported number means that we will actually flush the cache on every Nth SYNCHRONIZE CACHE command. A value of 0 means that we will flush the cache every time. .Pp You must specify the target and LUN you want to query. .It Ic inject Inject the specified type of error for the LUN specified, when a command that matches the given pattern is seen. The sense data returned is in either fixed or descriptor format, depending upon the status of the D_SENSE bit in the control mode page (page 0xa) for the LUN. .Pp Errors are only injected for commands that have not already failed for other reasons. By default, only the first command matching the pattern specified is returned with the supplied error. .Pp If the .Fl c flag is specified, all commands matching the pattern will be returned with the specified error until the error injection command is deleted with .Fl d flag. .Bl -tag -width 17n .It Fl i Ar action Specify the error to return: .Bl -tag -width 10n .It aborted Return the next matching command on the specified LUN with the sense key ABORTED COMMAND (0x0b), and the ASC/ASCQ 0x45,0x00 ("Select or reselect failure"). .It mediumerr Return the next matching command on the specified LUN with the sense key MEDIUM ERROR (0x03) and the ASC/ASCQ 0x11,0x00 ("Unrecovered read error") for reads, or ASC/ASCQ 0x0c,0x02 ("Write error - auto reallocation failed") for write errors. .It ua Return the next matching command on the specified LUN with the sense key UNIT ATTENTION (0x06) and the ASC/ASCQ 0x29,0x00 ("POWER ON, RESET, OR BUS DEVICE RESET OCCURRED"). .It custom Return the next matching command on the specified LUN with the supplied sense data. The .Fl s argument must be specified. .El .It Fl p Ar pattern Specify which commands should be returned with the given error. .Bl -tag -width 10n .It read The error should apply to READ(6), READ(10), READ(12), READ(16), etc. .It write The error should apply to WRITE(6), WRITE(10), WRITE(12), WRITE(16), WRITE AND VERIFY(10), etc. .It rw The error should apply to both read and write type commands. .It readcap The error should apply to READ CAPACITY(10) and READ CAPACITY(16) commands. .It tur The error should apply to TEST UNIT READY commands. .It any The error should apply to any command. .El .It Fl r Ar lba,len Specify the starting lba and length of the range of LBAs which should trigger an error. This option is only applies when read and/or write patterns are specified. If used with other command types, the error will never be triggered. .It Fl s Ar len fmt Op Ar args Specify the sense data that is to be returned for custom actions. If the format is .Sq - , len bytes of sense data will be read from standard input and written to the sense buffer. If len is longer than 252 bytes (the maximum allowable .Tn SCSI sense data length), it will be truncated to that length. The sense data format is described in .Xr cam_cdparse 3 . .It Fl c The error injection should be persistent, instead of happening once. Persistent errors must be deleted with the .Fl d argument. .It Fl d Ar delete_id Delete the specified error injection serial number. The serial number is returned when the error is injected. .El .It Ic port Perform one of several CTL frontend port operations. Either get a list of frontend ports .Pq Fl l , turn one or more frontends on or off .Pq Fl o Ar on|off , or set the World Wide Node Name .Pq Fl w Ar wwnn or World Wide Port Name .Pq Fl W Ar wwpn for a given port. One of .Fl l , .Fl o , or .Fl w or .Fl W must be specified. The WWNN and WWPN may both be specified at the same time, but cannot be combined with enabling/disabling or listing ports. .Bl -tag -width 12n .It Fl l List all CTL frontend ports or a specific port type or number. .It Fl o Ar on|off Turn the specified CTL frontend ports off or on. If no port number or port type is specified, all ports are turned on or off. .It Fl p Ar targ_port Specify the frontend port number. The port numbers can be found in the frontend port list. .It Fl q Omit the header in the port list output. .It Fl t Ar fe_type Specify the frontend type. Currently defined port types are .Dq fc (Fibre Channel), .Dq scsi (Parallel SCSI), .Dq ioctl (CTL ioctl interface), and .Dq internal (CTL CAM SIM). .It Fl w Ar wwnn Set the World Wide Node Name for the given port. The .Fl n argument must be specified, since this is only possible to implement on a single port. As a general rule, the WWNN should be the same across all ports on the system. .It Fl W Ar wwpn Set the World Wide Port Name for the given port. The .Fl n argument must be specified, since this is only possible to implement on a single port. As a general rule, the WWPN must be different for every port in the system. .It Fl x Output the port list in XML format. .El .It Ic portlist List CTL frontend ports. .Bl -tag -width 12n .It Fl f Ar frontend Specify the frontend type. .It Fl i Report target and connected initiators addresses. .It Fl l Report LUN mapping. .It Fl p Ar targ_port Specify the frontend port number. .It Fl q Omit the header in the port list output. .It Fl v Enable verbose output (report all port options). .It Fl x Output the port list in XML format. .El .It Ic lunmap Change LUN mapping for specified port. If both .Ar pLUN and .Ar cLUN are specified -- LUN will be mapped. If .Ar pLUN is specified, but .Ar cLUN is not -- LUN will be unmapped. If neither .Ar pLUN nor .Ar cLUN are specified -- LUN mapping will be disabled, exposing all CTL LUNs. .Bl -tag -width 12n .It Fl p Ar targ_port Specify the frontend port number. .It Fl l Ar pLUN LUN number visible by specified port. .It Fl L Ar cLUN CTL LUN number. .El .It Ic dumpooa Dump the OOA (Order Of Arrival) queue for each LUN registered with CTL. .It Ic dumpstructs Dump the CTL structures to the console. .It Ic create Create a new LUN. The backend must be specified, and depending upon the backend requested, some of the other options may be required. If the LUN is created successfully, the LUN configuration will be displayed. If LUN creation fails, a message will be displayed describing the failure. .Bl -tag -width 14n .It Fl b Ar backend The .Fl b flag is required. This specifies the name backend to use when creating the LUN. Examples are .Dq ramdisk and .Dq block . .It Fl B Ar blocksize Specify the blocksize of the backend in bytes. .It Fl d Ar device_id Specify the LUN-associated string to use in the .Tn SCSI INQUIRY VPD page 0x83 data. .It Fl l Ar lun_id Request that a particular LUN number be assigned. If the requested LUN number is not available, the request will fail. .It Fl o Ar name=value Specify a backend-specific name/value pair. Multiple .Fl o arguments may be specified. Refer to the backend documentation for arguments that may be used. .It Fl s Ar size_bytes Specify the size of the LUN in bytes. Some backends may allow setting the size (e.g. the ramdisk backend) and for others the size may be implicit (e.g. the block backend). .It Fl S Ar serial_num Specify the serial number to be used in the .Tn SCSI INQUIRY VPD page 0x80 data. .It Fl t Ar device_type Specify the numeric SCSI device type to use when creating the LUN. For example, the Direct Access type is 0. If this flag is not used, the type of LUN created is backend-specific. Not all LUN types are supported. Currently CTL only supports Direct Access (type 0) and Processor (type 3) LUNs. The backend requested may or may not support all of the LUN types that CTL supports. .El .It Ic remove Remove a LUN. The backend must be specified, and the LUN number must also be specified. Backend-specific options may also be specified with the .Fl o flag. .Bl -tag -width 14n .It Fl b Ar backend Specify the backend that owns the LUN to be removed. Examples are .Dq ramdisk and .Dq block . .It Fl l Ar lun_id Specify the LUN number to remove. .It Fl o Ar name=value Specify a backend-specific name/value pair. Multiple .Fl o arguments may be specified. Refer to the backend documentation for arguments that may be used. .El .It Ic modify Modify a LUN size. The backend, the LUN number, and the size must be specified. .Bl -tag -width 14n .It Fl b Ar backend Specify the backend that owns the LUN to be removed. Examples are .Dq ramdisk and .Dq block . .It Fl l Ar lun_id Specify the LUN number to remove. +.It Fl o Ar name=value +Specify a backend-specific name/value pair. +Multiple +.Fl o +arguments may be specified. +Refer to the backend documentation for arguments that may be used. .It Fl s Ar size_bytes Specify the size of the LUN in bytes. For the .Dq block backend, an .Dq auto keyword may be passed instead; this will make CTL use the size of backing file or device. .El .It Ic devlist Get a list of all configured LUNs. This also includes the LUN size and blocksize, serial number and device ID. .Bl -tag -width 11n .It Fl b Ar backend Specify the backend. This restricts the LUN list to the named backend. Examples are .Dq ramdisk and .Dq block . .It Fl v Be verbose. This will also display any backend-specific LUN attributes in addition to the standard per-LUN information. .It Fl x Dump the raw XML. The LUN list information from the kernel comes in XML format, and this option allows the display of the raw XML data. This option and the .Fl v and .Fl b options are mutually exclusive. If you specify .Fl x , the entire LUN database is displayed in XML format. .El .It Ic islist Get a list of currently running iSCSI sessions. This includes initiator and target names and the unique connection IDs. .Bl -tag -width 11n .It Fl v Verbose mode. .It Fl x Dump the raw XML. The sessions list information from the kernel comes in XML format, and this option allows the display of the raw XML data. .El .It Ic islogout Ask the initiator to log out iSCSI sessions matching criteria. .Bl -tag -width 11n .It Fl a Log out all sessions. .It Fl c Specify connection ID. .It Fl i Specify initiator name. .It Fl p Specify initiator portal (hostname or IP address). .El .It Ic isterminate Forcibly terminate iSCSI sessions matching criteria. .Bl -tag -width 11n .It Fl a Terminate all sessions. .It Fl c Specify connection ID. .It Fl i Specify initiator name. .It Fl p Specify initiator portal (hostname or IP address). .El .It Ic help Display .Nm usage information. .El .Sh OPTIONS Number of additional configuration options may be specified for LUNs. Some options are global, others are backend-specific. .Pp Global options: .Bl -tag -width 12n .It Va vendor Specifies LUN vendor string up to 8 chars. .It Va product Specifies LUN product string up to 16 chars. .It Va revision Specifies LUN revision string up to 4 chars. .It Va scsiname Specifies LUN SCSI name string. .It Va eui Specifies LUN EUI-64 identifier. .It Va naa Specifies LUN NAA identifier. Either EUI or NAA identifier should be set to UNIQUE value to allow EXTENDED COPY command access the LUN. Non-unique LUN identifiers may lead to data corruption. .It Va insecure_tpc Setting to "on" allows EXTENDED COPY command sent to this LUN access other LUNs on this host, not accessible otherwise. This allows to offload copying between different iSCSI targets residing on the same host in trusted environments. .It Va readcache Set to "off", disables read caching for the LUN, if supported by the backend. .It Va readonly Set to "on", blocks all media write operations to the LUN, reporting it as write protected. .It Va reordering Set to "unrestricted", allows target to process commands with SIMPLE task attribute in arbitrary order. Any data integrity exposures related to command sequence order shall be explicitly handled by the application client through the selection of appropriate commands and task attributes. The default value is "restricted". It improves data integrity, but may introduce some additional delays. .It Va serseq Set to "on" to serialize conseсutive reads/writes. Set to "read" to serialize conseсutive reads. Set to "off" to allow them be issued in parallel. Parallel issue of consecutive operations may confuse logic of the backing file system, hurting performance; but it may improve performance of backing stores without prefetch/write-back. .It Va pblocksize .It Va pblockoffset Specify physical block size and offset of the device. .It Va ublocksize .It Va ublockoffset Specify UNMAP block size and offset of the device. .It Va rpm .It Va rpm Specifies medium rotation rate of the device: 0 -- not reported, 1 -- non-rotating (SSD), >1024 -- value in revolutions per minute. .It Va formfactor Specifies nominal form factor of the device: 0 -- not reported, 1 -- 5.25", 2 -- 3.5", 3 -- 2.5", 4 -- 1.8", 5 -- less then 1.8". .It Va unmap Set to "on", enables UNMAP support for the LUN, if supported by the backend. .It Va avail-threshold .It Va used-threshold .It Va pool-avail-threshold .It Va pool-used-threshold Set per-LUN/-pool thin provisioning soft thresholds. LUN will establish UNIT ATTENTION condition if its or pool available space get below configured avail values, or its or pool used space get above configured used values. Pool thresholds are working only for ZVOL-backed LUNs. .It Va writecache Set to "off", disables write caching for the LUN, if supported by the backend. .El .Pp Options specific for block backend: .Bl -tag -width 12n .It Va file Specifies file or device name to use for backing store. .It Va num_threads Specifies number of backend threads to use for this LUN. .El .Sh EXAMPLES .Dl ctladm tur 0:1 .Pp Send a .Tn SCSI TEST UNIT READY command to LUN 1. .Pp .Dl ctladm modesense 0:1 -l .Pp Display the list of mode pages supported by LUN 1. .Pp .Dl ctladm modesense 0:0 -m 10 -P 3 -d -c 10 .Pp Display the saved version of the Control mode page (page 10) on LUN 0. Disable fetching block descriptors, and use a 10 byte MODE SENSE command instead of the default 6 byte command. .Bd -literal ctladm read 0:2 -l 0 -d 1 -b 512 -f - > foo .Ed .Pp Read the first 512 byte block from LUN 2 and dump it to the file .Pa foo . .Bd -literal ctladm write 0:3 -l 0xff432140 -d 20 -b 512 -f /tmp/bar .Ed .Pp Read 10240 bytes from the file .Pa /tmp/bar and write it to target 0, LUN 3. starting at LBA 0xff432140. .Pp .Dl ctladm create -b ramdisk -s 10485760000000000 .Pp Create a LUN with the .Dq fake ramdisk as a backing store. The LUN will claim to have a size of approximately 10 terabytes. .Pp .Dl ctladm create -b block -o file=src/usr.sbin/ctladm/ctladm.8 .Pp Create a LUN using the block backend, and specify the file .Pa src/usr.sbin/ctladm/ctladm.8 as the backing store. The size of the LUN will be derived from the size of the file. .Pp .Dl ctladm create -b block -o file=src/usr.sbin/ctladm/ctladm.8 -S MYSERIAL321 -d MYDEVID123 .Pp Create a LUN using the block backend, specify the file .Pa src/usr.sbin/ctladm/ctladm.8 as the backing store, and specify the .Tn SCSI VPD page 0x80 and 0x83 serial number .Fl ( S ) and device ID .Fl ( d ) . .Pp .Dl ctladm remove -b block -l 12 .Pp Remove LUN 12, which is handled by the block backend, from the system. .Pp .Dl ctladm devlist .Pp List configured LUNs in the system, along with their backend and serial number. This works when the Front End Target Drivers are enabled or disabled. .Pp .Dl ctladm lunlist .Pp List all LUNs in the system, along with their inquiry data and device type. This only works when the FETDs are enabled, since the commands go through the ioctl port. .Pp .Dl ctladm inject 0:6 -i mediumerr -p read -r 0,512 -c .Pp Inject a medium error on LUN 6 for every read that covers the first 512 blocks of the LUN. .Bd -literal -offset indent ctladm inject 0:6 -i custom -p tur -s 18 "f0 0 02 s12 04 02" .Ed .Pp Inject a custom error on LUN 6 for the next TEST UNIT READY command only. This will result in a sense key of NOT READY (0x02), and an ASC/ASCQ of 0x04,0x02 ("Logical unit not ready, initializing command required"). .Sh SEE ALSO .Xr cam 3 , .Xr cam_cdbparse 3 , .Xr cam 4 , .Xr ctl 4 , .Xr xpt 4 , .Xr camcontrol 8 , .Xr ctld 8 , .Xr ctlstat 8 .Sh HISTORY The .Nm utility was originally written during the Winter/Spring of 2003 as an interface to CTL. .Sh AUTHORS .An Ken Merry Aq Mt ken@FreeBSD.org Index: projects/clang-trunk/usr.sbin/ctladm/ctladm.c =================================================================== --- projects/clang-trunk/usr.sbin/ctladm/ctladm.c (revision 287501) +++ projects/clang-trunk/usr.sbin/ctladm/ctladm.c (revision 287502) @@ -1,4741 +1,4818 @@ /*- * Copyright (c) 2003, 2004 Silicon Graphics International Corp. * Copyright (c) 1997-2007 Kenneth D. Merry * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/usr.sbin/ctladm/ctladm.c#4 $ */ /* * CAM Target Layer exercise program. * * Author: Ken Merry */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ctladm.h" #ifdef min #undef min #endif #define min(x,y) (x < y) ? x : y typedef enum { CTLADM_CMD_TUR, CTLADM_CMD_INQUIRY, CTLADM_CMD_REQ_SENSE, CTLADM_CMD_ARRAYLIST, CTLADM_CMD_REPORT_LUNS, CTLADM_CMD_HELP, CTLADM_CMD_DEVLIST, CTLADM_CMD_ADDDEV, CTLADM_CMD_RM, CTLADM_CMD_CREATE, CTLADM_CMD_READ, CTLADM_CMD_WRITE, CTLADM_CMD_PORT, CTLADM_CMD_PORTLIST, CTLADM_CMD_READCAPACITY, CTLADM_CMD_MODESENSE, CTLADM_CMD_DUMPOOA, CTLADM_CMD_DUMPSTRUCTS, CTLADM_CMD_START, CTLADM_CMD_STOP, CTLADM_CMD_SYNC_CACHE, CTLADM_CMD_SHUTDOWN, CTLADM_CMD_STARTUP, CTLADM_CMD_LUNLIST, CTLADM_CMD_DELAY, CTLADM_CMD_REALSYNC, CTLADM_CMD_SETSYNC, CTLADM_CMD_GETSYNC, CTLADM_CMD_ERR_INJECT, CTLADM_CMD_PRES_IN, CTLADM_CMD_PRES_OUT, CTLADM_CMD_INQ_VPD_DEVID, CTLADM_CMD_RTPG, CTLADM_CMD_MODIFY, CTLADM_CMD_ISLIST, CTLADM_CMD_ISLOGOUT, CTLADM_CMD_ISTERMINATE, CTLADM_CMD_LUNMAP } ctladm_cmdfunction; typedef enum { CTLADM_ARG_NONE = 0x0000000, CTLADM_ARG_AUTOSENSE = 0x0000001, CTLADM_ARG_DEVICE = 0x0000002, CTLADM_ARG_ARRAYSIZE = 0x0000004, CTLADM_ARG_BACKEND = 0x0000008, CTLADM_ARG_CDBSIZE = 0x0000010, CTLADM_ARG_DATALEN = 0x0000020, CTLADM_ARG_FILENAME = 0x0000040, CTLADM_ARG_LBA = 0x0000080, CTLADM_ARG_PC = 0x0000100, CTLADM_ARG_PAGE_CODE = 0x0000200, CTLADM_ARG_PAGE_LIST = 0x0000400, CTLADM_ARG_SUBPAGE = 0x0000800, CTLADM_ARG_PAGELIST = 0x0001000, CTLADM_ARG_DBD = 0x0002000, CTLADM_ARG_TARG_LUN = 0x0004000, CTLADM_ARG_BLOCKSIZE = 0x0008000, CTLADM_ARG_IMMED = 0x0010000, CTLADM_ARG_RELADR = 0x0020000, CTLADM_ARG_RETRIES = 0x0040000, CTLADM_ARG_ONOFFLINE = 0x0080000, CTLADM_ARG_ONESHOT = 0x0100000, CTLADM_ARG_TIMEOUT = 0x0200000, CTLADM_ARG_INITIATOR = 0x0400000, CTLADM_ARG_NOCOPY = 0x0800000, CTLADM_ARG_NEED_TL = 0x1000000 } ctladm_cmdargs; struct ctladm_opts { const char *optname; uint32_t cmdnum; ctladm_cmdargs argnum; const char *subopt; }; typedef enum { CC_OR_NOT_FOUND, CC_OR_AMBIGUOUS, CC_OR_FOUND } ctladm_optret; static const char rw_opts[] = "Nb:c:d:f:l:"; static const char startstop_opts[] = "io"; static struct ctladm_opts option_table[] = { {"adddev", CTLADM_CMD_ADDDEV, CTLADM_ARG_NONE, NULL}, {"create", CTLADM_CMD_CREATE, CTLADM_ARG_NONE, "b:B:d:l:o:s:S:t:"}, {"delay", CTLADM_CMD_DELAY, CTLADM_ARG_NEED_TL, "T:l:t:"}, {"devid", CTLADM_CMD_INQ_VPD_DEVID, CTLADM_ARG_NEED_TL, NULL}, {"devlist", CTLADM_CMD_DEVLIST, CTLADM_ARG_NONE, "b:vx"}, {"dumpooa", CTLADM_CMD_DUMPOOA, CTLADM_ARG_NONE, NULL}, {"dumpstructs", CTLADM_CMD_DUMPSTRUCTS, CTLADM_ARG_NONE, NULL}, {"getsync", CTLADM_CMD_GETSYNC, CTLADM_ARG_NEED_TL, NULL}, {"help", CTLADM_CMD_HELP, CTLADM_ARG_NONE, NULL}, {"inject", CTLADM_CMD_ERR_INJECT, CTLADM_ARG_NEED_TL, "cd:i:p:r:s:"}, {"inquiry", CTLADM_CMD_INQUIRY, CTLADM_ARG_NEED_TL, NULL}, {"islist", CTLADM_CMD_ISLIST, CTLADM_ARG_NONE, "vx"}, {"islogout", CTLADM_CMD_ISLOGOUT, CTLADM_ARG_NONE, "ac:i:p:"}, {"isterminate", CTLADM_CMD_ISTERMINATE, CTLADM_ARG_NONE, "ac:i:p:"}, {"lunlist", CTLADM_CMD_LUNLIST, CTLADM_ARG_NONE, NULL}, {"lunmap", CTLADM_CMD_LUNMAP, CTLADM_ARG_NONE, "p:l:L:"}, {"modesense", CTLADM_CMD_MODESENSE, CTLADM_ARG_NEED_TL, "P:S:dlm:c:"}, - {"modify", CTLADM_CMD_MODIFY, CTLADM_ARG_NONE, "b:l:s:"}, + {"modify", CTLADM_CMD_MODIFY, CTLADM_ARG_NONE, "b:l:o:s:"}, {"port", CTLADM_CMD_PORT, CTLADM_ARG_NONE, "lo:p:qt:w:W:x"}, {"portlist", CTLADM_CMD_PORTLIST, CTLADM_ARG_NONE, "f:ilp:qvx"}, {"prin", CTLADM_CMD_PRES_IN, CTLADM_ARG_NEED_TL, "a:"}, {"prout", CTLADM_CMD_PRES_OUT, CTLADM_ARG_NEED_TL, "a:k:r:s:"}, {"read", CTLADM_CMD_READ, CTLADM_ARG_NEED_TL, rw_opts}, {"readcapacity", CTLADM_CMD_READCAPACITY, CTLADM_ARG_NEED_TL, "c:"}, {"realsync", CTLADM_CMD_REALSYNC, CTLADM_ARG_NONE, NULL}, {"remove", CTLADM_CMD_RM, CTLADM_ARG_NONE, "b:l:o:"}, {"reportluns", CTLADM_CMD_REPORT_LUNS, CTLADM_ARG_NEED_TL, NULL}, {"reqsense", CTLADM_CMD_REQ_SENSE, CTLADM_ARG_NEED_TL, NULL}, {"rtpg", CTLADM_CMD_RTPG, CTLADM_ARG_NEED_TL, NULL}, {"setsync", CTLADM_CMD_SETSYNC, CTLADM_ARG_NEED_TL, "i:"}, {"shutdown", CTLADM_CMD_SHUTDOWN, CTLADM_ARG_NONE, NULL}, {"start", CTLADM_CMD_START, CTLADM_ARG_NEED_TL, startstop_opts}, {"startup", CTLADM_CMD_STARTUP, CTLADM_ARG_NONE, NULL}, {"stop", CTLADM_CMD_STOP, CTLADM_ARG_NEED_TL, startstop_opts}, {"synccache", CTLADM_CMD_SYNC_CACHE, CTLADM_ARG_NEED_TL, "b:c:il:r"}, {"tur", CTLADM_CMD_TUR, CTLADM_ARG_NEED_TL, NULL}, {"write", CTLADM_CMD_WRITE, CTLADM_ARG_NEED_TL, rw_opts}, {"-?", CTLADM_CMD_HELP, CTLADM_ARG_NONE, NULL}, {"-h", CTLADM_CMD_HELP, CTLADM_ARG_NONE, NULL}, {NULL, 0, 0, NULL} }; ctladm_optret getoption(struct ctladm_opts *table, char *arg, uint32_t *cmdnum, ctladm_cmdargs *argnum, const char **subopt); static int cctl_parse_tl(char *str, int *target, int *lun); static int cctl_dump_ooa(int fd, int argc, char **argv); static int cctl_port_dump(int fd, int quiet, int xml, int32_t fe_num, ctl_port_type port_type); static int cctl_port(int fd, int argc, char **argv, char *combinedopt); static int cctl_do_io(int fd, int retries, union ctl_io *io, const char *func); static int cctl_delay(int fd, int target, int lun, int argc, char **argv, char *combinedopt); static int cctl_lunlist(int fd); static int cctl_startup_shutdown(int fd, int target, int lun, int iid, ctladm_cmdfunction command); static int cctl_sync_cache(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt); static int cctl_start_stop(int fd, int target, int lun, int iid, int retries, int start, int argc, char **argv, char *combinedopt); static int cctl_mode_sense(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt); static int cctl_read_capacity(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt); static int cctl_read_write(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt, ctladm_cmdfunction command); static int cctl_get_luns(int fd, int target, int lun, int iid, int retries, struct scsi_report_luns_data **lun_data, uint32_t *num_luns); static int cctl_report_luns(int fd, int target, int lun, int iid, int retries); static int cctl_tur(int fd, int target, int lun, int iid, int retries); static int cctl_get_inquiry(int fd, int target, int lun, int iid, int retries, char *path_str, int path_len, struct scsi_inquiry_data *inq_data); static int cctl_inquiry(int fd, int target, int lun, int iid, int retries); static int cctl_req_sense(int fd, int target, int lun, int iid, int retries); static int cctl_persistent_reserve_in(int fd, int target, int lun, int initiator, int argc, char **argv, char *combinedopt, int retry_count); static int cctl_persistent_reserve_out(int fd, int target, int lun, int initiator, int argc, char **argv, char *combinedopt, int retry_count); static int cctl_create_lun(int fd, int argc, char **argv, char *combinedopt); static int cctl_inquiry_vpd_devid(int fd, int target, int lun, int initiator); static int cctl_report_target_port_group(int fd, int target, int lun, int initiator); static int cctl_modify_lun(int fd, int argc, char **argv, char *combinedopt); ctladm_optret getoption(struct ctladm_opts *table, char *arg, uint32_t *cmdnum, ctladm_cmdargs *argnum, const char **subopt) { struct ctladm_opts *opts; int num_matches = 0; for (opts = table; (opts != NULL) && (opts->optname != NULL); opts++) { if (strncmp(opts->optname, arg, strlen(arg)) == 0) { *cmdnum = opts->cmdnum; *argnum = opts->argnum; *subopt = opts->subopt; if (strcmp(opts->optname, arg) == 0) return (CC_OR_FOUND); if (++num_matches > 1) return(CC_OR_AMBIGUOUS); } } if (num_matches > 0) return(CC_OR_FOUND); else return(CC_OR_NOT_FOUND); } static int cctl_parse_tl(char *str, int *target, int *lun) { char *tmpstr; int retval; retval = 0; while (isspace(*str) && (*str != '\0')) str++; tmpstr = (char *)strtok(str, ":"); if ((tmpstr != NULL) && (*tmpstr != '\0')) { *target = strtol(tmpstr, NULL, 0); tmpstr = (char *)strtok(NULL, ":"); if ((tmpstr != NULL) && (*tmpstr != '\0')) { *lun = strtol(tmpstr, NULL, 0); } else retval = -1; } else retval = -1; return (retval); } static int cctl_dump_ooa(int fd, int argc, char **argv) { struct ctl_ooa ooa; long double cmd_latency; int num_entries, len; int target = -1, lun = -1; int retval; unsigned int i; num_entries = 104; if ((argc > 2) && (isdigit(argv[2][0]))) { retval = cctl_parse_tl(argv[2], &target, &lun); if (retval != 0) warnx("invalid target:lun argument %s", argv[2]); } retry: len = num_entries * sizeof(struct ctl_ooa_entry); bzero(&ooa, sizeof(ooa)); ooa.entries = malloc(len); if (ooa.entries == NULL) { warn("%s: error mallocing %d bytes", __func__, len); return (1); } if (argc > 2) { ooa.lun_num = lun; } else ooa.flags |= CTL_OOA_FLAG_ALL_LUNS; ooa.alloc_len = len; ooa.alloc_num = num_entries; if (ioctl(fd, CTL_GET_OOA, &ooa) == -1) { warn("%s: CTL_GET_OOA ioctl failed", __func__); retval = 1; goto bailout; } if (ooa.status == CTL_OOA_NEED_MORE_SPACE) { num_entries = num_entries * 2; free(ooa.entries); ooa.entries = NULL; goto retry; } if (ooa.status != CTL_OOA_OK) { warnx("%s: CTL_GET_OOA ioctl returned error %d", __func__, ooa.status); retval = 1; goto bailout; } fprintf(stdout, "Dumping OOA queues\n"); for (i = 0; i < ooa.fill_num; i++) { struct ctl_ooa_entry *entry; char cdb_str[(SCSI_MAX_CDBLEN * 3) +1]; struct bintime delta_bt; struct timespec ts; entry = &ooa.entries[i]; delta_bt = ooa.cur_bt; bintime_sub(&delta_bt, &entry->start_bt); bintime2timespec(&delta_bt, &ts); cmd_latency = ts.tv_sec * 1000; if (ts.tv_nsec > 0) cmd_latency += ts.tv_nsec / 1000000; fprintf(stdout, "LUN %jd tag 0x%04x%s%s%s%s%s: %s. CDB: %s " "(%0.0Lf ms)\n", (intmax_t)entry->lun_num, entry->tag_num, (entry->cmd_flags & CTL_OOACMD_FLAG_BLOCKED) ? " BLOCKED" : "", (entry->cmd_flags & CTL_OOACMD_FLAG_DMA) ? " DMA" : "", (entry->cmd_flags & CTL_OOACMD_FLAG_DMA_QUEUED) ? " DMAQUEUED" : "", (entry->cmd_flags & CTL_OOACMD_FLAG_ABORT) ? " ABORT" : "", (entry->cmd_flags & CTL_OOACMD_FLAG_RTR) ? " RTR" :"", scsi_op_desc(entry->cdb[0], NULL), scsi_cdb_string(entry->cdb, cdb_str, sizeof(cdb_str)), cmd_latency); } fprintf(stdout, "OOA queues dump done\n"); #if 0 if (ioctl(fd, CTL_DUMP_OOA) == -1) { warn("%s: CTL_DUMP_OOA ioctl failed", __func__); return (1); } #endif bailout: free(ooa.entries); return (0); } static int cctl_dump_structs(int fd, ctladm_cmdargs cmdargs __unused) { if (ioctl(fd, CTL_DUMP_STRUCTS) == -1) { warn(__func__); return (1); } return (0); } static int cctl_port_dump(int fd, int quiet, int xml, int32_t targ_port, ctl_port_type port_type) { struct ctl_port_list port_list; struct ctl_port_entry *entries; struct sbuf *sb = NULL; int num_entries; int did_print = 0; unsigned int i; num_entries = 16; retry: entries = malloc(sizeof(*entries) * num_entries); bzero(&port_list, sizeof(port_list)); port_list.entries = entries; port_list.alloc_num = num_entries; port_list.alloc_len = num_entries * sizeof(*entries); if (ioctl(fd, CTL_GET_PORT_LIST, &port_list) != 0) { warn("%s: CTL_GET_PORT_LIST ioctl failed", __func__); return (1); } if (port_list.status == CTL_PORT_LIST_NEED_MORE_SPACE) { printf("%s: allocated %d, need %d, retrying\n", __func__, num_entries, port_list.fill_num + port_list.dropped_num); free(entries); num_entries = port_list.fill_num + port_list.dropped_num; goto retry; } if ((quiet == 0) && (xml == 0)) printf("Port Online Type Name pp vp %-18s %-18s\n", "WWNN", "WWPN"); if (xml != 0) { sb = sbuf_new_auto(); sbuf_printf(sb, "\n"); } for (i = 0; i < port_list.fill_num; i++) { struct ctl_port_entry *entry; const char *type; entry = &entries[i]; switch (entry->port_type) { case CTL_PORT_FC: type = "FC"; break; case CTL_PORT_SCSI: type = "SCSI"; break; case CTL_PORT_IOCTL: type = "IOCTL"; break; case CTL_PORT_INTERNAL: type = "INTERNAL"; break; case CTL_PORT_ISC: type = "ISC"; break; case CTL_PORT_ISCSI: type = "ISCSI"; break; case CTL_PORT_SAS: type = "SAS"; break; default: type = "UNKNOWN"; break; } /* * If the user specified a frontend number or a particular * frontend type, only print out that particular frontend * or frontend type. */ if ((targ_port != -1) && (targ_port != entry->targ_port)) continue; else if ((port_type != CTL_PORT_NONE) && ((port_type & entry->port_type) == 0)) continue; did_print = 1; #if 0 printf("Num: %ju Type: %s (%#x) Name: %s Physical Port: %d " "Virtual Port: %d\n", (uintmax_t)entry->fe_num, type, entry->port_type, entry->fe_name, entry->physical_port, entry->virtual_port); printf("WWNN %#jx WWPN %#jx Online: %s\n", (uintmax_t)entry->wwnn, (uintmax_t)entry->wwpn, (entry->online) ? "YES" : "NO" ); #endif if (xml == 0) { printf("%-4d %-6s %-8s %-12s %-2d %-2d %#-18jx " "%#-18jx\n", entry->targ_port, (entry->online) ? "YES" : "NO", type, entry->port_name, entry->physical_port, entry->virtual_port, (uintmax_t)entry->wwnn, (uintmax_t)entry->wwpn); } else { sbuf_printf(sb, "\n", entry->targ_port); sbuf_printf(sb, "%s\n", (entry->online) ? "YES" : "NO"); sbuf_printf(sb, "%s\n", type); sbuf_printf(sb, "%s\n", entry->port_name); sbuf_printf(sb, "%d\n", entry->physical_port); sbuf_printf(sb, "%d\n", entry->virtual_port); sbuf_printf(sb, "%#jx\n", (uintmax_t)entry->wwnn); sbuf_printf(sb, "%#jx\n", (uintmax_t)entry->wwpn); sbuf_printf(sb, "\n"); } } if (xml != 0) { sbuf_printf(sb, "\n"); if (sbuf_finish(sb) != 0) err(1, "%s: sbuf_finish", __func__); printf("%s", sbuf_data(sb)); sbuf_delete(sb); } /* * Give some indication that we didn't find the frontend or * frontend type requested by the user. We could print something * out, but it would probably be better to hide that behind a * verbose flag. */ if ((did_print == 0) && ((targ_port != -1) || (port_type != CTL_PORT_NONE))) return (1); else return (0); } typedef enum { CCTL_PORT_MODE_NONE, CCTL_PORT_MODE_LIST, CCTL_PORT_MODE_SET, CCTL_PORT_MODE_ON, CCTL_PORT_MODE_OFF } cctl_port_mode; static struct ctladm_opts cctl_fe_table[] = { {"fc", CTL_PORT_FC, CTLADM_ARG_NONE, NULL}, {"scsi", CTL_PORT_SCSI, CTLADM_ARG_NONE, NULL}, {"internal", CTL_PORT_INTERNAL, CTLADM_ARG_NONE, NULL}, {"iscsi", CTL_PORT_ISCSI, CTLADM_ARG_NONE, NULL}, {"sas", CTL_PORT_SAS, CTLADM_ARG_NONE, NULL}, {"all", CTL_PORT_ALL, CTLADM_ARG_NONE, NULL}, {NULL, 0, 0, NULL} }; static int cctl_port(int fd, int argc, char **argv, char *combinedopt) { int c; int32_t targ_port = -1; int retval = 0; int wwnn_set = 0, wwpn_set = 0; uint64_t wwnn = 0, wwpn = 0; cctl_port_mode port_mode = CCTL_PORT_MODE_NONE; struct ctl_port_entry entry; ctl_port_type port_type = CTL_PORT_NONE; int quiet = 0, xml = 0; while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'l': if (port_mode != CCTL_PORT_MODE_NONE) goto bailout_badarg; port_mode = CCTL_PORT_MODE_LIST; break; case 'o': if (port_mode != CCTL_PORT_MODE_NONE) goto bailout_badarg; if (strcasecmp(optarg, "on") == 0) port_mode = CCTL_PORT_MODE_ON; else if (strcasecmp(optarg, "off") == 0) port_mode = CCTL_PORT_MODE_OFF; else { warnx("Invalid -o argument %s, \"on\" or " "\"off\" are the only valid args", optarg); retval = 1; goto bailout; } break; case 'p': targ_port = strtol(optarg, NULL, 0); break; case 'q': quiet = 1; break; case 't': { ctladm_optret optret; ctladm_cmdargs argnum; const char *subopt; ctl_port_type tmp_port_type; optret = getoption(cctl_fe_table, optarg, &tmp_port_type, &argnum, &subopt); if (optret == CC_OR_AMBIGUOUS) { warnx("%s: ambiguous frontend type %s", __func__, optarg); retval = 1; goto bailout; } else if (optret == CC_OR_NOT_FOUND) { warnx("%s: invalid frontend type %s", __func__, optarg); retval = 1; goto bailout; } port_type |= tmp_port_type; break; } case 'w': if ((port_mode != CCTL_PORT_MODE_NONE) && (port_mode != CCTL_PORT_MODE_SET)) goto bailout_badarg; port_mode = CCTL_PORT_MODE_SET; wwnn = strtoull(optarg, NULL, 0); wwnn_set = 1; break; case 'W': if ((port_mode != CCTL_PORT_MODE_NONE) && (port_mode != CCTL_PORT_MODE_SET)) goto bailout_badarg; port_mode = CCTL_PORT_MODE_SET; wwpn = strtoull(optarg, NULL, 0); wwpn_set = 1; break; case 'x': xml = 1; break; } } /* * The user can specify either one or more frontend types (-t), or * a specific frontend, but not both. * * If the user didn't specify a frontend type or number, set it to * all. This is primarily needed for the enable/disable ioctls. * This will be a no-op for the listing code. For the set ioctl, * we'll throw an error, since that only works on one port at a time. */ if ((port_type != CTL_PORT_NONE) && (targ_port != -1)) { warnx("%s: can only specify one of -t or -n", __func__); retval = 1; goto bailout; } else if ((targ_port == -1) && (port_type == CTL_PORT_NONE)) port_type = CTL_PORT_ALL; bzero(&entry, sizeof(entry)); /* * These are needed for all but list/dump mode. */ entry.port_type = port_type; entry.targ_port = targ_port; switch (port_mode) { case CCTL_PORT_MODE_LIST: cctl_port_dump(fd, quiet, xml, targ_port, port_type); break; case CCTL_PORT_MODE_SET: if (targ_port == -1) { warnx("%s: -w and -W require -n", __func__); retval = 1; goto bailout; } if (wwnn_set) { entry.flags |= CTL_PORT_WWNN_VALID; entry.wwnn = wwnn; } if (wwpn_set) { entry.flags |= CTL_PORT_WWPN_VALID; entry.wwpn = wwpn; } if (ioctl(fd, CTL_SET_PORT_WWNS, &entry) == -1) { warn("%s: CTL_SET_PORT_WWNS ioctl failed", __func__); retval = 1; goto bailout; } break; case CCTL_PORT_MODE_ON: if (ioctl(fd, CTL_ENABLE_PORT, &entry) == -1) { warn("%s: CTL_ENABLE_PORT ioctl failed", __func__); retval = 1; goto bailout; } fprintf(stdout, "Front End Ports enabled\n"); break; case CCTL_PORT_MODE_OFF: if (ioctl(fd, CTL_DISABLE_PORT, &entry) == -1) { warn("%s: CTL_DISABLE_PORT ioctl failed", __func__); retval = 1; goto bailout; } fprintf(stdout, "Front End Ports disabled\n"); break; default: warnx("%s: one of -l, -o or -w/-W must be specified", __func__); retval = 1; goto bailout; break; } bailout: return (retval); bailout_badarg: warnx("%s: only one of -l, -o or -w/-W may be specified", __func__); return (1); } static int cctl_do_io(int fd, int retries, union ctl_io *io, const char *func) { do { if (ioctl(fd, CTL_IO, io) == -1) { warn("%s: error sending CTL_IO ioctl", func); return (-1); } } while (((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) && (retries-- > 0)); return (0); } static int cctl_delay(int fd, int target, int lun, int argc, char **argv, char *combinedopt) { struct ctl_io_delay_info delay_info; char *delayloc = NULL; char *delaytype = NULL; int delaytime = -1; int retval; int c; retval = 0; memset(&delay_info, 0, sizeof(delay_info)); while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'T': delaytype = strdup(optarg); break; case 'l': delayloc = strdup(optarg); break; case 't': delaytime = strtoul(optarg, NULL, 0); break; } } if (delaytime == -1) { warnx("%s: you must specify the delaytime with -t", __func__); retval = 1; goto bailout; } if (strcasecmp(delayloc, "datamove") == 0) delay_info.delay_loc = CTL_DELAY_LOC_DATAMOVE; else if (strcasecmp(delayloc, "done") == 0) delay_info.delay_loc = CTL_DELAY_LOC_DONE; else { warnx("%s: invalid delay location %s", __func__, delayloc); retval = 1; goto bailout; } if ((delaytype == NULL) || (strcmp(delaytype, "oneshot") == 0)) delay_info.delay_type = CTL_DELAY_TYPE_ONESHOT; else if (strcmp(delaytype, "cont") == 0) delay_info.delay_type = CTL_DELAY_TYPE_CONT; else { warnx("%s: invalid delay type %s", __func__, delaytype); retval = 1; goto bailout; } delay_info.target_id = target; delay_info.lun_id = lun; delay_info.delay_secs = delaytime; if (ioctl(fd, CTL_DELAY_IO, &delay_info) == -1) { warn("%s: CTL_DELAY_IO ioctl failed", __func__); retval = 1; goto bailout; } switch (delay_info.status) { case CTL_DELAY_STATUS_NONE: warnx("%s: no delay status??", __func__); retval = 1; break; case CTL_DELAY_STATUS_OK: break; case CTL_DELAY_STATUS_INVALID_LUN: warnx("%s: invalid lun %d", __func__, lun); retval = 1; break; case CTL_DELAY_STATUS_INVALID_TYPE: warnx("%s: invalid delay type %d", __func__, delay_info.delay_type); retval = 1; break; case CTL_DELAY_STATUS_INVALID_LOC: warnx("%s: delay location %s not implemented?", __func__, delayloc); retval = 1; break; case CTL_DELAY_STATUS_NOT_IMPLEMENTED: warnx("%s: delay not implemented in the kernel", __func__); warnx("%s: recompile with the CTL_IO_DELAY flag set", __func__); retval = 1; break; default: warnx("%s: unknown delay return status %d", __func__, delay_info.status); retval = 1; break; } bailout: /* delayloc should never be NULL, but just in case...*/ if (delayloc != NULL) free(delayloc); return (retval); } static int cctl_realsync(int fd, int argc, char **argv) { int syncstate; int retval; char *syncarg; retval = 0; if (argc != 3) { warnx("%s %s takes exactly one argument", argv[0], argv[1]); retval = 1; goto bailout; } syncarg = argv[2]; if (strncasecmp(syncarg, "query", min(strlen(syncarg), strlen("query"))) == 0) { if (ioctl(fd, CTL_REALSYNC_GET, &syncstate) == -1) { warn("%s: CTL_REALSYNC_GET ioctl failed", __func__); retval = 1; goto bailout; } fprintf(stdout, "SYNCHRONIZE CACHE support is: "); switch (syncstate) { case 0: fprintf(stdout, "OFF\n"); break; case 1: fprintf(stdout, "ON\n"); break; default: fprintf(stdout, "unknown (%d)\n", syncstate); break; } goto bailout; } else if (strcasecmp(syncarg, "on") == 0) { syncstate = 1; } else if (strcasecmp(syncarg, "off") == 0) { syncstate = 0; } else { warnx("%s: invalid realsync argument %s", __func__, syncarg); retval = 1; goto bailout; } if (ioctl(fd, CTL_REALSYNC_SET, &syncstate) == -1) { warn("%s: CTL_REALSYNC_SET ioctl failed", __func__); retval = 1; goto bailout; } bailout: return (retval); } static int cctl_getsetsync(int fd, int target, int lun, ctladm_cmdfunction command, int argc, char **argv, char *combinedopt) { struct ctl_sync_info sync_info; uint32_t ioctl_cmd; int sync_interval = -1; int retval; int c; retval = 0; memset(&sync_info, 0, sizeof(sync_info)); sync_info.target_id = target; sync_info.lun_id = lun; while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'i': sync_interval = strtoul(optarg, NULL, 0); break; default: break; } } if (command == CTLADM_CMD_SETSYNC) { if (sync_interval == -1) { warnx("%s: you must specify the sync interval with -i", __func__); retval = 1; goto bailout; } sync_info.sync_interval = sync_interval; ioctl_cmd = CTL_SETSYNC; } else { ioctl_cmd = CTL_GETSYNC; } if (ioctl(fd, ioctl_cmd, &sync_info) == -1) { warn("%s: CTL_%sSYNC ioctl failed", __func__, (command == CTLADM_CMD_SETSYNC) ? "SET" : "GET"); retval = 1; goto bailout; } switch (sync_info.status) { case CTL_GS_SYNC_OK: if (command == CTLADM_CMD_GETSYNC) { fprintf(stdout, "%d:%d: sync interval: %d\n", target, lun, sync_info.sync_interval); } break; case CTL_GS_SYNC_NO_LUN: warnx("%s: unknown target:LUN %d:%d", __func__, target, lun); retval = 1; break; case CTL_GS_SYNC_NONE: default: warnx("%s: unknown CTL_%sSYNC status %d", __func__, (command == CTLADM_CMD_SETSYNC) ? "SET" : "GET", sync_info.status); retval = 1; break; } bailout: return (retval); } static struct ctladm_opts cctl_err_types[] = { {"aborted", CTL_LUN_INJ_ABORTED, CTLADM_ARG_NONE, NULL}, {"mediumerr", CTL_LUN_INJ_MEDIUM_ERR, CTLADM_ARG_NONE, NULL}, {"ua", CTL_LUN_INJ_UA, CTLADM_ARG_NONE, NULL}, {"custom", CTL_LUN_INJ_CUSTOM, CTLADM_ARG_NONE, NULL}, {NULL, 0, 0, NULL} }; static struct ctladm_opts cctl_err_patterns[] = { {"read", CTL_LUN_PAT_READ, CTLADM_ARG_NONE, NULL}, {"write", CTL_LUN_PAT_WRITE, CTLADM_ARG_NONE, NULL}, {"rw", CTL_LUN_PAT_READWRITE, CTLADM_ARG_NONE, NULL}, {"readwrite", CTL_LUN_PAT_READWRITE, CTLADM_ARG_NONE, NULL}, {"readcap", CTL_LUN_PAT_READCAP, CTLADM_ARG_NONE, NULL}, {"tur", CTL_LUN_PAT_TUR, CTLADM_ARG_NONE, NULL}, {"any", CTL_LUN_PAT_ANY, CTLADM_ARG_NONE, NULL}, #if 0 {"cmd", CTL_LUN_PAT_CMD, CTLADM_ARG_NONE, NULL}, #endif {NULL, 0, 0, NULL} }; static int cctl_error_inject(int fd, uint32_t target, uint32_t lun, int argc, char **argv, char *combinedopt) { int retval = 0; struct ctl_error_desc err_desc; uint64_t lba = 0; uint32_t len = 0; uint64_t delete_id = 0; int delete_id_set = 0; int continuous = 0; int sense_len = 0; int fd_sense = 0; int c; bzero(&err_desc, sizeof(err_desc)); err_desc.target_id = target; err_desc.lun_id = lun; while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'c': continuous = 1; break; case 'd': delete_id = strtoull(optarg, NULL, 0); delete_id_set = 1; break; case 'i': case 'p': { ctladm_optret optret; ctladm_cmdargs argnum; const char *subopt; if (c == 'i') { ctl_lun_error err_type; if (err_desc.lun_error != CTL_LUN_INJ_NONE) { warnx("%s: can't specify multiple -i " "arguments", __func__); retval = 1; goto bailout; } optret = getoption(cctl_err_types, optarg, &err_type, &argnum, &subopt); err_desc.lun_error = err_type; } else { ctl_lun_error_pattern pattern; optret = getoption(cctl_err_patterns, optarg, &pattern, &argnum, &subopt); err_desc.error_pattern |= pattern; } if (optret == CC_OR_AMBIGUOUS) { warnx("%s: ambiguous argument %s", __func__, optarg); retval = 1; goto bailout; } else if (optret == CC_OR_NOT_FOUND) { warnx("%s: argument %s not found", __func__, optarg); retval = 1; goto bailout; } break; } case 'r': { char *tmpstr, *tmpstr2; tmpstr = strdup(optarg); if (tmpstr == NULL) { warn("%s: error duplicating string %s", __func__, optarg); retval = 1; goto bailout; } tmpstr2 = strsep(&tmpstr, ","); if (tmpstr2 == NULL) { warnx("%s: invalid -r argument %s", __func__, optarg); retval = 1; free(tmpstr); goto bailout; } lba = strtoull(tmpstr2, NULL, 0); tmpstr2 = strsep(&tmpstr, ","); if (tmpstr2 == NULL) { warnx("%s: no len argument for -r lba,len, got" " %s", __func__, optarg); retval = 1; free(tmpstr); goto bailout; } len = strtoul(tmpstr2, NULL, 0); free(tmpstr); break; } case 's': { struct get_hook hook; char *sensestr; sense_len = strtol(optarg, NULL, 0); if (sense_len <= 0) { warnx("invalid number of sense bytes %d", sense_len); retval = 1; goto bailout; } sense_len = MIN(sense_len, SSD_FULL_SIZE); hook.argc = argc - optind; hook.argv = argv + optind; hook.got = 0; sensestr = cget(&hook, NULL); if ((sensestr != NULL) && (sensestr[0] == '-')) { fd_sense = 1; } else { buff_encode_visit( (uint8_t *)&err_desc.custom_sense, sense_len, sensestr, iget, &hook); } optind += hook.got; break; } default: break; } } if (delete_id_set != 0) { err_desc.serial = delete_id; if (ioctl(fd, CTL_ERROR_INJECT_DELETE, &err_desc) == -1) { warn("%s: error issuing CTL_ERROR_INJECT_DELETE ioctl", __func__); retval = 1; } goto bailout; } if (err_desc.lun_error == CTL_LUN_INJ_NONE) { warnx("%s: error injection command (-i) needed", __func__); retval = 1; goto bailout; } else if ((err_desc.lun_error == CTL_LUN_INJ_CUSTOM) && (sense_len == 0)) { warnx("%s: custom error requires -s", __func__); retval = 1; goto bailout; } if (continuous != 0) err_desc.lun_error |= CTL_LUN_INJ_CONTINUOUS; /* * If fd_sense is set, we need to read the sense data the user * wants returned from stdin. */ if (fd_sense == 1) { ssize_t amt_read; int amt_to_read = sense_len; u_int8_t *buf_ptr = (uint8_t *)&err_desc.custom_sense; for (amt_read = 0; amt_to_read > 0; amt_read = read(STDIN_FILENO, buf_ptr, amt_to_read)) { if (amt_read == -1) { warn("error reading sense data from stdin"); retval = 1; goto bailout; } amt_to_read -= amt_read; buf_ptr += amt_read; } } if (err_desc.error_pattern == CTL_LUN_PAT_NONE) { warnx("%s: command pattern (-p) needed", __func__); retval = 1; goto bailout; } if (len != 0) { err_desc.error_pattern |= CTL_LUN_PAT_RANGE; /* * We could check here to see whether it's a read/write * command, but that will be pointless once we allow * custom patterns. At that point, the user could specify * a READ(6) CDB type, and we wouldn't have an easy way here * to verify whether range checking is possible there. The * user will just figure it out when his error never gets * executed. */ #if 0 if ((err_desc.pattern & CTL_LUN_PAT_READWRITE) == 0) { warnx("%s: need read and/or write pattern if range " "is specified", __func__); retval = 1; goto bailout; } #endif err_desc.lba_range.lba = lba; err_desc.lba_range.len = len; } if (ioctl(fd, CTL_ERROR_INJECT, &err_desc) == -1) { warn("%s: error issuing CTL_ERROR_INJECT ioctl", __func__); retval = 1; } else { printf("Error injection succeeded, serial number is %ju\n", (uintmax_t)err_desc.serial); } bailout: return (retval); } static int cctl_lunlist(int fd) { struct scsi_report_luns_data *lun_data; struct scsi_inquiry_data *inq_data; uint32_t num_luns; int target; int initid; unsigned int i; int retval; retval = 0; inq_data = NULL; target = 6; initid = 7; /* * XXX KDM assuming LUN 0 is fine, but we may need to change this * if we ever acquire the ability to have multiple targets. */ if ((retval = cctl_get_luns(fd, target, /*lun*/ 0, initid, /*retries*/ 2, &lun_data, &num_luns)) != 0) goto bailout; inq_data = malloc(sizeof(*inq_data)); if (inq_data == NULL) { warn("%s: couldn't allocate memory for inquiry data\n", __func__); retval = 1; goto bailout; } for (i = 0; i < num_luns; i++) { char scsi_path[40]; int lun_val; switch (lun_data->luns[i].lundata[0] & RPL_LUNDATA_ATYP_MASK) { case RPL_LUNDATA_ATYP_PERIPH: lun_val = lun_data->luns[i].lundata[1]; break; case RPL_LUNDATA_ATYP_FLAT: lun_val = (lun_data->luns[i].lundata[0] & RPL_LUNDATA_FLAT_LUN_MASK) | (lun_data->luns[i].lundata[1] << RPL_LUNDATA_FLAT_LUN_BITS); break; case RPL_LUNDATA_ATYP_LUN: case RPL_LUNDATA_ATYP_EXTLUN: default: fprintf(stdout, "Unsupported LUN format %d\n", lun_data->luns[i].lundata[0] & RPL_LUNDATA_ATYP_MASK); lun_val = -1; break; } if (lun_val == -1) continue; if ((retval = cctl_get_inquiry(fd, target, lun_val, initid, /*retries*/ 2, scsi_path, sizeof(scsi_path), inq_data)) != 0) { goto bailout; } printf("%s", scsi_path); scsi_print_inquiry(inq_data); } bailout: if (lun_data != NULL) free(lun_data); if (inq_data != NULL) free(inq_data); return (retval); } static int cctl_startup_shutdown(int fd, int target, int lun, int iid, ctladm_cmdfunction command) { union ctl_io *io; struct ctl_id id; struct scsi_report_luns_data *lun_data; struct scsi_inquiry_data *inq_data; uint32_t num_luns; unsigned int i; int retval; retval = 0; inq_data = NULL; /* * - report luns * - step through each lun, do an inquiry * - check OOA queue on direct access luns * - send stop with offline bit to each direct access device with a * clear OOA queue * - if we get a reservation conflict, reset the LUN to clear it * and reissue the stop with the offline bit set */ id.id = iid; io = ctl_scsi_alloc_io(id); if (io == NULL) { warnx("%s: can't allocate memory", __func__); return (1); } if ((retval = cctl_get_luns(fd, target, lun, iid, /*retries*/ 2, &lun_data, &num_luns)) != 0) goto bailout; inq_data = malloc(sizeof(*inq_data)); if (inq_data == NULL) { warn("%s: couldn't allocate memory for inquiry data\n", __func__); retval = 1; goto bailout; } for (i = 0; i < num_luns; i++) { char scsi_path[40]; int lun_val; /* * XXX KDM figure out a way to share this code with * cctl_lunlist()? */ switch (lun_data->luns[i].lundata[0] & RPL_LUNDATA_ATYP_MASK) { case RPL_LUNDATA_ATYP_PERIPH: lun_val = lun_data->luns[i].lundata[1]; break; case RPL_LUNDATA_ATYP_FLAT: lun_val = (lun_data->luns[i].lundata[0] & RPL_LUNDATA_FLAT_LUN_MASK) | (lun_data->luns[i].lundata[1] << RPL_LUNDATA_FLAT_LUN_BITS); break; case RPL_LUNDATA_ATYP_LUN: case RPL_LUNDATA_ATYP_EXTLUN: default: fprintf(stdout, "Unsupported LUN format %d\n", lun_data->luns[i].lundata[0] & RPL_LUNDATA_ATYP_MASK); lun_val = -1; break; } if (lun_val == -1) continue; if ((retval = cctl_get_inquiry(fd, target, lun_val, iid, /*retries*/ 2, scsi_path, sizeof(scsi_path), inq_data)) != 0) { goto bailout; } printf("%s", scsi_path); scsi_print_inquiry(inq_data); /* * We only want to shutdown direct access devices. */ if (SID_TYPE(inq_data) != T_DIRECT) { printf("%s LUN is not direct access, skipped\n", scsi_path); continue; } if (command == CTLADM_CMD_SHUTDOWN) { struct ctl_ooa_info ooa_info; ooa_info.target_id = target; ooa_info.lun_id = lun_val; if (ioctl(fd, CTL_CHECK_OOA, &ooa_info) == -1) { printf("%s CTL_CHECK_OOA ioctl failed\n", scsi_path); continue; } if (ooa_info.status != CTL_OOA_SUCCESS) { printf("%s CTL_CHECK_OOA returned status %d\n", scsi_path, ooa_info.status); continue; } if (ooa_info.num_entries != 0) { printf("%s %d entr%s in the OOA queue, " "skipping shutdown\n", scsi_path, ooa_info.num_entries, (ooa_info.num_entries > 1)?"ies" : "y" ); continue; } } ctl_scsi_start_stop(/*io*/ io, /*start*/(command == CTLADM_CMD_STARTUP) ? 1 : 0, /*load_eject*/ 0, /*immediate*/ 0, /*power_conditions*/ SSS_PC_START_VALID, /*onoffline*/ 1, /*ctl_tag_type*/ (command == CTLADM_CMD_STARTUP) ? CTL_TAG_SIMPLE :CTL_TAG_ORDERED, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun_val; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, /*retries*/ 3, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) ctl_io_error_print(io, inq_data, stderr); else { printf("%s LUN is now %s\n", scsi_path, (command == CTLADM_CMD_STARTUP) ? "online" : "offline"); } } bailout: if (lun_data != NULL) free(lun_data); if (inq_data != NULL) free(inq_data); if (io != NULL) ctl_scsi_free_io(io); return (retval); } static int cctl_sync_cache(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt) { union ctl_io *io; struct ctl_id id; int cdb_size = -1; int retval; uint64_t our_lba = 0; uint32_t our_block_count = 0; int reladr = 0, immed = 0; int c; id.id = iid; retval = 0; io = ctl_scsi_alloc_io(id); if (io == NULL) { warnx("%s: can't allocate memory", __func__); return (1); } while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'b': our_block_count = strtoul(optarg, NULL, 0); break; case 'c': cdb_size = strtol(optarg, NULL, 0); break; case 'i': immed = 1; break; case 'l': our_lba = strtoull(optarg, NULL, 0); break; case 'r': reladr = 1; break; default: break; } } if (cdb_size != -1) { switch (cdb_size) { case 10: case 16: break; default: warnx("%s: invalid cdbsize %d, valid sizes are 10 " "and 16", __func__, cdb_size); retval = 1; goto bailout; break; /* NOTREACHED */ } } else cdb_size = 10; ctl_scsi_sync_cache(/*io*/ io, /*immed*/ immed, /*reladr*/ reladr, /*minimum_cdb_size*/ cdb_size, /*starting_lba*/ our_lba, /*block_count*/ our_block_count, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { fprintf(stdout, "Cache synchronized successfully\n"); } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); return (retval); } static int cctl_start_stop(int fd, int target, int lun, int iid, int retries, int start, int argc, char **argv, char *combinedopt) { union ctl_io *io; struct ctl_id id; char scsi_path[40]; int immed = 0, onoffline = 0; int retval, c; id.id = iid; retval = 0; io = ctl_scsi_alloc_io(id); if (io == NULL) { warnx("%s: can't allocate memory", __func__); return (1); } while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'i': immed = 1; break; case 'o': onoffline = 1; break; default: break; } } /* * Use an ordered tag for the stop command, to guarantee that any * pending I/O will finish before the stop command executes. This * would normally be the case anyway, since CTL will basically * treat the start/stop command as an ordered command with respect * to any other command except an INQUIRY. (See ctl_ser_table.c.) */ ctl_scsi_start_stop(/*io*/ io, /*start*/ start, /*load_eject*/ 0, /*immediate*/ immed, /*power_conditions*/ SSS_PC_START_VALID, /*onoffline*/ onoffline, /*ctl_tag_type*/ start ? CTL_TAG_SIMPLE : CTL_TAG_ORDERED, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } ctl_scsi_path_string(io, scsi_path, sizeof(scsi_path)); if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { fprintf(stdout, "%s LUN %s successfully\n", scsi_path, (start) ? "started" : "stopped"); } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); return (retval); } static int cctl_mode_sense(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt) { union ctl_io *io; struct ctl_id id; uint32_t datalen; uint8_t *dataptr; int pc = -1, cdbsize, retval, dbd = 0, subpage = -1; int list = 0; int page_code = -1; int c; id.id = iid; cdbsize = 0; retval = 0; dataptr = NULL; io = ctl_scsi_alloc_io(id); if (io == NULL) { warn("%s: can't allocate memory", __func__); return (1); } while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'P': pc = strtoul(optarg, NULL, 0); break; case 'S': subpage = strtoul(optarg, NULL, 0); break; case 'd': dbd = 1; break; case 'l': list = 1; break; case 'm': page_code = strtoul(optarg, NULL, 0); break; case 'c': cdbsize = strtol(optarg, NULL, 0); break; default: break; } } if (((list == 0) && (page_code == -1)) || ((list != 0) && (page_code != -1))) { warnx("%s: you must specify either a page code (-m) or -l", __func__); retval = 1; goto bailout; } if ((page_code != -1) && ((page_code > SMS_ALL_PAGES_PAGE) || (page_code < 0))) { warnx("%s: page code %d is out of range", __func__, page_code); retval = 1; goto bailout; } if (list == 1) { page_code = SMS_ALL_PAGES_PAGE; if (pc != -1) { warnx("%s: arg -P makes no sense with -l", __func__); retval = 1; goto bailout; } if (subpage != -1) { warnx("%s: arg -S makes no sense with -l", __func__); retval = 1; goto bailout; } } if (pc == -1) pc = SMS_PAGE_CTRL_CURRENT; else { if ((pc > 3) || (pc < 0)) { warnx("%s: page control value %d is out of range: 0-3", __func__, pc); retval = 1; goto bailout; } } if ((subpage != -1) && ((subpage > 255) || (subpage < 0))) { warnx("%s: subpage code %d is out of range: 0-255", __func__, subpage); retval = 1; goto bailout; } if (cdbsize != 0) { switch (cdbsize) { case 6: case 10: break; default: warnx("%s: invalid cdbsize %d, valid sizes are 6 " "and 10", __func__, cdbsize); retval = 1; goto bailout; break; } } else cdbsize = 6; if (subpage == -1) subpage = 0; if (cdbsize == 6) datalen = 255; else datalen = 65535; dataptr = (uint8_t *)malloc(datalen); if (dataptr == NULL) { warn("%s: can't allocate %d bytes", __func__, datalen); retval = 1; goto bailout; } memset(dataptr, 0, datalen); ctl_scsi_mode_sense(io, /*data_ptr*/ dataptr, /*data_len*/ datalen, /*dbd*/ dbd, /*llbaa*/ 0, /*page_code*/ page_code, /*pc*/ pc << 6, /*subpage*/ subpage, /*minimum_cdb_size*/ cdbsize, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { int pages_len, used_len; uint32_t returned_len; uint8_t *ndataptr; if (io->scsiio.cdb[0] == MODE_SENSE_6) { struct scsi_mode_hdr_6 *hdr6; int bdlen; hdr6 = (struct scsi_mode_hdr_6 *)dataptr; returned_len = hdr6->datalen + 1; bdlen = hdr6->block_descr_len; ndataptr = (uint8_t *)((uint8_t *)&hdr6[1] + bdlen); } else { struct scsi_mode_hdr_10 *hdr10; int bdlen; hdr10 = (struct scsi_mode_hdr_10 *)dataptr; returned_len = scsi_2btoul(hdr10->datalen) + 2; bdlen = scsi_2btoul(hdr10->block_descr_len); ndataptr = (uint8_t *)((uint8_t *)&hdr10[1] + bdlen); } /* just in case they can give us more than we allocated for */ returned_len = min(returned_len, datalen); pages_len = returned_len - (ndataptr - dataptr); #if 0 fprintf(stdout, "returned_len = %d, pages_len = %d\n", returned_len, pages_len); #endif if (list == 1) { fprintf(stdout, "Supported mode pages:\n"); for (used_len = 0; used_len < pages_len;) { struct scsi_mode_page_header *header; header = (struct scsi_mode_page_header *) &ndataptr[used_len]; fprintf(stdout, "%d\n", header->page_code); used_len += header->page_length + 2; } } else { for (used_len = 0; used_len < pages_len; used_len++) { fprintf(stdout, "0x%x ", ndataptr[used_len]); if (((used_len+1) % 16) == 0) fprintf(stdout, "\n"); } fprintf(stdout, "\n"); } } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); if (dataptr != NULL) free(dataptr); return (retval); } static int cctl_read_capacity(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt) { union ctl_io *io; struct ctl_id id; struct scsi_read_capacity_data *data; struct scsi_read_capacity_data_long *longdata; int cdbsize = -1, retval; uint8_t *dataptr; int c; cdbsize = 10; dataptr = NULL; retval = 0; id.id = iid; io = ctl_scsi_alloc_io(id); if (io == NULL) { warn("%s: can't allocate memory\n", __func__); return (1); } while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'c': cdbsize = strtol(optarg, NULL, 0); break; default: break; } } if (cdbsize != -1) { switch (cdbsize) { case 10: case 16: break; default: warnx("%s: invalid cdbsize %d, valid sizes are 10 " "and 16", __func__, cdbsize); retval = 1; goto bailout; break; /* NOTREACHED */ } } else cdbsize = 10; dataptr = (uint8_t *)malloc(sizeof(*longdata)); if (dataptr == NULL) { warn("%s: can't allocate %zd bytes\n", __func__, sizeof(*longdata)); retval = 1; goto bailout; } memset(dataptr, 0, sizeof(*longdata)); retry: switch (cdbsize) { case 10: ctl_scsi_read_capacity(io, /*data_ptr*/ dataptr, /*data_len*/ sizeof(*longdata), /*addr*/ 0, /*reladr*/ 0, /*pmi*/ 0, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); break; case 16: ctl_scsi_read_capacity_16(io, /*data_ptr*/ dataptr, /*data_len*/ sizeof(*longdata), /*addr*/ 0, /*reladr*/ 0, /*pmi*/ 0, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); break; } io->io_hdr.nexus.initid = id; io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { uint64_t maxlba; uint32_t blocksize; if (cdbsize == 10) { data = (struct scsi_read_capacity_data *)dataptr; maxlba = scsi_4btoul(data->addr); blocksize = scsi_4btoul(data->length); if (maxlba == 0xffffffff) { cdbsize = 16; goto retry; } } else { longdata=(struct scsi_read_capacity_data_long *)dataptr; maxlba = scsi_8btou64(longdata->addr); blocksize = scsi_4btoul(longdata->length); } fprintf(stdout, "Disk Capacity: %ju, Blocksize: %d\n", (uintmax_t)maxlba, blocksize); } else { ctl_io_error_print(io, NULL, stderr); } bailout: ctl_scsi_free_io(io); if (dataptr != NULL) free(dataptr); return (retval); } static int cctl_read_write(int fd, int target, int lun, int iid, int retries, int argc, char **argv, char *combinedopt, ctladm_cmdfunction command) { union ctl_io *io; struct ctl_id id; int file_fd, do_stdio; int cdbsize = -1, databytes; uint8_t *dataptr; char *filename = NULL; int datalen = -1, blocksize = -1; uint64_t lba = 0; int lba_set = 0; int retval; int c; retval = 0; do_stdio = 0; dataptr = NULL; file_fd = -1; id.id = iid; io = ctl_scsi_alloc_io(id); if (io == NULL) { warn("%s: can't allocate memory\n", __func__); return (1); } while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'N': io->io_hdr.flags |= CTL_FLAG_NO_DATAMOVE; break; case 'b': blocksize = strtoul(optarg, NULL, 0); break; case 'c': cdbsize = strtoul(optarg, NULL, 0); break; case 'd': datalen = strtoul(optarg, NULL, 0); break; case 'f': filename = strdup(optarg); break; case 'l': lba = strtoull(optarg, NULL, 0); lba_set = 1; break; default: break; } } if (filename == NULL) { warnx("%s: you must supply a filename using -f", __func__); retval = 1; goto bailout; } if (datalen == -1) { warnx("%s: you must specify the data length with -d", __func__); retval = 1; goto bailout; } if (lba_set == 0) { warnx("%s: you must specify the LBA with -l", __func__); retval = 1; goto bailout; } if (blocksize == -1) { warnx("%s: you must specify the blocksize with -b", __func__); retval = 1; goto bailout; } if (cdbsize != -1) { switch (cdbsize) { case 6: case 10: case 12: case 16: break; default: warnx("%s: invalid cdbsize %d, valid sizes are 6, " "10, 12 or 16", __func__, cdbsize); retval = 1; goto bailout; break; /* NOTREACHED */ } } else cdbsize = 6; databytes = datalen * blocksize; dataptr = (uint8_t *)malloc(databytes); if (dataptr == NULL) { warn("%s: can't allocate %d bytes\n", __func__, databytes); retval = 1; goto bailout; } if (strcmp(filename, "-") == 0) { if (command == CTLADM_CMD_READ) file_fd = STDOUT_FILENO; else file_fd = STDIN_FILENO; do_stdio = 1; } else { file_fd = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); if (file_fd == -1) { warn("%s: can't open file %s", __func__, filename); retval = 1; goto bailout; } } memset(dataptr, 0, databytes); if (command == CTLADM_CMD_WRITE) { int bytes_read; bytes_read = read(file_fd, dataptr, databytes); if (bytes_read == -1) { warn("%s: error reading file %s", __func__, filename); retval = 1; goto bailout; } if (bytes_read != databytes) { warnx("%s: only read %d bytes from file %s", __func__, bytes_read, filename); retval = 1; goto bailout; } } ctl_scsi_read_write(io, /*data_ptr*/ dataptr, /*data_len*/ databytes, /*read_op*/ (command == CTLADM_CMD_READ) ? 1 : 0, /*byte2*/ 0, /*minimum_cdb_size*/ cdbsize, /*lba*/ lba, /*num_blocks*/ datalen, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } if (((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) && (command == CTLADM_CMD_READ)) { int bytes_written; bytes_written = write(file_fd, dataptr, databytes); if (bytes_written == -1) { warn("%s: can't write to %s", __func__, filename); goto bailout; } } else if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); if (dataptr != NULL) free(dataptr); if ((do_stdio == 0) && (file_fd != -1)) close(file_fd); return (retval); } static int cctl_get_luns(int fd, int target, int lun, int iid, int retries, struct scsi_report_luns_data **lun_data, uint32_t *num_luns) { union ctl_io *io; struct ctl_id id; uint32_t nluns; int lun_datalen; int retval; retval = 0; id.id = iid; io = ctl_scsi_alloc_io(id); if (io == NULL) { warnx("%s: can't allocate memory", __func__); return (1); } /* * lun_data includes space for 1 lun, allocate space for 4 initially. * If that isn't enough, we'll allocate more. */ nluns = 4; retry: lun_datalen = sizeof(*lun_data) + (nluns * sizeof(struct scsi_report_luns_lundata)); *lun_data = malloc(lun_datalen); if (*lun_data == NULL) { warnx("%s: can't allocate memory", __func__); ctl_scsi_free_io(io); return (1); } ctl_scsi_report_luns(io, /*data_ptr*/ (uint8_t *)*lun_data, /*data_len*/ lun_datalen, /*select_report*/ RPL_REPORT_ALL, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.initid = id; io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { uint32_t returned_len, returned_luns; returned_len = scsi_4btoul((*lun_data)->length); returned_luns = returned_len / 8; if (returned_luns > nluns) { nluns = returned_luns; free(*lun_data); goto retry; } /* These should be the same */ *num_luns = MIN(returned_luns, nluns); } else { ctl_io_error_print(io, NULL, stderr); retval = 1; } bailout: ctl_scsi_free_io(io); return (retval); } static int cctl_report_luns(int fd, int target, int lun, int iid, int retries) { struct scsi_report_luns_data *lun_data; uint32_t num_luns, i; int retval; lun_data = NULL; if ((retval = cctl_get_luns(fd, target, lun, iid, retries, &lun_data, &num_luns)) != 0) goto bailout; fprintf(stdout, "%u LUNs returned\n", num_luns); for (i = 0; i < num_luns; i++) { int lun_val; /* * XXX KDM figure out a way to share this code with * cctl_lunlist()? */ switch (lun_data->luns[i].lundata[0] & RPL_LUNDATA_ATYP_MASK) { case RPL_LUNDATA_ATYP_PERIPH: lun_val = lun_data->luns[i].lundata[1]; break; case RPL_LUNDATA_ATYP_FLAT: lun_val = (lun_data->luns[i].lundata[0] & RPL_LUNDATA_FLAT_LUN_MASK) | (lun_data->luns[i].lundata[1] << RPL_LUNDATA_FLAT_LUN_BITS); break; case RPL_LUNDATA_ATYP_LUN: case RPL_LUNDATA_ATYP_EXTLUN: default: fprintf(stdout, "Unsupported LUN format %d\n", lun_data->luns[i].lundata[0] & RPL_LUNDATA_ATYP_MASK); lun_val = -1; break; } if (lun_val == -1) continue; fprintf(stdout, "%d\n", lun_val); } bailout: if (lun_data != NULL) free(lun_data); return (retval); } static int cctl_tur(int fd, int target, int lun, int iid, int retries) { union ctl_io *io; struct ctl_id id; id.id = iid; io = ctl_scsi_alloc_io(id); if (io == NULL) { fprintf(stderr, "can't allocate memory\n"); return (1); } ctl_scsi_tur(io, /* tag_type */ CTL_TAG_SIMPLE, /* control */ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retries, io, __func__) != 0) { ctl_scsi_free_io(io); return (1); } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) fprintf(stdout, "Unit is ready\n"); else ctl_io_error_print(io, NULL, stderr); return (0); } static int cctl_get_inquiry(int fd, int target, int lun, int iid, int retries, char *path_str, int path_len, struct scsi_inquiry_data *inq_data) { union ctl_io *io; struct ctl_id id; int retval; retval = 0; id.id = iid; io = ctl_scsi_alloc_io(id); if (io == NULL) { warnx("cctl_inquiry: can't allocate memory\n"); return (1); } ctl_scsi_inquiry(/*io*/ io, /*data_ptr*/ (uint8_t *)inq_data, /*data_len*/ sizeof(*inq_data), /*byte2*/ 0, /*page_code*/ 0, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) { retval = 1; ctl_io_error_print(io, NULL, stderr); } else if (path_str != NULL) ctl_scsi_path_string(io, path_str, path_len); bailout: ctl_scsi_free_io(io); return (retval); } static int cctl_inquiry(int fd, int target, int lun, int iid, int retries) { struct scsi_inquiry_data *inq_data; char scsi_path[40]; int retval; retval = 0; inq_data = malloc(sizeof(*inq_data)); if (inq_data == NULL) { warnx("%s: can't allocate inquiry data", __func__); retval = 1; goto bailout; } if ((retval = cctl_get_inquiry(fd, target, lun, iid, retries, scsi_path, sizeof(scsi_path), inq_data)) != 0) goto bailout; printf("%s", scsi_path); scsi_print_inquiry(inq_data); bailout: if (inq_data != NULL) free(inq_data); return (retval); } static int cctl_req_sense(int fd, int target, int lun, int iid, int retries) { union ctl_io *io; struct scsi_sense_data *sense_data; struct ctl_id id; int retval; retval = 0; id.id = iid; io = ctl_scsi_alloc_io(id); if (io == NULL) { warnx("cctl_req_sense: can't allocate memory\n"); return (1); } sense_data = malloc(sizeof(*sense_data)); memset(sense_data, 0, sizeof(*sense_data)); ctl_scsi_request_sense(/*io*/ io, /*data_ptr*/ (uint8_t *)sense_data, /*data_len*/ sizeof(*sense_data), /*byte2*/ 0, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retries, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { bcopy(sense_data, &io->scsiio.sense_data, sizeof(*sense_data)); io->scsiio.sense_len = sizeof(*sense_data); ctl_scsi_sense_print(&io->scsiio, NULL, stdout); } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); free(sense_data); return (retval); } static int cctl_report_target_port_group(int fd, int target, int lun, int initiator) { union ctl_io *io; struct ctl_id id; uint32_t datalen; uint8_t *dataptr; int retval; id.id = initiator; dataptr = NULL; retval = 0; io = ctl_scsi_alloc_io(id); if (io == NULL) { warn("%s: can't allocate memory", __func__); return (1); } datalen = 64; dataptr = (uint8_t *)malloc(datalen); if (dataptr == NULL) { warn("%s: can't allocate %d bytes", __func__, datalen); retval = 1; goto bailout; } memset(dataptr, 0, datalen); ctl_scsi_maintenance_in(/*io*/ io, /*data_ptr*/ dataptr, /*data_len*/ datalen, /*action*/ SA_RPRT_TRGT_GRP, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, 0, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { int returned_len, used_len; returned_len = scsi_4btoul(&dataptr[0]) + 4; for (used_len = 0; used_len < returned_len; used_len++) { fprintf(stdout, "0x%02x ", dataptr[used_len]); if (((used_len+1) % 8) == 0) fprintf(stdout, "\n"); } fprintf(stdout, "\n"); } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); if (dataptr != NULL) free(dataptr); return (retval); } static int cctl_inquiry_vpd_devid(int fd, int target, int lun, int initiator) { union ctl_io *io; struct ctl_id id; uint32_t datalen; uint8_t *dataptr; int retval; id.id = initiator; retval = 0; dataptr = NULL; io = ctl_scsi_alloc_io(id); if (io == NULL) { warn("%s: can't allocate memory", __func__); return (1); } datalen = 256; dataptr = (uint8_t *)malloc(datalen); if (dataptr == NULL) { warn("%s: can't allocate %d bytes", __func__, datalen); retval = 1; goto bailout; } memset(dataptr, 0, datalen); ctl_scsi_inquiry(/*io*/ io, /*data_ptr*/ dataptr, /*data_len*/ datalen, /*byte2*/ SI_EVPD, /*page_code*/ SVPD_DEVICE_ID, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, 0, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { int returned_len, used_len; returned_len = scsi_2btoul(&dataptr[2]) + 4; for (used_len = 0; used_len < returned_len; used_len++) { fprintf(stdout, "0x%02x ", dataptr[used_len]); if (((used_len+1) % 8) == 0) fprintf(stdout, "\n"); } fprintf(stdout, "\n"); } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); if (dataptr != NULL) free(dataptr); return (retval); } static int cctl_persistent_reserve_in(int fd, int target, int lun, int initiator, int argc, char **argv, char *combinedopt, int retry_count) { union ctl_io *io; struct ctl_id id; uint32_t datalen; uint8_t *dataptr; int action = -1; int retval; int c; id.id = initiator; retval = 0; dataptr = NULL; io = ctl_scsi_alloc_io(id); if (io == NULL) { warn("%s: can't allocate memory", __func__); return (1); } while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'a': action = strtol(optarg, NULL, 0); break; default: break; } } if (action < 0 || action > 2) { warn("action must be specified and in the range: 0-2"); retval = 1; goto bailout; } datalen = 256; dataptr = (uint8_t *)malloc(datalen); if (dataptr == NULL) { warn("%s: can't allocate %d bytes", __func__, datalen); retval = 1; goto bailout; } memset(dataptr, 0, datalen); ctl_scsi_persistent_res_in(io, /*data_ptr*/ dataptr, /*data_len*/ datalen, /*action*/ action, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retry_count, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { int returned_len, used_len; returned_len = 0; switch (action) { case 0: returned_len = scsi_4btoul(&dataptr[4]) + 8; returned_len = min(returned_len, 256); break; case 1: returned_len = scsi_4btoul(&dataptr[4]) + 8; break; case 2: returned_len = 8; break; default: warnx("%s: invalid action %d", __func__, action); goto bailout; break; /* NOTREACHED */ } for (used_len = 0; used_len < returned_len; used_len++) { fprintf(stdout, "0x%02x ", dataptr[used_len]); if (((used_len+1) % 8) == 0) fprintf(stdout, "\n"); } fprintf(stdout, "\n"); } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); if (dataptr != NULL) free(dataptr); return (retval); } static int cctl_persistent_reserve_out(int fd, int target, int lun, int initiator, int argc, char **argv, char *combinedopt, int retry_count) { union ctl_io *io; struct ctl_id id; uint32_t datalen; uint64_t key = 0, sa_key = 0; int action = -1, restype = -1; uint8_t *dataptr; int retval; int c; id.id = initiator; retval = 0; dataptr = NULL; io = ctl_scsi_alloc_io(id); if (io == NULL) { warn("%s: can't allocate memory", __func__); return (1); } while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'a': action = strtol(optarg, NULL, 0); break; case 'k': key = strtoull(optarg, NULL, 0); break; case 'r': restype = strtol(optarg, NULL, 0); break; case 's': sa_key = strtoull(optarg, NULL, 0); break; default: break; } } if (action < 0 || action > 5) { warn("action must be specified and in the range: 0-5"); retval = 1; goto bailout; } if (restype < 0 || restype > 5) { if (action != 0 && action != 5 && action != 3) { warn("'restype' must specified and in the range: 0-5"); retval = 1; goto bailout; } } datalen = 24; dataptr = (uint8_t *)malloc(datalen); if (dataptr == NULL) { warn("%s: can't allocate %d bytes", __func__, datalen); retval = 1; goto bailout; } memset(dataptr, 0, datalen); ctl_scsi_persistent_res_out(io, /*data_ptr*/ dataptr, /*data_len*/ datalen, /*action*/ action, /*type*/ restype, /*key*/ key, /*sa key*/ sa_key, /*tag_type*/ CTL_TAG_SIMPLE, /*control*/ 0); io->io_hdr.nexus.targ_target.id = target; io->io_hdr.nexus.targ_lun = lun; io->io_hdr.nexus.initid = id; if (cctl_do_io(fd, retry_count, io, __func__) != 0) { retval = 1; goto bailout; } if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) { char scsi_path[40]; ctl_scsi_path_string(io, scsi_path, sizeof(scsi_path)); fprintf( stdout, "%sPERSISTENT RESERVE OUT executed " "successfully\n", scsi_path); } else ctl_io_error_print(io, NULL, stderr); bailout: ctl_scsi_free_io(io); if (dataptr != NULL) free(dataptr); return (retval); } struct cctl_req_option { char *name; int namelen; char *value; int vallen; STAILQ_ENTRY(cctl_req_option) links; }; static int cctl_create_lun(int fd, int argc, char **argv, char *combinedopt) { struct ctl_lun_req req; int device_type = -1; uint64_t lun_size = 0; uint32_t blocksize = 0, req_lun_id = 0; char *serial_num = NULL; char *device_id = NULL; int lun_size_set = 0, blocksize_set = 0, lun_id_set = 0; char *backend_name = NULL; STAILQ_HEAD(, cctl_req_option) option_list; int num_options = 0; int retval = 0, c; STAILQ_INIT(&option_list); while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'b': backend_name = strdup(optarg); break; case 'B': blocksize = strtoul(optarg, NULL, 0); blocksize_set = 1; break; case 'd': device_id = strdup(optarg); break; case 'l': req_lun_id = strtoul(optarg, NULL, 0); lun_id_set = 1; break; case 'o': { struct cctl_req_option *option; char *tmpstr; char *name, *value; tmpstr = strdup(optarg); name = strsep(&tmpstr, "="); if (name == NULL) { warnx("%s: option -o takes \"name=value\"" "argument", __func__); retval = 1; goto bailout; } value = strsep(&tmpstr, "="); if (value == NULL) { warnx("%s: option -o takes \"name=value\"" "argument", __func__); retval = 1; goto bailout; } option = malloc(sizeof(*option)); if (option == NULL) { warn("%s: error allocating %zd bytes", __func__, sizeof(*option)); retval = 1; goto bailout; } option->name = strdup(name); option->namelen = strlen(name) + 1; option->value = strdup(value); option->vallen = strlen(value) + 1; free(tmpstr); STAILQ_INSERT_TAIL(&option_list, option, links); num_options++; break; } case 's': if (strcasecmp(optarg, "auto") != 0) { retval = expand_number(optarg, &lun_size); if (retval != 0) { warn("%s: invalid -s argument", __func__); retval = 1; goto bailout; } } lun_size_set = 1; break; case 'S': serial_num = strdup(optarg); break; case 't': device_type = strtoul(optarg, NULL, 0); break; default: break; } } if (backend_name == NULL) { warnx("%s: backend name (-b) must be specified", __func__); retval = 1; goto bailout; } bzero(&req, sizeof(req)); strlcpy(req.backend, backend_name, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_CREATE; if (blocksize_set != 0) req.reqdata.create.blocksize_bytes = blocksize; if (lun_size_set != 0) req.reqdata.create.lun_size_bytes = lun_size; if (lun_id_set != 0) { req.reqdata.create.flags |= CTL_LUN_FLAG_ID_REQ; req.reqdata.create.req_lun_id = req_lun_id; } req.reqdata.create.flags |= CTL_LUN_FLAG_DEV_TYPE; if (device_type != -1) req.reqdata.create.device_type = device_type; else req.reqdata.create.device_type = T_DIRECT; if (serial_num != NULL) { strlcpy(req.reqdata.create.serial_num, serial_num, sizeof(req.reqdata.create.serial_num)); req.reqdata.create.flags |= CTL_LUN_FLAG_SERIAL_NUM; } if (device_id != NULL) { strlcpy(req.reqdata.create.device_id, device_id, sizeof(req.reqdata.create.device_id)); req.reqdata.create.flags |= CTL_LUN_FLAG_DEVID; } req.num_be_args = num_options; if (num_options > 0) { struct cctl_req_option *option, *next_option; int i; req.be_args = malloc(num_options * sizeof(*req.be_args)); if (req.be_args == NULL) { warn("%s: error allocating %zd bytes", __func__, num_options * sizeof(*req.be_args)); retval = 1; goto bailout; } for (i = 0, option = STAILQ_FIRST(&option_list); i < num_options; i++, option = next_option) { next_option = STAILQ_NEXT(option, links); req.be_args[i].namelen = option->namelen; req.be_args[i].name = strdup(option->name); req.be_args[i].vallen = option->vallen; req.be_args[i].value = strdup(option->value); /* * XXX KDM do we want a way to specify a writeable * flag of some sort? Do we want a way to specify * binary data? */ req.be_args[i].flags = CTL_BEARG_ASCII | CTL_BEARG_RD; STAILQ_REMOVE(&option_list, option, cctl_req_option, links); free(option->name); free(option->value); free(option); } } if (ioctl(fd, CTL_LUN_REQ, &req) == -1) { warn("%s: error issuing CTL_LUN_REQ ioctl", __func__); retval = 1; goto bailout; } switch (req.status) { case CTL_LUN_ERROR: warnx("LUN creation error: %s", req.error_str); retval = 1; goto bailout; case CTL_LUN_WARNING: warnx("LUN creation warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: warnx("unknown LUN creation status: %d", req.status); retval = 1; goto bailout; } fprintf(stdout, "LUN created successfully\n"); fprintf(stdout, "backend: %s\n", req.backend); fprintf(stdout, "device type: %d\n",req.reqdata.create.device_type); fprintf(stdout, "LUN size: %ju bytes\n", (uintmax_t)req.reqdata.create.lun_size_bytes); fprintf(stdout, "blocksize %u bytes\n", req.reqdata.create.blocksize_bytes); fprintf(stdout, "LUN ID: %d\n", req.reqdata.create.req_lun_id); fprintf(stdout, "Serial Number: %s\n", req.reqdata.create.serial_num); fprintf(stdout, "Device ID; %s\n", req.reqdata.create.device_id); bailout: return (retval); } static int cctl_rm_lun(int fd, int argc, char **argv, char *combinedopt) { struct ctl_lun_req req; uint32_t lun_id = 0; int lun_id_set = 0; char *backend_name = NULL; STAILQ_HEAD(, cctl_req_option) option_list; int num_options = 0; int retval = 0, c; STAILQ_INIT(&option_list); while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'b': backend_name = strdup(optarg); break; case 'l': lun_id = strtoul(optarg, NULL, 0); lun_id_set = 1; break; case 'o': { struct cctl_req_option *option; char *tmpstr; char *name, *value; tmpstr = strdup(optarg); name = strsep(&tmpstr, "="); if (name == NULL) { warnx("%s: option -o takes \"name=value\"" "argument", __func__); retval = 1; goto bailout; } value = strsep(&tmpstr, "="); if (value == NULL) { warnx("%s: option -o takes \"name=value\"" "argument", __func__); retval = 1; goto bailout; } option = malloc(sizeof(*option)); if (option == NULL) { warn("%s: error allocating %zd bytes", __func__, sizeof(*option)); retval = 1; goto bailout; } option->name = strdup(name); option->namelen = strlen(name) + 1; option->value = strdup(value); option->vallen = strlen(value) + 1; free(tmpstr); STAILQ_INSERT_TAIL(&option_list, option, links); num_options++; break; } default: break; } } if (backend_name == NULL) errx(1, "%s: backend name (-b) must be specified", __func__); if (lun_id_set == 0) errx(1, "%s: LUN id (-l) must be specified", __func__); bzero(&req, sizeof(req)); strlcpy(req.backend, backend_name, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_RM; req.reqdata.rm.lun_id = lun_id; req.num_be_args = num_options; if (num_options > 0) { struct cctl_req_option *option, *next_option; int i; req.be_args = malloc(num_options * sizeof(*req.be_args)); if (req.be_args == NULL) { warn("%s: error allocating %zd bytes", __func__, num_options * sizeof(*req.be_args)); retval = 1; goto bailout; } for (i = 0, option = STAILQ_FIRST(&option_list); i < num_options; i++, option = next_option) { next_option = STAILQ_NEXT(option, links); req.be_args[i].namelen = option->namelen; req.be_args[i].name = strdup(option->name); req.be_args[i].vallen = option->vallen; req.be_args[i].value = strdup(option->value); /* * XXX KDM do we want a way to specify a writeable * flag of some sort? Do we want a way to specify * binary data? */ req.be_args[i].flags = CTL_BEARG_ASCII | CTL_BEARG_RD; STAILQ_REMOVE(&option_list, option, cctl_req_option, links); free(option->name); free(option->value); free(option); } } if (ioctl(fd, CTL_LUN_REQ, &req) == -1) { warn("%s: error issuing CTL_LUN_REQ ioctl", __func__); retval = 1; goto bailout; } switch (req.status) { case CTL_LUN_ERROR: warnx("LUN removal error: %s", req.error_str); retval = 1; goto bailout; case CTL_LUN_WARNING: warnx("LUN removal warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: warnx("unknown LUN removal status: %d", req.status); retval = 1; goto bailout; } printf("LUN %d removed successfully\n", lun_id); bailout: return (retval); } static int cctl_modify_lun(int fd, int argc, char **argv, char *combinedopt) { struct ctl_lun_req req; uint64_t lun_size = 0; uint32_t lun_id = 0; int lun_id_set = 0, lun_size_set = 0; char *backend_name = NULL; + STAILQ_HEAD(, cctl_req_option) option_list; + int num_options = 0; int retval = 0, c; + STAILQ_INIT(&option_list); while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'b': backend_name = strdup(optarg); break; case 'l': lun_id = strtoul(optarg, NULL, 0); lun_id_set = 1; break; + case 'o': { + struct cctl_req_option *option; + char *tmpstr; + char *name, *value; + + tmpstr = strdup(optarg); + name = strsep(&tmpstr, "="); + if (name == NULL) { + warnx("%s: option -o takes \"name=value\"" + "argument", __func__); + retval = 1; + goto bailout; + } + value = strsep(&tmpstr, "="); + if (value == NULL) { + warnx("%s: option -o takes \"name=value\"" + "argument", __func__); + retval = 1; + goto bailout; + } + option = malloc(sizeof(*option)); + if (option == NULL) { + warn("%s: error allocating %zd bytes", + __func__, sizeof(*option)); + retval = 1; + goto bailout; + } + option->name = strdup(name); + option->namelen = strlen(name) + 1; + option->value = strdup(value); + option->vallen = strlen(value) + 1; + free(tmpstr); + + STAILQ_INSERT_TAIL(&option_list, option, links); + num_options++; + break; + } case 's': if (strcasecmp(optarg, "auto") != 0) { retval = expand_number(optarg, &lun_size); if (retval != 0) { warn("%s: invalid -s argument", __func__); retval = 1; goto bailout; } } lun_size_set = 1; break; default: break; } } if (backend_name == NULL) errx(1, "%s: backend name (-b) must be specified", __func__); if (lun_id_set == 0) errx(1, "%s: LUN id (-l) must be specified", __func__); - if (lun_size_set == 0) - errx(1, "%s: size (-s) must be specified", __func__); + if (lun_size_set == 0 && num_options == 0) + errx(1, "%s: size (-s) or options (-o) must be specified", + __func__); bzero(&req, sizeof(req)); strlcpy(req.backend, backend_name, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_MODIFY; req.reqdata.modify.lun_id = lun_id; req.reqdata.modify.lun_size_bytes = lun_size; + + req.num_be_args = num_options; + if (num_options > 0) { + struct cctl_req_option *option, *next_option; + int i; + + req.be_args = malloc(num_options * sizeof(*req.be_args)); + if (req.be_args == NULL) { + warn("%s: error allocating %zd bytes", __func__, + num_options * sizeof(*req.be_args)); + retval = 1; + goto bailout; + } + + for (i = 0, option = STAILQ_FIRST(&option_list); + i < num_options; i++, option = next_option) { + next_option = STAILQ_NEXT(option, links); + + req.be_args[i].namelen = option->namelen; + req.be_args[i].name = strdup(option->name); + req.be_args[i].vallen = option->vallen; + req.be_args[i].value = strdup(option->value); + /* + * XXX KDM do we want a way to specify a writeable + * flag of some sort? Do we want a way to specify + * binary data? + */ + req.be_args[i].flags = CTL_BEARG_ASCII | CTL_BEARG_RD; + + STAILQ_REMOVE(&option_list, option, cctl_req_option, + links); + free(option->name); + free(option->value); + free(option); + } + } if (ioctl(fd, CTL_LUN_REQ, &req) == -1) { warn("%s: error issuing CTL_LUN_REQ ioctl", __func__); retval = 1; goto bailout; } switch (req.status) { case CTL_LUN_ERROR: warnx("LUN modification error: %s", req.error_str); retval = 1; goto bailout; case CTL_LUN_WARNING: warnx("LUN modification warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: warnx("unknown LUN modification status: %d", req.status); retval = 1; goto bailout; } printf("LUN %d modified successfully\n", lun_id); bailout: return (retval); } struct cctl_islist_conn { int connection_id; char *initiator; char *initiator_addr; char *initiator_alias; char *target; char *target_alias; char *header_digest; char *data_digest; char *max_data_segment_length;; char *offload;; int immediate_data; int iser; STAILQ_ENTRY(cctl_islist_conn) links; }; struct cctl_islist_data { int num_conns; STAILQ_HEAD(,cctl_islist_conn) conn_list; struct cctl_islist_conn *cur_conn; int level; struct sbuf *cur_sb[32]; }; static void cctl_islist_start_element(void *user_data, const char *name, const char **attr) { int i; struct cctl_islist_data *islist; struct cctl_islist_conn *cur_conn; islist = (struct cctl_islist_data *)user_data; cur_conn = islist->cur_conn; islist->level++; if ((u_int)islist->level >= (sizeof(islist->cur_sb) / sizeof(islist->cur_sb[0]))) errx(1, "%s: too many nesting levels, %zd max", __func__, sizeof(islist->cur_sb) / sizeof(islist->cur_sb[0])); islist->cur_sb[islist->level] = sbuf_new_auto(); if (islist->cur_sb[islist->level] == NULL) err(1, "%s: Unable to allocate sbuf", __func__); if (strcmp(name, "connection") == 0) { if (cur_conn != NULL) errx(1, "%s: improper connection element nesting", __func__); cur_conn = calloc(1, sizeof(*cur_conn)); if (cur_conn == NULL) err(1, "%s: cannot allocate %zd bytes", __func__, sizeof(*cur_conn)); islist->num_conns++; islist->cur_conn = cur_conn; STAILQ_INSERT_TAIL(&islist->conn_list, cur_conn, links); for (i = 0; attr[i] != NULL; i += 2) { if (strcmp(attr[i], "id") == 0) { cur_conn->connection_id = strtoull(attr[i+1], NULL, 0); } else { errx(1, "%s: invalid connection attribute %s = %s", __func__, attr[i], attr[i+1]); } } } } static void cctl_islist_end_element(void *user_data, const char *name) { struct cctl_islist_data *islist; struct cctl_islist_conn *cur_conn; char *str; islist = (struct cctl_islist_data *)user_data; cur_conn = islist->cur_conn; if ((cur_conn == NULL) && (strcmp(name, "ctlislist") != 0)) errx(1, "%s: cur_conn == NULL! (name = %s)", __func__, name); if (islist->cur_sb[islist->level] == NULL) errx(1, "%s: no valid sbuf at level %d (name %s)", __func__, islist->level, name); sbuf_finish(islist->cur_sb[islist->level]); str = strdup(sbuf_data(islist->cur_sb[islist->level])); if (str == NULL) err(1, "%s can't allocate %zd bytes for string", __func__, sbuf_len(islist->cur_sb[islist->level])); sbuf_delete(islist->cur_sb[islist->level]); islist->cur_sb[islist->level] = NULL; islist->level--; if (strcmp(name, "initiator") == 0) { cur_conn->initiator = str; str = NULL; } else if (strcmp(name, "initiator_addr") == 0) { cur_conn->initiator_addr = str; str = NULL; } else if (strcmp(name, "initiator_alias") == 0) { cur_conn->initiator_alias = str; str = NULL; } else if (strcmp(name, "target") == 0) { cur_conn->target = str; str = NULL; } else if (strcmp(name, "target_alias") == 0) { cur_conn->target_alias = str; str = NULL; } else if (strcmp(name, "target_portal_group_tag") == 0) { } else if (strcmp(name, "header_digest") == 0) { cur_conn->header_digest = str; str = NULL; } else if (strcmp(name, "data_digest") == 0) { cur_conn->data_digest = str; str = NULL; } else if (strcmp(name, "max_data_segment_length") == 0) { cur_conn->max_data_segment_length = str; str = NULL; } else if (strcmp(name, "offload") == 0) { cur_conn->offload = str; str = NULL; } else if (strcmp(name, "immediate_data") == 0) { cur_conn->immediate_data = atoi(str); } else if (strcmp(name, "iser") == 0) { cur_conn->iser = atoi(str); } else if (strcmp(name, "connection") == 0) { islist->cur_conn = NULL; } else if (strcmp(name, "ctlislist") == 0) { /* Nothing. */ } else { /* * Unknown element; ignore it for forward compatiblity. */ } free(str); } static void cctl_islist_char_handler(void *user_data, const XML_Char *str, int len) { struct cctl_islist_data *islist; islist = (struct cctl_islist_data *)user_data; sbuf_bcat(islist->cur_sb[islist->level], str, len); } static int cctl_islist(int fd, int argc, char **argv, char *combinedopt) { struct ctl_iscsi req; struct cctl_islist_data islist; struct cctl_islist_conn *conn; XML_Parser parser; char *conn_str; int conn_len; int dump_xml = 0; int c, retval, verbose = 0; retval = 0; conn_len = 4096; bzero(&islist, sizeof(islist)); STAILQ_INIT(&islist.conn_list); while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'v': verbose = 1; break; case 'x': dump_xml = 1; break; default: break; } } retry: conn_str = malloc(conn_len); bzero(&req, sizeof(req)); req.type = CTL_ISCSI_LIST; req.data.list.alloc_len = conn_len; req.data.list.conn_xml = conn_str; if (ioctl(fd, CTL_ISCSI, &req) == -1) { warn("%s: error issuing CTL_ISCSI ioctl", __func__); retval = 1; goto bailout; } if (req.status == CTL_ISCSI_ERROR) { warnx("%s: error returned from CTL_ISCSI ioctl:\n%s", __func__, req.error_str); } else if (req.status == CTL_ISCSI_LIST_NEED_MORE_SPACE) { conn_len = conn_len << 1; goto retry; } if (dump_xml != 0) { printf("%s", conn_str); goto bailout; } parser = XML_ParserCreate(NULL); if (parser == NULL) { warn("%s: Unable to create XML parser", __func__); retval = 1; goto bailout; } XML_SetUserData(parser, &islist); XML_SetElementHandler(parser, cctl_islist_start_element, cctl_islist_end_element); XML_SetCharacterDataHandler(parser, cctl_islist_char_handler); retval = XML_Parse(parser, conn_str, strlen(conn_str), 1); if (retval != 1) { warnx("%s: Unable to parse XML: Error %d", __func__, XML_GetErrorCode(parser)); XML_ParserFree(parser); retval = 1; goto bailout; } retval = 0; XML_ParserFree(parser); if (verbose != 0) { STAILQ_FOREACH(conn, &islist.conn_list, links) { printf("Session ID: %d\n", conn->connection_id); printf("Initiator name: %s\n", conn->initiator); printf("Initiator portal: %s\n", conn->initiator_addr); printf("Initiator alias: %s\n", conn->initiator_alias); printf("Target name: %s\n", conn->target); printf("Target alias: %s\n", conn->target_alias); printf("Header digest: %s\n", conn->header_digest); printf("Data digest: %s\n", conn->data_digest); printf("DataSegmentLen: %s\n", conn->max_data_segment_length); printf("ImmediateData: %s\n", conn->immediate_data ? "Yes" : "No"); printf("iSER (RDMA): %s\n", conn->iser ? "Yes" : "No"); printf("Offload driver: %s\n", conn->offload); printf("\n"); } } else { printf("%4s %-16s %-36s %-36s\n", "ID", "Portal", "Initiator name", "Target name"); STAILQ_FOREACH(conn, &islist.conn_list, links) { printf("%4u %-16s %-36s %-36s\n", conn->connection_id, conn->initiator_addr, conn->initiator, conn->target); } } bailout: free(conn_str); return (retval); } static int cctl_islogout(int fd, int argc, char **argv, char *combinedopt) { struct ctl_iscsi req; int retval = 0, c; int all = 0, connection_id = -1, nargs = 0; char *initiator_name = NULL, *initiator_addr = NULL; while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'a': all = 1; nargs++; break; case 'c': connection_id = strtoul(optarg, NULL, 0); nargs++; break; case 'i': initiator_name = strdup(optarg); if (initiator_name == NULL) err(1, "%s: strdup", __func__); nargs++; break; case 'p': initiator_addr = strdup(optarg); if (initiator_addr == NULL) err(1, "%s: strdup", __func__); nargs++; break; default: break; } } if (nargs == 0) errx(1, "%s: either -a, -c, -i, or -p must be specified", __func__); if (nargs > 1) errx(1, "%s: only one of -a, -c, -i, or -p may be specified", __func__); bzero(&req, sizeof(req)); req.type = CTL_ISCSI_LOGOUT; req.data.logout.connection_id = connection_id; if (initiator_addr != NULL) strlcpy(req.data.logout.initiator_addr, initiator_addr, sizeof(req.data.logout.initiator_addr)); if (initiator_name != NULL) strlcpy(req.data.logout.initiator_name, initiator_name, sizeof(req.data.logout.initiator_name)); if (all != 0) req.data.logout.all = 1; if (ioctl(fd, CTL_ISCSI, &req) == -1) { warn("%s: error issuing CTL_ISCSI ioctl", __func__); retval = 1; goto bailout; } if (req.status != CTL_ISCSI_OK) { warnx("%s: error returned from CTL iSCSI logout request:\n%s", __func__, req.error_str); retval = 1; goto bailout; } printf("iSCSI logout requests submitted\n"); bailout: return (retval); } static int cctl_isterminate(int fd, int argc, char **argv, char *combinedopt) { struct ctl_iscsi req; int retval = 0, c; int all = 0, connection_id = -1, nargs = 0; char *initiator_name = NULL, *initiator_addr = NULL; while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'a': all = 1; nargs++; break; case 'c': connection_id = strtoul(optarg, NULL, 0); nargs++; break; case 'i': initiator_name = strdup(optarg); if (initiator_name == NULL) err(1, "%s: strdup", __func__); nargs++; break; case 'p': initiator_addr = strdup(optarg); if (initiator_addr == NULL) err(1, "%s: strdup", __func__); nargs++; break; default: break; } } if (nargs == 0) errx(1, "%s: either -a, -c, -i, or -p must be specified", __func__); if (nargs > 1) errx(1, "%s: only one of -a, -c, -i, or -p may be specified", __func__); bzero(&req, sizeof(req)); req.type = CTL_ISCSI_TERMINATE; req.data.terminate.connection_id = connection_id; if (initiator_addr != NULL) strlcpy(req.data.terminate.initiator_addr, initiator_addr, sizeof(req.data.terminate.initiator_addr)); if (initiator_name != NULL) strlcpy(req.data.terminate.initiator_name, initiator_name, sizeof(req.data.terminate.initiator_name)); if (all != 0) req.data.terminate.all = 1; if (ioctl(fd, CTL_ISCSI, &req) == -1) { warn("%s: error issuing CTL_ISCSI ioctl", __func__); retval = 1; goto bailout; } if (req.status != CTL_ISCSI_OK) { warnx("%s: error returned from CTL iSCSI connection " "termination request:\n%s", __func__, req.error_str); retval = 1; goto bailout; } printf("iSCSI connections terminated\n"); bailout: return (retval); } /* * Name/value pair used for per-LUN attributes. */ struct cctl_lun_nv { char *name; char *value; STAILQ_ENTRY(cctl_lun_nv) links; }; /* * Backend LUN information. */ struct cctl_lun { uint64_t lun_id; char *backend_type; uint64_t size_blocks; uint32_t blocksize; char *serial_number; char *device_id; STAILQ_HEAD(,cctl_lun_nv) attr_list; STAILQ_ENTRY(cctl_lun) links; }; struct cctl_devlist_data { int num_luns; STAILQ_HEAD(,cctl_lun) lun_list; struct cctl_lun *cur_lun; int level; struct sbuf *cur_sb[32]; }; static void cctl_start_element(void *user_data, const char *name, const char **attr) { int i; struct cctl_devlist_data *devlist; struct cctl_lun *cur_lun; devlist = (struct cctl_devlist_data *)user_data; cur_lun = devlist->cur_lun; devlist->level++; if ((u_int)devlist->level >= (sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0]))) errx(1, "%s: too many nesting levels, %zd max", __func__, sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0])); devlist->cur_sb[devlist->level] = sbuf_new_auto(); if (devlist->cur_sb[devlist->level] == NULL) err(1, "%s: Unable to allocate sbuf", __func__); if (strcmp(name, "lun") == 0) { if (cur_lun != NULL) errx(1, "%s: improper lun element nesting", __func__); cur_lun = calloc(1, sizeof(*cur_lun)); if (cur_lun == NULL) err(1, "%s: cannot allocate %zd bytes", __func__, sizeof(*cur_lun)); devlist->num_luns++; devlist->cur_lun = cur_lun; STAILQ_INIT(&cur_lun->attr_list); STAILQ_INSERT_TAIL(&devlist->lun_list, cur_lun, links); for (i = 0; attr[i] != NULL; i += 2) { if (strcmp(attr[i], "id") == 0) { cur_lun->lun_id = strtoull(attr[i+1], NULL, 0); } else { errx(1, "%s: invalid LUN attribute %s = %s", __func__, attr[i], attr[i+1]); } } } } static void cctl_end_element(void *user_data, const char *name) { struct cctl_devlist_data *devlist; struct cctl_lun *cur_lun; char *str; devlist = (struct cctl_devlist_data *)user_data; cur_lun = devlist->cur_lun; if ((cur_lun == NULL) && (strcmp(name, "ctllunlist") != 0)) errx(1, "%s: cur_lun == NULL! (name = %s)", __func__, name); if (devlist->cur_sb[devlist->level] == NULL) errx(1, "%s: no valid sbuf at level %d (name %s)", __func__, devlist->level, name); if (sbuf_finish(devlist->cur_sb[devlist->level]) != 0) err(1, "%s: sbuf_finish", __func__); str = strdup(sbuf_data(devlist->cur_sb[devlist->level])); if (str == NULL) err(1, "%s can't allocate %zd bytes for string", __func__, sbuf_len(devlist->cur_sb[devlist->level])); if (strlen(str) == 0) { free(str); str = NULL; } sbuf_delete(devlist->cur_sb[devlist->level]); devlist->cur_sb[devlist->level] = NULL; devlist->level--; if (strcmp(name, "backend_type") == 0) { cur_lun->backend_type = str; str = NULL; } else if (strcmp(name, "size") == 0) { cur_lun->size_blocks = strtoull(str, NULL, 0); } else if (strcmp(name, "blocksize") == 0) { cur_lun->blocksize = strtoul(str, NULL, 0); } else if (strcmp(name, "serial_number") == 0) { cur_lun->serial_number = str; str = NULL; } else if (strcmp(name, "device_id") == 0) { cur_lun->device_id = str; str = NULL; } else if (strcmp(name, "lun") == 0) { devlist->cur_lun = NULL; } else if (strcmp(name, "ctllunlist") == 0) { /* Nothing. */ } else { struct cctl_lun_nv *nv; nv = calloc(1, sizeof(*nv)); if (nv == NULL) err(1, "%s: can't allocate %zd bytes for nv pair", __func__, sizeof(*nv)); nv->name = strdup(name); if (nv->name == NULL) err(1, "%s: can't allocated %zd bytes for string", __func__, strlen(name)); nv->value = str; str = NULL; STAILQ_INSERT_TAIL(&cur_lun->attr_list, nv, links); } free(str); } static void cctl_char_handler(void *user_data, const XML_Char *str, int len) { struct cctl_devlist_data *devlist; devlist = (struct cctl_devlist_data *)user_data; sbuf_bcat(devlist->cur_sb[devlist->level], str, len); } static int cctl_devlist(int fd, int argc, char **argv, char *combinedopt) { struct ctl_lun_list list; struct cctl_devlist_data devlist; struct cctl_lun *lun; XML_Parser parser; char *lun_str; int lun_len; int dump_xml = 0; int retval, c; char *backend = NULL; int verbose = 0; retval = 0; lun_len = 4096; bzero(&devlist, sizeof(devlist)); STAILQ_INIT(&devlist.lun_list); while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'b': backend = strdup(optarg); break; case 'v': verbose++; break; case 'x': dump_xml = 1; break; default: break; } } retry: lun_str = malloc(lun_len); bzero(&list, sizeof(list)); list.alloc_len = lun_len; list.status = CTL_LUN_LIST_NONE; list.lun_xml = lun_str; if (ioctl(fd, CTL_LUN_LIST, &list) == -1) { warn("%s: error issuing CTL_LUN_LIST ioctl", __func__); retval = 1; goto bailout; } if (list.status == CTL_LUN_LIST_ERROR) { warnx("%s: error returned from CTL_LUN_LIST ioctl:\n%s", __func__, list.error_str); } else if (list.status == CTL_LUN_LIST_NEED_MORE_SPACE) { lun_len = lun_len << 1; goto retry; } if (dump_xml != 0) { printf("%s", lun_str); goto bailout; } parser = XML_ParserCreate(NULL); if (parser == NULL) { warn("%s: Unable to create XML parser", __func__); retval = 1; goto bailout; } XML_SetUserData(parser, &devlist); XML_SetElementHandler(parser, cctl_start_element, cctl_end_element); XML_SetCharacterDataHandler(parser, cctl_char_handler); retval = XML_Parse(parser, lun_str, strlen(lun_str), 1); if (retval != 1) { warnx("%s: Unable to parse XML: Error %d", __func__, XML_GetErrorCode(parser)); XML_ParserFree(parser); retval = 1; goto bailout; } retval = 0; XML_ParserFree(parser); printf("LUN Backend %18s %4s %-16s %-16s\n", "Size (Blocks)", "BS", "Serial Number", "Device ID"); STAILQ_FOREACH(lun, &devlist.lun_list, links) { struct cctl_lun_nv *nv; if ((backend != NULL) && (strcmp(lun->backend_type, backend) != 0)) continue; printf("%3ju %-8s %18ju %4u %-16s %-16s\n", (uintmax_t)lun->lun_id, lun->backend_type, (uintmax_t)lun->size_blocks, lun->blocksize, lun->serial_number, lun->device_id); if (verbose == 0) continue; STAILQ_FOREACH(nv, &lun->attr_list, links) { printf(" %s=%s\n", nv->name, nv->value); } } bailout: free(lun_str); return (retval); } /* * Port information. */ struct cctl_port { uint64_t port_id; char *online; char *frontend_type; char *name; int pp, vp; char *target, *port, *lun_map; STAILQ_HEAD(,cctl_lun_nv) init_list; STAILQ_HEAD(,cctl_lun_nv) lun_list; STAILQ_HEAD(,cctl_lun_nv) attr_list; STAILQ_ENTRY(cctl_port) links; }; struct cctl_portlist_data { int num_ports; STAILQ_HEAD(,cctl_port) port_list; struct cctl_port *cur_port; int level; uint64_t cur_id; struct sbuf *cur_sb[32]; }; static void cctl_start_pelement(void *user_data, const char *name, const char **attr) { int i; struct cctl_portlist_data *portlist; struct cctl_port *cur_port; portlist = (struct cctl_portlist_data *)user_data; cur_port = portlist->cur_port; portlist->level++; if ((u_int)portlist->level >= (sizeof(portlist->cur_sb) / sizeof(portlist->cur_sb[0]))) errx(1, "%s: too many nesting levels, %zd max", __func__, sizeof(portlist->cur_sb) / sizeof(portlist->cur_sb[0])); portlist->cur_sb[portlist->level] = sbuf_new_auto(); if (portlist->cur_sb[portlist->level] == NULL) err(1, "%s: Unable to allocate sbuf", __func__); portlist->cur_id = 0; for (i = 0; attr[i] != NULL; i += 2) { if (strcmp(attr[i], "id") == 0) { portlist->cur_id = strtoull(attr[i+1], NULL, 0); break; } } if (strcmp(name, "targ_port") == 0) { if (cur_port != NULL) errx(1, "%s: improper port element nesting", __func__); cur_port = calloc(1, sizeof(*cur_port)); if (cur_port == NULL) err(1, "%s: cannot allocate %zd bytes", __func__, sizeof(*cur_port)); portlist->num_ports++; portlist->cur_port = cur_port; STAILQ_INIT(&cur_port->init_list); STAILQ_INIT(&cur_port->lun_list); STAILQ_INIT(&cur_port->attr_list); cur_port->port_id = portlist->cur_id; STAILQ_INSERT_TAIL(&portlist->port_list, cur_port, links); } } static void cctl_end_pelement(void *user_data, const char *name) { struct cctl_portlist_data *portlist; struct cctl_port *cur_port; char *str; portlist = (struct cctl_portlist_data *)user_data; cur_port = portlist->cur_port; if ((cur_port == NULL) && (strcmp(name, "ctlportlist") != 0)) errx(1, "%s: cur_port == NULL! (name = %s)", __func__, name); if (portlist->cur_sb[portlist->level] == NULL) errx(1, "%s: no valid sbuf at level %d (name %s)", __func__, portlist->level, name); if (sbuf_finish(portlist->cur_sb[portlist->level]) != 0) err(1, "%s: sbuf_finish", __func__); str = strdup(sbuf_data(portlist->cur_sb[portlist->level])); if (str == NULL) err(1, "%s can't allocate %zd bytes for string", __func__, sbuf_len(portlist->cur_sb[portlist->level])); if (strlen(str) == 0) { free(str); str = NULL; } sbuf_delete(portlist->cur_sb[portlist->level]); portlist->cur_sb[portlist->level] = NULL; portlist->level--; if (strcmp(name, "frontend_type") == 0) { cur_port->frontend_type = str; str = NULL; } else if (strcmp(name, "port_name") == 0) { cur_port->name = str; str = NULL; } else if (strcmp(name, "online") == 0) { cur_port->online = str; str = NULL; } else if (strcmp(name, "physical_port") == 0) { cur_port->pp = strtoull(str, NULL, 0); } else if (strcmp(name, "virtual_port") == 0) { cur_port->vp = strtoull(str, NULL, 0); } else if (strcmp(name, "target") == 0) { cur_port->target = str; str = NULL; } else if (strcmp(name, "port") == 0) { cur_port->port = str; str = NULL; } else if (strcmp(name, "lun_map") == 0) { cur_port->lun_map = str; str = NULL; } else if (strcmp(name, "targ_port") == 0) { portlist->cur_port = NULL; } else if (strcmp(name, "ctlportlist") == 0) { /* Nothing. */ } else { struct cctl_lun_nv *nv; nv = calloc(1, sizeof(*nv)); if (nv == NULL) err(1, "%s: can't allocate %zd bytes for nv pair", __func__, sizeof(*nv)); if (strcmp(name, "initiator") == 0 || strcmp(name, "lun") == 0) asprintf(&nv->name, "%ju", portlist->cur_id); else nv->name = strdup(name); if (nv->name == NULL) err(1, "%s: can't allocated %zd bytes for string", __func__, strlen(name)); nv->value = str; str = NULL; if (strcmp(name, "initiator") == 0) STAILQ_INSERT_TAIL(&cur_port->init_list, nv, links); else if (strcmp(name, "lun") == 0) STAILQ_INSERT_TAIL(&cur_port->lun_list, nv, links); else STAILQ_INSERT_TAIL(&cur_port->attr_list, nv, links); } free(str); } static void cctl_char_phandler(void *user_data, const XML_Char *str, int len) { struct cctl_portlist_data *portlist; portlist = (struct cctl_portlist_data *)user_data; sbuf_bcat(portlist->cur_sb[portlist->level], str, len); } static int cctl_portlist(int fd, int argc, char **argv, char *combinedopt) { struct ctl_lun_list list; struct cctl_portlist_data portlist; struct cctl_port *port; XML_Parser parser; char *port_str; int port_len; int dump_xml = 0; int retval, c; char *frontend = NULL; uint64_t portarg = UINT64_MAX; int verbose = 0, init = 0, lun = 0, quiet = 0; retval = 0; port_len = 4096; bzero(&portlist, sizeof(portlist)); STAILQ_INIT(&portlist.port_list); while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'f': frontend = strdup(optarg); break; case 'i': init++; break; case 'l': lun++; break; case 'p': portarg = strtoll(optarg, NULL, 0); break; case 'q': quiet++; break; case 'v': verbose++; break; case 'x': dump_xml = 1; break; default: break; } } retry: port_str = malloc(port_len); bzero(&list, sizeof(list)); list.alloc_len = port_len; list.status = CTL_LUN_LIST_NONE; list.lun_xml = port_str; if (ioctl(fd, CTL_PORT_LIST, &list) == -1) { warn("%s: error issuing CTL_PORT_LIST ioctl", __func__); retval = 1; goto bailout; } if (list.status == CTL_LUN_LIST_ERROR) { warnx("%s: error returned from CTL_PORT_LIST ioctl:\n%s", __func__, list.error_str); } else if (list.status == CTL_LUN_LIST_NEED_MORE_SPACE) { port_len = port_len << 1; goto retry; } if (dump_xml != 0) { printf("%s", port_str); goto bailout; } parser = XML_ParserCreate(NULL); if (parser == NULL) { warn("%s: Unable to create XML parser", __func__); retval = 1; goto bailout; } XML_SetUserData(parser, &portlist); XML_SetElementHandler(parser, cctl_start_pelement, cctl_end_pelement); XML_SetCharacterDataHandler(parser, cctl_char_phandler); retval = XML_Parse(parser, port_str, strlen(port_str), 1); if (retval != 1) { warnx("%s: Unable to parse XML: Error %d", __func__, XML_GetErrorCode(parser)); XML_ParserFree(parser); retval = 1; goto bailout; } retval = 0; XML_ParserFree(parser); if (quiet == 0) printf("Port Online Frontend Name pp vp\n"); STAILQ_FOREACH(port, &portlist.port_list, links) { struct cctl_lun_nv *nv; if ((frontend != NULL) && (strcmp(port->frontend_type, frontend) != 0)) continue; if ((portarg != UINT64_MAX) && (portarg != port->port_id)) continue; printf("%-4ju %-6s %-8s %-8s %-2d %-2d %s\n", (uintmax_t)port->port_id, port->online, port->frontend_type, port->name, port->pp, port->vp, port->port ? port->port : ""); if (init || verbose) { if (port->target) printf(" Target: %s\n", port->target); STAILQ_FOREACH(nv, &port->init_list, links) { printf(" Initiator %s: %s\n", nv->name, nv->value); } } if (lun || verbose) { if (port->lun_map) { STAILQ_FOREACH(nv, &port->lun_list, links) printf(" LUN %s: %s\n", nv->name, nv->value); if (STAILQ_EMPTY(&port->lun_list)) printf(" No LUNs mapped\n"); } else printf(" All LUNs mapped\n"); } if (verbose) { STAILQ_FOREACH(nv, &port->attr_list, links) { printf(" %s=%s\n", nv->name, nv->value); } } } bailout: free(port_str); return (retval); } static int cctl_lunmap(int fd, int argc, char **argv, char *combinedopt) { struct ctl_lun_map lm; int retval = 0, c; retval = 0; lm.port = UINT32_MAX; lm.plun = UINT32_MAX; lm.lun = UINT32_MAX; while ((c = getopt(argc, argv, combinedopt)) != -1) { switch (c) { case 'p': lm.port = strtoll(optarg, NULL, 0); break; case 'l': lm.plun = strtoll(optarg, NULL, 0); break; case 'L': lm.lun = strtoll(optarg, NULL, 0); break; default: break; } } if (ioctl(fd, CTL_LUN_MAP, &lm) == -1) { warn("%s: error issuing CTL_LUN_MAP ioctl", __func__); retval = 1; } return (retval); } void usage(int error) { fprintf(error ? stderr : stdout, "Usage:\n" "Primary commands:\n" " ctladm tur [dev_id][general options]\n" " ctladm inquiry [dev_id][general options]\n" " ctladm devid [dev_id][general options]\n" " ctladm reqsense [dev_id][general options]\n" " ctladm reportluns [dev_id][general options]\n" " ctladm read [dev_id][general options] <-l lba> <-d len>\n" " <-f file|-> <-b blocksize> [-c cdbsize][-N]\n" " ctladm write [dev_id][general options] <-l lba> <-d len>\n" " <-f file|-> <-b blocksize> [-c cdbsize][-N]\n" " ctladm readcap [dev_id][general options] [-c cdbsize]\n" " ctladm modesense [dev_id][general options] <-m page|-l> [-P pc]\n" " [-d] [-S subpage] [-c cdbsize]\n" " ctladm prin [dev_id][general options] <-a action>\n" " ctladm prout [dev_id][general options] <-a action>\n" " <-r restype] [-k key] [-s sa_key]\n" " ctladm rtpg [dev_id][general options]\n" " ctladm start [dev_id][general options] [-i] [-o]\n" " ctladm stop [dev_id][general options] [-i] [-o]\n" " ctladm synccache [dev_id][general options] [-l lba]\n" " [-b blockcount] [-r] [-i] [-c cdbsize]\n" " ctladm create <-b backend> [-B blocksize] [-d device_id]\n" " [-l lun_id] [-o name=value] [-s size_bytes]\n" " [-S serial_num] [-t dev_type]\n" " ctladm remove <-b backend> <-l lun_id> [-o name=value]\n" " ctladm modify <-b backend> <-l lun_id> <-s size_bytes>\n" " ctladm devlist [-b backend] [-v] [-x]\n" " ctladm shutdown\n" " ctladm startup\n" " ctladm lunlist\n" " ctladm lunmap -p targ_port [-l pLUN] [-L cLUN]\n" " ctladm delay [dev_id] <-l datamove|done> [-T oneshot|cont]\n" " [-t secs]\n" " ctladm realsync \n" " ctladm setsync [dev_id] <-i interval>\n" " ctladm getsync [dev_id]\n" " ctladm inject [dev_id] <-i action> <-p pattern> [-r lba,len]\n" " [-s len fmt [args]] [-c] [-d delete_id]\n" " ctladm port <-l | -o | [-w wwnn][-W wwpn]>\n" " [-p targ_port] [-t port_type] [-q] [-x]\n" " ctladm portlist [-f frontend] [-i] [-p targ_port] [-q] [-v] [-x]\n" " ctladm islist [-v | -x]\n" " ctladm islogout <-a | -c connection-id | -i name | -p portal>\n" " ctladm isterminate <-a | -c connection-id | -i name | -p portal>\n" " ctladm dumpooa\n" " ctladm dumpstructs\n" " ctladm help\n" "General Options:\n" "-I intiator_id : defaults to 7, used to change the initiator id\n" "-C retries : specify the number of times to retry this command\n" "-D devicename : specify the device to operate on\n" " : (default is %s)\n" "read/write options:\n" "-l lba : logical block address\n" "-d len : read/write length, in blocks\n" "-f file|- : write/read data to/from file or stdout/stdin\n" "-b blocksize : block size, in bytes\n" "-c cdbsize : specify minimum cdb size: 6, 10, 12 or 16\n" "-N : do not copy data to/from userland\n" "readcapacity options:\n" "-c cdbsize : specify minimum cdb size: 10 or 16\n" "modesense options:\n" "-m page : specify the mode page to view\n" "-l : request a list of supported pages\n" "-P pc : specify the page control value: 0-3 (current,\n" " changeable, default, saved, respectively)\n" "-d : disable block descriptors for mode sense\n" "-S subpage : specify a subpage\n" "-c cdbsize : specify minimum cdb size: 6 or 10\n" "persistent reserve in options:\n" "-a action : specify the action value: 0-2 (read key, read\n" " reservation, read capabilities, respectively)\n" "persistent reserve out options:\n" "-a action : specify the action value: 0-5 (register, reserve,\n" " release, clear, preempt, register and ignore)\n" "-k key : key value\n" "-s sa_key : service action value\n" "-r restype : specify the reservation type: 0-5(wr ex, ex ac,\n" " wr ex ro, ex ac ro, wr ex ar, ex ac ar)\n" "start/stop options:\n" "-i : set the immediate bit (CTL does not support this)\n" "-o : set the on/offline bit\n" "synccache options:\n" "-l lba : set the starting LBA\n" "-b blockcount : set the length to sync in blocks\n" "-r : set the relative addressing bit\n" "-i : set the immediate bit\n" "-c cdbsize : specify minimum cdb size: 10 or 16\n" "create options:\n" "-b backend : backend name (\"block\", \"ramdisk\", etc.)\n" "-B blocksize : LUN blocksize in bytes (some backends)\n" "-d device_id : SCSI VPD page 0x83 ID\n" "-l lun_id : requested LUN number\n" "-o name=value : backend-specific options, multiple allowed\n" "-s size_bytes : LUN size in bytes (some backends)\n" "-S serial_num : SCSI VPD page 0x80 serial number\n" "-t dev_type : SCSI device type (0=disk, 3=processor)\n" "remove options:\n" "-b backend : backend name (\"block\", \"ramdisk\", etc.)\n" "-l lun_id : LUN number to delete\n" "-o name=value : backend-specific options, multiple allowed\n" "devlist options:\n" "-b backend : list devices from specified backend only\n" "-v : be verbose, show backend attributes\n" "-x : dump raw XML\n" "delay options:\n" "-l datamove|done : delay command at datamove or done phase\n" "-T oneshot : delay one command, then resume normal completion\n" "-T cont : delay all commands\n" "-t secs : number of seconds to delay\n" "inject options:\n" "-i error_action : action to perform\n" "-p pattern : command pattern to look for\n" "-r lba,len : LBA range for pattern\n" "-s len fmt [args] : sense data for custom sense action\n" "-c : continuous operation\n" "-d delete_id : error id to delete\n" "port options:\n" "-l : list frontend ports\n" "-o on|off : turn frontend ports on or off\n" "-w wwnn : set WWNN for one frontend\n" "-W wwpn : set WWPN for one frontend\n" "-t port_type : specify fc, scsi, ioctl, internal frontend type\n" "-p targ_port : specify target port number\n" "-q : omit header in list output\n" "-x : output port list in XML format\n" "portlist options:\n" "-f fronetnd : specify frontend type\n" "-i : report target and initiators addresses\n" "-l : report LUN mapping\n" "-p targ_port : specify target port number\n" "-q : omit header in list output\n" "-v : verbose output (report all port options)\n" "-x : output port list in XML format\n" "lunmap options:\n" "-p targ_port : specify target port number\n" "-L pLUN : specify port-visible LUN\n" "-L cLUN : specify CTL LUN\n", CTL_DEFAULT_DEV); } int main(int argc, char **argv) { int c; ctladm_cmdfunction command; ctladm_cmdargs cmdargs; ctladm_optret optreturn; char *device; const char *mainopt = "C:D:I:"; const char *subopt = NULL; char combinedopt[256]; int target, lun; int optstart = 2; int retval, fd; int retries; int initid; int saved_errno; retval = 0; cmdargs = CTLADM_ARG_NONE; command = CTLADM_CMD_HELP; device = NULL; fd = -1; retries = 0; target = 0; lun = 0; initid = 7; if (argc < 2) { usage(1); retval = 1; goto bailout; } /* * Get the base option. */ optreturn = getoption(option_table,argv[1], &command, &cmdargs,&subopt); if (optreturn == CC_OR_AMBIGUOUS) { warnx("ambiguous option %s", argv[1]); usage(0); exit(1); } else if (optreturn == CC_OR_NOT_FOUND) { warnx("option %s not found", argv[1]); usage(0); exit(1); } if (cmdargs & CTLADM_ARG_NEED_TL) { if ((argc < 3) || (!isdigit(argv[2][0]))) { warnx("option %s requires a target:lun argument", argv[1]); usage(0); exit(1); } retval = cctl_parse_tl(argv[2], &target, &lun); if (retval != 0) errx(1, "invalid target:lun argument %s", argv[2]); cmdargs |= CTLADM_ARG_TARG_LUN; optstart++; } /* * Ahh, getopt(3) is a pain. * * This is a gross hack. There really aren't many other good * options (excuse the pun) for parsing options in a situation like * this. getopt is kinda braindead, so you end up having to run * through the options twice, and give each invocation of getopt * the option string for the other invocation. * * You would think that you could just have two groups of options. * The first group would get parsed by the first invocation of * getopt, and the second group would get parsed by the second * invocation of getopt. It doesn't quite work out that way. When * the first invocation of getopt finishes, it leaves optind pointing * to the argument _after_ the first argument in the second group. * So when the second invocation of getopt comes around, it doesn't * recognize the first argument it gets and then bails out. * * A nice alternative would be to have a flag for getopt that says * "just keep parsing arguments even when you encounter an unknown * argument", but there isn't one. So there's no real clean way to * easily parse two sets of arguments without having one invocation * of getopt know about the other. * * Without this hack, the first invocation of getopt would work as * long as the generic arguments are first, but the second invocation * (in the subfunction) would fail in one of two ways. In the case * where you don't set optreset, it would fail because optind may be * pointing to the argument after the one it should be pointing at. * In the case where you do set optreset, and reset optind, it would * fail because getopt would run into the first set of options, which * it doesn't understand. * * All of this would "sort of" work if you could somehow figure out * whether optind had been incremented one option too far. The * mechanics of that, however, are more daunting than just giving * both invocations all of the expect options for either invocation. * * Needless to say, I wouldn't mind if someone invented a better * (non-GPL!) command line parsing interface than getopt. I * wouldn't mind if someone added more knobs to getopt to make it * work better. Who knows, I may talk myself into doing it someday, * if the standards weenies let me. As it is, it just leads to * hackery like this and causes people to avoid it in some cases. * * KDM, September 8th, 1998 */ if (subopt != NULL) sprintf(combinedopt, "%s%s", mainopt, subopt); else sprintf(combinedopt, "%s", mainopt); /* * Start getopt processing at argv[2/3], since we've already * accepted argv[1..2] as the command name, and as a possible * device name. */ optind = optstart; /* * Now we run through the argument list looking for generic * options, and ignoring options that possibly belong to * subfunctions. */ while ((c = getopt(argc, argv, combinedopt))!= -1){ switch (c) { case 'C': cmdargs |= CTLADM_ARG_RETRIES; retries = strtol(optarg, NULL, 0); break; case 'D': device = strdup(optarg); cmdargs |= CTLADM_ARG_DEVICE; break; case 'I': cmdargs |= CTLADM_ARG_INITIATOR; initid = strtol(optarg, NULL, 0); break; default: break; } } if ((cmdargs & CTLADM_ARG_INITIATOR) == 0) initid = 7; optind = optstart; optreset = 1; /* * Default to opening the CTL device for now. */ if (((cmdargs & CTLADM_ARG_DEVICE) == 0) && (command != CTLADM_CMD_HELP)) { device = strdup(CTL_DEFAULT_DEV); cmdargs |= CTLADM_ARG_DEVICE; } if ((cmdargs & CTLADM_ARG_DEVICE) && (command != CTLADM_CMD_HELP)) { fd = open(device, O_RDWR); if (fd == -1 && errno == ENOENT) { saved_errno = errno; retval = kldload("ctl"); if (retval != -1) fd = open(device, O_RDWR); else errno = saved_errno; } if (fd == -1) { fprintf(stderr, "%s: error opening %s: %s\n", argv[0], device, strerror(errno)); retval = 1; goto bailout; } } else if ((command != CTLADM_CMD_HELP) && ((cmdargs & CTLADM_ARG_DEVICE) == 0)) { fprintf(stderr, "%s: you must specify a device with the " "--device argument for this command\n", argv[0]); command = CTLADM_CMD_HELP; retval = 1; } switch (command) { case CTLADM_CMD_TUR: retval = cctl_tur(fd, target, lun, initid, retries); break; case CTLADM_CMD_INQUIRY: retval = cctl_inquiry(fd, target, lun, initid, retries); break; case CTLADM_CMD_REQ_SENSE: retval = cctl_req_sense(fd, target, lun, initid, retries); break; case CTLADM_CMD_REPORT_LUNS: retval = cctl_report_luns(fd, target, lun, initid, retries); break; case CTLADM_CMD_CREATE: retval = cctl_create_lun(fd, argc, argv, combinedopt); break; case CTLADM_CMD_RM: retval = cctl_rm_lun(fd, argc, argv, combinedopt); break; case CTLADM_CMD_DEVLIST: retval = cctl_devlist(fd, argc, argv, combinedopt); break; case CTLADM_CMD_READ: case CTLADM_CMD_WRITE: retval = cctl_read_write(fd, target, lun, initid, retries, argc, argv, combinedopt, command); break; case CTLADM_CMD_PORT: retval = cctl_port(fd, argc, argv, combinedopt); break; case CTLADM_CMD_PORTLIST: retval = cctl_portlist(fd, argc, argv, combinedopt); break; case CTLADM_CMD_LUNMAP: retval = cctl_lunmap(fd, argc, argv, combinedopt); break; case CTLADM_CMD_READCAPACITY: retval = cctl_read_capacity(fd, target, lun, initid, retries, argc, argv, combinedopt); break; case CTLADM_CMD_MODESENSE: retval = cctl_mode_sense(fd, target, lun, initid, retries, argc, argv, combinedopt); break; case CTLADM_CMD_START: case CTLADM_CMD_STOP: retval = cctl_start_stop(fd, target, lun, initid, retries, (command == CTLADM_CMD_START) ? 1 : 0, argc, argv, combinedopt); break; case CTLADM_CMD_SYNC_CACHE: retval = cctl_sync_cache(fd, target, lun, initid, retries, argc, argv, combinedopt); break; case CTLADM_CMD_SHUTDOWN: case CTLADM_CMD_STARTUP: retval = cctl_startup_shutdown(fd, target, lun, initid, command); break; case CTLADM_CMD_LUNLIST: retval = cctl_lunlist(fd); break; case CTLADM_CMD_DELAY: retval = cctl_delay(fd, target, lun, argc, argv, combinedopt); break; case CTLADM_CMD_REALSYNC: retval = cctl_realsync(fd, argc, argv); break; case CTLADM_CMD_SETSYNC: case CTLADM_CMD_GETSYNC: retval = cctl_getsetsync(fd, target, lun, command, argc, argv, combinedopt); break; case CTLADM_CMD_ERR_INJECT: retval = cctl_error_inject(fd, target, lun, argc, argv, combinedopt); break; case CTLADM_CMD_DUMPOOA: retval = cctl_dump_ooa(fd, argc, argv); break; case CTLADM_CMD_DUMPSTRUCTS: retval = cctl_dump_structs(fd, cmdargs); break; case CTLADM_CMD_PRES_IN: retval = cctl_persistent_reserve_in(fd, target, lun, initid, argc, argv, combinedopt, retries); break; case CTLADM_CMD_PRES_OUT: retval = cctl_persistent_reserve_out(fd, target, lun, initid, argc, argv, combinedopt, retries); break; case CTLADM_CMD_INQ_VPD_DEVID: retval = cctl_inquiry_vpd_devid(fd, target, lun, initid); break; case CTLADM_CMD_RTPG: retval = cctl_report_target_port_group(fd, target, lun, initid); break; case CTLADM_CMD_MODIFY: retval = cctl_modify_lun(fd, argc, argv, combinedopt); break; case CTLADM_CMD_ISLIST: retval = cctl_islist(fd, argc, argv, combinedopt); break; case CTLADM_CMD_ISLOGOUT: retval = cctl_islogout(fd, argc, argv, combinedopt); break; case CTLADM_CMD_ISTERMINATE: retval = cctl_isterminate(fd, argc, argv, combinedopt); break; case CTLADM_CMD_HELP: default: usage(retval); break; } bailout: if (fd != -1) close(fd); exit (retval); } /* * vim: ts=8 */ Index: projects/clang-trunk/usr.sbin/ctld/ctld.c =================================================================== --- projects/clang-trunk/usr.sbin/ctld/ctld.c (revision 287501) +++ projects/clang-trunk/usr.sbin/ctld/ctld.c (revision 287502) @@ -1,2600 +1,2596 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ctld.h" #include "isns.h" bool proxy_mode = false; static volatile bool sighup_received = false; static volatile bool sigterm_received = false; static volatile bool sigalrm_received = false; static int nchildren = 0; static uint16_t last_portal_group_tag = 0; static void usage(void) { fprintf(stderr, "usage: ctld [-d][-f config-file]\n"); exit(1); } char * checked_strdup(const char *s) { char *c; c = strdup(s); if (c == NULL) log_err(1, "strdup"); return (c); } struct conf * conf_new(void) { struct conf *conf; conf = calloc(1, sizeof(*conf)); if (conf == NULL) log_err(1, "calloc"); TAILQ_INIT(&conf->conf_luns); TAILQ_INIT(&conf->conf_targets); TAILQ_INIT(&conf->conf_auth_groups); TAILQ_INIT(&conf->conf_ports); TAILQ_INIT(&conf->conf_portal_groups); TAILQ_INIT(&conf->conf_pports); TAILQ_INIT(&conf->conf_isns); conf->conf_isns_period = 900; conf->conf_isns_timeout = 5; conf->conf_debug = 0; conf->conf_timeout = 60; conf->conf_maxproc = 30; return (conf); } void conf_delete(struct conf *conf) { struct lun *lun, *ltmp; struct target *targ, *tmp; struct auth_group *ag, *cagtmp; struct portal_group *pg, *cpgtmp; struct pport *pp, *pptmp; struct isns *is, *istmp; assert(conf->conf_pidfh == NULL); TAILQ_FOREACH_SAFE(lun, &conf->conf_luns, l_next, ltmp) lun_delete(lun); TAILQ_FOREACH_SAFE(targ, &conf->conf_targets, t_next, tmp) target_delete(targ); TAILQ_FOREACH_SAFE(ag, &conf->conf_auth_groups, ag_next, cagtmp) auth_group_delete(ag); TAILQ_FOREACH_SAFE(pg, &conf->conf_portal_groups, pg_next, cpgtmp) portal_group_delete(pg); TAILQ_FOREACH_SAFE(pp, &conf->conf_pports, pp_next, pptmp) pport_delete(pp); TAILQ_FOREACH_SAFE(is, &conf->conf_isns, i_next, istmp) isns_delete(is); assert(TAILQ_EMPTY(&conf->conf_ports)); free(conf->conf_pidfile_path); free(conf); } static struct auth * auth_new(struct auth_group *ag) { struct auth *auth; auth = calloc(1, sizeof(*auth)); if (auth == NULL) log_err(1, "calloc"); auth->a_auth_group = ag; TAILQ_INSERT_TAIL(&ag->ag_auths, auth, a_next); return (auth); } static void auth_delete(struct auth *auth) { TAILQ_REMOVE(&auth->a_auth_group->ag_auths, auth, a_next); free(auth->a_user); free(auth->a_secret); free(auth->a_mutual_user); free(auth->a_mutual_secret); free(auth); } const struct auth * auth_find(const struct auth_group *ag, const char *user) { const struct auth *auth; TAILQ_FOREACH(auth, &ag->ag_auths, a_next) { if (strcmp(auth->a_user, user) == 0) return (auth); } return (NULL); } static void auth_check_secret_length(struct auth *auth) { size_t len; len = strlen(auth->a_secret); if (len > 16) { if (auth->a_auth_group->ag_name != NULL) log_warnx("secret for user \"%s\", auth-group \"%s\", " "is too long; it should be at most 16 characters " "long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("secret for user \"%s\", target \"%s\", " "is too long; it should be at most 16 characters " "long", auth->a_user, auth->a_auth_group->ag_target->t_name); } if (len < 12) { if (auth->a_auth_group->ag_name != NULL) log_warnx("secret for user \"%s\", auth-group \"%s\", " "is too short; it should be at least 12 characters " "long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("secret for user \"%s\", target \"%s\", " "is too short; it should be at least 16 characters " "long", auth->a_user, auth->a_auth_group->ag_target->t_name); } if (auth->a_mutual_secret != NULL) { len = strlen(auth->a_mutual_secret); if (len > 16) { if (auth->a_auth_group->ag_name != NULL) log_warnx("mutual secret for user \"%s\", " "auth-group \"%s\", is too long; it should " "be at most 16 characters long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("mutual secret for user \"%s\", " "target \"%s\", is too long; it should " "be at most 16 characters long", auth->a_user, auth->a_auth_group->ag_target->t_name); } if (len < 12) { if (auth->a_auth_group->ag_name != NULL) log_warnx("mutual secret for user \"%s\", " "auth-group \"%s\", is too short; it " "should be at least 12 characters long", auth->a_user, auth->a_auth_group->ag_name); else log_warnx("mutual secret for user \"%s\", " "target \"%s\", is too short; it should be " "at least 16 characters long", auth->a_user, auth->a_auth_group->ag_target->t_name); } } } const struct auth * auth_new_chap(struct auth_group *ag, const char *user, const char *secret) { struct auth *auth; if (ag->ag_type == AG_TYPE_UNKNOWN) ag->ag_type = AG_TYPE_CHAP; if (ag->ag_type != AG_TYPE_CHAP) { if (ag->ag_name != NULL) log_warnx("cannot mix \"chap\" authentication with " "other types for auth-group \"%s\"", ag->ag_name); else log_warnx("cannot mix \"chap\" authentication with " "other types for target \"%s\"", ag->ag_target->t_name); return (NULL); } auth = auth_new(ag); auth->a_user = checked_strdup(user); auth->a_secret = checked_strdup(secret); auth_check_secret_length(auth); return (auth); } const struct auth * auth_new_chap_mutual(struct auth_group *ag, const char *user, const char *secret, const char *user2, const char *secret2) { struct auth *auth; if (ag->ag_type == AG_TYPE_UNKNOWN) ag->ag_type = AG_TYPE_CHAP_MUTUAL; if (ag->ag_type != AG_TYPE_CHAP_MUTUAL) { if (ag->ag_name != NULL) log_warnx("cannot mix \"chap-mutual\" authentication " "with other types for auth-group \"%s\"", ag->ag_name); else log_warnx("cannot mix \"chap-mutual\" authentication " "with other types for target \"%s\"", ag->ag_target->t_name); return (NULL); } auth = auth_new(ag); auth->a_user = checked_strdup(user); auth->a_secret = checked_strdup(secret); auth->a_mutual_user = checked_strdup(user2); auth->a_mutual_secret = checked_strdup(secret2); auth_check_secret_length(auth); return (auth); } const struct auth_name * auth_name_new(struct auth_group *ag, const char *name) { struct auth_name *an; an = calloc(1, sizeof(*an)); if (an == NULL) log_err(1, "calloc"); an->an_auth_group = ag; an->an_initator_name = checked_strdup(name); TAILQ_INSERT_TAIL(&ag->ag_names, an, an_next); return (an); } static void auth_name_delete(struct auth_name *an) { TAILQ_REMOVE(&an->an_auth_group->ag_names, an, an_next); free(an->an_initator_name); free(an); } bool auth_name_defined(const struct auth_group *ag) { if (TAILQ_EMPTY(&ag->ag_names)) return (false); return (true); } const struct auth_name * auth_name_find(const struct auth_group *ag, const char *name) { const struct auth_name *auth_name; TAILQ_FOREACH(auth_name, &ag->ag_names, an_next) { if (strcmp(auth_name->an_initator_name, name) == 0) return (auth_name); } return (NULL); } int auth_name_check(const struct auth_group *ag, const char *initiator_name) { if (!auth_name_defined(ag)) return (0); if (auth_name_find(ag, initiator_name) == NULL) return (1); return (0); } const struct auth_portal * auth_portal_new(struct auth_group *ag, const char *portal) { struct auth_portal *ap; char *net, *mask, *str, *tmp; int len, dm, m; ap = calloc(1, sizeof(*ap)); if (ap == NULL) log_err(1, "calloc"); ap->ap_auth_group = ag; ap->ap_initator_portal = checked_strdup(portal); mask = str = checked_strdup(portal); net = strsep(&mask, "/"); if (net[0] == '[') net++; len = strlen(net); if (len == 0) goto error; if (net[len - 1] == ']') net[len - 1] = 0; if (strchr(net, ':') != NULL) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ap->ap_sa; sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; if (inet_pton(AF_INET6, net, &sin6->sin6_addr) <= 0) goto error; dm = 128; } else { struct sockaddr_in *sin = (struct sockaddr_in *)&ap->ap_sa; sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; if (inet_pton(AF_INET, net, &sin->sin_addr) <= 0) goto error; dm = 32; } if (mask != NULL) { m = strtol(mask, &tmp, 0); if (m < 0 || m > dm || tmp[0] != 0) goto error; } else m = dm; ap->ap_mask = m; free(str); TAILQ_INSERT_TAIL(&ag->ag_portals, ap, ap_next); return (ap); error: free(ap); log_errx(1, "Incorrect initiator portal '%s'", portal); return (NULL); } static void auth_portal_delete(struct auth_portal *ap) { TAILQ_REMOVE(&ap->ap_auth_group->ag_portals, ap, ap_next); free(ap->ap_initator_portal); free(ap); } bool auth_portal_defined(const struct auth_group *ag) { if (TAILQ_EMPTY(&ag->ag_portals)) return (false); return (true); } const struct auth_portal * auth_portal_find(const struct auth_group *ag, const struct sockaddr_storage *ss) { const struct auth_portal *ap; const uint8_t *a, *b; int i; uint8_t bmask; TAILQ_FOREACH(ap, &ag->ag_portals, ap_next) { if (ap->ap_sa.ss_family != ss->ss_family) continue; if (ss->ss_family == AF_INET) { a = (const uint8_t *) &((const struct sockaddr_in *)ss)->sin_addr; b = (const uint8_t *) &((const struct sockaddr_in *)&ap->ap_sa)->sin_addr; } else { a = (const uint8_t *) &((const struct sockaddr_in6 *)ss)->sin6_addr; b = (const uint8_t *) &((const struct sockaddr_in6 *)&ap->ap_sa)->sin6_addr; } for (i = 0; i < ap->ap_mask / 8; i++) { if (a[i] != b[i]) goto next; } if (ap->ap_mask % 8) { bmask = 0xff << (8 - (ap->ap_mask % 8)); if ((a[i] & bmask) != (b[i] & bmask)) goto next; } return (ap); next: ; } return (NULL); } int auth_portal_check(const struct auth_group *ag, const struct sockaddr_storage *sa) { if (!auth_portal_defined(ag)) return (0); if (auth_portal_find(ag, sa) == NULL) return (1); return (0); } struct auth_group * auth_group_new(struct conf *conf, const char *name) { struct auth_group *ag; if (name != NULL) { ag = auth_group_find(conf, name); if (ag != NULL) { log_warnx("duplicated auth-group \"%s\"", name); return (NULL); } } ag = calloc(1, sizeof(*ag)); if (ag == NULL) log_err(1, "calloc"); if (name != NULL) ag->ag_name = checked_strdup(name); TAILQ_INIT(&ag->ag_auths); TAILQ_INIT(&ag->ag_names); TAILQ_INIT(&ag->ag_portals); ag->ag_conf = conf; TAILQ_INSERT_TAIL(&conf->conf_auth_groups, ag, ag_next); return (ag); } void auth_group_delete(struct auth_group *ag) { struct auth *auth, *auth_tmp; struct auth_name *auth_name, *auth_name_tmp; struct auth_portal *auth_portal, *auth_portal_tmp; TAILQ_REMOVE(&ag->ag_conf->conf_auth_groups, ag, ag_next); TAILQ_FOREACH_SAFE(auth, &ag->ag_auths, a_next, auth_tmp) auth_delete(auth); TAILQ_FOREACH_SAFE(auth_name, &ag->ag_names, an_next, auth_name_tmp) auth_name_delete(auth_name); TAILQ_FOREACH_SAFE(auth_portal, &ag->ag_portals, ap_next, auth_portal_tmp) auth_portal_delete(auth_portal); free(ag->ag_name); free(ag); } struct auth_group * auth_group_find(const struct conf *conf, const char *name) { struct auth_group *ag; TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) { if (ag->ag_name != NULL && strcmp(ag->ag_name, name) == 0) return (ag); } return (NULL); } int auth_group_set_type(struct auth_group *ag, const char *str) { int type; if (strcmp(str, "none") == 0) { type = AG_TYPE_NO_AUTHENTICATION; } else if (strcmp(str, "deny") == 0) { type = AG_TYPE_DENY; } else if (strcmp(str, "chap") == 0) { type = AG_TYPE_CHAP; } else if (strcmp(str, "chap-mutual") == 0) { type = AG_TYPE_CHAP_MUTUAL; } else { if (ag->ag_name != NULL) log_warnx("invalid auth-type \"%s\" for auth-group " "\"%s\"", str, ag->ag_name); else log_warnx("invalid auth-type \"%s\" for target " "\"%s\"", str, ag->ag_target->t_name); return (1); } if (ag->ag_type != AG_TYPE_UNKNOWN && ag->ag_type != type) { if (ag->ag_name != NULL) { log_warnx("cannot set auth-type to \"%s\" for " "auth-group \"%s\"; already has a different " "type", str, ag->ag_name); } else { log_warnx("cannot set auth-type to \"%s\" for target " "\"%s\"; already has a different type", str, ag->ag_target->t_name); } return (1); } ag->ag_type = type; return (0); } static struct portal * portal_new(struct portal_group *pg) { struct portal *portal; portal = calloc(1, sizeof(*portal)); if (portal == NULL) log_err(1, "calloc"); TAILQ_INIT(&portal->p_targets); portal->p_portal_group = pg; TAILQ_INSERT_TAIL(&pg->pg_portals, portal, p_next); return (portal); } static void portal_delete(struct portal *portal) { TAILQ_REMOVE(&portal->p_portal_group->pg_portals, portal, p_next); if (portal->p_ai != NULL) freeaddrinfo(portal->p_ai); free(portal->p_listen); free(portal); } struct portal_group * portal_group_new(struct conf *conf, const char *name) { struct portal_group *pg; pg = portal_group_find(conf, name); if (pg != NULL) { log_warnx("duplicated portal-group \"%s\"", name); return (NULL); } pg = calloc(1, sizeof(*pg)); if (pg == NULL) log_err(1, "calloc"); pg->pg_name = checked_strdup(name); TAILQ_INIT(&pg->pg_portals); TAILQ_INIT(&pg->pg_ports); pg->pg_conf = conf; pg->pg_tag = 0; /* Assigned later in conf_apply(). */ TAILQ_INSERT_TAIL(&conf->conf_portal_groups, pg, pg_next); return (pg); } void portal_group_delete(struct portal_group *pg) { struct portal *portal, *tmp; struct port *port, *tport; TAILQ_FOREACH_SAFE(port, &pg->pg_ports, p_pgs, tport) port_delete(port); TAILQ_REMOVE(&pg->pg_conf->conf_portal_groups, pg, pg_next); TAILQ_FOREACH_SAFE(portal, &pg->pg_portals, p_next, tmp) portal_delete(portal); free(pg->pg_name); free(pg->pg_offload); free(pg->pg_redirection); free(pg); } struct portal_group * portal_group_find(const struct conf *conf, const char *name) { struct portal_group *pg; TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { if (strcmp(pg->pg_name, name) == 0) return (pg); } return (NULL); } static int parse_addr_port(char *arg, const char *def_port, struct addrinfo **ai) { struct addrinfo hints; char *str, *addr, *ch; const char *port; int error, colons = 0; str = arg = strdup(arg); if (arg[0] == '[') { /* * IPv6 address in square brackets, perhaps with port. */ arg++; addr = strsep(&arg, "]"); if (arg == NULL) return (1); if (arg[0] == '\0') { port = def_port; } else if (arg[0] == ':') { port = arg + 1; } else { free(str); return (1); } } else { /* * Either IPv6 address without brackets - and without * a port - or IPv4 address. Just count the colons. */ for (ch = arg; *ch != '\0'; ch++) { if (*ch == ':') colons++; } if (colons > 1) { addr = arg; port = def_port; } else { addr = strsep(&arg, ":"); if (arg == NULL) port = def_port; else port = arg; } } memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; error = getaddrinfo(addr, port, &hints, ai); free(str); return ((error != 0) ? 1 : 0); } int portal_group_add_listen(struct portal_group *pg, const char *value, bool iser) { struct portal *portal; portal = portal_new(pg); portal->p_listen = checked_strdup(value); portal->p_iser = iser; if (parse_addr_port(portal->p_listen, "3260", &portal->p_ai)) { log_warnx("invalid listen address %s", portal->p_listen); portal_delete(portal); return (1); } /* * XXX: getaddrinfo(3) may return multiple addresses; we should turn * those into multiple portals. */ return (0); } int isns_new(struct conf *conf, const char *addr) { struct isns *isns; isns = calloc(1, sizeof(*isns)); if (isns == NULL) log_err(1, "calloc"); isns->i_conf = conf; TAILQ_INSERT_TAIL(&conf->conf_isns, isns, i_next); isns->i_addr = checked_strdup(addr); if (parse_addr_port(isns->i_addr, "3205", &isns->i_ai)) { log_warnx("invalid iSNS address %s", isns->i_addr); isns_delete(isns); return (1); } /* * XXX: getaddrinfo(3) may return multiple addresses; we should turn * those into multiple servers. */ return (0); } void isns_delete(struct isns *isns) { TAILQ_REMOVE(&isns->i_conf->conf_isns, isns, i_next); free(isns->i_addr); if (isns->i_ai != NULL) freeaddrinfo(isns->i_ai); free(isns); } static int isns_do_connect(struct isns *isns) { int s; s = socket(isns->i_ai->ai_family, isns->i_ai->ai_socktype, isns->i_ai->ai_protocol); if (s < 0) { log_warn("socket(2) failed for %s", isns->i_addr); return (-1); } if (connect(s, isns->i_ai->ai_addr, isns->i_ai->ai_addrlen)) { log_warn("connect(2) failed for %s", isns->i_addr); close(s); return (-1); } return(s); } static int isns_do_register(struct isns *isns, int s, const char *hostname) { struct conf *conf = isns->i_conf; struct target *target; struct portal *portal; struct portal_group *pg; struct port *port; struct isns_req *req; int res = 0; uint32_t error; req = isns_req_create(ISNS_FUNC_DEVATTRREG, ISNS_FLAG_CLIENT); isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name); isns_req_add_delim(req); isns_req_add_str(req, 1, hostname); isns_req_add_32(req, 2, 2); /* 2 -- iSCSI */ isns_req_add_32(req, 6, conf->conf_isns_period); TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { if (pg->pg_unassigned) continue; TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { isns_req_add_addr(req, 16, portal->p_ai); isns_req_add_port(req, 17, portal->p_ai); } } TAILQ_FOREACH(target, &conf->conf_targets, t_next) { isns_req_add_str(req, 32, target->t_name); isns_req_add_32(req, 33, 1); /* 1 -- Target*/ if (target->t_alias != NULL) isns_req_add_str(req, 34, target->t_alias); TAILQ_FOREACH(port, &target->t_ports, p_ts) { if ((pg = port->p_portal_group) == NULL) continue; isns_req_add_32(req, 51, pg->pg_tag); TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { isns_req_add_addr(req, 49, portal->p_ai); isns_req_add_port(req, 50, portal->p_ai); } } } res = isns_req_send(s, req); if (res < 0) { log_warn("send(2) failed for %s", isns->i_addr); goto quit; } res = isns_req_receive(s, req); if (res < 0) { log_warn("receive(2) failed for %s", isns->i_addr); goto quit; } error = isns_req_get_status(req); if (error != 0) { log_warnx("iSNS register error %d for %s", error, isns->i_addr); res = -1; } quit: isns_req_free(req); return (res); } static int isns_do_check(struct isns *isns, int s, const char *hostname) { struct conf *conf = isns->i_conf; struct isns_req *req; int res = 0; uint32_t error; req = isns_req_create(ISNS_FUNC_DEVATTRQRY, ISNS_FLAG_CLIENT); isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name); isns_req_add_str(req, 1, hostname); isns_req_add_delim(req); isns_req_add(req, 2, 0, NULL); res = isns_req_send(s, req); if (res < 0) { log_warn("send(2) failed for %s", isns->i_addr); goto quit; } res = isns_req_receive(s, req); if (res < 0) { log_warn("receive(2) failed for %s", isns->i_addr); goto quit; } error = isns_req_get_status(req); if (error != 0) { log_warnx("iSNS check error %d for %s", error, isns->i_addr); res = -1; } quit: isns_req_free(req); return (res); } static int isns_do_deregister(struct isns *isns, int s, const char *hostname) { struct conf *conf = isns->i_conf; struct isns_req *req; int res = 0; uint32_t error; req = isns_req_create(ISNS_FUNC_DEVDEREG, ISNS_FLAG_CLIENT); isns_req_add_str(req, 32, TAILQ_FIRST(&conf->conf_targets)->t_name); isns_req_add_delim(req); isns_req_add_str(req, 1, hostname); res = isns_req_send(s, req); if (res < 0) { log_warn("send(2) failed for %s", isns->i_addr); goto quit; } res = isns_req_receive(s, req); if (res < 0) { log_warn("receive(2) failed for %s", isns->i_addr); goto quit; } error = isns_req_get_status(req); if (error != 0) { log_warnx("iSNS deregister error %d for %s", error, isns->i_addr); res = -1; } quit: isns_req_free(req); return (res); } void isns_register(struct isns *isns, struct isns *oldisns) { struct conf *conf = isns->i_conf; int s; char hostname[256]; if (TAILQ_EMPTY(&conf->conf_targets) || TAILQ_EMPTY(&conf->conf_portal_groups)) return; set_timeout(conf->conf_isns_timeout, false); s = isns_do_connect(isns); if (s < 0) { set_timeout(0, false); return; } gethostname(hostname, sizeof(hostname)); if (oldisns == NULL || TAILQ_EMPTY(&oldisns->i_conf->conf_targets)) oldisns = isns; isns_do_deregister(oldisns, s, hostname); isns_do_register(isns, s, hostname); close(s); set_timeout(0, false); } void isns_check(struct isns *isns) { struct conf *conf = isns->i_conf; int s, res; char hostname[256]; if (TAILQ_EMPTY(&conf->conf_targets) || TAILQ_EMPTY(&conf->conf_portal_groups)) return; set_timeout(conf->conf_isns_timeout, false); s = isns_do_connect(isns); if (s < 0) { set_timeout(0, false); return; } gethostname(hostname, sizeof(hostname)); res = isns_do_check(isns, s, hostname); if (res < 0) { isns_do_deregister(isns, s, hostname); isns_do_register(isns, s, hostname); } close(s); set_timeout(0, false); } void isns_deregister(struct isns *isns) { struct conf *conf = isns->i_conf; int s; char hostname[256]; if (TAILQ_EMPTY(&conf->conf_targets) || TAILQ_EMPTY(&conf->conf_portal_groups)) return; set_timeout(conf->conf_isns_timeout, false); s = isns_do_connect(isns); if (s < 0) return; gethostname(hostname, sizeof(hostname)); isns_do_deregister(isns, s, hostname); close(s); set_timeout(0, false); } int portal_group_set_filter(struct portal_group *pg, const char *str) { int filter; if (strcmp(str, "none") == 0) { filter = PG_FILTER_NONE; } else if (strcmp(str, "portal") == 0) { filter = PG_FILTER_PORTAL; } else if (strcmp(str, "portal-name") == 0) { filter = PG_FILTER_PORTAL_NAME; } else if (strcmp(str, "portal-name-auth") == 0) { filter = PG_FILTER_PORTAL_NAME_AUTH; } else { log_warnx("invalid discovery-filter \"%s\" for portal-group " "\"%s\"; valid values are \"none\", \"portal\", " "\"portal-name\", and \"portal-name-auth\"", str, pg->pg_name); return (1); } if (pg->pg_discovery_filter != PG_FILTER_UNKNOWN && pg->pg_discovery_filter != filter) { log_warnx("cannot set discovery-filter to \"%s\" for " "portal-group \"%s\"; already has a different " "value", str, pg->pg_name); return (1); } pg->pg_discovery_filter = filter; return (0); } int portal_group_set_offload(struct portal_group *pg, const char *offload) { if (pg->pg_offload != NULL) { log_warnx("cannot set offload to \"%s\" for " "portal-group \"%s\"; already defined", offload, pg->pg_name); return (1); } pg->pg_offload = checked_strdup(offload); return (0); } int portal_group_set_redirection(struct portal_group *pg, const char *addr) { if (pg->pg_redirection != NULL) { log_warnx("cannot set redirection to \"%s\" for " "portal-group \"%s\"; already defined", addr, pg->pg_name); return (1); } pg->pg_redirection = checked_strdup(addr); return (0); } static bool valid_hex(const char ch) { switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': case 'd': case 'D': case 'e': case 'E': case 'f': case 'F': return (true); default: return (false); } } bool valid_iscsi_name(const char *name) { int i; if (strlen(name) >= MAX_NAME_LEN) { log_warnx("overlong name for target \"%s\"; max length allowed " "by iSCSI specification is %d characters", name, MAX_NAME_LEN); return (false); } /* * In the cases below, we don't return an error, just in case the admin * was right, and we're wrong. */ if (strncasecmp(name, "iqn.", strlen("iqn.")) == 0) { for (i = strlen("iqn."); name[i] != '\0'; i++) { /* * XXX: We should verify UTF-8 normalisation, as defined * by 3.2.6.2: iSCSI Name Encoding. */ if (isalnum(name[i])) continue; if (name[i] == '-' || name[i] == '.' || name[i] == ':') continue; log_warnx("invalid character \"%c\" in target name " "\"%s\"; allowed characters are letters, digits, " "'-', '.', and ':'", name[i], name); break; } /* * XXX: Check more stuff: valid date and a valid reversed domain. */ } else if (strncasecmp(name, "eui.", strlen("eui.")) == 0) { if (strlen(name) != strlen("eui.") + 16) log_warnx("invalid target name \"%s\"; the \"eui.\" " "should be followed by exactly 16 hexadecimal " "digits", name); for (i = strlen("eui."); name[i] != '\0'; i++) { if (!valid_hex(name[i])) { log_warnx("invalid character \"%c\" in target " "name \"%s\"; allowed characters are 1-9 " "and A-F", name[i], name); break; } } } else if (strncasecmp(name, "naa.", strlen("naa.")) == 0) { if (strlen(name) > strlen("naa.") + 32) log_warnx("invalid target name \"%s\"; the \"naa.\" " "should be followed by at most 32 hexadecimal " "digits", name); for (i = strlen("naa."); name[i] != '\0'; i++) { if (!valid_hex(name[i])) { log_warnx("invalid character \"%c\" in target " "name \"%s\"; allowed characters are 1-9 " "and A-F", name[i], name); break; } } } else { log_warnx("invalid target name \"%s\"; should start with " "either \".iqn\", \"eui.\", or \"naa.\"", name); } return (true); } struct pport * pport_new(struct conf *conf, const char *name, uint32_t ctl_port) { struct pport *pp; pp = calloc(1, sizeof(*pp)); if (pp == NULL) log_err(1, "calloc"); pp->pp_conf = conf; pp->pp_name = checked_strdup(name); pp->pp_ctl_port = ctl_port; TAILQ_INIT(&pp->pp_ports); TAILQ_INSERT_TAIL(&conf->conf_pports, pp, pp_next); return (pp); } struct pport * pport_find(const struct conf *conf, const char *name) { struct pport *pp; TAILQ_FOREACH(pp, &conf->conf_pports, pp_next) { if (strcasecmp(pp->pp_name, name) == 0) return (pp); } return (NULL); } struct pport * pport_copy(struct pport *pp, struct conf *conf) { struct pport *ppnew; ppnew = pport_new(conf, pp->pp_name, pp->pp_ctl_port); return (ppnew); } void pport_delete(struct pport *pp) { struct port *port, *tport; TAILQ_FOREACH_SAFE(port, &pp->pp_ports, p_ts, tport) port_delete(port); TAILQ_REMOVE(&pp->pp_conf->conf_pports, pp, pp_next); free(pp->pp_name); free(pp); } struct port * port_new(struct conf *conf, struct target *target, struct portal_group *pg) { struct port *port; char *name; int ret; ret = asprintf(&name, "%s-%s", pg->pg_name, target->t_name); if (ret <= 0) log_err(1, "asprintf"); if (port_find(conf, name) != NULL) { log_warnx("duplicate port \"%s\"", name); free(name); return (NULL); } port = calloc(1, sizeof(*port)); if (port == NULL) log_err(1, "calloc"); port->p_conf = conf; port->p_name = name; TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next); TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts); port->p_target = target; TAILQ_INSERT_TAIL(&pg->pg_ports, port, p_pgs); port->p_portal_group = pg; return (port); } struct port * port_new_pp(struct conf *conf, struct target *target, struct pport *pp) { struct port *port; char *name; int ret; ret = asprintf(&name, "%s-%s", pp->pp_name, target->t_name); if (ret <= 0) log_err(1, "asprintf"); if (port_find(conf, name) != NULL) { log_warnx("duplicate port \"%s\"", name); free(name); return (NULL); } port = calloc(1, sizeof(*port)); if (port == NULL) log_err(1, "calloc"); port->p_conf = conf; port->p_name = name; TAILQ_INSERT_TAIL(&conf->conf_ports, port, p_next); TAILQ_INSERT_TAIL(&target->t_ports, port, p_ts); port->p_target = target; TAILQ_INSERT_TAIL(&pp->pp_ports, port, p_pps); port->p_pport = pp; return (port); } struct port * port_find(const struct conf *conf, const char *name) { struct port *port; TAILQ_FOREACH(port, &conf->conf_ports, p_next) { if (strcasecmp(port->p_name, name) == 0) return (port); } return (NULL); } struct port * port_find_in_pg(const struct portal_group *pg, const char *target) { struct port *port; TAILQ_FOREACH(port, &pg->pg_ports, p_pgs) { if (strcasecmp(port->p_target->t_name, target) == 0) return (port); } return (NULL); } void port_delete(struct port *port) { if (port->p_portal_group) TAILQ_REMOVE(&port->p_portal_group->pg_ports, port, p_pgs); if (port->p_pport) TAILQ_REMOVE(&port->p_pport->pp_ports, port, p_pps); if (port->p_target) TAILQ_REMOVE(&port->p_target->t_ports, port, p_ts); TAILQ_REMOVE(&port->p_conf->conf_ports, port, p_next); free(port->p_name); free(port); } struct target * target_new(struct conf *conf, const char *name) { struct target *targ; int i, len; targ = target_find(conf, name); if (targ != NULL) { log_warnx("duplicated target \"%s\"", name); return (NULL); } if (valid_iscsi_name(name) == false) { log_warnx("target name \"%s\" is invalid", name); return (NULL); } targ = calloc(1, sizeof(*targ)); if (targ == NULL) log_err(1, "calloc"); targ->t_name = checked_strdup(name); /* * RFC 3722 requires us to normalize the name to lowercase. */ len = strlen(name); for (i = 0; i < len; i++) targ->t_name[i] = tolower(targ->t_name[i]); targ->t_conf = conf; TAILQ_INIT(&targ->t_ports); TAILQ_INSERT_TAIL(&conf->conf_targets, targ, t_next); return (targ); } void target_delete(struct target *targ) { struct port *port, *tport; TAILQ_FOREACH_SAFE(port, &targ->t_ports, p_ts, tport) port_delete(port); TAILQ_REMOVE(&targ->t_conf->conf_targets, targ, t_next); free(targ->t_name); free(targ->t_redirection); free(targ); } struct target * target_find(struct conf *conf, const char *name) { struct target *targ; TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { if (strcasecmp(targ->t_name, name) == 0) return (targ); } return (NULL); } int target_set_redirection(struct target *target, const char *addr) { if (target->t_redirection != NULL) { log_warnx("cannot set redirection to \"%s\" for " "target \"%s\"; already defined", addr, target->t_name); return (1); } target->t_redirection = checked_strdup(addr); return (0); } struct lun * lun_new(struct conf *conf, const char *name) { struct lun *lun; lun = lun_find(conf, name); if (lun != NULL) { log_warnx("duplicated lun \"%s\"", name); return (NULL); } lun = calloc(1, sizeof(*lun)); if (lun == NULL) log_err(1, "calloc"); lun->l_conf = conf; lun->l_name = checked_strdup(name); TAILQ_INIT(&lun->l_options); TAILQ_INSERT_TAIL(&conf->conf_luns, lun, l_next); return (lun); } void lun_delete(struct lun *lun) { struct target *targ; struct lun_option *lo, *tmp; int i; TAILQ_FOREACH(targ, &lun->l_conf->conf_targets, t_next) { for (i = 0; i < MAX_LUNS; i++) { if (targ->t_luns[i] == lun) targ->t_luns[i] = NULL; } } TAILQ_REMOVE(&lun->l_conf->conf_luns, lun, l_next); TAILQ_FOREACH_SAFE(lo, &lun->l_options, lo_next, tmp) lun_option_delete(lo); free(lun->l_name); free(lun->l_backend); free(lun->l_device_id); free(lun->l_path); free(lun->l_scsiname); free(lun->l_serial); free(lun); } struct lun * lun_find(const struct conf *conf, const char *name) { struct lun *lun; TAILQ_FOREACH(lun, &conf->conf_luns, l_next) { if (strcmp(lun->l_name, name) == 0) return (lun); } return (NULL); } void lun_set_backend(struct lun *lun, const char *value) { free(lun->l_backend); lun->l_backend = checked_strdup(value); } void lun_set_blocksize(struct lun *lun, size_t value) { lun->l_blocksize = value; } void lun_set_device_id(struct lun *lun, const char *value) { free(lun->l_device_id); lun->l_device_id = checked_strdup(value); } void lun_set_path(struct lun *lun, const char *value) { free(lun->l_path); lun->l_path = checked_strdup(value); } void lun_set_scsiname(struct lun *lun, const char *value) { free(lun->l_scsiname); lun->l_scsiname = checked_strdup(value); } void lun_set_serial(struct lun *lun, const char *value) { free(lun->l_serial); lun->l_serial = checked_strdup(value); } void lun_set_size(struct lun *lun, size_t value) { lun->l_size = value; } void lun_set_ctl_lun(struct lun *lun, uint32_t value) { lun->l_ctl_lun = value; } struct lun_option * lun_option_new(struct lun *lun, const char *name, const char *value) { struct lun_option *lo; lo = lun_option_find(lun, name); if (lo != NULL) { log_warnx("duplicated lun option \"%s\" for lun \"%s\"", name, lun->l_name); return (NULL); } lo = calloc(1, sizeof(*lo)); if (lo == NULL) log_err(1, "calloc"); lo->lo_name = checked_strdup(name); lo->lo_value = checked_strdup(value); lo->lo_lun = lun; TAILQ_INSERT_TAIL(&lun->l_options, lo, lo_next); return (lo); } void lun_option_delete(struct lun_option *lo) { TAILQ_REMOVE(&lo->lo_lun->l_options, lo, lo_next); free(lo->lo_name); free(lo->lo_value); free(lo); } struct lun_option * lun_option_find(const struct lun *lun, const char *name) { struct lun_option *lo; TAILQ_FOREACH(lo, &lun->l_options, lo_next) { if (strcmp(lo->lo_name, name) == 0) return (lo); } return (NULL); } void lun_option_set(struct lun_option *lo, const char *value) { free(lo->lo_value); lo->lo_value = checked_strdup(value); } static struct connection * connection_new(struct portal *portal, int fd, const char *host, const struct sockaddr *client_sa) { struct connection *conn; conn = calloc(1, sizeof(*conn)); if (conn == NULL) log_err(1, "calloc"); conn->conn_portal = portal; conn->conn_socket = fd; conn->conn_initiator_addr = checked_strdup(host); memcpy(&conn->conn_initiator_sa, client_sa, client_sa->sa_len); /* * Default values, from RFC 3720, section 12. */ conn->conn_max_data_segment_length = 8192; conn->conn_max_burst_length = 262144; conn->conn_immediate_data = true; return (conn); } #if 0 static void conf_print(struct conf *conf) { struct auth_group *ag; struct auth *auth; struct auth_name *auth_name; struct auth_portal *auth_portal; struct portal_group *pg; struct portal *portal; struct target *targ; struct lun *lun; struct lun_option *lo; TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) { fprintf(stderr, "auth-group %s {\n", ag->ag_name); TAILQ_FOREACH(auth, &ag->ag_auths, a_next) fprintf(stderr, "\t chap-mutual %s %s %s %s\n", auth->a_user, auth->a_secret, auth->a_mutual_user, auth->a_mutual_secret); TAILQ_FOREACH(auth_name, &ag->ag_names, an_next) fprintf(stderr, "\t initiator-name %s\n", auth_name->an_initator_name); TAILQ_FOREACH(auth_portal, &ag->ag_portals, an_next) fprintf(stderr, "\t initiator-portal %s\n", auth_portal->an_initator_portal); fprintf(stderr, "}\n"); } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { fprintf(stderr, "portal-group %s {\n", pg->pg_name); TAILQ_FOREACH(portal, &pg->pg_portals, p_next) fprintf(stderr, "\t listen %s\n", portal->p_listen); fprintf(stderr, "}\n"); } TAILQ_FOREACH(lun, &conf->conf_luns, l_next) { fprintf(stderr, "\tlun %s {\n", lun->l_name); fprintf(stderr, "\t\tpath %s\n", lun->l_path); TAILQ_FOREACH(lo, &lun->l_options, lo_next) fprintf(stderr, "\t\toption %s %s\n", lo->lo_name, lo->lo_value); fprintf(stderr, "\t}\n"); } TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { fprintf(stderr, "target %s {\n", targ->t_name); if (targ->t_alias != NULL) fprintf(stderr, "\t alias %s\n", targ->t_alias); fprintf(stderr, "}\n"); } } #endif static int conf_verify_lun(struct lun *lun) { const struct lun *lun2; if (lun->l_backend == NULL) lun_set_backend(lun, "block"); if (strcmp(lun->l_backend, "block") == 0) { if (lun->l_path == NULL) { log_warnx("missing path for lun \"%s\"", lun->l_name); return (1); } } else if (strcmp(lun->l_backend, "ramdisk") == 0) { if (lun->l_size == 0) { log_warnx("missing size for ramdisk-backed lun \"%s\"", lun->l_name); return (1); } if (lun->l_path != NULL) { log_warnx("path must not be specified " "for ramdisk-backed lun \"%s\"", lun->l_name); return (1); } } if (lun->l_blocksize == 0) { lun_set_blocksize(lun, DEFAULT_BLOCKSIZE); } else if (lun->l_blocksize < 0) { log_warnx("invalid blocksize for lun \"%s\"; " "must be larger than 0", lun->l_name); return (1); } if (lun->l_size != 0 && lun->l_size % lun->l_blocksize != 0) { log_warnx("invalid size for lun \"%s\"; " "must be multiple of blocksize", lun->l_name); return (1); } TAILQ_FOREACH(lun2, &lun->l_conf->conf_luns, l_next) { if (lun == lun2) continue; if (lun->l_path != NULL && lun2->l_path != NULL && strcmp(lun->l_path, lun2->l_path) == 0) { log_debugx("WARNING: path \"%s\" duplicated " "between lun \"%s\", and " "lun \"%s\"", lun->l_path, lun->l_name, lun2->l_name); } } return (0); } int conf_verify(struct conf *conf) { struct auth_group *ag; struct portal_group *pg; struct port *port; struct target *targ; struct lun *lun; bool found; int error, i; if (conf->conf_pidfile_path == NULL) conf->conf_pidfile_path = checked_strdup(DEFAULT_PIDFILE); TAILQ_FOREACH(lun, &conf->conf_luns, l_next) { error = conf_verify_lun(lun); if (error != 0) return (error); } TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { if (targ->t_auth_group == NULL) { targ->t_auth_group = auth_group_find(conf, "default"); assert(targ->t_auth_group != NULL); } if (TAILQ_EMPTY(&targ->t_ports)) { pg = portal_group_find(conf, "default"); assert(pg != NULL); port_new(conf, targ, pg); } found = false; for (i = 0; i < MAX_LUNS; i++) { if (targ->t_luns[i] != NULL) found = true; } if (!found && targ->t_redirection == NULL) { log_warnx("no LUNs defined for target \"%s\"", targ->t_name); } if (found && targ->t_redirection != NULL) { log_debugx("target \"%s\" contains luns, " " but configured for redirection", targ->t_name); } } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { assert(pg->pg_name != NULL); if (pg->pg_discovery_auth_group == NULL) { pg->pg_discovery_auth_group = auth_group_find(conf, "default"); assert(pg->pg_discovery_auth_group != NULL); } if (pg->pg_discovery_filter == PG_FILTER_UNKNOWN) pg->pg_discovery_filter = PG_FILTER_NONE; if (!TAILQ_EMPTY(&pg->pg_ports)) { if (pg->pg_redirection != NULL) { log_debugx("portal-group \"%s\" assigned " "to target, but configured " "for redirection", pg->pg_name); } pg->pg_unassigned = false; } else { if (strcmp(pg->pg_name, "default") != 0) log_warnx("portal-group \"%s\" not assigned " "to any target", pg->pg_name); pg->pg_unassigned = true; } } TAILQ_FOREACH(ag, &conf->conf_auth_groups, ag_next) { if (ag->ag_name == NULL) assert(ag->ag_target != NULL); else assert(ag->ag_target == NULL); found = false; TAILQ_FOREACH(targ, &conf->conf_targets, t_next) { if (targ->t_auth_group == ag) { found = true; break; } } TAILQ_FOREACH(port, &conf->conf_ports, p_next) { if (port->p_auth_group == ag) { found = true; break; } } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { if (pg->pg_discovery_auth_group == ag) { found = true; break; } } if (!found && ag->ag_name != NULL && strcmp(ag->ag_name, "default") != 0 && strcmp(ag->ag_name, "no-authentication") != 0 && strcmp(ag->ag_name, "no-access") != 0) { log_warnx("auth-group \"%s\" not assigned " "to any target", ag->ag_name); } } return (0); } static int conf_apply(struct conf *oldconf, struct conf *newconf) { struct lun *oldlun, *newlun, *tmplun; struct portal_group *oldpg, *newpg; struct portal *oldp, *newp; struct port *oldport, *newport, *tmpport; struct isns *oldns, *newns; pid_t otherpid; int changed, cumulated_error = 0, error, sockbuf; int one = 1; if (oldconf->conf_debug != newconf->conf_debug) { log_debugx("changing debug level to %d", newconf->conf_debug); log_init(newconf->conf_debug); } if (oldconf->conf_pidfh != NULL) { assert(oldconf->conf_pidfile_path != NULL); if (newconf->conf_pidfile_path != NULL && strcmp(oldconf->conf_pidfile_path, newconf->conf_pidfile_path) == 0) { newconf->conf_pidfh = oldconf->conf_pidfh; oldconf->conf_pidfh = NULL; } else { log_debugx("removing pidfile %s", oldconf->conf_pidfile_path); pidfile_remove(oldconf->conf_pidfh); oldconf->conf_pidfh = NULL; } } if (newconf->conf_pidfh == NULL && newconf->conf_pidfile_path != NULL) { log_debugx("opening pidfile %s", newconf->conf_pidfile_path); newconf->conf_pidfh = pidfile_open(newconf->conf_pidfile_path, 0600, &otherpid); if (newconf->conf_pidfh == NULL) { if (errno == EEXIST) log_errx(1, "daemon already running, pid: %jd.", (intmax_t)otherpid); log_err(1, "cannot open or create pidfile \"%s\"", newconf->conf_pidfile_path); } } /* * Go through the new portal groups, assigning tags or preserving old. */ TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) { oldpg = portal_group_find(oldconf, newpg->pg_name); if (oldpg != NULL) newpg->pg_tag = oldpg->pg_tag; else newpg->pg_tag = ++last_portal_group_tag; } /* Deregister on removed iSNS servers. */ TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) { TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) { if (strcmp(oldns->i_addr, newns->i_addr) == 0) break; } if (newns == NULL) isns_deregister(oldns); } /* * XXX: If target or lun removal fails, we should somehow "move" * the old lun or target into newconf, so that subsequent * conf_apply() would try to remove them again. That would * be somewhat hairy, though, and lun deletion failures don't * really happen, so leave it as it is for now. */ /* * First, remove any ports present in the old configuration * and missing in the new one. */ TAILQ_FOREACH_SAFE(oldport, &oldconf->conf_ports, p_next, tmpport) { newport = port_find(newconf, oldport->p_name); if (newport != NULL) continue; log_debugx("removing port \"%s\"", oldport->p_name); error = kernel_port_remove(oldport); if (error != 0) { log_warnx("failed to remove port %s", oldport->p_name); /* * XXX: Uncomment after fixing the root cause. * * cumulated_error++; */ } } /* * Second, remove any LUNs present in the old configuration * and missing in the new one. */ TAILQ_FOREACH_SAFE(oldlun, &oldconf->conf_luns, l_next, tmplun) { newlun = lun_find(newconf, oldlun->l_name); if (newlun == NULL) { log_debugx("lun \"%s\", CTL lun %d " "not found in new configuration; " "removing", oldlun->l_name, oldlun->l_ctl_lun); error = kernel_lun_remove(oldlun); if (error != 0) { log_warnx("failed to remove lun \"%s\", " "CTL lun %d", oldlun->l_name, oldlun->l_ctl_lun); cumulated_error++; } continue; } /* * Also remove the LUNs changed by more than size. */ changed = 0; assert(oldlun->l_backend != NULL); assert(newlun->l_backend != NULL); if (strcmp(newlun->l_backend, oldlun->l_backend) != 0) { log_debugx("backend for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (oldlun->l_blocksize != newlun->l_blocksize) { log_debugx("blocksize for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (newlun->l_device_id != NULL && (oldlun->l_device_id == NULL || strcmp(oldlun->l_device_id, newlun->l_device_id) != 0)) { log_debugx("device-id for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (newlun->l_path != NULL && (oldlun->l_path == NULL || strcmp(oldlun->l_path, newlun->l_path) != 0)) { log_debugx("path for lun \"%s\", " "CTL lun %d, changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (newlun->l_serial != NULL && (oldlun->l_serial == NULL || strcmp(oldlun->l_serial, newlun->l_serial) != 0)) { log_debugx("serial for lun \"%s\", " "CTL lun %d changed; removing", oldlun->l_name, oldlun->l_ctl_lun); changed = 1; } if (changed) { error = kernel_lun_remove(oldlun); if (error != 0) { log_warnx("failed to remove lun \"%s\", " "CTL lun %d", oldlun->l_name, oldlun->l_ctl_lun); cumulated_error++; } lun_delete(oldlun); continue; } lun_set_ctl_lun(newlun, oldlun->l_ctl_lun); } TAILQ_FOREACH_SAFE(newlun, &newconf->conf_luns, l_next, tmplun) { oldlun = lun_find(oldconf, newlun->l_name); if (oldlun != NULL) { - if (newlun->l_size != oldlun->l_size || - newlun->l_size == 0) { - log_debugx("resizing lun \"%s\", CTL lun %d", + log_debugx("modifying lun \"%s\", CTL lun %d", + newlun->l_name, newlun->l_ctl_lun); + error = kernel_lun_modify(newlun); + if (error != 0) { + log_warnx("failed to " + "modify lun \"%s\", CTL lun %d", newlun->l_name, newlun->l_ctl_lun); - error = kernel_lun_resize(newlun); - if (error != 0) { - log_warnx("failed to " - "resize lun \"%s\", CTL lun %d", - newlun->l_name, - newlun->l_ctl_lun); - cumulated_error++; - } + cumulated_error++; } continue; } log_debugx("adding lun \"%s\"", newlun->l_name); error = kernel_lun_add(newlun); if (error != 0) { log_warnx("failed to add lun \"%s\"", newlun->l_name); lun_delete(newlun); cumulated_error++; } } /* * Now add new ports or modify existing ones. */ TAILQ_FOREACH(newport, &newconf->conf_ports, p_next) { oldport = port_find(oldconf, newport->p_name); if (oldport == NULL) { log_debugx("adding port \"%s\"", newport->p_name); error = kernel_port_add(newport); } else { log_debugx("updating port \"%s\"", newport->p_name); newport->p_ctl_port = oldport->p_ctl_port; error = kernel_port_update(newport); } if (error != 0) { log_warnx("failed to %s port %s", (oldport == NULL) ? "add" : "update", newport->p_name); /* * XXX: Uncomment after fixing the root cause. * * cumulated_error++; */ } } /* * Go through the new portals, opening the sockets as neccessary. */ TAILQ_FOREACH(newpg, &newconf->conf_portal_groups, pg_next) { if (newpg->pg_unassigned) { log_debugx("not listening on portal-group \"%s\", " "not assigned to any target", newpg->pg_name); continue; } TAILQ_FOREACH(newp, &newpg->pg_portals, p_next) { /* * Try to find already open portal and reuse * the listening socket. We don't care about * what portal or portal group that was, what * matters is the listening address. */ TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups, pg_next) { TAILQ_FOREACH(oldp, &oldpg->pg_portals, p_next) { if (strcmp(newp->p_listen, oldp->p_listen) == 0 && oldp->p_socket > 0) { newp->p_socket = oldp->p_socket; oldp->p_socket = 0; break; } } } if (newp->p_socket > 0) { /* * We're done with this portal. */ continue; } #ifdef ICL_KERNEL_PROXY if (proxy_mode) { newpg->pg_conf->conf_portal_id++; newp->p_id = newpg->pg_conf->conf_portal_id; log_debugx("listening on %s, portal-group " "\"%s\", portal id %d, using ICL proxy", newp->p_listen, newpg->pg_name, newp->p_id); kernel_listen(newp->p_ai, newp->p_iser, newp->p_id); continue; } #endif assert(proxy_mode == false); assert(newp->p_iser == false); log_debugx("listening on %s, portal-group \"%s\"", newp->p_listen, newpg->pg_name); newp->p_socket = socket(newp->p_ai->ai_family, newp->p_ai->ai_socktype, newp->p_ai->ai_protocol); if (newp->p_socket < 0) { log_warn("socket(2) failed for %s", newp->p_listen); cumulated_error++; continue; } sockbuf = SOCKBUF_SIZE; if (setsockopt(newp->p_socket, SOL_SOCKET, SO_RCVBUF, &sockbuf, sizeof(sockbuf)) == -1) log_warn("setsockopt(SO_RCVBUF) failed " "for %s", newp->p_listen); sockbuf = SOCKBUF_SIZE; if (setsockopt(newp->p_socket, SOL_SOCKET, SO_SNDBUF, &sockbuf, sizeof(sockbuf)) == -1) log_warn("setsockopt(SO_SNDBUF) failed " "for %s", newp->p_listen); error = setsockopt(newp->p_socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); if (error != 0) { log_warn("setsockopt(SO_REUSEADDR) failed " "for %s", newp->p_listen); close(newp->p_socket); newp->p_socket = 0; cumulated_error++; continue; } error = bind(newp->p_socket, newp->p_ai->ai_addr, newp->p_ai->ai_addrlen); if (error != 0) { log_warn("bind(2) failed for %s", newp->p_listen); close(newp->p_socket); newp->p_socket = 0; cumulated_error++; continue; } error = listen(newp->p_socket, -1); if (error != 0) { log_warn("listen(2) failed for %s", newp->p_listen); close(newp->p_socket); newp->p_socket = 0; cumulated_error++; continue; } } } /* * Go through the no longer used sockets, closing them. */ TAILQ_FOREACH(oldpg, &oldconf->conf_portal_groups, pg_next) { TAILQ_FOREACH(oldp, &oldpg->pg_portals, p_next) { if (oldp->p_socket <= 0) continue; log_debugx("closing socket for %s, portal-group \"%s\"", oldp->p_listen, oldpg->pg_name); close(oldp->p_socket); oldp->p_socket = 0; } } /* (Re-)Register on remaining/new iSNS servers. */ TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) { TAILQ_FOREACH(oldns, &oldconf->conf_isns, i_next) { if (strcmp(oldns->i_addr, newns->i_addr) == 0) break; } isns_register(newns, oldns); } /* Schedule iSNS update */ if (!TAILQ_EMPTY(&newconf->conf_isns)) set_timeout((newconf->conf_isns_period + 2) / 3, false); return (cumulated_error); } bool timed_out(void) { return (sigalrm_received); } static void sigalrm_handler_fatal(int dummy __unused) { /* * It would be easiest to just log an error and exit. We can't * do this, though, because log_errx() is not signal safe, since * it calls syslog(3). Instead, set a flag checked by pdu_send() * and pdu_receive(), to call log_errx() there. Should they fail * to notice, we'll exit here one second later. */ if (sigalrm_received) { /* * Oh well. Just give up and quit. */ _exit(2); } sigalrm_received = true; } static void sigalrm_handler(int dummy __unused) { sigalrm_received = true; } void set_timeout(int timeout, int fatal) { struct sigaction sa; struct itimerval itv; int error; if (timeout <= 0) { log_debugx("session timeout disabled"); bzero(&itv, sizeof(itv)); error = setitimer(ITIMER_REAL, &itv, NULL); if (error != 0) log_err(1, "setitimer"); sigalrm_received = false; return; } sigalrm_received = false; bzero(&sa, sizeof(sa)); if (fatal) sa.sa_handler = sigalrm_handler_fatal; else sa.sa_handler = sigalrm_handler; sigfillset(&sa.sa_mask); error = sigaction(SIGALRM, &sa, NULL); if (error != 0) log_err(1, "sigaction"); /* * First SIGALRM will arive after conf_timeout seconds. * If we do nothing, another one will arrive a second later. */ log_debugx("setting session timeout to %d seconds", timeout); bzero(&itv, sizeof(itv)); itv.it_interval.tv_sec = 1; itv.it_value.tv_sec = timeout; error = setitimer(ITIMER_REAL, &itv, NULL); if (error != 0) log_err(1, "setitimer"); } static int wait_for_children(bool block) { pid_t pid; int status; int num = 0; for (;;) { /* * If "block" is true, wait for at least one process. */ if (block && num == 0) pid = wait4(-1, &status, 0, NULL); else pid = wait4(-1, &status, WNOHANG, NULL); if (pid <= 0) break; if (WIFSIGNALED(status)) { log_warnx("child process %d terminated with signal %d", pid, WTERMSIG(status)); } else if (WEXITSTATUS(status) != 0) { log_warnx("child process %d terminated with exit status %d", pid, WEXITSTATUS(status)); } else { log_debugx("child process %d terminated gracefully", pid); } num++; } return (num); } static void handle_connection(struct portal *portal, int fd, const struct sockaddr *client_sa, bool dont_fork) { struct connection *conn; int error; pid_t pid; char host[NI_MAXHOST + 1]; struct conf *conf; conf = portal->p_portal_group->pg_conf; if (dont_fork) { log_debugx("incoming connection; not forking due to -d flag"); } else { nchildren -= wait_for_children(false); assert(nchildren >= 0); while (conf->conf_maxproc > 0 && nchildren >= conf->conf_maxproc) { log_debugx("maxproc limit of %d child processes hit; " "waiting for child process to exit", conf->conf_maxproc); nchildren -= wait_for_children(true); assert(nchildren >= 0); } log_debugx("incoming connection; forking child process #%d", nchildren); nchildren++; pid = fork(); if (pid < 0) log_err(1, "fork"); if (pid > 0) { close(fd); return; } } pidfile_close(conf->conf_pidfh); error = getnameinfo(client_sa, client_sa->sa_len, host, sizeof(host), NULL, 0, NI_NUMERICHOST); if (error != 0) log_errx(1, "getnameinfo: %s", gai_strerror(error)); log_debugx("accepted connection from %s; portal group \"%s\"", host, portal->p_portal_group->pg_name); log_set_peer_addr(host); setproctitle("%s", host); conn = connection_new(portal, fd, host, client_sa); set_timeout(conf->conf_timeout, true); kernel_capsicate(); login(conn); if (conn->conn_session_type == CONN_SESSION_TYPE_NORMAL) { kernel_handoff(conn); log_debugx("connection handed off to the kernel"); } else { assert(conn->conn_session_type == CONN_SESSION_TYPE_DISCOVERY); discovery(conn); } log_debugx("nothing more to do; exiting"); exit(0); } static int fd_add(int fd, fd_set *fdset, int nfds) { /* * Skip sockets which we failed to bind. */ if (fd <= 0) return (nfds); FD_SET(fd, fdset); if (fd > nfds) nfds = fd; return (nfds); } static void main_loop(struct conf *conf, bool dont_fork) { struct portal_group *pg; struct portal *portal; struct sockaddr_storage client_sa; socklen_t client_salen; #ifdef ICL_KERNEL_PROXY int connection_id; int portal_id; #endif fd_set fdset; int error, nfds, client_fd; pidfile_write(conf->conf_pidfh); for (;;) { if (sighup_received || sigterm_received || timed_out()) return; #ifdef ICL_KERNEL_PROXY if (proxy_mode) { client_salen = sizeof(client_sa); kernel_accept(&connection_id, &portal_id, (struct sockaddr *)&client_sa, &client_salen); assert(client_salen >= client_sa.ss_len); log_debugx("incoming connection, id %d, portal id %d", connection_id, portal_id); TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { if (portal->p_id == portal_id) { goto found; } } } log_errx(1, "kernel returned invalid portal_id %d", portal_id); found: handle_connection(portal, connection_id, (struct sockaddr *)&client_sa, dont_fork); } else { #endif assert(proxy_mode == false); FD_ZERO(&fdset); nfds = 0; TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { TAILQ_FOREACH(portal, &pg->pg_portals, p_next) nfds = fd_add(portal->p_socket, &fdset, nfds); } error = select(nfds + 1, &fdset, NULL, NULL, NULL); if (error <= 0) { if (errno == EINTR) return; log_err(1, "select"); } TAILQ_FOREACH(pg, &conf->conf_portal_groups, pg_next) { TAILQ_FOREACH(portal, &pg->pg_portals, p_next) { if (!FD_ISSET(portal->p_socket, &fdset)) continue; client_salen = sizeof(client_sa); client_fd = accept(portal->p_socket, (struct sockaddr *)&client_sa, &client_salen); if (client_fd < 0) { if (errno == ECONNABORTED) continue; log_err(1, "accept"); } assert(client_salen >= client_sa.ss_len); handle_connection(portal, client_fd, (struct sockaddr *)&client_sa, dont_fork); break; } } #ifdef ICL_KERNEL_PROXY } #endif } } static void sighup_handler(int dummy __unused) { sighup_received = true; } static void sigterm_handler(int dummy __unused) { sigterm_received = true; } static void sigchld_handler(int dummy __unused) { /* * The only purpose of this handler is to make SIGCHLD * interrupt the ISCSIDWAIT ioctl(2), so we can call * wait_for_children(). */ } static void register_signals(void) { struct sigaction sa; int error; bzero(&sa, sizeof(sa)); sa.sa_handler = sighup_handler; sigfillset(&sa.sa_mask); error = sigaction(SIGHUP, &sa, NULL); if (error != 0) log_err(1, "sigaction"); sa.sa_handler = sigterm_handler; error = sigaction(SIGTERM, &sa, NULL); if (error != 0) log_err(1, "sigaction"); sa.sa_handler = sigterm_handler; error = sigaction(SIGINT, &sa, NULL); if (error != 0) log_err(1, "sigaction"); sa.sa_handler = sigchld_handler; error = sigaction(SIGCHLD, &sa, NULL); if (error != 0) log_err(1, "sigaction"); } int main(int argc, char **argv) { struct conf *oldconf, *newconf, *tmpconf; struct isns *newns; const char *config_path = DEFAULT_CONFIG_PATH; int debug = 0, ch, error; bool dont_daemonize = false; while ((ch = getopt(argc, argv, "df:R")) != -1) { switch (ch) { case 'd': dont_daemonize = true; debug++; break; case 'f': config_path = optarg; break; case 'R': #ifndef ICL_KERNEL_PROXY log_errx(1, "ctld(8) compiled without ICL_KERNEL_PROXY " "does not support iSER protocol"); #endif proxy_mode = true; break; case '?': default: usage(); } } argc -= optind; if (argc != 0) usage(); log_init(debug); kernel_init(); oldconf = conf_new_from_kernel(); newconf = conf_new_from_file(config_path, oldconf); if (newconf == NULL) log_errx(1, "configuration error; exiting"); if (debug > 0) { oldconf->conf_debug = debug; newconf->conf_debug = debug; } error = conf_apply(oldconf, newconf); if (error != 0) log_errx(1, "failed to apply configuration; exiting"); conf_delete(oldconf); oldconf = NULL; register_signals(); if (dont_daemonize == false) { log_debugx("daemonizing"); if (daemon(0, 0) == -1) { log_warn("cannot daemonize"); pidfile_remove(newconf->conf_pidfh); exit(1); } } /* Schedule iSNS update */ if (!TAILQ_EMPTY(&newconf->conf_isns)) set_timeout((newconf->conf_isns_period + 2) / 3, false); for (;;) { main_loop(newconf, dont_daemonize); if (sighup_received) { sighup_received = false; log_debugx("received SIGHUP, reloading configuration"); tmpconf = conf_new_from_file(config_path, newconf); if (tmpconf == NULL) { log_warnx("configuration error, " "continuing with old configuration"); } else { if (debug > 0) tmpconf->conf_debug = debug; oldconf = newconf; newconf = tmpconf; error = conf_apply(oldconf, newconf); if (error != 0) log_warnx("failed to reload " "configuration"); conf_delete(oldconf); oldconf = NULL; } } else if (sigterm_received) { log_debugx("exiting on signal; " "reloading empty configuration"); log_debugx("removing CTL iSCSI ports " "and terminating all connections"); oldconf = newconf; newconf = conf_new(); if (debug > 0) newconf->conf_debug = debug; error = conf_apply(oldconf, newconf); if (error != 0) log_warnx("failed to apply configuration"); conf_delete(oldconf); oldconf = NULL; log_warnx("exiting on signal"); exit(0); } else { nchildren -= wait_for_children(false); assert(nchildren >= 0); if (timed_out()) { set_timeout(0, false); TAILQ_FOREACH(newns, &newconf->conf_isns, i_next) isns_check(newns); /* Schedule iSNS update */ if (!TAILQ_EMPTY(&newconf->conf_isns)) { set_timeout((newconf->conf_isns_period + 2) / 3, false); } } } } /* NOTREACHED */ } Index: projects/clang-trunk/usr.sbin/ctld/ctld.h =================================================================== --- projects/clang-trunk/usr.sbin/ctld/ctld.h (revision 287501) +++ projects/clang-trunk/usr.sbin/ctld/ctld.h (revision 287502) @@ -1,450 +1,450 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef CTLD_H #define CTLD_H #include #ifdef ICL_KERNEL_PROXY #include #endif #include #include #include #define DEFAULT_CONFIG_PATH "/etc/ctl.conf" #define DEFAULT_PIDFILE "/var/run/ctld.pid" #define DEFAULT_BLOCKSIZE 512 #define MAX_LUNS 1024 #define MAX_NAME_LEN 223 #define MAX_DATA_SEGMENT_LENGTH (128 * 1024) #define MAX_BURST_LENGTH 16776192 #define SOCKBUF_SIZE 1048576 struct auth { TAILQ_ENTRY(auth) a_next; struct auth_group *a_auth_group; char *a_user; char *a_secret; char *a_mutual_user; char *a_mutual_secret; }; struct auth_name { TAILQ_ENTRY(auth_name) an_next; struct auth_group *an_auth_group; char *an_initator_name; }; struct auth_portal { TAILQ_ENTRY(auth_portal) ap_next; struct auth_group *ap_auth_group; char *ap_initator_portal; struct sockaddr_storage ap_sa; int ap_mask; }; #define AG_TYPE_UNKNOWN 0 #define AG_TYPE_DENY 1 #define AG_TYPE_NO_AUTHENTICATION 2 #define AG_TYPE_CHAP 3 #define AG_TYPE_CHAP_MUTUAL 4 struct auth_group { TAILQ_ENTRY(auth_group) ag_next; struct conf *ag_conf; char *ag_name; struct target *ag_target; int ag_type; TAILQ_HEAD(, auth) ag_auths; TAILQ_HEAD(, auth_name) ag_names; TAILQ_HEAD(, auth_portal) ag_portals; }; struct portal { TAILQ_ENTRY(portal) p_next; struct portal_group *p_portal_group; bool p_iser; char *p_listen; struct addrinfo *p_ai; #ifdef ICL_KERNEL_PROXY int p_id; #endif TAILQ_HEAD(, target) p_targets; int p_socket; }; #define PG_FILTER_UNKNOWN 0 #define PG_FILTER_NONE 1 #define PG_FILTER_PORTAL 2 #define PG_FILTER_PORTAL_NAME 3 #define PG_FILTER_PORTAL_NAME_AUTH 4 struct portal_group { TAILQ_ENTRY(portal_group) pg_next; struct conf *pg_conf; char *pg_name; struct auth_group *pg_discovery_auth_group; int pg_discovery_filter; bool pg_unassigned; TAILQ_HEAD(, portal) pg_portals; TAILQ_HEAD(, port) pg_ports; char *pg_offload; char *pg_redirection; uint16_t pg_tag; }; struct pport { TAILQ_ENTRY(pport) pp_next; TAILQ_HEAD(, port) pp_ports; struct conf *pp_conf; char *pp_name; uint32_t pp_ctl_port; }; struct port { TAILQ_ENTRY(port) p_next; TAILQ_ENTRY(port) p_pgs; TAILQ_ENTRY(port) p_pps; TAILQ_ENTRY(port) p_ts; struct conf *p_conf; char *p_name; struct auth_group *p_auth_group; struct portal_group *p_portal_group; struct pport *p_pport; struct target *p_target; uint32_t p_ctl_port; }; struct lun_option { TAILQ_ENTRY(lun_option) lo_next; struct lun *lo_lun; char *lo_name; char *lo_value; }; struct lun { TAILQ_ENTRY(lun) l_next; struct conf *l_conf; TAILQ_HEAD(, lun_option) l_options; char *l_name; char *l_backend; int l_blocksize; char *l_device_id; char *l_path; char *l_scsiname; char *l_serial; int64_t l_size; int l_ctl_lun; }; struct target { TAILQ_ENTRY(target) t_next; struct conf *t_conf; struct lun *t_luns[MAX_LUNS]; struct auth_group *t_auth_group; TAILQ_HEAD(, port) t_ports; char *t_name; char *t_alias; char *t_redirection; }; struct isns { TAILQ_ENTRY(isns) i_next; struct conf *i_conf; char *i_addr; struct addrinfo *i_ai; }; struct conf { char *conf_pidfile_path; TAILQ_HEAD(, lun) conf_luns; TAILQ_HEAD(, target) conf_targets; TAILQ_HEAD(, auth_group) conf_auth_groups; TAILQ_HEAD(, port) conf_ports; TAILQ_HEAD(, portal_group) conf_portal_groups; TAILQ_HEAD(, pport) conf_pports; TAILQ_HEAD(, isns) conf_isns; int conf_isns_period; int conf_isns_timeout; int conf_debug; int conf_timeout; int conf_maxproc; #ifdef ICL_KERNEL_PROXY int conf_portal_id; #endif struct pidfh *conf_pidfh; bool conf_default_pg_defined; bool conf_default_ag_defined; bool conf_kernel_port_on; }; #define CONN_SESSION_TYPE_NONE 0 #define CONN_SESSION_TYPE_DISCOVERY 1 #define CONN_SESSION_TYPE_NORMAL 2 #define CONN_DIGEST_NONE 0 #define CONN_DIGEST_CRC32C 1 struct connection { struct portal *conn_portal; struct port *conn_port; struct target *conn_target; int conn_socket; int conn_session_type; char *conn_initiator_name; char *conn_initiator_addr; char *conn_initiator_alias; uint8_t conn_initiator_isid[6]; struct sockaddr_storage conn_initiator_sa; uint32_t conn_cmdsn; uint32_t conn_statsn; size_t conn_data_segment_limit; size_t conn_max_data_segment_length; size_t conn_max_burst_length; int conn_immediate_data; int conn_header_digest; int conn_data_digest; const char *conn_user; struct chap *conn_chap; }; struct pdu { struct connection *pdu_connection; struct iscsi_bhs *pdu_bhs; char *pdu_data; size_t pdu_data_len; }; #define KEYS_MAX 1024 struct keys { char *keys_names[KEYS_MAX]; char *keys_values[KEYS_MAX]; char *keys_data; size_t keys_data_len; }; #define CHAP_CHALLENGE_LEN 1024 #define CHAP_DIGEST_LEN 16 /* Equal to MD5 digest size. */ struct chap { unsigned char chap_id; char chap_challenge[CHAP_CHALLENGE_LEN]; char chap_response[CHAP_DIGEST_LEN]; }; struct rchap { char *rchap_secret; unsigned char rchap_id; void *rchap_challenge; size_t rchap_challenge_len; }; struct chap *chap_new(void); char *chap_get_id(const struct chap *chap); char *chap_get_challenge(const struct chap *chap); int chap_receive(struct chap *chap, const char *response); int chap_authenticate(struct chap *chap, const char *secret); void chap_delete(struct chap *chap); struct rchap *rchap_new(const char *secret); int rchap_receive(struct rchap *rchap, const char *id, const char *challenge); char *rchap_get_response(struct rchap *rchap); void rchap_delete(struct rchap *rchap); struct conf *conf_new(void); struct conf *conf_new_from_file(const char *path, struct conf *old); struct conf *conf_new_from_kernel(void); void conf_delete(struct conf *conf); int conf_verify(struct conf *conf); struct auth_group *auth_group_new(struct conf *conf, const char *name); void auth_group_delete(struct auth_group *ag); struct auth_group *auth_group_find(const struct conf *conf, const char *name); int auth_group_set_type(struct auth_group *ag, const char *type); const struct auth *auth_new_chap(struct auth_group *ag, const char *user, const char *secret); const struct auth *auth_new_chap_mutual(struct auth_group *ag, const char *user, const char *secret, const char *user2, const char *secret2); const struct auth *auth_find(const struct auth_group *ag, const char *user); const struct auth_name *auth_name_new(struct auth_group *ag, const char *initiator_name); bool auth_name_defined(const struct auth_group *ag); const struct auth_name *auth_name_find(const struct auth_group *ag, const char *initiator_name); int auth_name_check(const struct auth_group *ag, const char *initiator_name); const struct auth_portal *auth_portal_new(struct auth_group *ag, const char *initiator_portal); bool auth_portal_defined(const struct auth_group *ag); const struct auth_portal *auth_portal_find(const struct auth_group *ag, const struct sockaddr_storage *sa); int auth_portal_check(const struct auth_group *ag, const struct sockaddr_storage *sa); struct portal_group *portal_group_new(struct conf *conf, const char *name); void portal_group_delete(struct portal_group *pg); struct portal_group *portal_group_find(const struct conf *conf, const char *name); int portal_group_add_listen(struct portal_group *pg, const char *listen, bool iser); int portal_group_set_filter(struct portal_group *pg, const char *filter); int portal_group_set_offload(struct portal_group *pg, const char *offload); int portal_group_set_redirection(struct portal_group *pg, const char *addr); int isns_new(struct conf *conf, const char *addr); void isns_delete(struct isns *is); void isns_register(struct isns *isns, struct isns *oldisns); void isns_check(struct isns *isns); void isns_deregister(struct isns *isns); struct pport *pport_new(struct conf *conf, const char *name, uint32_t ctl_port); struct pport *pport_find(const struct conf *conf, const char *name); struct pport *pport_copy(struct pport *pport, struct conf *conf); void pport_delete(struct pport *pport); struct port *port_new(struct conf *conf, struct target *target, struct portal_group *pg); struct port *port_new_pp(struct conf *conf, struct target *target, struct pport *pp); struct port *port_find(const struct conf *conf, const char *name); struct port *port_find_in_pg(const struct portal_group *pg, const char *target); void port_delete(struct port *port); struct target *target_new(struct conf *conf, const char *name); void target_delete(struct target *target); struct target *target_find(struct conf *conf, const char *name); int target_set_redirection(struct target *target, const char *addr); struct lun *lun_new(struct conf *conf, const char *name); void lun_delete(struct lun *lun); struct lun *lun_find(const struct conf *conf, const char *name); void lun_set_backend(struct lun *lun, const char *value); void lun_set_blocksize(struct lun *lun, size_t value); void lun_set_device_id(struct lun *lun, const char *value); void lun_set_path(struct lun *lun, const char *value); void lun_set_scsiname(struct lun *lun, const char *value); void lun_set_serial(struct lun *lun, const char *value); void lun_set_size(struct lun *lun, size_t value); void lun_set_ctl_lun(struct lun *lun, uint32_t value); struct lun_option *lun_option_new(struct lun *lun, const char *name, const char *value); void lun_option_delete(struct lun_option *clo); struct lun_option *lun_option_find(const struct lun *lun, const char *name); void lun_option_set(struct lun_option *clo, const char *value); void kernel_init(void); int kernel_lun_add(struct lun *lun); -int kernel_lun_resize(struct lun *lun); +int kernel_lun_modify(struct lun *lun); int kernel_lun_remove(struct lun *lun); void kernel_handoff(struct connection *conn); void kernel_limits(const char *offload, size_t *max_data_segment_length); int kernel_port_add(struct port *port); int kernel_port_update(struct port *port); int kernel_port_remove(struct port *port); void kernel_capsicate(void); #ifdef ICL_KERNEL_PROXY void kernel_listen(struct addrinfo *ai, bool iser, int portal_id); void kernel_accept(int *connection_id, int *portal_id, struct sockaddr *client_sa, socklen_t *client_salen); void kernel_send(struct pdu *pdu); void kernel_receive(struct pdu *pdu); #endif struct keys *keys_new(void); void keys_delete(struct keys *keys); void keys_load(struct keys *keys, const struct pdu *pdu); void keys_save(struct keys *keys, struct pdu *pdu); const char *keys_find(struct keys *keys, const char *name); void keys_add(struct keys *keys, const char *name, const char *value); void keys_add_int(struct keys *keys, const char *name, int value); struct pdu *pdu_new(struct connection *conn); struct pdu *pdu_new_response(struct pdu *request); void pdu_delete(struct pdu *pdu); void pdu_receive(struct pdu *request); void pdu_send(struct pdu *response); void login(struct connection *conn); void discovery(struct connection *conn); void log_init(int level); void log_set_peer_name(const char *name); void log_set_peer_addr(const char *addr); void log_err(int, const char *, ...) __dead2 __printflike(2, 3); void log_errx(int, const char *, ...) __dead2 __printflike(2, 3); void log_warn(const char *, ...) __printflike(1, 2); void log_warnx(const char *, ...) __printflike(1, 2); void log_debugx(const char *, ...) __printflike(1, 2); char *checked_strdup(const char *); bool valid_iscsi_name(const char *name); void set_timeout(int timeout, int fatal); bool timed_out(void); #endif /* !CTLD_H */ Index: projects/clang-trunk/usr.sbin/ctld/kernel.c =================================================================== --- projects/clang-trunk/usr.sbin/ctld/kernel.c (revision 287501) +++ projects/clang-trunk/usr.sbin/ctld/kernel.c (revision 287502) @@ -1,1219 +1,1244 @@ /*- * Copyright (c) 2003, 2004 Silicon Graphics International Corp. * Copyright (c) 1997-2007 Kenneth D. Merry * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ctld.h" #ifdef ICL_KERNEL_PROXY #include #endif extern bool proxy_mode; static int ctl_fd = 0; void kernel_init(void) { int retval, saved_errno; ctl_fd = open(CTL_DEFAULT_DEV, O_RDWR); if (ctl_fd < 0 && errno == ENOENT) { saved_errno = errno; retval = kldload("ctl"); if (retval != -1) ctl_fd = open(CTL_DEFAULT_DEV, O_RDWR); else errno = saved_errno; } if (ctl_fd < 0) log_err(1, "failed to open %s", CTL_DEFAULT_DEV); } /* * Name/value pair used for per-LUN attributes. */ struct cctl_lun_nv { char *name; char *value; STAILQ_ENTRY(cctl_lun_nv) links; }; /* * Backend LUN information. */ struct cctl_lun { uint64_t lun_id; char *backend_type; uint64_t size_blocks; uint32_t blocksize; char *serial_number; char *device_id; char *ctld_name; STAILQ_HEAD(,cctl_lun_nv) attr_list; STAILQ_ENTRY(cctl_lun) links; }; struct cctl_port { uint32_t port_id; char *port_name; int pp; int vp; int cfiscsi_state; char *cfiscsi_target; uint16_t cfiscsi_portal_group_tag; char *ctld_portal_group_name; STAILQ_HEAD(,cctl_lun_nv) attr_list; STAILQ_ENTRY(cctl_port) links; }; struct cctl_devlist_data { int num_luns; STAILQ_HEAD(,cctl_lun) lun_list; struct cctl_lun *cur_lun; int num_ports; STAILQ_HEAD(,cctl_port) port_list; struct cctl_port *cur_port; int level; struct sbuf *cur_sb[32]; }; static void cctl_start_element(void *user_data, const char *name, const char **attr) { int i; struct cctl_devlist_data *devlist; struct cctl_lun *cur_lun; devlist = (struct cctl_devlist_data *)user_data; cur_lun = devlist->cur_lun; devlist->level++; if ((u_int)devlist->level >= (sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0]))) log_errx(1, "%s: too many nesting levels, %zd max", __func__, sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0])); devlist->cur_sb[devlist->level] = sbuf_new_auto(); if (devlist->cur_sb[devlist->level] == NULL) log_err(1, "%s: unable to allocate sbuf", __func__); if (strcmp(name, "lun") == 0) { if (cur_lun != NULL) log_errx(1, "%s: improper lun element nesting", __func__); cur_lun = calloc(1, sizeof(*cur_lun)); if (cur_lun == NULL) log_err(1, "%s: cannot allocate %zd bytes", __func__, sizeof(*cur_lun)); devlist->num_luns++; devlist->cur_lun = cur_lun; STAILQ_INIT(&cur_lun->attr_list); STAILQ_INSERT_TAIL(&devlist->lun_list, cur_lun, links); for (i = 0; attr[i] != NULL; i += 2) { if (strcmp(attr[i], "id") == 0) { cur_lun->lun_id = strtoull(attr[i+1], NULL, 0); } else { log_errx(1, "%s: invalid LUN attribute %s = %s", __func__, attr[i], attr[i+1]); } } } } static void cctl_end_element(void *user_data, const char *name) { struct cctl_devlist_data *devlist; struct cctl_lun *cur_lun; char *str; devlist = (struct cctl_devlist_data *)user_data; cur_lun = devlist->cur_lun; if ((cur_lun == NULL) && (strcmp(name, "ctllunlist") != 0)) log_errx(1, "%s: cur_lun == NULL! (name = %s)", __func__, name); if (devlist->cur_sb[devlist->level] == NULL) log_errx(1, "%s: no valid sbuf at level %d (name %s)", __func__, devlist->level, name); sbuf_finish(devlist->cur_sb[devlist->level]); str = checked_strdup(sbuf_data(devlist->cur_sb[devlist->level])); if (strlen(str) == 0) { free(str); str = NULL; } sbuf_delete(devlist->cur_sb[devlist->level]); devlist->cur_sb[devlist->level] = NULL; devlist->level--; if (strcmp(name, "backend_type") == 0) { cur_lun->backend_type = str; str = NULL; } else if (strcmp(name, "size") == 0) { cur_lun->size_blocks = strtoull(str, NULL, 0); } else if (strcmp(name, "blocksize") == 0) { cur_lun->blocksize = strtoul(str, NULL, 0); } else if (strcmp(name, "serial_number") == 0) { cur_lun->serial_number = str; str = NULL; } else if (strcmp(name, "device_id") == 0) { cur_lun->device_id = str; str = NULL; } else if (strcmp(name, "ctld_name") == 0) { cur_lun->ctld_name = str; str = NULL; } else if (strcmp(name, "lun") == 0) { devlist->cur_lun = NULL; } else if (strcmp(name, "ctllunlist") == 0) { /* Nothing. */ } else { struct cctl_lun_nv *nv; nv = calloc(1, sizeof(*nv)); if (nv == NULL) log_err(1, "%s: can't allocate %zd bytes for nv pair", __func__, sizeof(*nv)); nv->name = checked_strdup(name); nv->value = str; str = NULL; STAILQ_INSERT_TAIL(&cur_lun->attr_list, nv, links); } free(str); } static void cctl_start_pelement(void *user_data, const char *name, const char **attr) { int i; struct cctl_devlist_data *devlist; struct cctl_port *cur_port; devlist = (struct cctl_devlist_data *)user_data; cur_port = devlist->cur_port; devlist->level++; if ((u_int)devlist->level >= (sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0]))) log_errx(1, "%s: too many nesting levels, %zd max", __func__, sizeof(devlist->cur_sb) / sizeof(devlist->cur_sb[0])); devlist->cur_sb[devlist->level] = sbuf_new_auto(); if (devlist->cur_sb[devlist->level] == NULL) log_err(1, "%s: unable to allocate sbuf", __func__); if (strcmp(name, "targ_port") == 0) { if (cur_port != NULL) log_errx(1, "%s: improper port element nesting (%s)", __func__, name); cur_port = calloc(1, sizeof(*cur_port)); if (cur_port == NULL) log_err(1, "%s: cannot allocate %zd bytes", __func__, sizeof(*cur_port)); devlist->num_ports++; devlist->cur_port = cur_port; STAILQ_INIT(&cur_port->attr_list); STAILQ_INSERT_TAIL(&devlist->port_list, cur_port, links); for (i = 0; attr[i] != NULL; i += 2) { if (strcmp(attr[i], "id") == 0) { cur_port->port_id = strtoul(attr[i+1], NULL, 0); } else { log_errx(1, "%s: invalid LUN attribute %s = %s", __func__, attr[i], attr[i+1]); } } } } static void cctl_end_pelement(void *user_data, const char *name) { struct cctl_devlist_data *devlist; struct cctl_port *cur_port; char *str; devlist = (struct cctl_devlist_data *)user_data; cur_port = devlist->cur_port; if ((cur_port == NULL) && (strcmp(name, "ctlportlist") != 0)) log_errx(1, "%s: cur_port == NULL! (name = %s)", __func__, name); if (devlist->cur_sb[devlist->level] == NULL) log_errx(1, "%s: no valid sbuf at level %d (name %s)", __func__, devlist->level, name); sbuf_finish(devlist->cur_sb[devlist->level]); str = checked_strdup(sbuf_data(devlist->cur_sb[devlist->level])); if (strlen(str) == 0) { free(str); str = NULL; } sbuf_delete(devlist->cur_sb[devlist->level]); devlist->cur_sb[devlist->level] = NULL; devlist->level--; if (strcmp(name, "port_name") == 0) { cur_port->port_name = str; str = NULL; } else if (strcmp(name, "physical_port") == 0) { cur_port->pp = strtoul(str, NULL, 0); } else if (strcmp(name, "virtual_port") == 0) { cur_port->vp = strtoul(str, NULL, 0); } else if (strcmp(name, "cfiscsi_target") == 0) { cur_port->cfiscsi_target = str; str = NULL; } else if (strcmp(name, "cfiscsi_state") == 0) { cur_port->cfiscsi_state = strtoul(str, NULL, 0); } else if (strcmp(name, "cfiscsi_portal_group_tag") == 0) { cur_port->cfiscsi_portal_group_tag = strtoul(str, NULL, 0); } else if (strcmp(name, "ctld_portal_group_name") == 0) { cur_port->ctld_portal_group_name = str; str = NULL; } else if (strcmp(name, "targ_port") == 0) { devlist->cur_port = NULL; } else if (strcmp(name, "ctlportlist") == 0) { /* Nothing. */ } else { struct cctl_lun_nv *nv; nv = calloc(1, sizeof(*nv)); if (nv == NULL) log_err(1, "%s: can't allocate %zd bytes for nv pair", __func__, sizeof(*nv)); nv->name = checked_strdup(name); nv->value = str; str = NULL; STAILQ_INSERT_TAIL(&cur_port->attr_list, nv, links); } free(str); } static void cctl_char_handler(void *user_data, const XML_Char *str, int len) { struct cctl_devlist_data *devlist; devlist = (struct cctl_devlist_data *)user_data; sbuf_bcat(devlist->cur_sb[devlist->level], str, len); } struct conf * conf_new_from_kernel(void) { struct conf *conf = NULL; struct target *targ; struct portal_group *pg; struct pport *pp; struct port *cp; struct lun *cl; struct lun_option *lo; struct ctl_lun_list list; struct cctl_devlist_data devlist; struct cctl_lun *lun; struct cctl_port *port; XML_Parser parser; char *str, *name; int len, retval; bzero(&devlist, sizeof(devlist)); STAILQ_INIT(&devlist.lun_list); STAILQ_INIT(&devlist.port_list); log_debugx("obtaining previously configured CTL luns from the kernel"); str = NULL; len = 4096; retry: str = realloc(str, len); if (str == NULL) log_err(1, "realloc"); bzero(&list, sizeof(list)); list.alloc_len = len; list.status = CTL_LUN_LIST_NONE; list.lun_xml = str; if (ioctl(ctl_fd, CTL_LUN_LIST, &list) == -1) { log_warn("error issuing CTL_LUN_LIST ioctl"); free(str); return (NULL); } if (list.status == CTL_LUN_LIST_ERROR) { log_warnx("error returned from CTL_LUN_LIST ioctl: %s", list.error_str); free(str); return (NULL); } if (list.status == CTL_LUN_LIST_NEED_MORE_SPACE) { len = len << 1; goto retry; } parser = XML_ParserCreate(NULL); if (parser == NULL) { log_warnx("unable to create XML parser"); free(str); return (NULL); } XML_SetUserData(parser, &devlist); XML_SetElementHandler(parser, cctl_start_element, cctl_end_element); XML_SetCharacterDataHandler(parser, cctl_char_handler); retval = XML_Parse(parser, str, strlen(str), 1); XML_ParserFree(parser); free(str); if (retval != 1) { log_warnx("XML_Parse failed"); return (NULL); } str = NULL; len = 4096; retry_port: str = realloc(str, len); if (str == NULL) log_err(1, "realloc"); bzero(&list, sizeof(list)); list.alloc_len = len; list.status = CTL_LUN_LIST_NONE; list.lun_xml = str; if (ioctl(ctl_fd, CTL_PORT_LIST, &list) == -1) { log_warn("error issuing CTL_PORT_LIST ioctl"); free(str); return (NULL); } if (list.status == CTL_PORT_LIST_ERROR) { log_warnx("error returned from CTL_PORT_LIST ioctl: %s", list.error_str); free(str); return (NULL); } if (list.status == CTL_LUN_LIST_NEED_MORE_SPACE) { len = len << 1; goto retry_port; } parser = XML_ParserCreate(NULL); if (parser == NULL) { log_warnx("unable to create XML parser"); free(str); return (NULL); } XML_SetUserData(parser, &devlist); XML_SetElementHandler(parser, cctl_start_pelement, cctl_end_pelement); XML_SetCharacterDataHandler(parser, cctl_char_handler); retval = XML_Parse(parser, str, strlen(str), 1); XML_ParserFree(parser); free(str); if (retval != 1) { log_warnx("XML_Parse failed"); return (NULL); } conf = conf_new(); name = NULL; STAILQ_FOREACH(port, &devlist.port_list, links) { if (name) free(name); if (port->pp == 0 && port->vp == 0) name = checked_strdup(port->port_name); else if (port->vp == 0) asprintf(&name, "%s/%d", port->port_name, port->pp); else asprintf(&name, "%s/%d/%d", port->port_name, port->pp, port->vp); if (port->cfiscsi_target == NULL) { log_debugx("CTL port %u \"%s\" wasn't managed by ctld; ", port->port_id, name); pp = pport_find(conf, name); if (pp == NULL) { #if 0 log_debugx("found new kernel port %u \"%s\"", port->port_id, name); #endif pp = pport_new(conf, name, port->port_id); if (pp == NULL) { log_warnx("pport_new failed"); continue; } } continue; } if (port->cfiscsi_state != 1) { log_debugx("CTL port %ju is not active (%d); ignoring", (uintmax_t)port->port_id, port->cfiscsi_state); continue; } targ = target_find(conf, port->cfiscsi_target); if (targ == NULL) { #if 0 log_debugx("found new kernel target %s for CTL port %ld", port->cfiscsi_target, port->port_id); #endif targ = target_new(conf, port->cfiscsi_target); if (targ == NULL) { log_warnx("target_new failed"); continue; } } if (port->ctld_portal_group_name == NULL) continue; pg = portal_group_find(conf, port->ctld_portal_group_name); if (pg == NULL) { #if 0 log_debugx("found new kernel portal group %s for CTL port %ld", port->ctld_portal_group_name, port->port_id); #endif pg = portal_group_new(conf, port->ctld_portal_group_name); if (pg == NULL) { log_warnx("portal_group_new failed"); continue; } } pg->pg_tag = port->cfiscsi_portal_group_tag; cp = port_new(conf, targ, pg); if (cp == NULL) { log_warnx("port_new failed"); continue; } cp->p_ctl_port = port->port_id; } if (name) free(name); STAILQ_FOREACH(lun, &devlist.lun_list, links) { struct cctl_lun_nv *nv; if (lun->ctld_name == NULL) { log_debugx("CTL lun %ju wasn't managed by ctld; " "ignoring", (uintmax_t)lun->lun_id); continue; } cl = lun_find(conf, lun->ctld_name); if (cl != NULL) { log_warnx("found CTL lun %ju \"%s\", " "also backed by CTL lun %d; ignoring", (uintmax_t)lun->lun_id, lun->ctld_name, cl->l_ctl_lun); continue; } log_debugx("found CTL lun %ju \"%s\"", (uintmax_t)lun->lun_id, lun->ctld_name); cl = lun_new(conf, lun->ctld_name); if (cl == NULL) { log_warnx("lun_new failed"); continue; } lun_set_backend(cl, lun->backend_type); lun_set_blocksize(cl, lun->blocksize); lun_set_device_id(cl, lun->device_id); lun_set_serial(cl, lun->serial_number); lun_set_size(cl, lun->size_blocks * cl->l_blocksize); lun_set_ctl_lun(cl, lun->lun_id); STAILQ_FOREACH(nv, &lun->attr_list, links) { if (strcmp(nv->name, "file") == 0 || strcmp(nv->name, "dev") == 0) { lun_set_path(cl, nv->value); continue; } lo = lun_option_new(cl, nv->name, nv->value); if (lo == NULL) log_warnx("unable to add CTL lun option %s " "for CTL lun %ju \"%s\"", nv->name, (uintmax_t) lun->lun_id, cl->l_name); } } return (conf); } static void str_arg(struct ctl_be_arg *arg, const char *name, const char *value) { arg->namelen = strlen(name) + 1; arg->name = __DECONST(char *, name); arg->vallen = strlen(value) + 1; arg->value = __DECONST(char *, value); arg->flags = CTL_BEARG_ASCII | CTL_BEARG_RD; } int kernel_lun_add(struct lun *lun) { struct lun_option *lo; struct ctl_lun_req req; int error, i, num_options; bzero(&req, sizeof(req)); strlcpy(req.backend, lun->l_backend, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_CREATE; req.reqdata.create.blocksize_bytes = lun->l_blocksize; if (lun->l_size != 0) req.reqdata.create.lun_size_bytes = lun->l_size; req.reqdata.create.flags |= CTL_LUN_FLAG_DEV_TYPE; req.reqdata.create.device_type = T_DIRECT; if (lun->l_serial != NULL) { strncpy(req.reqdata.create.serial_num, lun->l_serial, sizeof(req.reqdata.create.serial_num)); req.reqdata.create.flags |= CTL_LUN_FLAG_SERIAL_NUM; } if (lun->l_device_id != NULL) { strncpy(req.reqdata.create.device_id, lun->l_device_id, sizeof(req.reqdata.create.device_id)); req.reqdata.create.flags |= CTL_LUN_FLAG_DEVID; } if (lun->l_path != NULL) { lo = lun_option_find(lun, "file"); if (lo != NULL) { lun_option_set(lo, lun->l_path); } else { lo = lun_option_new(lun, "file", lun->l_path); assert(lo != NULL); } } lo = lun_option_find(lun, "ctld_name"); if (lo != NULL) { lun_option_set(lo, lun->l_name); } else { lo = lun_option_new(lun, "ctld_name", lun->l_name); assert(lo != NULL); } lo = lun_option_find(lun, "scsiname"); if (lo == NULL && lun->l_scsiname != NULL) { lo = lun_option_new(lun, "scsiname", lun->l_scsiname); assert(lo != NULL); } num_options = 0; TAILQ_FOREACH(lo, &lun->l_options, lo_next) num_options++; req.num_be_args = num_options; if (num_options > 0) { req.be_args = malloc(num_options * sizeof(*req.be_args)); if (req.be_args == NULL) { log_warn("error allocating %zd bytes", num_options * sizeof(*req.be_args)); return (1); } i = 0; TAILQ_FOREACH(lo, &lun->l_options, lo_next) { str_arg(&req.be_args[i], lo->lo_name, lo->lo_value); i++; } assert(i == num_options); } error = ioctl(ctl_fd, CTL_LUN_REQ, &req); free(req.be_args); if (error != 0) { log_warn("error issuing CTL_LUN_REQ ioctl"); return (1); } switch (req.status) { case CTL_LUN_ERROR: log_warnx("LUN creation error: %s", req.error_str); return (1); case CTL_LUN_WARNING: log_warnx("LUN creation warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: log_warnx("unknown LUN creation status: %d", req.status); return (1); } lun_set_ctl_lun(lun, req.reqdata.create.req_lun_id); return (0); } int -kernel_lun_resize(struct lun *lun) +kernel_lun_modify(struct lun *lun) { + struct lun_option *lo; struct ctl_lun_req req; + int error, i, num_options; bzero(&req, sizeof(req)); strlcpy(req.backend, lun->l_backend, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_MODIFY; req.reqdata.modify.lun_id = lun->l_ctl_lun; req.reqdata.modify.lun_size_bytes = lun->l_size; - if (ioctl(ctl_fd, CTL_LUN_REQ, &req) == -1) { + num_options = 0; + TAILQ_FOREACH(lo, &lun->l_options, lo_next) + num_options++; + + req.num_be_args = num_options; + if (num_options > 0) { + req.be_args = malloc(num_options * sizeof(*req.be_args)); + if (req.be_args == NULL) { + log_warn("error allocating %zd bytes", + num_options * sizeof(*req.be_args)); + return (1); + } + + i = 0; + TAILQ_FOREACH(lo, &lun->l_options, lo_next) { + str_arg(&req.be_args[i], lo->lo_name, lo->lo_value); + i++; + } + assert(i == num_options); + } + + error = ioctl(ctl_fd, CTL_LUN_REQ, &req); + free(req.be_args); + if (error != 0) { log_warn("error issuing CTL_LUN_REQ ioctl"); return (1); } switch (req.status) { case CTL_LUN_ERROR: log_warnx("LUN modification error: %s", req.error_str); return (1); case CTL_LUN_WARNING: log_warnx("LUN modification warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: log_warnx("unknown LUN modification status: %d", req.status); return (1); } return (0); } int kernel_lun_remove(struct lun *lun) { struct ctl_lun_req req; bzero(&req, sizeof(req)); strlcpy(req.backend, lun->l_backend, sizeof(req.backend)); req.reqtype = CTL_LUNREQ_RM; req.reqdata.rm.lun_id = lun->l_ctl_lun; if (ioctl(ctl_fd, CTL_LUN_REQ, &req) == -1) { log_warn("error issuing CTL_LUN_REQ ioctl"); return (1); } switch (req.status) { case CTL_LUN_ERROR: log_warnx("LUN removal error: %s", req.error_str); return (1); case CTL_LUN_WARNING: log_warnx("LUN removal warning: %s", req.error_str); break; case CTL_LUN_OK: break; default: log_warnx("unknown LUN removal status: %d", req.status); return (1); } return (0); } void kernel_handoff(struct connection *conn) { struct ctl_iscsi req; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_HANDOFF; strlcpy(req.data.handoff.initiator_name, conn->conn_initiator_name, sizeof(req.data.handoff.initiator_name)); strlcpy(req.data.handoff.initiator_addr, conn->conn_initiator_addr, sizeof(req.data.handoff.initiator_addr)); if (conn->conn_initiator_alias != NULL) { strlcpy(req.data.handoff.initiator_alias, conn->conn_initiator_alias, sizeof(req.data.handoff.initiator_alias)); } memcpy(req.data.handoff.initiator_isid, conn->conn_initiator_isid, sizeof(req.data.handoff.initiator_isid)); strlcpy(req.data.handoff.target_name, conn->conn_target->t_name, sizeof(req.data.handoff.target_name)); if (conn->conn_portal->p_portal_group->pg_offload != NULL) { strlcpy(req.data.handoff.offload, conn->conn_portal->p_portal_group->pg_offload, sizeof(req.data.handoff.offload)); } #ifdef ICL_KERNEL_PROXY if (proxy_mode) req.data.handoff.connection_id = conn->conn_socket; else req.data.handoff.socket = conn->conn_socket; #else req.data.handoff.socket = conn->conn_socket; #endif req.data.handoff.portal_group_tag = conn->conn_portal->p_portal_group->pg_tag; if (conn->conn_header_digest == CONN_DIGEST_CRC32C) req.data.handoff.header_digest = CTL_ISCSI_DIGEST_CRC32C; if (conn->conn_data_digest == CONN_DIGEST_CRC32C) req.data.handoff.data_digest = CTL_ISCSI_DIGEST_CRC32C; req.data.handoff.cmdsn = conn->conn_cmdsn; req.data.handoff.statsn = conn->conn_statsn; req.data.handoff.max_recv_data_segment_length = conn->conn_max_data_segment_length; req.data.handoff.max_burst_length = conn->conn_max_burst_length; req.data.handoff.immediate_data = conn->conn_immediate_data; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI handoff request: " "%s; dropping connection", req.error_str); } } void kernel_limits(const char *offload, size_t *max_data_segment_length) { struct ctl_iscsi req; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_LIMITS; if (offload != NULL) { strlcpy(req.data.limits.offload, offload, sizeof(req.data.limits.offload)); } if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI limits request: " "%s; dropping connection", req.error_str); } *max_data_segment_length = req.data.limits.data_segment_limit; if (offload != NULL) { log_debugx("MaxRecvDataSegment kernel limit for offload " "\"%s\" is %zd", offload, *max_data_segment_length); } else { log_debugx("MaxRecvDataSegment kernel limit is %zd", *max_data_segment_length); } } int kernel_port_add(struct port *port) { struct ctl_port_entry entry; struct ctl_req req; struct ctl_lun_map lm; struct target *targ = port->p_target; struct portal_group *pg = port->p_portal_group; char tagstr[16]; int error, i, n; /* Create iSCSI port. */ if (port->p_portal_group) { bzero(&req, sizeof(req)); strlcpy(req.driver, "iscsi", sizeof(req.driver)); req.reqtype = CTL_REQ_CREATE; req.num_args = 5; req.args = malloc(req.num_args * sizeof(*req.args)); if (req.args == NULL) log_err(1, "malloc"); n = 0; req.args[n].namelen = sizeof("port_id"); req.args[n].name = __DECONST(char *, "port_id"); req.args[n].vallen = sizeof(port->p_ctl_port); req.args[n].value = &port->p_ctl_port; req.args[n++].flags = CTL_BEARG_WR; str_arg(&req.args[n++], "cfiscsi_target", targ->t_name); snprintf(tagstr, sizeof(tagstr), "%d", pg->pg_tag); str_arg(&req.args[n++], "cfiscsi_portal_group_tag", tagstr); if (targ->t_alias) str_arg(&req.args[n++], "cfiscsi_target_alias", targ->t_alias); str_arg(&req.args[n++], "ctld_portal_group_name", pg->pg_name); req.num_args = n; error = ioctl(ctl_fd, CTL_PORT_REQ, &req); free(req.args); if (error != 0) { log_warn("error issuing CTL_PORT_REQ ioctl"); return (1); } if (req.status == CTL_LUN_ERROR) { log_warnx("error returned from port creation request: %s", req.error_str); return (1); } if (req.status != CTL_LUN_OK) { log_warnx("unknown port creation request status %d", req.status); return (1); } } else if (port->p_pport) { port->p_ctl_port = port->p_pport->pp_ctl_port; if (strncmp(targ->t_name, "naa.", 4) == 0 && strlen(targ->t_name) == 20) { bzero(&entry, sizeof(entry)); entry.port_type = CTL_PORT_NONE; entry.targ_port = port->p_ctl_port; entry.flags |= CTL_PORT_WWNN_VALID; entry.wwnn = strtoull(targ->t_name + 4, NULL, 16); if (ioctl(ctl_fd, CTL_SET_PORT_WWNS, &entry) == -1) log_warn("CTL_SET_PORT_WWNS ioctl failed"); } } /* Explicitly enable mapping to block any access except allowed. */ lm.port = port->p_ctl_port; lm.plun = UINT32_MAX; lm.lun = 0; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); /* Map configured LUNs */ for (i = 0; i < MAX_LUNS; i++) { if (targ->t_luns[i] == NULL) continue; lm.port = port->p_ctl_port; lm.plun = i; lm.lun = targ->t_luns[i]->l_ctl_lun; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); } /* Enable port */ bzero(&entry, sizeof(entry)); entry.targ_port = port->p_ctl_port; error = ioctl(ctl_fd, CTL_ENABLE_PORT, &entry); if (error != 0) { log_warn("CTL_ENABLE_PORT ioctl failed"); return (-1); } return (0); } int kernel_port_update(struct port *port) { struct ctl_lun_map lm; struct target *targ = port->p_target; int error, i; /* Map configured LUNs and unmap others */ for (i = 0; i < MAX_LUNS; i++) { lm.port = port->p_ctl_port; lm.plun = i; if (targ->t_luns[i] == NULL) lm.lun = UINT32_MAX; else lm.lun = targ->t_luns[i]->l_ctl_lun; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); } return (0); } int kernel_port_remove(struct port *port) { struct ctl_port_entry entry; struct ctl_lun_map lm; struct ctl_req req; char tagstr[16]; struct target *targ = port->p_target; struct portal_group *pg = port->p_portal_group; int error; /* Disable port */ bzero(&entry, sizeof(entry)); entry.targ_port = port->p_ctl_port; error = ioctl(ctl_fd, CTL_DISABLE_PORT, &entry); if (error != 0) { log_warn("CTL_DISABLE_PORT ioctl failed"); return (-1); } /* Remove iSCSI port. */ if (port->p_portal_group) { bzero(&req, sizeof(req)); strlcpy(req.driver, "iscsi", sizeof(req.driver)); req.reqtype = CTL_REQ_REMOVE; req.num_args = 2; req.args = malloc(req.num_args * sizeof(*req.args)); if (req.args == NULL) log_err(1, "malloc"); str_arg(&req.args[0], "cfiscsi_target", targ->t_name); snprintf(tagstr, sizeof(tagstr), "%d", pg->pg_tag); str_arg(&req.args[1], "cfiscsi_portal_group_tag", tagstr); error = ioctl(ctl_fd, CTL_PORT_REQ, &req); free(req.args); if (error != 0) { log_warn("error issuing CTL_PORT_REQ ioctl"); return (1); } if (req.status == CTL_LUN_ERROR) { log_warnx("error returned from port removal request: %s", req.error_str); return (1); } if (req.status != CTL_LUN_OK) { log_warnx("unknown port removal request status %d", req.status); return (1); } } else { /* Disable LUN mapping. */ lm.port = port->p_ctl_port; lm.plun = UINT32_MAX; lm.lun = UINT32_MAX; error = ioctl(ctl_fd, CTL_LUN_MAP, &lm); if (error != 0) log_warn("CTL_LUN_MAP ioctl failed"); } return (0); } #ifdef ICL_KERNEL_PROXY void kernel_listen(struct addrinfo *ai, bool iser, int portal_id) { struct ctl_iscsi req; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_LISTEN; req.data.listen.iser = iser; req.data.listen.domain = ai->ai_family; req.data.listen.socktype = ai->ai_socktype; req.data.listen.protocol = ai->ai_protocol; req.data.listen.addr = ai->ai_addr; req.data.listen.addrlen = ai->ai_addrlen; req.data.listen.portal_id = portal_id; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) log_err(1, "error issuing CTL_ISCSI ioctl"); if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI listen: %s", req.error_str); } } void kernel_accept(int *connection_id, int *portal_id, struct sockaddr *client_sa, socklen_t *client_salen) { struct ctl_iscsi req; struct sockaddr_storage ss; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_ACCEPT; req.data.accept.initiator_addr = (struct sockaddr *)&ss; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) log_err(1, "error issuing CTL_ISCSI ioctl"); if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI accept: %s", req.error_str); } *connection_id = req.data.accept.connection_id; *portal_id = req.data.accept.portal_id; *client_salen = req.data.accept.initiator_addrlen; memcpy(client_sa, &ss, *client_salen); } void kernel_send(struct pdu *pdu) { struct ctl_iscsi req; bzero(&req, sizeof(req)); req.type = CTL_ISCSI_SEND; req.data.send.connection_id = pdu->pdu_connection->conn_socket; req.data.send.bhs = pdu->pdu_bhs; req.data.send.data_segment_len = pdu->pdu_data_len; req.data.send.data_segment = pdu->pdu_data; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI send: " "%s; dropping connection", req.error_str); } } void kernel_receive(struct pdu *pdu) { struct ctl_iscsi req; pdu->pdu_data = malloc(MAX_DATA_SEGMENT_LENGTH); if (pdu->pdu_data == NULL) log_err(1, "malloc"); bzero(&req, sizeof(req)); req.type = CTL_ISCSI_RECEIVE; req.data.receive.connection_id = pdu->pdu_connection->conn_socket; req.data.receive.bhs = pdu->pdu_bhs; req.data.receive.data_segment_len = MAX_DATA_SEGMENT_LENGTH; req.data.receive.data_segment = pdu->pdu_data; if (ioctl(ctl_fd, CTL_ISCSI, &req) == -1) { log_err(1, "error issuing CTL_ISCSI ioctl; " "dropping connection"); } if (req.status != CTL_ISCSI_OK) { log_errx(1, "error returned from CTL iSCSI receive: " "%s; dropping connection", req.error_str); } } #endif /* ICL_KERNEL_PROXY */ /* * XXX: I CANT INTO LATIN */ void kernel_capsicate(void) { int error; cap_rights_t rights; const unsigned long cmds[] = { CTL_ISCSI }; cap_rights_init(&rights, CAP_IOCTL); error = cap_rights_limit(ctl_fd, &rights); if (error != 0 && errno != ENOSYS) log_err(1, "cap_rights_limit"); error = cap_ioctls_limit(ctl_fd, cmds, sizeof(cmds) / sizeof(cmds[0])); if (error != 0 && errno != ENOSYS) log_err(1, "cap_ioctls_limit"); error = cap_enter(); if (error != 0 && errno != ENOSYS) log_err(1, "cap_enter"); if (cap_sandboxed()) log_debugx("Capsicum capability mode enabled"); else log_warnx("Capsicum capability mode not supported"); } Index: projects/clang-trunk/usr.sbin/sesutil/sesutil.c =================================================================== --- projects/clang-trunk/usr.sbin/sesutil/sesutil.c (revision 287501) +++ projects/clang-trunk/usr.sbin/sesutil/sesutil.c (revision 287502) @@ -1,225 +1,224 @@ /*- * Copyright (c) 2015 Baptiste Daroussin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int locate(int argc, char **argv); static struct command { const char *name; const char *desc; int (*exec)(int argc, char **argv); } cmds[] = { { "locate", "Change the state of the external LED associated with a" " disk", locate} , }; static const int nbcmds = nitems(cmds); static void do_locate(int fd, unsigned int idx, bool onoff) { encioc_elm_status_t o; o.elm_idx = idx; if (ioctl(fd, ENCIOC_GETELMSTAT, (caddr_t) &o) < 0) { close(fd); err(EXIT_FAILURE, "ENCIOC_GETELMSTAT"); } o.cstat[0] |= 0x80; if (onoff) o.cstat[2] |= 0x02; else o.cstat[2] &= 0xfd; if (ioctl(fd, ENCIOC_SETELMSTAT, (caddr_t) &o) < 0) { close(fd); err(EXIT_FAILURE, "ENCIOC_SETELMSTAT"); } } static bool disk_match(const char *devnames, const char *disk, size_t len) { - const char *devname; + const char *dname; - devname = devnames; - while ((devname = strstr(devname, disk)) != NULL) { - if (devname[len] == '\0' || devname[len] == ',') + dname = devnames; + while ((dname = strstr(dname, disk)) != NULL) { + if (dname[len] == '\0' || dname[len] == ',') return (true); - devname++; + dname++; } return (false); } static int locate(int argc, char **argv) { encioc_elm_devnames_t objdn; encioc_element_t *objp; glob_t g; char *disk; size_t len, i; int fd, nobj, j; bool all = false; bool onoff; if (argc != 2) { errx(EXIT_FAILURE, "usage: %s locate [disk] [on|off]", getprogname()); } disk = argv[0]; if (strcmp(argv[1], "on") == 0) { onoff = true; } else if (strcmp(argv[1], "off") == 0) { onoff = false; } else { errx(EXIT_FAILURE, "usage: %s locate [disk] [on|off]", getprogname()); } if (strcmp(disk, "all") == 0) { all = true; } len = strlen(disk); /* Get the list of ses devices */ if (glob("/dev/ses[0-9]*", 0, NULL, &g) == GLOB_NOMATCH) { globfree(&g); errx(EXIT_FAILURE, "No SES devices found"); } for (i = 0; i < g.gl_pathc; i++) { /* ensure we only got numbers after ses */ if (strspn(g.gl_pathv[i] + 8, "0123456789") != strlen(g.gl_pathv[i] + 8)) continue; if ((fd = open(g.gl_pathv[i], O_RDWR)) < 0) { if (errno == EACCES) err(EXIT_FAILURE, "enable to access SES device"); break; } if (ioctl(fd, ENCIOC_GETNELM, (caddr_t) &nobj) < 0) err(EXIT_FAILURE, "ENCIOC_GETNELM"); objp = calloc(nobj, sizeof(encioc_element_t)); if (objp == NULL) err(EXIT_FAILURE, "calloc()"); if (ioctl(fd, ENCIOC_GETELMMAP, (caddr_t) objp) < 0) err(EXIT_FAILURE, "ENCIOC_GETELMMAP"); for (j = 0; j < nobj; j++) { memset(&objdn, 0, sizeof(objdn)); objdn.elm_idx = objp[j].elm_idx; objdn.elm_names_size = 128; objdn.elm_devnames = calloc(128, sizeof(char)); if (objdn.elm_devnames == NULL) err(EXIT_FAILURE, "calloc()"); if (ioctl(fd, ENCIOC_GETELMDEVNAMES, (caddr_t) &objdn) <0) continue; if (objdn.elm_names_len > 0) { if (all) { do_locate(fd, objdn.elm_idx, onoff); continue; } if (disk_match(objdn.elm_devnames, disk, len)) { do_locate(fd, objdn.elm_idx, onoff); break; } } } close(fd); - i++; } globfree(&g); return (EXIT_SUCCESS); } static void usage(FILE *out) { int i; fprintf(out, "Usage: %s [command] [options]\n", getprogname()); fprintf(out, "Commands supported:\n"); for (i = 0; i < nbcmds; i++) fprintf(out, "\t%-15s%s\n", cmds[i].name, cmds[i].desc); } int main(int argc, char **argv) { int i; struct command *cmd = NULL; if (argc < 2) { warnx("Missing command"); usage(stderr); return (EXIT_FAILURE); } for (i = 0; i < nbcmds; i++) { if (strcmp(argv[1], cmds[i].name) == 0) { cmd = &cmds[i]; break; } } if (cmd == NULL) { warnx("unknown command %s", argv[1]); usage(stderr); return (EXIT_FAILURE); } argc-=2; argv+=2; return (cmd->exec(argc, argv)); } Index: projects/clang-trunk =================================================================== --- projects/clang-trunk (revision 287501) +++ projects/clang-trunk (revision 287502) Property changes on: projects/clang-trunk ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r287490-287501