Index: head/sys/conf/NOTES =================================================================== --- head/sys/conf/NOTES (revision 14092) +++ head/sys/conf/NOTES (revision 14093) @@ -1,854 +1,853 @@ # # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.237 1996/02/03 11:48:12 phk Exp $ +# $Id: LINT,v 1.238 1996/02/06 20:57:46 wollman Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from # this file as required. # # # This directive is mandatory; it defines the architecture to be # configured for; in this case, the 386 family. You must also specify # at least one CPU (the one you intend to run on); deleting the # specification for CPUs you don't need to use may make parts of the # system run faster # machine "i386" cpu "I386_CPU" cpu "I486_CPU" cpu "I586_CPU" # aka Pentium(tm) cpu "I686_CPU" # aka Pentium Pro(tm) # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a complicated formula defined in param.c. # maxusers 10 # # Under some circumstances it is necessary to make the default max # number of processes per user and open files per user more than the # defaults on bootup. (an example is a large news server in which # the uid, news, can sometimes need > 100 simultaneous processes running, # or perhaps a user using lots of windows under X). options CHILD_MAX=128 options OPEN_MAX=128 # # A math emulator is mandatory if you wish to run on hardware which # does not have a floating-point processor. Pick either the original, # bogus (but freely-distributable) math emulator, or a much more # fully-featured but GPL-licensed emulator taken from Linux. # options MATH_EMULATE #Support for x87 emulation # Don't enable both of these in a real config. options GPL_MATH_EMULATE #Support for x87 emulation via #new math emulator # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' # - The root filesystem might be on partition wd0a # - Crash dumps will be written to wd0b, if possible. Specifying the # dump device here is not recommended. Use dumpon(8). # config kernel root on wd0 dumps on wd0 ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. # options "COMPAT_43" # # Allow user-mode programs to manipulate their local descriptor tables. # This option is required for the WINE Windows(tm) emulator, and is # not used by anything else (that we know of). # options USER_LDT #allow user-level control of i386 ldt # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG ##################################################################### # DEBUGGING OPTIONS # # Enable the kernel debugger. # options DDB # # KTRACE enables the system-call tracing facility ktrace(2). # options KTRACE #kernel tracing # # The DIAGNOSTIC option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options DIAGNOSTIC # # Allow ordinary users to take the console - this is useful for X. options UCONSOLE ##################################################################### # NETWORKING OPTIONS # # Protocol families: # Only the INET (Internet) family is officially supported in FreeBSD. # Source code for the NS (Xerox Network Service), ISO (OSI), and # CCITT (X.25) families is provided for amusement value, although we # try to ensure that it actually compiles. # options INET #Internet communications protocols -options NS #Xerox NS communications protocols options IPX #IPX/SPX communications protocols options IPXIP #IPX in IP encapsulation (not available) options IPTUNNEL #IP in IPX encapsulation (not available) options IPXPRINTFS=0 #IPX/SPX Console Debugging Information options IPX_ERRPRINTFS=0 #IPX/SPX Console Debugging Information # These are currently broken and are no longer shipped due to lack # of interest. #options CCITT #X.25 network layer #options ISO #options TPIP #ISO TP class 4 over IP #options TPCONS #ISO TP class 0 over X.25 #options LLC #X.25 link layer for Ethernets #options HDLC #X.25 link layer for serial lines #options EON #ISO CLNP over IP +#options NS #Xerox NS protocols +#options NSIP #XNS over IP # # Network interfaces: # The `loop' pseudo-device is MANDATORY when networking is enabled. # The `ether' pseudo-device provides generic code to handle # Ethernets; it is MANDATORY when a Ethernet device driver is # configured. # The 'fddi' pseudo-device provides generic code to support FDDI. # The `sppp' pseudo-device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). # The `sl' pseudo-device implements the Serial Line IP (SLIP) service. # The `ppp' pseudo-device implements the Point-to-Point Protocol. # The `bpfilter' pseudo-device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. The number of devices determines the maximum number of # simultaneous BPF clients programs runnable. # The `disc' pseudo-device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing purposes. # The `tun' pseudo-device implements the User Process PPP (iijppp) # pseudo-device ether #Generic Ethernet pseudo-device fddi #Generic FDDI pseudo-device sppp #Generic Synchronous PPP pseudo-device loop #Network loopback device pseudo-device sl 2 #Serial Line IP pseudo-device ppp 2 #Point-to-point protocol pseudo-device bpfilter 4 #Berkeley packet filter pseudo-device disc #Discard device pseudo-device tun 1 #Tunnel driver(user process ppp) - -options NSIP #XNS over IP # # Internet family options: # # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in # 4.2BSD. This option should not be used unless you have a 4.2BSD # machine and TCP connections fail. # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted(8). # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE does # the obvious thing. # IPFIREWALL_ORDER_RULES makes the ipfw code sort the rules. You # don't want that, it's only there to be backward compatible. # IPACCT enables IP accounting. # # TCPDEBUG is undocumented. # options "TCP_COMPAT_42" #emulate 4.2BSD TCP bugs options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #print information about # dropped packets options IPFIREWALL_ORDER_RULES # bogusly sort rules. options IPACCT #ipaccounting options TCPDEBUG ##################################################################### # FILESYSTEM OPTIONS # # Only the root, /usr, and /tmp filesystems need be statically # compiled; everything else will be automatically loaded at mount # time. (Exception: the UFS family---FFS, MFS, and LFS---cannot # currently be demand-loaded.) Some people still prefer to statically # compile other filesystems as well. # # NB: The LFS, PORTAL, and UNION filesystems are known to be buggy, # and WILL panic your system if you attempt to do anything with them. # They are included here as an incentive for some enterprising soul to # sit down and fix them. # # Note: 4.4BSD NQNFS lease checking has relatively high cost for # _local_ I/O as well as remote I/O. Don't use it unless you will # using NQNFS. # # One of these is mandatory: options FFS #Fast filesystem options NFS #Network File System # The rest are optional: options NQNFS #Enable NQNFS lease checking # options NFS_NOSERVER #Disable the NFS-server code. options "CD9660" #ISO 9660 filesystem options FDESC #File descriptor filesystem options KERNFS #Kernel filesystem options LFS #Log filesystem options MFS #Memory File System options MSDOSFS #MS DOS File System options NULLFS #NULL filesystem options PORTAL #Portal filesystem options PROCFS #Process filesystem options UMAPFS #UID map filesystem options UNION #Union filesystem # THis DEVFS is experimental but seems to work options DEVFS #devices filesystem # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing. options MFS_AUTOLOAD # Allow this many swap-devices. options NSWAPDEV=20 # Disk quotas are supported when this option is enabled. If you # change the value of this option, you must do a `make clean' in your # kernel compile directory in order to get a working kernel. # options QUOTA #enable disk quotas ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so # that a given bus, target, and LUN always come on line as the same # device unit. In earlier versions the unit numbers were assigned # in the order that the devices were probed on the SCSI bus. This # means that if you removed a disk drive, you may have had to rewrite # your /etc/fstab file, and also that you had to be careful when adding # a new disk as it may have been probed earlier and moved your device # configuration around. # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "sd3" then the first # non-wired disk will be assigned sd4. # The syntax for wiring down devices is: # controller scbus0 at ahc0 # Single bus device # controller scbus1 at ahc1 bus 0 # Single bus device # controller scbus3 at ahc2 bus 0 # Twin bus device # controller scbus2 at ahc2 bus 1 # Twin bus device # disk sd0 at scbus0 target 0 unit 0 # disk sd1 at scbus3 target 1 # disk sd2 at scbus2 target 3 # tape st1 at scbus1 target 6 # device cd0 at scbus? # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI # configuration and doesn't have to be explicitly configured. controller scbus0 #base SCSI code device ch0 #SCSI media changers device sd0 #SCSI disks device st0 #SCSI tapes device cd0 #SCSI CD-ROMs device od0 #SCSI optical disk # The previous devices (ch, sd, st, cd) are recognized by config. # config doesn't (and shouldn't) know about these newer ones, # so we have to specify that they are on a SCSI bus with the "at scbus?" # clause. device worm0 at scbus? # SCSI worm device pt0 at scbus? # SCSI processor type device sctarg0 at scbus? # SCSI target # SCSI OPTIONS: # SCSIDEBUG: When defined enables debugging macros # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k) # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead # of only when booting verbosely. options SCSIDEBUG #options NO_SCSI_SENSE options SCSI_REPORT_GEOMETRY ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS # # Of these, only the `log' device is truly mandatory. The `pty' # device usually turns out to be ``effectively mandatory'', as it is # required for `telnetd', `rlogind', `screen', `emacs', and `xterm', # among others. The `isdn', `ii', `ity', `itel', and `ispy' devices # are all required when ISDN support is used. If you wish to run certain # system utilities which are compressed by default (like /stand/sysinstall) # then `gzip' becomes mandatory too. # pseudo-device pty 16 #Pseudo ttys - can go as high as 64 pseudo-device speaker #Play IBM BASIC-style noises out your speaker pseudo-device log #Kernel syslog interface (/dev/klog) pseudo-device gzip #Exec gzipped a.out's pseudo-device vn #Vnode driver (turns a file into a device) pseudo-device snp 3 #Snoop device - to look at pty/vty/etc.. # These are non-optional for ISDN pseudo-device isdn pseudo-device ii 4 pseudo-device ity 4 pseudo-device itel 2 pseudo-device ispy 1 # These are only for watching for bitrot in old tty code. # broken #pseudo-device tb # These are only for watching for bitrot in old SCSI code. pseudo-device su #scsi user pseudo-device ssc #super scsi ##################################################################### # HARDWARE DEVICE CONFIGURATION # ISA and EISA devices: # Currently there is no separate support for EISA. There should be. # Micro Channel is not supported at all. # # Mandatory ISA devices: isa, sc or vt, npx # controller isa0 # # Options for `isa': # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # BOUNCE_BUFFERS provides support for ISA DMA on machines with more # than 16 megabytes of memory. It doesn't hurt on other machines. # Some broken EISA and VLB hardware may need this, too. # # DUMMY_NOPS disables extra delays for some bus operations. The delays # are mostly for older systems and aren't used consistently. Probably # works OK on most EISA bus machines. # # TUNE_1542 enables the automatic ISA bus speed selection for the # Adaptec 1542 boards. Does not work for all boards, use it with caution. # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. # #options "AUTO_EOI_2" options BOUNCE_BUFFERS #options DUMMY_NOPS #options "TUNE_1542" #options BROKEN_KEYBOARD_RESET # Enable this and PCVT_FREEBSD for pcvt vt220 compatible console driver device vt0 at isa? port "IO_KBD" tty irq 1 vector pcrint options PCVT_FREEBSD=210 # pcvt running on FreeBSD >= 2.0.5 options XSERVER # include code for XFree86 options FAT_CURSOR # start with block cursor # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops options PCVT_SCANSET=2 # IBM keyboards are non-std # The syscons console driver (sco color console compatible) - default. device sc0 at isa? port "IO_KBD" tty irq 1 vector scintr # # Options for `sc': # # HARDFONTS allows the driver to load an ISO-8859-1 font to replace # the default font in your display adapter's memory. # options HARDFONTS # # MAXCONS is maximum number of virtual consoles, no more than 16 # default value: 12 # options MAXCONS=16 # # This device is mandatory. # # The Numeric Processing eXtension is used to either enable the # coprocessor or enable math emulation. If your machine doesn't contain # a math co-processor, you must *also* add the option "MATH_EMULATE". # THIS IS NOT AN OPTIONAL ENTRY, DO NOT REMOVE IT # device npx0 at isa? port "IO_NPX" irq 13 vector npxintr # # Optional ISA and EISA devices: # # # SCSI host adapters: `aha', `aic', `bt', `nca' # # aha: Adaptec 154x # ahc: Adaptec 274x/284x/294x # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!) # bt: Most Buslogic controllers # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130 # uha: UltraStore 14F and 34F # sea: Seagate ST01/02 8 bit controller (slow!) # wds: Western Digital WD7000 controller (no scatter/gather!). # # Note that the order is important in order for Buslogic cards to be # probed correctly. # controller bt0 at isa? port "IO_BT0" bio irq ? vector bt_isa_intr controller aha0 at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr controller uha0 at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr controller aic0 at isa? port 0x340 bio irq 11 vector aicintr controller nca0 at isa? port 0x1f88 bio irq 10 vector ncaintr controller nca1 at isa? port 0x1f84 controller nca2 at isa? port 0x1f8c controller nca3 at isa? port 0x1e88 controller nca4 at isa? port 0x350 bio irq 5 vector ncaintr controller sea0 at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr controller wds0 at isa? port 0x350 bio irq 15 drq 6 vector wdsintr # # ST-506, ESDI, and IDE hard disks: `wdc' and `wd' # # NB: ``Enhanced IDE'' is NOT supported at this time. # # The flags fields are used to enable the multi-sector I/O and # the 32BIT I/O modes. The flags may be used in either the controller # definition or in the individual disk definitions. The controller # definition is supported for the boot configuration stuff. # # Each drive has a 16 bit flags value defined: # The low 8 bits are the maximum value for the multi-sector I/O, # where 0xff defaults to the maximum that the drive can handle. # The high bit of the 16 bit flags (0x8000) allows probing for # 32 bit transfers. # # The flags field for the drives can be specified in the controller # specification with the low 16 bits for drive 0, and the high 16 bits # for drive 1. # e.g.: #controller wdc0 at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr # # specifies that drive 0 will be allowed to probe for 32 bit transfers and # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be # allowed to probe for 32 bit transfers, but will allow multi-sector # transfers up to the maximum that the drive supports. # # controller wdc0 at isa? port "IO_WD1" bio irq 14 vector wdintr disk wd0 at wdc0 drive 0 disk wd1 at wdc0 drive 1 controller wdc1 at isa? port "IO_WD2" bio irq 15 vector wdintr disk wd2 at wdc1 drive 0 disk wd3 at wdc1 drive 1 # # Options for `wdc': # # ATAPI enables the support for ATAPI-compatible IDE devices # options ATAPI #Enable ATAPI support for IDE bus # IDE CD-ROM driver - requires wdc controller and ATAPI option device wcd0 # # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft' # controller fdc0 at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr # # Activate this line instead of the fdc0 line above if you happen to # have an Insight floppy tape. Probing them proved to be dangerous # for people with floppy disks only, so it's "hidden" behind a flag: #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr disk fd0 at fdc0 drive 0 disk fd1 at fdc0 drive 1 tape ft0 at fdc0 drive 2 # # Options for `fd': # # FDSEEKWAIT selects a non-default head-settle time (i.e., the time to # wait after a seek is performed). The default value (1/32 s) is # usually sufficient. The units are inverse seconds, so a value of 16 # here means to wait 1/16th of a second; you should choose a power of # two. # XXX: this seems to be missing! options FDSEEKWAIT=16 # # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc. # # lpt: printer port # lpt specials: # port can be specified as ?, this will cause the driver to scan # the BIOS port list; # the irq and vector clauses may be omitted, this # will force the port into polling mode. # mse: Logitech and ATI InPort bus mouse ports # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd] # sio: serial ports (see sio(4)) device lpt0 at isa? port? tty irq 7 vector lptintr device lpt1 at isa? port "IO_LPT3" tty irq 5 vector lptintr device mse0 at isa? port 0x23c tty irq 5 vector mseintr device psm0 at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr # Options for psm: options PSM_NO_RESET #don't reset mouse hardware (some laptops) device sio0 at isa? port "IO_COM1" tty irq 4 vector siointr # Options for sio: options COMCONSOLE #prefer serial console to video console options COM_ESP #code for Hayes ESP options COM_MULTIPORT #code for some cards with shared IRQs options DSI_SOFT_MODEM #code for DSI Softmodems options BREAK_TO_DEBUGGER #a BREAK on a comconsole goes to #DDB, if available. # # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc' # # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp) # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # el: 3Com 3C501 (slow!) # ep: 3Com 3C509 (buggy) # fe: Fujitsu MB86960A/MB86965A Ethernet # fea: DEC DEFEA EISA FDDI adapter # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210 # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100, # DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422) # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL) # ze: IBM/National Semiconductor PCMCIA ethernet controller. # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for # send/receive operation, but it needs 'iomem' to read/write the # attribute memory) # device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr device eg0 at isa? port 0x310 net irq 5 vector egintr device el0 at isa? port 0x300 net irq 9 vector elintr device ep0 at isa? port 0x300 net irq 10 vector epintr device fe0 at isa? port 0x240 net irq ? vector feintr device fea0 at isa? net irq ? vector feaintr device ie0 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr device ix0 at isa? port 0x300 net irq 10 iomem 0xd0000 iosiz 32768 vector ixintr device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr # # ISDN drivers - `isdn'. # # Uncomment one (and only one) of the following two drivers for the appropriate # ISDN device you have. For more information on what's considered appropriate # for your given set of circumstances, please read # /usr/src/gnu/usr.sbin/isdn/docs/INSTALL. It's a bit sparse at present, but # it's the best we have right now. The snic driver is also disabled at present, # waiting for someone to upgrade the driver to 2.0 (it's in /sys/gnu/scsi/). # device nic0 at isa? port "IO_COM3" iomem 0xe0000 tty irq 9 vector nicintr device nnic0 at isa? port 0x150 iomem 0xe0000 tty irq 12 vector nnicintr # # Audio drivers: `snd', `sb', `pas', `gus', `pca' # # snd: Voxware sound support code # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum # sbxvi: SoundBlaster 16 # sbmidi: SoundBlaster 16 MIDI interface # pas: ProAudioSpectrum PCM and MIDI # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX # gusxvi: Gravis Ultrasound 16-bit PCM (do not use) # mss: Microsoft Sound System # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum # uart: stand-alone 6850 UART for MIDI # mpu: Roland MPU-401 stand-alone card # # Beware! The addresses specified below are also hard-coded in # i386/isa/sound/sound_config.h. If you change the values here, you # must also change the values in the include file. # # pca: PCM audio through your PC speaker # # If you don't have a lpt0 device at IRQ 7, you can remove the # ``conflicts'' specification in the appropriate device entries below. # # If you have a GUS-MAX card and want to use the CS4231 codec on the # card the drqs for the gus max must be 8 bit (1, 2, or 3). # # If you would like to use the full duplex option on the gus, then define # flags to be the ``read dma channel''. # # options BROKEN_BUS_CLOCK #PAS-16 isn't working and OPTI chipset # options SYMPHONY_PAS #PAS-16 isn't working and SYMPHONY chipset # options EXCLUDE_SBPRO #PAS-16 # options SBC_IRQ=5 #PAS-16. Must match irq on sb0 line. # PAS16: The order of the pas0/sb0/opl0 is important since the # sb emulation is enabled in the pas-16 attach. # # The i386/isa/sound/sound.doc has more information. # Controls all sound devices controller snd0 device pas0 at isa? port 0x388 irq 10 drq 6 vector pasintr device sb0 at isa? port 0x220 irq 7 conflicts drq 1 vector sbintr device sbxvi0 at isa? drq 5 device sbmidi0 at isa? port 0x330 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr device opl0 at isa? port 0x388 conflicts device mpu0 at isa? port 0x330 irq 6 drq 0 device uart0 at isa? port 0x330 irq 5 vector "m6850intr" # More undocumented sound devices with bogus configurations for linting. # broken #device sscape0 at isa? port 0x330 irq 6 drq 0 #device trix0 at isa? port 0x330 irq 6 drq 0 vector sscapeintr # Not controlled by `snd' device pca0 at isa? port IO_TIMER1 tty # # Miscellaneous hardware: # # mcd: Mitsumi CD-ROM # scd: Sony CD-ROM # matcd: Matsushita/Panasonic CD-ROM # wt: Wangtek and Archive QIC-02/QIC-36 tape drives # ctx: Cortex-I frame grabber # apm: Laptop Advanced Power Management (experimental) # spigot: The Creative Labs Video Spigot video-acquisition board # meteor: Matrox Meteor video capture board # cy: Cyclades serial driver # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!) # gp: National Instruments AT-GPIB and AT-GPIB/TNT board # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey # gsc: Genius GS-4500 hand scanner. # joy: joystick # labpc: National Instrument's Lab-PC and Lab-PC+ # rc: RISCom/8 multiport card # tw: TW-523 power line interface for use with X-10 home control products # si: Specialix SI/XIO 4-32 port terminal multiplexor # # Notes on the spigot: # The video spigot is at 0xad6. This port address can not be changed. # The irq values may only be 10, 11, or 15 # I/O memory is an 8kb region. Possible values are: # 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff # The start address must be on an even boundary. # Add the following option if you want to allow non-root users to be able # to access the spigot. This option is not secure because it allows users # direct access to the I/O page. # options SPIGOT_UNSECURE # # Notes on the Digiboard driver: # # The following flag values have special meanings: # 0x01 - alternate layout of pins # 0x02 - use the windowed PC/Xe in 64K mode # Notes on the Specialix SI/XIO driver: # **This is NOT a Specialix supported Driver!** # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. device mcd0 at isa? port 0x300 bio irq 10 vector mcdintr # for the Sony CDU31/33A CDROM device scd0 at isa? port 0x230 bio # for the SoundBlaster 16 multicd - up to 4 devices controller matcd0 at isa? port 0x230 bio device wt0 at isa? port 0x300 bio irq 5 drq 1 vector wtintr device ctx0 at isa? port 0x230 iomem 0xd0000 device spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr device qcam0 at isa? port "IO_LPT3" tty device apm0 at isa? device gp0 at isa? port 0x2c0 tty device gsc0 at isa? port "IO_GSC1" tty drq 3 device joy0 at isa? port "IO_GAME" device cy0 at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr device dgb0 at isa? port 0x220 iomem 0xfc0000 iosiz ? tty device labpc0 at isa? port 0x260 tty irq 5 vector labpcintr device rc0 at isa? port 0x220 tty irq 12 vector rcintr # the port and irq for tw0 are fictitious device tw0 at isa? port 0x380 tty irq 11 vector twintr device si0 at isa? iomem 0xd0000 tty irq 12 vector siintr device asc0 at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr device bqu0 at isa? port 0x150 # # EISA devices: # # The EISA bus device is eisa0. It provides auto-detection and # configuration support for all devices on the EISA bus. # # The `ahb' device provides support for the Adaptec 174X adapter. # # The `ahc' device provides support for the Adaptec 274X and 284X # adapters. The 284X, although a VLB card responds to EISA probes. # controller eisa0 controller ahb0 controller ahc0 # # PCI devices: # # The main PCI bus device is `pci'. It provides auto-detection and # configuration support for all devices on the PCI bus, using either # configuration mode defined in the PCI specification. # # The `ahc' device provides support for the Adaptec 29/3940(U)(W) # and motherboard based AIC7870/AIC7880 adapters. # # The `ncr' device provides support for the NCR 53C810 and 53C825 # self-contained SCSI host adapters. # # The `de' device provides support for the Digital Equipment DC21040 # self-contained Ethernet adapter. # # The `vx' device provides support for the 3Com 3C590 and 3C595 # early support # # The `fpa' device provides support for the Digital DEFPA PCI FDDI # adapter. pseudo-device fddi is also needed. # # The PROBE_VERBOSE option enables a long listing of chip set registers # for supported PCI chip sets (currently only intel Saturn and Mercury). # # The `meteor' device is a PCI video capture board. It can also have the # following options: # options METEOR_ALLOC_PAGES=xxx preallocate kernel pages for data entry # figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE # options METEOR_DEALLOC_PAGES remove all allocated pages on close(2) # options METEOR_DEALLOC_ABOVE=xxx remove all allocated pages above the # specified amount. If this value is below the allocated amount no action # taken # controller pci0 controller ahc1 device ncr0 device de0 device fxp0 device vx0 device fpa0 device meteor0 options PROBE_VERBOSE # # PCCARD/PCMCIA # controller crd0 controller pcic0 at crd? # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # options PSM_NO_RESET for the `psm' driver # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # More undocumented options for linting. options APM_SLOWSTART=1 options COMPAT_LINUX options DEBUG options "EXT2FS" options "IBCS2" options LINUX options "SCSI_2_DEF" options SHOW_BUSYBUFS # List buffers that prevent root unmount Index: head/sys/conf/files =================================================================== --- head/sys/conf/files (revision 14092) +++ head/sys/conf/files (revision 14093) @@ -1,386 +1,386 @@ ddb/db_access.c optional ddb ddb/db_aout.c optional ddb ddb/db_break.c optional ddb ddb/db_command.c optional ddb ddb/db_examine.c optional ddb ddb/db_expr.c optional ddb ddb/db_input.c optional ddb ddb/db_lex.c optional ddb ddb/db_output.c optional ddb ddb/db_print.c optional ddb ddb/db_ps.c optional ddb ddb/db_run.c optional ddb ddb/db_sym.c optional ddb ddb/db_trap.c optional ddb ddb/db_variables.c optional ddb ddb/db_watch.c optional ddb ddb/db_write_cmd.c optional ddb dev/vn/vn.c optional vn gnu/isdn/iitel.c optional itel device-driver gnu/isdn/iitty.c optional ity device-driver gnu/isdn/if_ii.c optional ii device-driver gnu/isdn/isdn.c optional isdn device-driver gnu/isdn/iispy.c optional ispy device-driver isofs/cd9660/cd9660_bmap.c optional cd9660 isofs/cd9660/cd9660_lookup.c optional cd9660 isofs/cd9660/cd9660_node.c optional cd9660 isofs/cd9660/cd9660_rrip.c optional cd9660 isofs/cd9660/cd9660_util.c optional cd9660 isofs/cd9660/cd9660_vfsops.c optional cd9660 isofs/cd9660/cd9660_vnops.c optional cd9660 kdb/kdb_access.c optional kadb kdb/kdb_command.c optional kadb kdb/kdb_ctype.c optional kadb kdb/kdb_expr.c optional kadb kdb/kdb_format.c optional kadb kdb/kdb_input.c optional kadb kdb/kdb_message.c optional kadb kdb/kdb_output.c optional kadb kdb/kdb_pcs.c optional kadb kdb/kdb_print.c optional kadb kdb/kdb_runpcs.c optional kadb kdb/kdb_sym.c optional kadb kdb/kdb_trap.c optional kadb kern/imgact_aout.c standard kern/imgact_gzip.c optional gzip kern/imgact_shell.c standard kern/inflate.c optional gzip kern/init_main.c standard kern/init_sysent.c standard kern/kern_acct.c standard kern/kern_clock.c standard kern/kern_conf.c standard kern/kern_descrip.c standard kern/kern_devconf.c standard kern/kern_exec.c standard kern/kern_exit.c standard kern/kern_fork.c standard kern/kern_ktrace.c standard kern/kern_lkm.c standard kern/kern_lockf.c standard kern/kern_malloc.c standard kern/kern_ntptime.c standard kern/kern_physio.c standard kern/kern_proc.c standard kern/kern_prot.c standard kern/kern_resource.c standard kern/kern_sig.c standard kern/kern_subr.c standard kern/kern_synch.c standard kern/kern_sysctl.c standard kern/kern_time.c standard kern/kern_xxx.c standard kern/subr_diskslice.c standard kern/subr_dkbad.c standard kern/subr_log.c standard kern/subr_prf.c standard kern/subr_prof.c standard kern/subr_rlist.c standard kern/subr_xxx.c standard kern/sys_generic.c standard kern/sys_process.c standard kern/sys_socket.c standard kern/sys_pipe.c standard kern/sysv_ipc.c standard kern/sysv_msg.c optional sysvmsg kern/sysv_sem.c optional sysvsem kern/sysv_shm.c optional sysvshm kern/tty.c standard kern/tty_compat.c standard kern/tty_conf.c standard kern/tty_pty.c optional pty kern/tty_snoop.c optional snp kern/tty_subr.c standard kern/tty_tb.c optional tb kern/tty_tty.c standard kern/uipc_domain.c standard kern/uipc_mbuf.c standard kern/uipc_proto.c standard kern/uipc_socket.c standard kern/uipc_socket2.c standard kern/uipc_syscalls.c standard kern/uipc_usrreq.c standard kern/vfs_bio.c standard kern/vfs_cache.c standard kern/vfs_cluster.c standard kern/vfs_conf.c standard kern/vfs_init.c standard kern/vfs_lookup.c standard kern/vfs_subr.c standard kern/vfs_syscalls.c standard kern/vfs_vnops.c standard miscfs/deadfs/dead_vnops.c standard miscfs/fdesc/fdesc_vfsops.c optional fdesc miscfs/fdesc/fdesc_vnops.c optional fdesc miscfs/fifofs/fifo_vnops.c standard miscfs/kernfs/kernfs_vfsops.c optional kernfs miscfs/kernfs/kernfs_vnops.c optional kernfs miscfs/nullfs/null_subr.c optional nullfs miscfs/nullfs/null_vfsops.c optional nullfs miscfs/nullfs/null_vnops.c optional nullfs miscfs/portal/portal_vfsops.c optional portal miscfs/portal/portal_vnops.c optional portal miscfs/procfs/procfs_ctl.c optional procfs miscfs/procfs/procfs_fpregs.c standard miscfs/procfs/procfs_mem.c standard miscfs/procfs/procfs_note.c optional procfs miscfs/procfs/procfs_regs.c standard miscfs/procfs/procfs_status.c optional procfs miscfs/procfs/procfs_subr.c optional procfs miscfs/procfs/procfs_vfsops.c optional procfs miscfs/procfs/procfs_vnops.c optional procfs miscfs/specfs/spec_vnops.c standard miscfs/umapfs/umap_subr.c optional umapfs miscfs/umapfs/umap_vfsops.c optional umapfs miscfs/umapfs/umap_vnops.c optional umapfs miscfs/union/union_subr.c optional union miscfs/union/union_vfsops.c optional union miscfs/union/union_vnops.c optional union miscfs/devfs/devfs_vnops.c optional devfs miscfs/devfs/devfs_vfsops.c optional devfs miscfs/devfs/devfs_tree.c optional devfs msdosfs/msdosfs_conv.c optional msdosfs msdosfs/msdosfs_denode.c optional msdosfs msdosfs/msdosfs_fat.c optional msdosfs msdosfs/msdosfs_lookup.c optional msdosfs msdosfs/msdosfs_vfsops.c optional msdosfs msdosfs/msdosfs_vnops.c optional msdosfs net/bpf.c optional bpfilter net/bpf_filter.c optional bpfilter net/bsd_comp.c optional ppp net/if.c standard net/if_disc.c optional disc net/if_ethersubr.c optional ether net/if_fddisubr.c optional fddi net/if_loop.c optional loop net/if_ppp.c optional ppp net/if_sl.c optional sl net/if_spppsubr.c optional sppp net/ppp_tty.c optional ppp net/radix.c standard net/raw_cb.c standard net/raw_usrreq.c standard net/route.c standard net/rtsock.c standard net/slcompress.c optional ppp net/slcompress.c optional sl net/if_tun.c optional tun #netccitt/ccitt_proto.c optional ccitt #netccitt/hd_debug.c optional hdlc #netccitt/hd_input.c optional hdlc #netccitt/hd_output.c optional hdlc #netccitt/hd_subr.c optional hdlc #netccitt/hd_timer.c optional hdlc #netccitt/if_x25subr.c optional ccitt #netccitt/llc_input.c optional llc #netccitt/llc_output.c optional llc #netccitt/llc_subr.c optional llc #netccitt/llc_timer.c optional llc #netccitt/pk_acct.c optional ccitt #netccitt/pk_debug.c optional ccitt #netccitt/pk_input.c optional ccitt #netccitt/pk_llcsubr.c optional hdlc #netccitt/pk_llcsubr.c optional llc #netccitt/pk_output.c optional ccitt #netccitt/pk_subr.c optional ccitt #netccitt/pk_timer.c optional ccitt #netccitt/pk_usrreq.c optional ccitt #netimp/if_imp.c optional imp #netimp/if_imphost.c optional imp #netimp/raw_imp.c optional imp netinet/if_ether.c optional ether netinet/igmp.c optional inet netinet/in.c optional inet netinet/in_pcb.c optional inet netinet/in_proto.c optional inet netinet/in_rmx.c optional inet netinet/ip_icmp.c optional inet netinet/ip_input.c optional inet netinet/ip_mroute.c optional inet netinet/ip_output.c optional inet netinet/raw_ip.c optional inet netinet/ip_fwdef.c optional inet netinet/ip_fw.c optional ipfirewall netinet/ip_fw.c optional ipacct netinet/tcp_debug.c optional tcpdebug netinet/tcp_input.c optional inet netinet/tcp_output.c optional inet netinet/tcp_subr.c optional inet netinet/tcp_timer.c optional inet netinet/tcp_usrreq.c optional inet netinet/udp_usrreq.c optional inet netipx/ipx_usrreq.c optional ipx netipx/ipx.c optional ipx netipx/ipx_cksum.c optional ipx netipx/ipx_error.c optional ipx netipx/ipx_input.c optional ipx netipx/ipx_ip.c optional ipx netipx/ipx_outputfl.c optional ipx netipx/ipx_pcb.c optional ipx netipx/ipx_proto.c optional ipx netipx/ipx_tun.c optional ipx netipx/spx_debug.c optional ipx netipx/spx_usrreq.c optional ipx #netiso/clnp_debug.c optional iso #netiso/clnp_er.c optional iso #netiso/clnp_frag.c optional iso #netiso/clnp_input.c optional iso #netiso/clnp_options.c optional iso #netiso/clnp_output.c optional iso #netiso/clnp_raw.c optional iso #netiso/clnp_subr.c optional iso #netiso/clnp_timer.c optional iso #netiso/cltp_usrreq.c optional iso #netiso/esis.c optional iso #netiso/idrp_usrreq.c optional iso #netiso/if_eon.c optional eon #netiso/iso.c optional iso #netiso/iso_chksum.c optional iso #netiso/iso_pcb.c optional iso #netiso/iso_proto.c optional iso #netiso/iso_snpac.c optional iso #netiso/tp_astring.c optional iso #netiso/tp_astring.c optional tpip #netiso/tp_cons.c optional iso #netiso/tp_driver.c optional iso #netiso/tp_driver.c optional tpip #netiso/tp_emit.c optional iso #netiso/tp_emit.c optional tpip #netiso/tp_inet.c optional iso #netiso/tp_inet.c optional tpip #netiso/tp_input.c optional iso #netiso/tp_input.c optional tpip #netiso/tp_iso.c optional iso #netiso/tp_meas.c optional iso #netiso/tp_meas.c optional tpip #netiso/tp_output.c optional iso #netiso/tp_output.c optional tpip #netiso/tp_pcb.c optional iso #netiso/tp_pcb.c optional tpip #netiso/tp_subr.c optional iso #netiso/tp_subr.c optional tpip #netiso/tp_subr2.c optional iso #netiso/tp_subr2.c optional tpip #netiso/tp_timer.c optional iso #netiso/tp_timer.c optional tpip #netiso/tp_trace.c optional iso #netiso/tp_trace.c optional tpip #netiso/tp_usrreq.c optional iso #netiso/tp_usrreq.c optional tpip #netiso/tuba_subr.c optional iso tuba #netiso/tuba_table.c optional iso tuba #netiso/tuba_usrreq.c optional iso tuba -netns/idp_usrreq.c optional ns -netns/ns.c optional ns -netns/ns_error.c optional ns -netns/ns_input.c optional ns -netns/ns_ip.c optional ns -netns/ns_output.c optional ns -netns/ns_pcb.c optional ns -netns/ns_proto.c optional ns -netns/spp_debug.c optional ns -netns/spp_usrreq.c optional ns +#netns/idp_usrreq.c optional ns +#netns/ns.c optional ns +#netns/ns_error.c optional ns +#netns/ns_input.c optional ns +#netns/ns_ip.c optional ns +#netns/ns_output.c optional ns +#netns/ns_pcb.c optional ns +#netns/ns_proto.c optional ns +#netns/spp_debug.c optional ns +#netns/spp_usrreq.c optional ns nfs/nfs_bio.c optional nfs nfs/nfs_node.c optional nfs nfs/nfs_nqlease.c optional nfs nfs/nfs_serv.c optional nfs nfs/nfs_socket.c optional nfs nfs/nfs_srvcache.c optional nfs nfs/nfs_subs.c optional nfs nfs/nfs_syscalls.c optional nfs nfs/nfs_vfsops.c optional nfs nfs/nfs_vnops.c optional nfs pccard/pccard.c optional crd pccard/pcic.c optional pcic device-driver pci/aic7870.c optional ahc device-driver pci/bt9xx.c optional bt device-driver pci/if_de.c optional de device-driver pci/if_fxp.c optional fxp device-driver pci/if_vx.c optional vx device-driver pci/ncr.c optional ncr device-driver pci/pci.c optional pci device-driver pci/pcisupport.c optional pci pci/if_pdq.c optional fea device-driver pci/pdq.c optional fea device-driver pci/if_pdq.c optional fpa device-driver pci/pdq.c optional fpa device-driver pci/meteor.c optional meteor device-driver scsi/cd.c optional cd scsi/ch.c optional ch scsi/od.c optional od scsi/scsi_base.c optional scbus scsi/scsi_sense.c optional scbus scsi/scsi_ioctl.c optional scbus scsi/scsiconf.c optional scbus scsi/scsi_driver.c optional scbus scsi/uk.c optional scbus scsi/pt.c optional pt scsi/sd.c optional sd scsi/st.c optional st scsi/worm.c optional worm scsi/su.c optional su scsi/ssc.c optional ssc scsi/sctarg.c optional sctarg ufs/ffs/ffs_alloc.c optional ffs ufs/ffs/ffs_alloc.c optional mfs ufs/ffs/ffs_balloc.c optional ffs ufs/ffs/ffs_balloc.c optional mfs ufs/ffs/ffs_inode.c optional ffs ufs/ffs/ffs_inode.c optional mfs ufs/ffs/ffs_subr.c optional ffs ufs/ffs/ffs_subr.c optional mfs ufs/ffs/ffs_tables.c optional ffs ufs/ffs/ffs_tables.c optional mfs ufs/ffs/ffs_vfsops.c optional ffs ufs/ffs/ffs_vfsops.c optional mfs ufs/ffs/ffs_vnops.c optional ffs ufs/ffs/ffs_vnops.c optional mfs ufs/lfs/lfs_alloc.c optional lfs ufs/lfs/lfs_balloc.c optional lfs ufs/lfs/lfs_bio.c optional lfs ufs/lfs/lfs_cksum.c optional lfs ufs/lfs/lfs_debug.c optional lfs ufs/lfs/lfs_inode.c optional lfs ufs/lfs/lfs_segment.c optional lfs ufs/lfs/lfs_subr.c optional lfs ufs/lfs/lfs_syscalls.c optional lfs ufs/lfs/lfs_vfsops.c optional lfs ufs/lfs/lfs_vnops.c optional lfs ufs/mfs/mfs_vfsops.c optional mfs ufs/mfs/mfs_vnops.c optional mfs gnu/ext2fs/ext2_balloc.c optional ext2fs gnu/ext2fs/ext2_inode.c optional ext2fs gnu/ext2fs/ext2_subr.c optional ext2fs gnu/ext2fs/ext2_vfsops.c optional ext2fs gnu/ext2fs/ext2_vnops.c optional ext2fs gnu/ext2fs/ext2_inode_cnv.c optional ext2fs gnu/ext2fs/ext2_lookup.c optional ext2fs gnu/ext2fs/ext2_linux_ialloc.c optional ext2fs gnu/ext2fs/ext2_linux_balloc.c optional ext2fs gnu/ext2fs/ext2_alloc.c optional ext2fs ufs/ufs/ufs_bmap.c standard ufs/ufs/ufs_disksubr.c standard ufs/ufs/ufs_ihash.c standard ufs/ufs/ufs_inode.c standard ufs/ufs/ufs_lookup.c standard ufs/ufs/ufs_quota.c standard ufs/ufs/ufs_vfsops.c standard ufs/ufs/ufs_vnops.c standard vm/default_pager.c standard vm/device_pager.c standard vm/kern_lock.c standard vm/swap_pager.c standard vm/vm_fault.c standard vm/vm_glue.c standard vm/vm_init.c standard vm/vm_kern.c standard vm/vm_map.c standard vm/vm_meter.c standard vm/vm_mmap.c standard vm/vm_object.c standard vm/vm_page.c standard vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_swap.c standard vm/vm_unix.c standard vm/vnode_pager.c standard Index: head/sys/fs/fifofs/fifo_vnops.c =================================================================== --- head/sys/fs/fifofs/fifo_vnops.c (revision 14092) +++ head/sys/fs/fifofs/fifo_vnops.c (revision 14093) @@ -1,545 +1,545 @@ /* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fifo_vnops.c 8.2 (Berkeley) 1/4/94 - * $Id: fifo_vnops.c,v 1.14 1995/12/11 10:26:34 phk Exp $ + * $Id: fifo_vnops.c,v 1.15 1995/12/14 09:53:03 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This structure is associated with the FIFO vnode and stores * the state associated with the FIFO. */ struct fifoinfo { struct socket *fi_readsock; struct socket *fi_writesock; long fi_readers; long fi_writers; }; static int fifo_ebadf __P((void)); static int fifo_unlock __P((struct vop_unlock_args *)); static int fifo_lock __P((struct vop_lock_args *)); static int fifo_print __P((struct vop_print_args *)); vop_t **fifo_vnodeop_p; static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { { &vop_default_desc, (vop_t *)vn_default_error }, { &vop_lookup_desc, (vop_t *)fifo_lookup }, /* lookup */ { &vop_create_desc, (vop_t *)fifo_create }, /* create */ { &vop_mknod_desc, (vop_t *)fifo_mknod }, /* mknod */ { &vop_open_desc, (vop_t *)fifo_open }, /* open */ { &vop_close_desc, (vop_t *)fifo_close }, /* close */ { &vop_access_desc, (vop_t *)fifo_access }, /* access */ { &vop_getattr_desc, (vop_t *)fifo_getattr }, /* getattr */ { &vop_setattr_desc, (vop_t *)fifo_setattr }, /* setattr */ { &vop_read_desc, (vop_t *)fifo_read }, /* read */ { &vop_write_desc, (vop_t *)fifo_write }, /* write */ { &vop_ioctl_desc, (vop_t *)fifo_ioctl }, /* ioctl */ { &vop_select_desc, (vop_t *)fifo_select }, /* select */ { &vop_mmap_desc, (vop_t *)fifo_mmap }, /* mmap */ { &vop_fsync_desc, (vop_t *)fifo_fsync }, /* fsync */ { &vop_seek_desc, (vop_t *)fifo_seek }, /* seek */ { &vop_remove_desc, (vop_t *)fifo_remove }, /* remove */ { &vop_link_desc, (vop_t *)fifo_link }, /* link */ { &vop_rename_desc, (vop_t *)fifo_rename }, /* rename */ { &vop_mkdir_desc, (vop_t *)fifo_mkdir }, /* mkdir */ { &vop_rmdir_desc, (vop_t *)fifo_rmdir }, /* rmdir */ { &vop_symlink_desc, (vop_t *)fifo_symlink }, /* symlink */ { &vop_readdir_desc, (vop_t *)fifo_readdir }, /* readdir */ { &vop_readlink_desc, (vop_t *)fifo_readlink }, /* readlink */ { &vop_abortop_desc, (vop_t *)fifo_abortop }, /* abortop */ { &vop_inactive_desc, (vop_t *)fifo_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *)fifo_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *)fifo_lock }, /* lock */ { &vop_unlock_desc, (vop_t *)fifo_unlock }, /* unlock */ { &vop_bmap_desc, (vop_t *)fifo_bmap }, /* bmap */ { &vop_strategy_desc, (vop_t *)fifo_strategy }, /* strategy */ { &vop_print_desc, (vop_t *)fifo_print }, /* print */ { &vop_islocked_desc, (vop_t *)fifo_islocked }, /* islocked */ { &vop_pathconf_desc, (vop_t *)fifo_pathconf }, /* pathconf */ { &vop_advlock_desc, (vop_t *)fifo_advlock }, /* advlock */ { &vop_blkatoff_desc, (vop_t *)fifo_blkatoff }, /* blkatoff */ { &vop_valloc_desc, (vop_t *)fifo_valloc }, /* valloc */ { &vop_vfree_desc, (vop_t *)fifo_vfree }, /* vfree */ { &vop_truncate_desc, (vop_t *)fifo_truncate }, /* truncate */ { &vop_update_desc, (vop_t *)fifo_update }, /* update */ { &vop_bwrite_desc, (vop_t *)fifo_bwrite }, /* bwrite */ { NULL, NULL } }; static struct vnodeopv_desc fifo_vnodeop_opv_desc = { &fifo_vnodeop_p, fifo_vnodeop_entries }; VNODEOP_SET(fifo_vnodeop_opv_desc); /* * Trivial lookup routine that always fails. */ /* ARGSUSED */ int fifo_lookup(ap) struct vop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open called to set up a new instance of a fifo or * to find an active instance of a fifo. */ /* ARGSUSED */ int fifo_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct fifoinfo *fip; struct socket *rso, *wso; int error; static char openstr[] = "fifo"; if ((fip = vp->v_fifoinfo) == NULL) { MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK); vp->v_fifoinfo = fip; - error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0, ap->a_p); if (error) { free(fip, M_VNODE); vp->v_fifoinfo = NULL; return (error); } fip->fi_readsock = rso; - error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0, ap->a_p); if (error) { (void)soclose(rso); free(fip, M_VNODE); vp->v_fifoinfo = NULL; return (error); } fip->fi_writesock = wso; error = unp_connect2(wso, rso); if (error) { (void)soclose(wso); (void)soclose(rso); free(fip, M_VNODE); vp->v_fifoinfo = NULL; return (error); } fip->fi_readers = fip->fi_writers = 0; wso->so_state |= SS_CANTRCVMORE; rso->so_state |= SS_CANTSENDMORE; } error = 0; if ((ap->a_mode & (FREAD|FWRITE)) == (FREAD|FWRITE)) { if (fip->fi_readers == 0) { fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; if (fip->fi_writers > 0) wakeup((caddr_t)&fip->fi_writers); } if (fip->fi_writers == 0) { fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; if (fip->fi_readers > 0) wakeup((caddr_t)&fip->fi_readers); } fip->fi_readers++; fip->fi_writers++; } else if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; if (fip->fi_writers > 0) wakeup((caddr_t)&fip->fi_writers); } if (!(ap->a_mode & O_NONBLOCK)) while (fip->fi_writers == 0) { VOP_UNLOCK(vp); error = tsleep((caddr_t)&fip->fi_readers, PCATCH | PSOCK, openstr, 0); VOP_LOCK(vp); if (error) break; } } else { fip->fi_writers++; if (fip->fi_readers == 0 && (ap->a_mode & O_NONBLOCK)) { error = ENXIO; } else { if (fip->fi_writers == 1) { fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; if (fip->fi_readers > 0) wakeup((caddr_t)&fip->fi_readers); } while (fip->fi_readers == 0) { VOP_UNLOCK(vp); error = tsleep((caddr_t)&fip->fi_writers, PCATCH | PSOCK, openstr, 0); VOP_LOCK(vp); if (error) break; } } } if (error) VOP_CLOSE(vp, ap->a_mode, ap->a_cred, ap->a_p); return (error); } /* * Vnode op for read */ /* ARGSUSED */ int fifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct uio *uio = ap->a_uio; register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock; int error, startresid; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("fifo_read mode"); #endif if (uio->uio_resid == 0) return (0); if (ap->a_ioflag & IO_NDELAY) rso->so_state |= SS_NBIO; startresid = uio->uio_resid; VOP_UNLOCK(ap->a_vp); error = soreceive(rso, (struct mbuf **)0, uio, (struct mbuf **)0, (struct mbuf **)0, (int*)0); VOP_LOCK(ap->a_vp); /* * Clear EOF indication after first such return. */ if (uio->uio_resid == startresid) rso->so_state &= ~SS_CANTRCVMORE; if (ap->a_ioflag & IO_NDELAY) rso->so_state &= ~SS_NBIO; return (error); } /* * Vnode op for write */ /* ARGSUSED */ int fifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock; int error; #ifdef DIAGNOSTIC if (ap->a_uio->uio_rw != UIO_WRITE) panic("fifo_write mode"); #endif if (ap->a_ioflag & IO_NDELAY) wso->so_state |= SS_NBIO; VOP_UNLOCK(ap->a_vp); error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0); VOP_LOCK(ap->a_vp); if (ap->a_ioflag & IO_NDELAY) wso->so_state &= ~SS_NBIO; return (error); } /* * Device ioctl operation. */ /* ARGSUSED */ int fifo_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct file filetmp; if (ap->a_command == FIONBIO) return (0); if (ap->a_fflag & FREAD) filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; else filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; return (soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p)); } /* ARGSUSED */ int fifo_select(ap) struct vop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct file filetmp; if (ap->a_fflags & FREAD) filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; else filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; return (soo_select(&filetmp, ap->a_which, ap->a_p)); } /* * This is a noop, simply returning what one has been given. */ int fifo_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * At the moment we do not do any locking. */ /* ARGSUSED */ static int fifo_lock(ap) struct vop_lock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* ARGSUSED */ static int fifo_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* * Device close routine */ /* ARGSUSED */ int fifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct fifoinfo *fip = vp->v_fifoinfo; int error1, error2; if (ap->a_fflag & FWRITE) { fip->fi_writers--; if (fip->fi_writers == 0) socantrcvmore(fip->fi_readsock); } if (ap->a_fflag & FREAD) { fip->fi_readers--; if (fip->fi_readers == 0) socantsendmore(fip->fi_writesock); } if (vp->v_usecount > 1) return (0); error1 = soclose(fip->fi_readsock); error2 = soclose(fip->fi_writesock); FREE(fip, M_VNODE); vp->v_fifoinfo = NULL; if (error1) return (error1); return (error2); } /* * Print out internal contents of a fifo vnode. */ int fifo_printinfo(vp) struct vnode *vp; { register struct fifoinfo *fip = vp->v_fifoinfo; printf(", fifo with %ld readers and %ld writers", fip->fi_readers, fip->fi_writers); return (0); } /* * Print out the contents of a fifo vnode. */ static int fifo_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON"); fifo_printinfo(ap->a_vp); printf("\n"); return (0); } /* * Return POSIX pathconf information applicable to fifo's. */ int fifo_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Fifo failed operation */ static int fifo_ebadf() { return (EBADF); } /* * Fifo advisory byte-level locks. */ /* ARGSUSED */ int fifo_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (EOPNOTSUPP); } /* * Fifo bad operation */ int fifo_badop() { panic("fifo_badop called"); /* NOTREACHED */ } Index: head/sys/fs/portalfs/portal_vnops.c =================================================================== --- head/sys/fs/portalfs/portal_vnops.c (revision 14092) +++ head/sys/fs/portalfs/portal_vnops.c (revision 14093) @@ -1,726 +1,726 @@ /* * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)portal_vnops.c 8.8 (Berkeley) 1/21/94 * - * $Id: portal_vnops.c,v 1.9 1995/12/03 14:54:29 bde Exp $ + * $Id: portal_vnops.c,v 1.10 1995/12/11 09:24:45 phk Exp $ */ /* * Portal Filesystem */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int portal_fileid = PORTAL_ROOTFILEID+1; static int portal_badop __P((void)); static void portal_closefd __P((struct proc *p, int fd)); static int portal_connect __P((struct socket *so, struct socket *so2)); static int portal_enotsupp __P((void)); static int portal_getattr __P((struct vop_getattr_args *ap)); static int portal_inactive __P((struct vop_inactive_args *ap)); static int portal_lookup __P((struct vop_lookup_args *ap)); static int portal_nullop __P((void)); static int portal_open __P((struct vop_open_args *ap)); static int portal_pathconf __P((struct vop_pathconf_args *ap)); static int portal_print __P((struct vop_print_args *ap)); static int portal_readdir __P((struct vop_readdir_args *ap)); static int portal_reclaim __P((struct vop_reclaim_args *ap)); static int portal_setattr __P((struct vop_setattr_args *ap)); static int portal_vfree __P((struct vop_vfree_args *ap)); static void portal_closefd(p, fd) struct proc *p; int fd; { int error; struct close_args ua; int rc; ua.fd = fd; error = close(p, &ua, &rc); /* * We should never get an error, and there isn't anything * we could do if we got one, so just print a message. */ if (error) printf("portal_closefd: error = %d\n", error); } /* * vp is the current namei directory * cnp is the name to locate in that directory... */ static int portal_lookup(ap) struct vop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; } */ *ap; { char *pname = ap->a_cnp->cn_nameptr; struct portalnode *pt; int error; struct vnode *fvp = 0; char *path; int size; if (ap->a_cnp->cn_namelen == 1 && *pname == '.') { *ap->a_vpp = ap->a_dvp; VREF(ap->a_dvp); /*VOP_LOCK(ap->a_dvp);*/ return (0); } error = getnewvnode(VT_PORTAL, ap->a_dvp->v_mount, portal_vnodeop_p, &fvp); if (error) goto bad; fvp->v_type = VREG; MALLOC(fvp->v_data, void *, sizeof(struct portalnode), M_TEMP, M_WAITOK); pt = VTOPORTAL(fvp); /* * Save all of the remaining pathname and * advance the namei next pointer to the end * of the string. */ for (size = 0, path = pname; *path; path++) size++; ap->a_cnp->cn_consume = size - ap->a_cnp->cn_namelen; pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK); pt->pt_size = size+1; bcopy(pname, pt->pt_arg, pt->pt_size); pt->pt_fileid = portal_fileid++; *ap->a_vpp = fvp; /*VOP_LOCK(fvp);*/ return (0); bad:; if (fvp) { vrele(fvp); } *ap->a_vpp = NULL; return (error); } static int portal_connect(so, so2) struct socket *so; struct socket *so2; { /* from unp_connect, bypassing the namei stuff... */ struct socket *so3; struct unpcb *unp2; struct unpcb *unp3; if (so2 == 0) return (ECONNREFUSED); if (so->so_type != so2->so_type) return (EPROTOTYPE); if ((so2->so_options & SO_ACCEPTCONN) == 0) return (ECONNREFUSED); if ((so3 = sonewconn(so2, 0)) == 0) return (ECONNREFUSED); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) unp3->unp_addr = m_copy(unp2->unp_addr, 0, (int)M_COPYALL); so2 = so3; return (unp_connect2(so, so2)); } static int portal_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct socket *so = 0; struct portalnode *pt; struct proc *p = ap->a_p; struct vnode *vp = ap->a_vp; int s; struct uio auio; struct iovec aiov[2]; int res; struct mbuf *cm = 0; struct cmsghdr *cmsg; int newfds; int *ip; int fd; int error; int len; struct portalmount *fmp; struct file *fp; struct portal_cred pcred; /* * Nothing to do when opening the root node. */ if (vp->v_flag & VROOT) return (0); /* * Can't be opened unless the caller is set up * to deal with the side effects. Check for this * by testing whether the p_dupfd has been set. */ if (p->p_dupfd >= 0) return (ENODEV); pt = VTOPORTAL(vp); fmp = VFSTOPORTAL(vp->v_mount); /* * Create a new socket. */ - error = socreate(AF_UNIX, &so, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &so, SOCK_STREAM, 0, ap->a_p); if (error) goto bad; /* * Reserve some buffer space */ res = pt->pt_size + sizeof(pcred) + 512; /* XXX */ error = soreserve(so, res, res); if (error) goto bad; /* * Kick off connection */ error = portal_connect(so, (struct socket *)fmp->pm_server->f_data); if (error) goto bad; /* * Wait for connection to complete */ /* * XXX: Since the mount point is holding a reference on the * underlying server socket, it is not easy to find out whether * the server process is still running. To handle this problem * we loop waiting for the new socket to be connected (something * which will only happen if the server is still running) or for * the reference count on the server socket to drop to 1, which * will happen if the server dies. Sleep for 5 second intervals * and keep polling the reference count. XXX. */ s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { if (fmp->pm_server->f_count == 1) { error = ECONNREFUSED; splx(s); goto bad; } (void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz); } splx(s); if (so->so_error) { error = so->so_error; goto bad; } /* * Set miscellaneous flags */ so->so_rcv.sb_timeo = 0; so->so_snd.sb_timeo = 0; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; pcred.pcr_flag = ap->a_mode; pcred.pcr_uid = ap->a_cred->cr_uid; pcred.pcr_ngroups = ap->a_cred->cr_ngroups; bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t)); aiov[0].iov_base = (caddr_t) &pcred; aiov[0].iov_len = sizeof(pcred); aiov[1].iov_base = pt->pt_arg; aiov[1].iov_len = pt->pt_size; auio.uio_iov = aiov; auio.uio_iovcnt = 2; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = p; auio.uio_offset = 0; auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len; error = sosend(so, (struct mbuf *) 0, &auio, (struct mbuf *) 0, (struct mbuf *) 0, 0); if (error) goto bad; len = auio.uio_resid = sizeof(int); do { struct mbuf *m = 0; int flags = MSG_WAITALL; error = soreceive(so, (struct mbuf **) 0, &auio, &m, &cm, &flags); if (error) goto bad; /* * Grab an error code from the mbuf. */ if (m) { m = m_pullup(m, sizeof(int)); /* Needed? */ if (m) { error = *(mtod(m, int *)); m_freem(m); } else { error = EINVAL; } } else { if (cm == 0) { error = ECONNRESET; /* XXX */ #ifdef notdef break; #endif } } } while (cm == 0 && auio.uio_resid == len && !error); if (cm == 0) goto bad; if (auio.uio_resid) { error = 0; #ifdef notdef error = EMSGSIZE; goto bad; #endif } /* * XXX: Break apart the control message, and retrieve the * received file descriptor. Note that more than one descriptor * may have been received, or that the rights chain may have more * than a single mbuf in it. What to do? */ cmsg = mtod(cm, struct cmsghdr *); newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int); if (newfds == 0) { error = ECONNREFUSED; goto bad; } /* * At this point the rights message consists of a control message * header, followed by a data region containing a vector of * integer file descriptors. The fds were allocated by the action * of receiving the control message. */ ip = (int *) (cmsg + 1); fd = *ip++; if (newfds > 1) { /* * Close extra fds. */ int i; printf("portal_open: %d extra fds\n", newfds - 1); for (i = 1; i < newfds; i++) { portal_closefd(p, *ip); ip++; } } /* * Check that the mode the file is being opened for is a subset * of the mode of the existing descriptor. */ fp = p->p_fd->fd_ofiles[fd]; if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { portal_closefd(p, fd); error = EACCES; goto bad; } /* * Save the dup fd in the proc structure then return the * special error code (ENXIO) which causes magic things to * happen in vn_open. The whole concept is, well, hmmm. */ p->p_dupfd = fd; error = ENXIO; bad:; /* * And discard the control message. */ if (cm) { m_freem(cm); } if (so) { soshutdown(so, 2); soclose(so); } return (error); } static int portal_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; bzero(vap, sizeof(*vap)); vattr_null(vap); vap->va_uid = 0; vap->va_gid = 0; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_size = DEV_BSIZE; vap->va_blocksize = DEV_BSIZE; microtime((struct timeval *)&vap->va_atime); TIMEVAL_TO_TIMESPEC((struct timeval *)&vap->va_atime, (struct timespec *)&vap->va_atime); vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_ctime; vap->va_gen = 0; vap->va_flags = 0; vap->va_rdev = 0; /* vap->va_qbytes = 0; */ vap->va_bytes = 0; /* vap->va_qsize = 0; */ if (vp->v_flag & VROOT) { vap->va_type = VDIR; vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR| S_IRGRP|S_IWGRP|S_IXGRP| S_IROTH|S_IWOTH|S_IXOTH; vap->va_nlink = 2; vap->va_fileid = 2; } else { vap->va_type = VREG; vap->va_mode = S_IRUSR|S_IWUSR| S_IRGRP|S_IWGRP| S_IROTH|S_IWOTH; vap->va_nlink = 1; vap->va_fileid = VTOPORTAL(vp)->pt_fileid; } return (0); } static int portal_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { /* * Can't mess with the root vnode */ if (ap->a_vp->v_flag & VROOT) return (EACCES); return (0); } /* * Fake readdir, just return empty directory. * It is hard to deal with '.' and '..' so don't bother. */ static int portal_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { return (0); } static int portal_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; } */ *ap; { return (0); } static int portal_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct portalnode *pt = VTOPORTAL(ap->a_vp); if (pt->pt_arg) { free((caddr_t) pt->pt_arg, M_TEMP); pt->pt_arg = 0; } FREE(ap->a_vp->v_data, M_TEMP); ap->a_vp->v_data = 0; return (0); } /* * Return POSIX pathconf information applicable to special devices. */ static int portal_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_MAX_CANON: *ap->a_retval = MAX_CANON; return (0); case _PC_MAX_INPUT: *ap->a_retval = MAX_INPUT; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_VDISABLE: *ap->a_retval = _POSIX_VDISABLE; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Print out the contents of a Portal vnode. */ /* ARGSUSED */ static int portal_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_PORTAL, portal vnode\n"); return (0); } /*void*/ static int portal_vfree(ap) struct vop_vfree_args /* { struct vnode *a_pvp; ino_t a_ino; int a_mode; } */ *ap; { return (0); } /* * Portal vnode unsupported operation */ static int portal_enotsupp() { return (EOPNOTSUPP); } /* * Portal "should never get here" operation */ static int portal_badop() { panic("portal: bad op"); /* NOTREACHED */ } /* * Portal vnode null operation */ static int portal_nullop() { return (0); } #define portal_create ((int (*) __P((struct vop_create_args *)))portal_enotsupp) #define portal_mknod ((int (*) __P((struct vop_mknod_args *)))portal_enotsupp) #define portal_close ((int (*) __P((struct vop_close_args *)))nullop) #define portal_access ((int (*) __P((struct vop_access_args *)))nullop) #define portal_read ((int (*) __P((struct vop_read_args *)))portal_enotsupp) #define portal_write ((int (*) __P((struct vop_write_args *)))portal_enotsupp) #define portal_ioctl ((int (*) __P((struct vop_ioctl_args *)))portal_enotsupp) #define portal_select ((int (*) __P((struct vop_select_args *)))portal_enotsupp) #define portal_mmap ((int (*) __P((struct vop_mmap_args *)))portal_enotsupp) #define portal_fsync ((int (*) __P((struct vop_fsync_args *)))nullop) #define portal_seek ((int (*) __P((struct vop_seek_args *)))nullop) #define portal_remove ((int (*) __P((struct vop_remove_args *)))portal_enotsupp) #define portal_link ((int (*) __P((struct vop_link_args *)))portal_enotsupp) #define portal_rename ((int (*) __P((struct vop_rename_args *)))portal_enotsupp) #define portal_mkdir ((int (*) __P((struct vop_mkdir_args *)))portal_enotsupp) #define portal_rmdir ((int (*) __P((struct vop_rmdir_args *)))portal_enotsupp) #define portal_symlink \ ((int (*) __P((struct vop_symlink_args *)))portal_enotsupp) #define portal_readlink \ ((int (*) __P((struct vop_readlink_args *)))portal_enotsupp) #define portal_abortop ((int (*) __P((struct vop_abortop_args *)))nullop) #define portal_lock ((int (*) __P((struct vop_lock_args *)))nullop) #define portal_unlock ((int (*) __P((struct vop_unlock_args *)))nullop) #define portal_bmap ((int (*) __P((struct vop_bmap_args *)))portal_badop) #define portal_strategy \ ((int (*) __P((struct vop_strategy_args *)))portal_badop) #define portal_islocked ((int (*) __P((struct vop_islocked_args *)))nullop) #define portal_advlock \ ((int (*) __P((struct vop_advlock_args *)))portal_enotsupp) #define portal_blkatoff \ ((int (*) __P((struct vop_blkatoff_args *)))portal_enotsupp) #define portal_valloc ((int(*) __P(( \ struct vnode *pvp, \ int mode, \ struct ucred *cred, \ struct vnode **vpp))) portal_enotsupp) #define portal_truncate \ ((int (*) __P((struct vop_truncate_args *)))portal_enotsupp) #define portal_update ((int (*) __P((struct vop_update_args *)))portal_enotsupp) #define portal_bwrite ((int (*) __P((struct vop_bwrite_args *)))portal_enotsupp) vop_t **portal_vnodeop_p; static struct vnodeopv_entry_desc portal_vnodeop_entries[] = { { &vop_default_desc, (vop_t *)vn_default_error }, { &vop_lookup_desc, (vop_t *)portal_lookup }, /* lookup */ { &vop_create_desc, (vop_t *)portal_create }, /* create */ { &vop_mknod_desc, (vop_t *)portal_mknod }, /* mknod */ { &vop_open_desc, (vop_t *)portal_open }, /* open */ { &vop_close_desc, (vop_t *)portal_close }, /* close */ { &vop_access_desc, (vop_t *)portal_access }, /* access */ { &vop_getattr_desc, (vop_t *)portal_getattr }, /* getattr */ { &vop_setattr_desc, (vop_t *)portal_setattr }, /* setattr */ { &vop_read_desc, (vop_t *)portal_read }, /* read */ { &vop_write_desc, (vop_t *)portal_write }, /* write */ { &vop_ioctl_desc, (vop_t *)portal_ioctl }, /* ioctl */ { &vop_select_desc, (vop_t *)portal_select }, /* select */ { &vop_mmap_desc, (vop_t *)portal_mmap }, /* mmap */ { &vop_fsync_desc, (vop_t *)portal_fsync }, /* fsync */ { &vop_seek_desc, (vop_t *)portal_seek }, /* seek */ { &vop_remove_desc, (vop_t *)portal_remove }, /* remove */ { &vop_link_desc, (vop_t *)portal_link }, /* link */ { &vop_rename_desc, (vop_t *)portal_rename }, /* rename */ { &vop_mkdir_desc, (vop_t *)portal_mkdir }, /* mkdir */ { &vop_rmdir_desc, (vop_t *)portal_rmdir }, /* rmdir */ { &vop_symlink_desc, (vop_t *)portal_symlink }, /* symlink */ { &vop_readdir_desc, (vop_t *)portal_readdir }, /* readdir */ { &vop_readlink_desc, (vop_t *)portal_readlink }, /* readlink */ { &vop_abortop_desc, (vop_t *)portal_abortop }, /* abortop */ { &vop_inactive_desc, (vop_t *)portal_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *)portal_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *)portal_lock }, /* lock */ { &vop_unlock_desc, (vop_t *)portal_unlock }, /* unlock */ { &vop_bmap_desc, (vop_t *)portal_bmap }, /* bmap */ { &vop_strategy_desc, (vop_t *)portal_strategy }, /* strategy */ { &vop_print_desc, (vop_t *)portal_print }, /* print */ { &vop_islocked_desc, (vop_t *)portal_islocked }, /* islocked */ { &vop_pathconf_desc, (vop_t *)portal_pathconf }, /* pathconf */ { &vop_advlock_desc, (vop_t *)portal_advlock }, /* advlock */ { &vop_blkatoff_desc, (vop_t *)portal_blkatoff }, /* blkatoff */ { &vop_valloc_desc, (vop_t *)portal_valloc }, /* valloc */ { &vop_vfree_desc, (vop_t *)portal_vfree }, /* vfree */ { &vop_truncate_desc, (vop_t *)portal_truncate }, /* truncate */ { &vop_update_desc, (vop_t *)portal_update }, /* update */ { &vop_bwrite_desc, (vop_t *)portal_bwrite }, /* bwrite */ { NULL, NULL } }; static struct vnodeopv_desc portal_vnodeop_opv_desc = { &portal_vnodeop_p, portal_vnodeop_entries }; VNODEOP_SET(portal_vnodeop_opv_desc); Index: head/sys/i386/conf/LINT =================================================================== --- head/sys/i386/conf/LINT (revision 14092) +++ head/sys/i386/conf/LINT (revision 14093) @@ -1,854 +1,853 @@ # # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.237 1996/02/03 11:48:12 phk Exp $ +# $Id: LINT,v 1.238 1996/02/06 20:57:46 wollman Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from # this file as required. # # # This directive is mandatory; it defines the architecture to be # configured for; in this case, the 386 family. You must also specify # at least one CPU (the one you intend to run on); deleting the # specification for CPUs you don't need to use may make parts of the # system run faster # machine "i386" cpu "I386_CPU" cpu "I486_CPU" cpu "I586_CPU" # aka Pentium(tm) cpu "I686_CPU" # aka Pentium Pro(tm) # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a complicated formula defined in param.c. # maxusers 10 # # Under some circumstances it is necessary to make the default max # number of processes per user and open files per user more than the # defaults on bootup. (an example is a large news server in which # the uid, news, can sometimes need > 100 simultaneous processes running, # or perhaps a user using lots of windows under X). options CHILD_MAX=128 options OPEN_MAX=128 # # A math emulator is mandatory if you wish to run on hardware which # does not have a floating-point processor. Pick either the original, # bogus (but freely-distributable) math emulator, or a much more # fully-featured but GPL-licensed emulator taken from Linux. # options MATH_EMULATE #Support for x87 emulation # Don't enable both of these in a real config. options GPL_MATH_EMULATE #Support for x87 emulation via #new math emulator # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' # - The root filesystem might be on partition wd0a # - Crash dumps will be written to wd0b, if possible. Specifying the # dump device here is not recommended. Use dumpon(8). # config kernel root on wd0 dumps on wd0 ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. # options "COMPAT_43" # # Allow user-mode programs to manipulate their local descriptor tables. # This option is required for the WINE Windows(tm) emulator, and is # not used by anything else (that we know of). # options USER_LDT #allow user-level control of i386 ldt # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG ##################################################################### # DEBUGGING OPTIONS # # Enable the kernel debugger. # options DDB # # KTRACE enables the system-call tracing facility ktrace(2). # options KTRACE #kernel tracing # # The DIAGNOSTIC option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options DIAGNOSTIC # # Allow ordinary users to take the console - this is useful for X. options UCONSOLE ##################################################################### # NETWORKING OPTIONS # # Protocol families: # Only the INET (Internet) family is officially supported in FreeBSD. # Source code for the NS (Xerox Network Service), ISO (OSI), and # CCITT (X.25) families is provided for amusement value, although we # try to ensure that it actually compiles. # options INET #Internet communications protocols -options NS #Xerox NS communications protocols options IPX #IPX/SPX communications protocols options IPXIP #IPX in IP encapsulation (not available) options IPTUNNEL #IP in IPX encapsulation (not available) options IPXPRINTFS=0 #IPX/SPX Console Debugging Information options IPX_ERRPRINTFS=0 #IPX/SPX Console Debugging Information # These are currently broken and are no longer shipped due to lack # of interest. #options CCITT #X.25 network layer #options ISO #options TPIP #ISO TP class 4 over IP #options TPCONS #ISO TP class 0 over X.25 #options LLC #X.25 link layer for Ethernets #options HDLC #X.25 link layer for serial lines #options EON #ISO CLNP over IP +#options NS #Xerox NS protocols +#options NSIP #XNS over IP # # Network interfaces: # The `loop' pseudo-device is MANDATORY when networking is enabled. # The `ether' pseudo-device provides generic code to handle # Ethernets; it is MANDATORY when a Ethernet device driver is # configured. # The 'fddi' pseudo-device provides generic code to support FDDI. # The `sppp' pseudo-device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). # The `sl' pseudo-device implements the Serial Line IP (SLIP) service. # The `ppp' pseudo-device implements the Point-to-Point Protocol. # The `bpfilter' pseudo-device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. The number of devices determines the maximum number of # simultaneous BPF clients programs runnable. # The `disc' pseudo-device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing purposes. # The `tun' pseudo-device implements the User Process PPP (iijppp) # pseudo-device ether #Generic Ethernet pseudo-device fddi #Generic FDDI pseudo-device sppp #Generic Synchronous PPP pseudo-device loop #Network loopback device pseudo-device sl 2 #Serial Line IP pseudo-device ppp 2 #Point-to-point protocol pseudo-device bpfilter 4 #Berkeley packet filter pseudo-device disc #Discard device pseudo-device tun 1 #Tunnel driver(user process ppp) - -options NSIP #XNS over IP # # Internet family options: # # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in # 4.2BSD. This option should not be used unless you have a 4.2BSD # machine and TCP connections fail. # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted(8). # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE does # the obvious thing. # IPFIREWALL_ORDER_RULES makes the ipfw code sort the rules. You # don't want that, it's only there to be backward compatible. # IPACCT enables IP accounting. # # TCPDEBUG is undocumented. # options "TCP_COMPAT_42" #emulate 4.2BSD TCP bugs options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #print information about # dropped packets options IPFIREWALL_ORDER_RULES # bogusly sort rules. options IPACCT #ipaccounting options TCPDEBUG ##################################################################### # FILESYSTEM OPTIONS # # Only the root, /usr, and /tmp filesystems need be statically # compiled; everything else will be automatically loaded at mount # time. (Exception: the UFS family---FFS, MFS, and LFS---cannot # currently be demand-loaded.) Some people still prefer to statically # compile other filesystems as well. # # NB: The LFS, PORTAL, and UNION filesystems are known to be buggy, # and WILL panic your system if you attempt to do anything with them. # They are included here as an incentive for some enterprising soul to # sit down and fix them. # # Note: 4.4BSD NQNFS lease checking has relatively high cost for # _local_ I/O as well as remote I/O. Don't use it unless you will # using NQNFS. # # One of these is mandatory: options FFS #Fast filesystem options NFS #Network File System # The rest are optional: options NQNFS #Enable NQNFS lease checking # options NFS_NOSERVER #Disable the NFS-server code. options "CD9660" #ISO 9660 filesystem options FDESC #File descriptor filesystem options KERNFS #Kernel filesystem options LFS #Log filesystem options MFS #Memory File System options MSDOSFS #MS DOS File System options NULLFS #NULL filesystem options PORTAL #Portal filesystem options PROCFS #Process filesystem options UMAPFS #UID map filesystem options UNION #Union filesystem # THis DEVFS is experimental but seems to work options DEVFS #devices filesystem # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing. options MFS_AUTOLOAD # Allow this many swap-devices. options NSWAPDEV=20 # Disk quotas are supported when this option is enabled. If you # change the value of this option, you must do a `make clean' in your # kernel compile directory in order to get a working kernel. # options QUOTA #enable disk quotas ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so # that a given bus, target, and LUN always come on line as the same # device unit. In earlier versions the unit numbers were assigned # in the order that the devices were probed on the SCSI bus. This # means that if you removed a disk drive, you may have had to rewrite # your /etc/fstab file, and also that you had to be careful when adding # a new disk as it may have been probed earlier and moved your device # configuration around. # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "sd3" then the first # non-wired disk will be assigned sd4. # The syntax for wiring down devices is: # controller scbus0 at ahc0 # Single bus device # controller scbus1 at ahc1 bus 0 # Single bus device # controller scbus3 at ahc2 bus 0 # Twin bus device # controller scbus2 at ahc2 bus 1 # Twin bus device # disk sd0 at scbus0 target 0 unit 0 # disk sd1 at scbus3 target 1 # disk sd2 at scbus2 target 3 # tape st1 at scbus1 target 6 # device cd0 at scbus? # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI # configuration and doesn't have to be explicitly configured. controller scbus0 #base SCSI code device ch0 #SCSI media changers device sd0 #SCSI disks device st0 #SCSI tapes device cd0 #SCSI CD-ROMs device od0 #SCSI optical disk # The previous devices (ch, sd, st, cd) are recognized by config. # config doesn't (and shouldn't) know about these newer ones, # so we have to specify that they are on a SCSI bus with the "at scbus?" # clause. device worm0 at scbus? # SCSI worm device pt0 at scbus? # SCSI processor type device sctarg0 at scbus? # SCSI target # SCSI OPTIONS: # SCSIDEBUG: When defined enables debugging macros # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k) # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead # of only when booting verbosely. options SCSIDEBUG #options NO_SCSI_SENSE options SCSI_REPORT_GEOMETRY ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS # # Of these, only the `log' device is truly mandatory. The `pty' # device usually turns out to be ``effectively mandatory'', as it is # required for `telnetd', `rlogind', `screen', `emacs', and `xterm', # among others. The `isdn', `ii', `ity', `itel', and `ispy' devices # are all required when ISDN support is used. If you wish to run certain # system utilities which are compressed by default (like /stand/sysinstall) # then `gzip' becomes mandatory too. # pseudo-device pty 16 #Pseudo ttys - can go as high as 64 pseudo-device speaker #Play IBM BASIC-style noises out your speaker pseudo-device log #Kernel syslog interface (/dev/klog) pseudo-device gzip #Exec gzipped a.out's pseudo-device vn #Vnode driver (turns a file into a device) pseudo-device snp 3 #Snoop device - to look at pty/vty/etc.. # These are non-optional for ISDN pseudo-device isdn pseudo-device ii 4 pseudo-device ity 4 pseudo-device itel 2 pseudo-device ispy 1 # These are only for watching for bitrot in old tty code. # broken #pseudo-device tb # These are only for watching for bitrot in old SCSI code. pseudo-device su #scsi user pseudo-device ssc #super scsi ##################################################################### # HARDWARE DEVICE CONFIGURATION # ISA and EISA devices: # Currently there is no separate support for EISA. There should be. # Micro Channel is not supported at all. # # Mandatory ISA devices: isa, sc or vt, npx # controller isa0 # # Options for `isa': # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # BOUNCE_BUFFERS provides support for ISA DMA on machines with more # than 16 megabytes of memory. It doesn't hurt on other machines. # Some broken EISA and VLB hardware may need this, too. # # DUMMY_NOPS disables extra delays for some bus operations. The delays # are mostly for older systems and aren't used consistently. Probably # works OK on most EISA bus machines. # # TUNE_1542 enables the automatic ISA bus speed selection for the # Adaptec 1542 boards. Does not work for all boards, use it with caution. # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. # #options "AUTO_EOI_2" options BOUNCE_BUFFERS #options DUMMY_NOPS #options "TUNE_1542" #options BROKEN_KEYBOARD_RESET # Enable this and PCVT_FREEBSD for pcvt vt220 compatible console driver device vt0 at isa? port "IO_KBD" tty irq 1 vector pcrint options PCVT_FREEBSD=210 # pcvt running on FreeBSD >= 2.0.5 options XSERVER # include code for XFree86 options FAT_CURSOR # start with block cursor # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops options PCVT_SCANSET=2 # IBM keyboards are non-std # The syscons console driver (sco color console compatible) - default. device sc0 at isa? port "IO_KBD" tty irq 1 vector scintr # # Options for `sc': # # HARDFONTS allows the driver to load an ISO-8859-1 font to replace # the default font in your display adapter's memory. # options HARDFONTS # # MAXCONS is maximum number of virtual consoles, no more than 16 # default value: 12 # options MAXCONS=16 # # This device is mandatory. # # The Numeric Processing eXtension is used to either enable the # coprocessor or enable math emulation. If your machine doesn't contain # a math co-processor, you must *also* add the option "MATH_EMULATE". # THIS IS NOT AN OPTIONAL ENTRY, DO NOT REMOVE IT # device npx0 at isa? port "IO_NPX" irq 13 vector npxintr # # Optional ISA and EISA devices: # # # SCSI host adapters: `aha', `aic', `bt', `nca' # # aha: Adaptec 154x # ahc: Adaptec 274x/284x/294x # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!) # bt: Most Buslogic controllers # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130 # uha: UltraStore 14F and 34F # sea: Seagate ST01/02 8 bit controller (slow!) # wds: Western Digital WD7000 controller (no scatter/gather!). # # Note that the order is important in order for Buslogic cards to be # probed correctly. # controller bt0 at isa? port "IO_BT0" bio irq ? vector bt_isa_intr controller aha0 at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr controller uha0 at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr controller aic0 at isa? port 0x340 bio irq 11 vector aicintr controller nca0 at isa? port 0x1f88 bio irq 10 vector ncaintr controller nca1 at isa? port 0x1f84 controller nca2 at isa? port 0x1f8c controller nca3 at isa? port 0x1e88 controller nca4 at isa? port 0x350 bio irq 5 vector ncaintr controller sea0 at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr controller wds0 at isa? port 0x350 bio irq 15 drq 6 vector wdsintr # # ST-506, ESDI, and IDE hard disks: `wdc' and `wd' # # NB: ``Enhanced IDE'' is NOT supported at this time. # # The flags fields are used to enable the multi-sector I/O and # the 32BIT I/O modes. The flags may be used in either the controller # definition or in the individual disk definitions. The controller # definition is supported for the boot configuration stuff. # # Each drive has a 16 bit flags value defined: # The low 8 bits are the maximum value for the multi-sector I/O, # where 0xff defaults to the maximum that the drive can handle. # The high bit of the 16 bit flags (0x8000) allows probing for # 32 bit transfers. # # The flags field for the drives can be specified in the controller # specification with the low 16 bits for drive 0, and the high 16 bits # for drive 1. # e.g.: #controller wdc0 at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr # # specifies that drive 0 will be allowed to probe for 32 bit transfers and # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be # allowed to probe for 32 bit transfers, but will allow multi-sector # transfers up to the maximum that the drive supports. # # controller wdc0 at isa? port "IO_WD1" bio irq 14 vector wdintr disk wd0 at wdc0 drive 0 disk wd1 at wdc0 drive 1 controller wdc1 at isa? port "IO_WD2" bio irq 15 vector wdintr disk wd2 at wdc1 drive 0 disk wd3 at wdc1 drive 1 # # Options for `wdc': # # ATAPI enables the support for ATAPI-compatible IDE devices # options ATAPI #Enable ATAPI support for IDE bus # IDE CD-ROM driver - requires wdc controller and ATAPI option device wcd0 # # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft' # controller fdc0 at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr # # Activate this line instead of the fdc0 line above if you happen to # have an Insight floppy tape. Probing them proved to be dangerous # for people with floppy disks only, so it's "hidden" behind a flag: #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr disk fd0 at fdc0 drive 0 disk fd1 at fdc0 drive 1 tape ft0 at fdc0 drive 2 # # Options for `fd': # # FDSEEKWAIT selects a non-default head-settle time (i.e., the time to # wait after a seek is performed). The default value (1/32 s) is # usually sufficient. The units are inverse seconds, so a value of 16 # here means to wait 1/16th of a second; you should choose a power of # two. # XXX: this seems to be missing! options FDSEEKWAIT=16 # # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc. # # lpt: printer port # lpt specials: # port can be specified as ?, this will cause the driver to scan # the BIOS port list; # the irq and vector clauses may be omitted, this # will force the port into polling mode. # mse: Logitech and ATI InPort bus mouse ports # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd] # sio: serial ports (see sio(4)) device lpt0 at isa? port? tty irq 7 vector lptintr device lpt1 at isa? port "IO_LPT3" tty irq 5 vector lptintr device mse0 at isa? port 0x23c tty irq 5 vector mseintr device psm0 at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr # Options for psm: options PSM_NO_RESET #don't reset mouse hardware (some laptops) device sio0 at isa? port "IO_COM1" tty irq 4 vector siointr # Options for sio: options COMCONSOLE #prefer serial console to video console options COM_ESP #code for Hayes ESP options COM_MULTIPORT #code for some cards with shared IRQs options DSI_SOFT_MODEM #code for DSI Softmodems options BREAK_TO_DEBUGGER #a BREAK on a comconsole goes to #DDB, if available. # # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc' # # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp) # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # el: 3Com 3C501 (slow!) # ep: 3Com 3C509 (buggy) # fe: Fujitsu MB86960A/MB86965A Ethernet # fea: DEC DEFEA EISA FDDI adapter # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210 # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100, # DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422) # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL) # ze: IBM/National Semiconductor PCMCIA ethernet controller. # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for # send/receive operation, but it needs 'iomem' to read/write the # attribute memory) # device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr device eg0 at isa? port 0x310 net irq 5 vector egintr device el0 at isa? port 0x300 net irq 9 vector elintr device ep0 at isa? port 0x300 net irq 10 vector epintr device fe0 at isa? port 0x240 net irq ? vector feintr device fea0 at isa? net irq ? vector feaintr device ie0 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr device ix0 at isa? port 0x300 net irq 10 iomem 0xd0000 iosiz 32768 vector ixintr device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr # # ISDN drivers - `isdn'. # # Uncomment one (and only one) of the following two drivers for the appropriate # ISDN device you have. For more information on what's considered appropriate # for your given set of circumstances, please read # /usr/src/gnu/usr.sbin/isdn/docs/INSTALL. It's a bit sparse at present, but # it's the best we have right now. The snic driver is also disabled at present, # waiting for someone to upgrade the driver to 2.0 (it's in /sys/gnu/scsi/). # device nic0 at isa? port "IO_COM3" iomem 0xe0000 tty irq 9 vector nicintr device nnic0 at isa? port 0x150 iomem 0xe0000 tty irq 12 vector nnicintr # # Audio drivers: `snd', `sb', `pas', `gus', `pca' # # snd: Voxware sound support code # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum # sbxvi: SoundBlaster 16 # sbmidi: SoundBlaster 16 MIDI interface # pas: ProAudioSpectrum PCM and MIDI # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX # gusxvi: Gravis Ultrasound 16-bit PCM (do not use) # mss: Microsoft Sound System # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum # uart: stand-alone 6850 UART for MIDI # mpu: Roland MPU-401 stand-alone card # # Beware! The addresses specified below are also hard-coded in # i386/isa/sound/sound_config.h. If you change the values here, you # must also change the values in the include file. # # pca: PCM audio through your PC speaker # # If you don't have a lpt0 device at IRQ 7, you can remove the # ``conflicts'' specification in the appropriate device entries below. # # If you have a GUS-MAX card and want to use the CS4231 codec on the # card the drqs for the gus max must be 8 bit (1, 2, or 3). # # If you would like to use the full duplex option on the gus, then define # flags to be the ``read dma channel''. # # options BROKEN_BUS_CLOCK #PAS-16 isn't working and OPTI chipset # options SYMPHONY_PAS #PAS-16 isn't working and SYMPHONY chipset # options EXCLUDE_SBPRO #PAS-16 # options SBC_IRQ=5 #PAS-16. Must match irq on sb0 line. # PAS16: The order of the pas0/sb0/opl0 is important since the # sb emulation is enabled in the pas-16 attach. # # The i386/isa/sound/sound.doc has more information. # Controls all sound devices controller snd0 device pas0 at isa? port 0x388 irq 10 drq 6 vector pasintr device sb0 at isa? port 0x220 irq 7 conflicts drq 1 vector sbintr device sbxvi0 at isa? drq 5 device sbmidi0 at isa? port 0x330 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr device opl0 at isa? port 0x388 conflicts device mpu0 at isa? port 0x330 irq 6 drq 0 device uart0 at isa? port 0x330 irq 5 vector "m6850intr" # More undocumented sound devices with bogus configurations for linting. # broken #device sscape0 at isa? port 0x330 irq 6 drq 0 #device trix0 at isa? port 0x330 irq 6 drq 0 vector sscapeintr # Not controlled by `snd' device pca0 at isa? port IO_TIMER1 tty # # Miscellaneous hardware: # # mcd: Mitsumi CD-ROM # scd: Sony CD-ROM # matcd: Matsushita/Panasonic CD-ROM # wt: Wangtek and Archive QIC-02/QIC-36 tape drives # ctx: Cortex-I frame grabber # apm: Laptop Advanced Power Management (experimental) # spigot: The Creative Labs Video Spigot video-acquisition board # meteor: Matrox Meteor video capture board # cy: Cyclades serial driver # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!) # gp: National Instruments AT-GPIB and AT-GPIB/TNT board # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey # gsc: Genius GS-4500 hand scanner. # joy: joystick # labpc: National Instrument's Lab-PC and Lab-PC+ # rc: RISCom/8 multiport card # tw: TW-523 power line interface for use with X-10 home control products # si: Specialix SI/XIO 4-32 port terminal multiplexor # # Notes on the spigot: # The video spigot is at 0xad6. This port address can not be changed. # The irq values may only be 10, 11, or 15 # I/O memory is an 8kb region. Possible values are: # 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff # The start address must be on an even boundary. # Add the following option if you want to allow non-root users to be able # to access the spigot. This option is not secure because it allows users # direct access to the I/O page. # options SPIGOT_UNSECURE # # Notes on the Digiboard driver: # # The following flag values have special meanings: # 0x01 - alternate layout of pins # 0x02 - use the windowed PC/Xe in 64K mode # Notes on the Specialix SI/XIO driver: # **This is NOT a Specialix supported Driver!** # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. device mcd0 at isa? port 0x300 bio irq 10 vector mcdintr # for the Sony CDU31/33A CDROM device scd0 at isa? port 0x230 bio # for the SoundBlaster 16 multicd - up to 4 devices controller matcd0 at isa? port 0x230 bio device wt0 at isa? port 0x300 bio irq 5 drq 1 vector wtintr device ctx0 at isa? port 0x230 iomem 0xd0000 device spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr device qcam0 at isa? port "IO_LPT3" tty device apm0 at isa? device gp0 at isa? port 0x2c0 tty device gsc0 at isa? port "IO_GSC1" tty drq 3 device joy0 at isa? port "IO_GAME" device cy0 at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr device dgb0 at isa? port 0x220 iomem 0xfc0000 iosiz ? tty device labpc0 at isa? port 0x260 tty irq 5 vector labpcintr device rc0 at isa? port 0x220 tty irq 12 vector rcintr # the port and irq for tw0 are fictitious device tw0 at isa? port 0x380 tty irq 11 vector twintr device si0 at isa? iomem 0xd0000 tty irq 12 vector siintr device asc0 at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr device bqu0 at isa? port 0x150 # # EISA devices: # # The EISA bus device is eisa0. It provides auto-detection and # configuration support for all devices on the EISA bus. # # The `ahb' device provides support for the Adaptec 174X adapter. # # The `ahc' device provides support for the Adaptec 274X and 284X # adapters. The 284X, although a VLB card responds to EISA probes. # controller eisa0 controller ahb0 controller ahc0 # # PCI devices: # # The main PCI bus device is `pci'. It provides auto-detection and # configuration support for all devices on the PCI bus, using either # configuration mode defined in the PCI specification. # # The `ahc' device provides support for the Adaptec 29/3940(U)(W) # and motherboard based AIC7870/AIC7880 adapters. # # The `ncr' device provides support for the NCR 53C810 and 53C825 # self-contained SCSI host adapters. # # The `de' device provides support for the Digital Equipment DC21040 # self-contained Ethernet adapter. # # The `vx' device provides support for the 3Com 3C590 and 3C595 # early support # # The `fpa' device provides support for the Digital DEFPA PCI FDDI # adapter. pseudo-device fddi is also needed. # # The PROBE_VERBOSE option enables a long listing of chip set registers # for supported PCI chip sets (currently only intel Saturn and Mercury). # # The `meteor' device is a PCI video capture board. It can also have the # following options: # options METEOR_ALLOC_PAGES=xxx preallocate kernel pages for data entry # figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE # options METEOR_DEALLOC_PAGES remove all allocated pages on close(2) # options METEOR_DEALLOC_ABOVE=xxx remove all allocated pages above the # specified amount. If this value is below the allocated amount no action # taken # controller pci0 controller ahc1 device ncr0 device de0 device fxp0 device vx0 device fpa0 device meteor0 options PROBE_VERBOSE # # PCCARD/PCMCIA # controller crd0 controller pcic0 at crd? # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # options PSM_NO_RESET for the `psm' driver # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # More undocumented options for linting. options APM_SLOWSTART=1 options COMPAT_LINUX options DEBUG options "EXT2FS" options "IBCS2" options LINUX options "SCSI_2_DEF" options SHOW_BUSYBUFS # List buffers that prevent root unmount Index: head/sys/i386/conf/NOTES =================================================================== --- head/sys/i386/conf/NOTES (revision 14092) +++ head/sys/i386/conf/NOTES (revision 14093) @@ -1,854 +1,853 @@ # # LINT -- config file for checking all the sources, tries to pull in # as much of the source tree as it can. # -# $Id: LINT,v 1.237 1996/02/03 11:48:12 phk Exp $ +# $Id: LINT,v 1.238 1996/02/06 20:57:46 wollman Exp $ # # NB: You probably don't want to try running a kernel built from this # file. Instead, you should start from GENERIC, and add options from # this file as required. # # # This directive is mandatory; it defines the architecture to be # configured for; in this case, the 386 family. You must also specify # at least one CPU (the one you intend to run on); deleting the # specification for CPUs you don't need to use may make parts of the # system run faster # machine "i386" cpu "I386_CPU" cpu "I486_CPU" cpu "I586_CPU" # aka Pentium(tm) cpu "I686_CPU" # aka Pentium Pro(tm) # # This is the ``identification'' of the kernel. Usually this should # be the same as the name of your kernel. # ident LINT # # The `maxusers' parameter controls the static sizing of a number of # internal system tables by a complicated formula defined in param.c. # maxusers 10 # # Under some circumstances it is necessary to make the default max # number of processes per user and open files per user more than the # defaults on bootup. (an example is a large news server in which # the uid, news, can sometimes need > 100 simultaneous processes running, # or perhaps a user using lots of windows under X). options CHILD_MAX=128 options OPEN_MAX=128 # # A math emulator is mandatory if you wish to run on hardware which # does not have a floating-point processor. Pick either the original, # bogus (but freely-distributable) math emulator, or a much more # fully-featured but GPL-licensed emulator taken from Linux. # options MATH_EMULATE #Support for x87 emulation # Don't enable both of these in a real config. options GPL_MATH_EMULATE #Support for x87 emulation via #new math emulator # # This directive defines a number of things: # - The compiled kernel is to be called `kernel' # - The root filesystem might be on partition wd0a # - Crash dumps will be written to wd0b, if possible. Specifying the # dump device here is not recommended. Use dumpon(8). # config kernel root on wd0 dumps on wd0 ##################################################################### # COMPATIBILITY OPTIONS # # Implement system calls compatible with 4.3BSD and older versions of # FreeBSD. You probably do NOT want to remove this as much current code # still relies on the 4.3 emulation. # options "COMPAT_43" # # Allow user-mode programs to manipulate their local descriptor tables. # This option is required for the WINE Windows(tm) emulator, and is # not used by anything else (that we know of). # options USER_LDT #allow user-level control of i386 ldt # # These three options provide support for System V Interface # Definition-style interprocess communication, in the form of shared # memory, semaphores, and message queues, respectively. # options SYSVSHM options SYSVSEM options SYSVMSG ##################################################################### # DEBUGGING OPTIONS # # Enable the kernel debugger. # options DDB # # KTRACE enables the system-call tracing facility ktrace(2). # options KTRACE #kernel tracing # # The DIAGNOSTIC option is used in a number of source files to enable # extra sanity checking of internal structures. This support is not # enabled by default because of the extra time it would take to check # for these conditions, which can only occur as a result of # programming errors. # options DIAGNOSTIC # # Allow ordinary users to take the console - this is useful for X. options UCONSOLE ##################################################################### # NETWORKING OPTIONS # # Protocol families: # Only the INET (Internet) family is officially supported in FreeBSD. # Source code for the NS (Xerox Network Service), ISO (OSI), and # CCITT (X.25) families is provided for amusement value, although we # try to ensure that it actually compiles. # options INET #Internet communications protocols -options NS #Xerox NS communications protocols options IPX #IPX/SPX communications protocols options IPXIP #IPX in IP encapsulation (not available) options IPTUNNEL #IP in IPX encapsulation (not available) options IPXPRINTFS=0 #IPX/SPX Console Debugging Information options IPX_ERRPRINTFS=0 #IPX/SPX Console Debugging Information # These are currently broken and are no longer shipped due to lack # of interest. #options CCITT #X.25 network layer #options ISO #options TPIP #ISO TP class 4 over IP #options TPCONS #ISO TP class 0 over X.25 #options LLC #X.25 link layer for Ethernets #options HDLC #X.25 link layer for serial lines #options EON #ISO CLNP over IP +#options NS #Xerox NS protocols +#options NSIP #XNS over IP # # Network interfaces: # The `loop' pseudo-device is MANDATORY when networking is enabled. # The `ether' pseudo-device provides generic code to handle # Ethernets; it is MANDATORY when a Ethernet device driver is # configured. # The 'fddi' pseudo-device provides generic code to support FDDI. # The `sppp' pseudo-device serves a similar role for certain types # of synchronous PPP links (like `cx', `ar'). # The `sl' pseudo-device implements the Serial Line IP (SLIP) service. # The `ppp' pseudo-device implements the Point-to-Point Protocol. # The `bpfilter' pseudo-device enables the Berkeley Packet Filter. Be # aware of the legal and administrative consequences of enabling this # option. The number of devices determines the maximum number of # simultaneous BPF clients programs runnable. # The `disc' pseudo-device implements a minimal network interface, # which throws away all packets sent and never receives any. It is # included for testing purposes. # The `tun' pseudo-device implements the User Process PPP (iijppp) # pseudo-device ether #Generic Ethernet pseudo-device fddi #Generic FDDI pseudo-device sppp #Generic Synchronous PPP pseudo-device loop #Network loopback device pseudo-device sl 2 #Serial Line IP pseudo-device ppp 2 #Point-to-point protocol pseudo-device bpfilter 4 #Berkeley packet filter pseudo-device disc #Discard device pseudo-device tun 1 #Tunnel driver(user process ppp) - -options NSIP #XNS over IP # # Internet family options: # # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in # 4.2BSD. This option should not be used unless you have a 4.2BSD # machine and TCP connections fail. # # MROUTING enables the kernel multicast packet forwarder, which works # with mrouted(8). # # IPFIREWALL enables support for IP firewall construction, in # conjunction with the `ipfw' program. IPFIREWALL_VERBOSE does # the obvious thing. # IPFIREWALL_ORDER_RULES makes the ipfw code sort the rules. You # don't want that, it's only there to be backward compatible. # IPACCT enables IP accounting. # # TCPDEBUG is undocumented. # options "TCP_COMPAT_42" #emulate 4.2BSD TCP bugs options MROUTING # Multicast routing options IPFIREWALL #firewall options IPFIREWALL_VERBOSE #print information about # dropped packets options IPFIREWALL_ORDER_RULES # bogusly sort rules. options IPACCT #ipaccounting options TCPDEBUG ##################################################################### # FILESYSTEM OPTIONS # # Only the root, /usr, and /tmp filesystems need be statically # compiled; everything else will be automatically loaded at mount # time. (Exception: the UFS family---FFS, MFS, and LFS---cannot # currently be demand-loaded.) Some people still prefer to statically # compile other filesystems as well. # # NB: The LFS, PORTAL, and UNION filesystems are known to be buggy, # and WILL panic your system if you attempt to do anything with them. # They are included here as an incentive for some enterprising soul to # sit down and fix them. # # Note: 4.4BSD NQNFS lease checking has relatively high cost for # _local_ I/O as well as remote I/O. Don't use it unless you will # using NQNFS. # # One of these is mandatory: options FFS #Fast filesystem options NFS #Network File System # The rest are optional: options NQNFS #Enable NQNFS lease checking # options NFS_NOSERVER #Disable the NFS-server code. options "CD9660" #ISO 9660 filesystem options FDESC #File descriptor filesystem options KERNFS #Kernel filesystem options LFS #Log filesystem options MFS #Memory File System options MSDOSFS #MS DOS File System options NULLFS #NULL filesystem options PORTAL #Portal filesystem options PROCFS #Process filesystem options UMAPFS #UID map filesystem options UNION #Union filesystem # THis DEVFS is experimental but seems to work options DEVFS #devices filesystem # Make space in the kernel for a MFS root filesystem. Define to the number # of kilobytes to reserve for the filesystem. options MFS_ROOT=10 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing. options MFS_AUTOLOAD # Allow this many swap-devices. options NSWAPDEV=20 # Disk quotas are supported when this option is enabled. If you # change the value of this option, you must do a `make clean' in your # kernel compile directory in order to get a working kernel. # options QUOTA #enable disk quotas ##################################################################### # SCSI DEVICES # SCSI DEVICE CONFIGURATION # The SCSI subsystem consists of the `base' SCSI code, a number of # high-level SCSI device `type' drivers, and the low-level host-adapter # device drivers. The host adapters are listed in the ISA and PCI # device configuration sections below. # # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so # that a given bus, target, and LUN always come on line as the same # device unit. In earlier versions the unit numbers were assigned # in the order that the devices were probed on the SCSI bus. This # means that if you removed a disk drive, you may have had to rewrite # your /etc/fstab file, and also that you had to be careful when adding # a new disk as it may have been probed earlier and moved your device # configuration around. # This old behavior is maintained as the default behavior. The unit # assignment begins with the first non-wired down unit for a device # type. For example, if you wire a disk as "sd3" then the first # non-wired disk will be assigned sd4. # The syntax for wiring down devices is: # controller scbus0 at ahc0 # Single bus device # controller scbus1 at ahc1 bus 0 # Single bus device # controller scbus3 at ahc2 bus 0 # Twin bus device # controller scbus2 at ahc2 bus 1 # Twin bus device # disk sd0 at scbus0 target 0 unit 0 # disk sd1 at scbus3 target 1 # disk sd2 at scbus2 target 3 # tape st1 at scbus1 target 6 # device cd0 at scbus? # "units" (SCSI logical unit number) that are not specified are # treated as if specified as LUN 0. # All SCSI devices allocate as many units as are required. # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI # configuration and doesn't have to be explicitly configured. controller scbus0 #base SCSI code device ch0 #SCSI media changers device sd0 #SCSI disks device st0 #SCSI tapes device cd0 #SCSI CD-ROMs device od0 #SCSI optical disk # The previous devices (ch, sd, st, cd) are recognized by config. # config doesn't (and shouldn't) know about these newer ones, # so we have to specify that they are on a SCSI bus with the "at scbus?" # clause. device worm0 at scbus? # SCSI worm device pt0 at scbus? # SCSI processor type device sctarg0 at scbus? # SCSI target # SCSI OPTIONS: # SCSIDEBUG: When defined enables debugging macros # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k) # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead # of only when booting verbosely. options SCSIDEBUG #options NO_SCSI_SENSE options SCSI_REPORT_GEOMETRY ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS # # Of these, only the `log' device is truly mandatory. The `pty' # device usually turns out to be ``effectively mandatory'', as it is # required for `telnetd', `rlogind', `screen', `emacs', and `xterm', # among others. The `isdn', `ii', `ity', `itel', and `ispy' devices # are all required when ISDN support is used. If you wish to run certain # system utilities which are compressed by default (like /stand/sysinstall) # then `gzip' becomes mandatory too. # pseudo-device pty 16 #Pseudo ttys - can go as high as 64 pseudo-device speaker #Play IBM BASIC-style noises out your speaker pseudo-device log #Kernel syslog interface (/dev/klog) pseudo-device gzip #Exec gzipped a.out's pseudo-device vn #Vnode driver (turns a file into a device) pseudo-device snp 3 #Snoop device - to look at pty/vty/etc.. # These are non-optional for ISDN pseudo-device isdn pseudo-device ii 4 pseudo-device ity 4 pseudo-device itel 2 pseudo-device ispy 1 # These are only for watching for bitrot in old tty code. # broken #pseudo-device tb # These are only for watching for bitrot in old SCSI code. pseudo-device su #scsi user pseudo-device ssc #super scsi ##################################################################### # HARDWARE DEVICE CONFIGURATION # ISA and EISA devices: # Currently there is no separate support for EISA. There should be. # Micro Channel is not supported at all. # # Mandatory ISA devices: isa, sc or vt, npx # controller isa0 # # Options for `isa': # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # BOUNCE_BUFFERS provides support for ISA DMA on machines with more # than 16 megabytes of memory. It doesn't hurt on other machines. # Some broken EISA and VLB hardware may need this, too. # # DUMMY_NOPS disables extra delays for some bus operations. The delays # are mostly for older systems and aren't used consistently. Probably # works OK on most EISA bus machines. # # TUNE_1542 enables the automatic ISA bus speed selection for the # Adaptec 1542 boards. Does not work for all boards, use it with caution. # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. # #options "AUTO_EOI_2" options BOUNCE_BUFFERS #options DUMMY_NOPS #options "TUNE_1542" #options BROKEN_KEYBOARD_RESET # Enable this and PCVT_FREEBSD for pcvt vt220 compatible console driver device vt0 at isa? port "IO_KBD" tty irq 1 vector pcrint options PCVT_FREEBSD=210 # pcvt running on FreeBSD >= 2.0.5 options XSERVER # include code for XFree86 options FAT_CURSOR # start with block cursor # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops options PCVT_SCANSET=2 # IBM keyboards are non-std # The syscons console driver (sco color console compatible) - default. device sc0 at isa? port "IO_KBD" tty irq 1 vector scintr # # Options for `sc': # # HARDFONTS allows the driver to load an ISO-8859-1 font to replace # the default font in your display adapter's memory. # options HARDFONTS # # MAXCONS is maximum number of virtual consoles, no more than 16 # default value: 12 # options MAXCONS=16 # # This device is mandatory. # # The Numeric Processing eXtension is used to either enable the # coprocessor or enable math emulation. If your machine doesn't contain # a math co-processor, you must *also* add the option "MATH_EMULATE". # THIS IS NOT AN OPTIONAL ENTRY, DO NOT REMOVE IT # device npx0 at isa? port "IO_NPX" irq 13 vector npxintr # # Optional ISA and EISA devices: # # # SCSI host adapters: `aha', `aic', `bt', `nca' # # aha: Adaptec 154x # ahc: Adaptec 274x/284x/294x # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!) # bt: Most Buslogic controllers # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130 # uha: UltraStore 14F and 34F # sea: Seagate ST01/02 8 bit controller (slow!) # wds: Western Digital WD7000 controller (no scatter/gather!). # # Note that the order is important in order for Buslogic cards to be # probed correctly. # controller bt0 at isa? port "IO_BT0" bio irq ? vector bt_isa_intr controller aha0 at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr controller uha0 at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr controller aic0 at isa? port 0x340 bio irq 11 vector aicintr controller nca0 at isa? port 0x1f88 bio irq 10 vector ncaintr controller nca1 at isa? port 0x1f84 controller nca2 at isa? port 0x1f8c controller nca3 at isa? port 0x1e88 controller nca4 at isa? port 0x350 bio irq 5 vector ncaintr controller sea0 at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr controller wds0 at isa? port 0x350 bio irq 15 drq 6 vector wdsintr # # ST-506, ESDI, and IDE hard disks: `wdc' and `wd' # # NB: ``Enhanced IDE'' is NOT supported at this time. # # The flags fields are used to enable the multi-sector I/O and # the 32BIT I/O modes. The flags may be used in either the controller # definition or in the individual disk definitions. The controller # definition is supported for the boot configuration stuff. # # Each drive has a 16 bit flags value defined: # The low 8 bits are the maximum value for the multi-sector I/O, # where 0xff defaults to the maximum that the drive can handle. # The high bit of the 16 bit flags (0x8000) allows probing for # 32 bit transfers. # # The flags field for the drives can be specified in the controller # specification with the low 16 bits for drive 0, and the high 16 bits # for drive 1. # e.g.: #controller wdc0 at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr # # specifies that drive 0 will be allowed to probe for 32 bit transfers and # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be # allowed to probe for 32 bit transfers, but will allow multi-sector # transfers up to the maximum that the drive supports. # # controller wdc0 at isa? port "IO_WD1" bio irq 14 vector wdintr disk wd0 at wdc0 drive 0 disk wd1 at wdc0 drive 1 controller wdc1 at isa? port "IO_WD2" bio irq 15 vector wdintr disk wd2 at wdc1 drive 0 disk wd3 at wdc1 drive 1 # # Options for `wdc': # # ATAPI enables the support for ATAPI-compatible IDE devices # options ATAPI #Enable ATAPI support for IDE bus # IDE CD-ROM driver - requires wdc controller and ATAPI option device wcd0 # # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft' # controller fdc0 at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr # # Activate this line instead of the fdc0 line above if you happen to # have an Insight floppy tape. Probing them proved to be dangerous # for people with floppy disks only, so it's "hidden" behind a flag: #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr disk fd0 at fdc0 drive 0 disk fd1 at fdc0 drive 1 tape ft0 at fdc0 drive 2 # # Options for `fd': # # FDSEEKWAIT selects a non-default head-settle time (i.e., the time to # wait after a seek is performed). The default value (1/32 s) is # usually sufficient. The units are inverse seconds, so a value of 16 # here means to wait 1/16th of a second; you should choose a power of # two. # XXX: this seems to be missing! options FDSEEKWAIT=16 # # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc. # # lpt: printer port # lpt specials: # port can be specified as ?, this will cause the driver to scan # the BIOS port list; # the irq and vector clauses may be omitted, this # will force the port into polling mode. # mse: Logitech and ATI InPort bus mouse ports # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd] # sio: serial ports (see sio(4)) device lpt0 at isa? port? tty irq 7 vector lptintr device lpt1 at isa? port "IO_LPT3" tty irq 5 vector lptintr device mse0 at isa? port 0x23c tty irq 5 vector mseintr device psm0 at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr # Options for psm: options PSM_NO_RESET #don't reset mouse hardware (some laptops) device sio0 at isa? port "IO_COM1" tty irq 4 vector siointr # Options for sio: options COMCONSOLE #prefer serial console to video console options COM_ESP #code for Hayes ESP options COM_MULTIPORT #code for some cards with shared IRQs options DSI_SOFT_MODEM #code for DSI Softmodems options BREAK_TO_DEBUGGER #a BREAK on a comconsole goes to #DDB, if available. # # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc' # # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp) # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # el: 3Com 3C501 (slow!) # ep: 3Com 3C509 (buggy) # fe: Fujitsu MB86960A/MB86965A Ethernet # fea: DEC DEFEA EISA FDDI adapter # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210 # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100, # DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422) # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL) # ze: IBM/National Semiconductor PCMCIA ethernet controller. # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for # send/receive operation, but it needs 'iomem' to read/write the # attribute memory) # device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr device eg0 at isa? port 0x310 net irq 5 vector egintr device el0 at isa? port 0x300 net irq 9 vector elintr device ep0 at isa? port 0x300 net irq 10 vector epintr device fe0 at isa? port 0x240 net irq ? vector feintr device fea0 at isa? net irq ? vector feaintr device ie0 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr device ix0 at isa? port 0x300 net irq 10 iomem 0xd0000 iosiz 32768 vector ixintr device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr # # ISDN drivers - `isdn'. # # Uncomment one (and only one) of the following two drivers for the appropriate # ISDN device you have. For more information on what's considered appropriate # for your given set of circumstances, please read # /usr/src/gnu/usr.sbin/isdn/docs/INSTALL. It's a bit sparse at present, but # it's the best we have right now. The snic driver is also disabled at present, # waiting for someone to upgrade the driver to 2.0 (it's in /sys/gnu/scsi/). # device nic0 at isa? port "IO_COM3" iomem 0xe0000 tty irq 9 vector nicintr device nnic0 at isa? port 0x150 iomem 0xe0000 tty irq 12 vector nnicintr # # Audio drivers: `snd', `sb', `pas', `gus', `pca' # # snd: Voxware sound support code # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum # sbxvi: SoundBlaster 16 # sbmidi: SoundBlaster 16 MIDI interface # pas: ProAudioSpectrum PCM and MIDI # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX # gusxvi: Gravis Ultrasound 16-bit PCM (do not use) # mss: Microsoft Sound System # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum # uart: stand-alone 6850 UART for MIDI # mpu: Roland MPU-401 stand-alone card # # Beware! The addresses specified below are also hard-coded in # i386/isa/sound/sound_config.h. If you change the values here, you # must also change the values in the include file. # # pca: PCM audio through your PC speaker # # If you don't have a lpt0 device at IRQ 7, you can remove the # ``conflicts'' specification in the appropriate device entries below. # # If you have a GUS-MAX card and want to use the CS4231 codec on the # card the drqs for the gus max must be 8 bit (1, 2, or 3). # # If you would like to use the full duplex option on the gus, then define # flags to be the ``read dma channel''. # # options BROKEN_BUS_CLOCK #PAS-16 isn't working and OPTI chipset # options SYMPHONY_PAS #PAS-16 isn't working and SYMPHONY chipset # options EXCLUDE_SBPRO #PAS-16 # options SBC_IRQ=5 #PAS-16. Must match irq on sb0 line. # PAS16: The order of the pas0/sb0/opl0 is important since the # sb emulation is enabled in the pas-16 attach. # # The i386/isa/sound/sound.doc has more information. # Controls all sound devices controller snd0 device pas0 at isa? port 0x388 irq 10 drq 6 vector pasintr device sb0 at isa? port 0x220 irq 7 conflicts drq 1 vector sbintr device sbxvi0 at isa? drq 5 device sbmidi0 at isa? port 0x330 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr device opl0 at isa? port 0x388 conflicts device mpu0 at isa? port 0x330 irq 6 drq 0 device uart0 at isa? port 0x330 irq 5 vector "m6850intr" # More undocumented sound devices with bogus configurations for linting. # broken #device sscape0 at isa? port 0x330 irq 6 drq 0 #device trix0 at isa? port 0x330 irq 6 drq 0 vector sscapeintr # Not controlled by `snd' device pca0 at isa? port IO_TIMER1 tty # # Miscellaneous hardware: # # mcd: Mitsumi CD-ROM # scd: Sony CD-ROM # matcd: Matsushita/Panasonic CD-ROM # wt: Wangtek and Archive QIC-02/QIC-36 tape drives # ctx: Cortex-I frame grabber # apm: Laptop Advanced Power Management (experimental) # spigot: The Creative Labs Video Spigot video-acquisition board # meteor: Matrox Meteor video capture board # cy: Cyclades serial driver # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!) # gp: National Instruments AT-GPIB and AT-GPIB/TNT board # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey # gsc: Genius GS-4500 hand scanner. # joy: joystick # labpc: National Instrument's Lab-PC and Lab-PC+ # rc: RISCom/8 multiport card # tw: TW-523 power line interface for use with X-10 home control products # si: Specialix SI/XIO 4-32 port terminal multiplexor # # Notes on the spigot: # The video spigot is at 0xad6. This port address can not be changed. # The irq values may only be 10, 11, or 15 # I/O memory is an 8kb region. Possible values are: # 0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff # The start address must be on an even boundary. # Add the following option if you want to allow non-root users to be able # to access the spigot. This option is not secure because it allows users # direct access to the I/O page. # options SPIGOT_UNSECURE # # Notes on the Digiboard driver: # # The following flag values have special meanings: # 0x01 - alternate layout of pins # 0x02 - use the windowed PC/Xe in 64K mode # Notes on the Specialix SI/XIO driver: # **This is NOT a Specialix supported Driver!** # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. device mcd0 at isa? port 0x300 bio irq 10 vector mcdintr # for the Sony CDU31/33A CDROM device scd0 at isa? port 0x230 bio # for the SoundBlaster 16 multicd - up to 4 devices controller matcd0 at isa? port 0x230 bio device wt0 at isa? port 0x300 bio irq 5 drq 1 vector wtintr device ctx0 at isa? port 0x230 iomem 0xd0000 device spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr device qcam0 at isa? port "IO_LPT3" tty device apm0 at isa? device gp0 at isa? port 0x2c0 tty device gsc0 at isa? port "IO_GSC1" tty drq 3 device joy0 at isa? port "IO_GAME" device cy0 at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr device dgb0 at isa? port 0x220 iomem 0xfc0000 iosiz ? tty device labpc0 at isa? port 0x260 tty irq 5 vector labpcintr device rc0 at isa? port 0x220 tty irq 12 vector rcintr # the port and irq for tw0 are fictitious device tw0 at isa? port 0x380 tty irq 11 vector twintr device si0 at isa? iomem 0xd0000 tty irq 12 vector siintr device asc0 at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr device bqu0 at isa? port 0x150 # # EISA devices: # # The EISA bus device is eisa0. It provides auto-detection and # configuration support for all devices on the EISA bus. # # The `ahb' device provides support for the Adaptec 174X adapter. # # The `ahc' device provides support for the Adaptec 274X and 284X # adapters. The 284X, although a VLB card responds to EISA probes. # controller eisa0 controller ahb0 controller ahc0 # # PCI devices: # # The main PCI bus device is `pci'. It provides auto-detection and # configuration support for all devices on the PCI bus, using either # configuration mode defined in the PCI specification. # # The `ahc' device provides support for the Adaptec 29/3940(U)(W) # and motherboard based AIC7870/AIC7880 adapters. # # The `ncr' device provides support for the NCR 53C810 and 53C825 # self-contained SCSI host adapters. # # The `de' device provides support for the Digital Equipment DC21040 # self-contained Ethernet adapter. # # The `vx' device provides support for the 3Com 3C590 and 3C595 # early support # # The `fpa' device provides support for the Digital DEFPA PCI FDDI # adapter. pseudo-device fddi is also needed. # # The PROBE_VERBOSE option enables a long listing of chip set registers # for supported PCI chip sets (currently only intel Saturn and Mercury). # # The `meteor' device is a PCI video capture board. It can also have the # following options: # options METEOR_ALLOC_PAGES=xxx preallocate kernel pages for data entry # figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE # options METEOR_DEALLOC_PAGES remove all allocated pages on close(2) # options METEOR_DEALLOC_ABOVE=xxx remove all allocated pages above the # specified amount. If this value is below the allocated amount no action # taken # controller pci0 controller ahc1 device ncr0 device de0 device fxp0 device vx0 device fpa0 device meteor0 options PROBE_VERBOSE # # PCCARD/PCMCIA # controller crd0 controller pcic0 at crd? # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # options PSM_NO_RESET for the `psm' driver # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # More undocumented options for linting. options APM_SLOWSTART=1 options COMPAT_LINUX options DEBUG options "EXT2FS" options "IBCS2" options LINUX options "SCSI_2_DEF" options SHOW_BUSYBUFS # List buffers that prevent root unmount Index: head/sys/kern/uipc_socket.c =================================================================== --- head/sys/kern/uipc_socket.c (revision 14092) +++ head/sys/kern/uipc_socket.c (revision 14093) @@ -1,1068 +1,1068 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 * $Id: uipc_socket.c,v 1.13 1995/12/14 22:51:01 bde Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int somaxconn = SOMAXCONN; SYSCTL_INT(_kern, KERN_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); /* * Socket operation routines. * These routines are called by the routines in * sys_socket.c or from a system process, and * implement the semantics of socket operations by * switching out to the protocol specific routines. */ /*ARGSUSED*/ int -socreate(dom, aso, type, proto) +socreate(dom, aso, type, proto, p) int dom; struct socket **aso; register int type; int proto; + struct proc *p; { - struct proc *p = curproc; /* XXX */ register struct protosw *prp; register struct socket *so; register int error; if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); if (prp == 0 || prp->pr_usrreq == 0) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); bzero((caddr_t)so, sizeof(*so)); so->so_type = type; if (p->p_ucred->cr_uid == 0) so->so_state = SS_PRIV; so->so_proto = prp; error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0); if (error) { so->so_state |= SS_NOFDREF; sofree(so); return (error); } *aso = so; return (0); } int sobind(so, nam) struct socket *so; struct mbuf *nam; { int s = splnet(); int error; error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, nam, (struct mbuf *)0); splx(s); return (error); } int solisten(so, backlog) register struct socket *so; int backlog; { int s = splnet(), error; error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); if (error) { splx(s); return (error); } if (so->so_q == 0) so->so_options |= SO_ACCEPTCONN; if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->so_qlimit = backlog; splx(s); return (0); } void sofree(so) register struct socket *so; { if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; if (so->so_head) { if (!soqremque(so, 0) && !soqremque(so, 1)) panic("sofree dq"); so->so_head = 0; } sbrelease(&so->so_snd); sorflush(so); FREE(so, M_SOCKET); } /* * Close a socket on last file table reference removal. * Initiate disconnect if connected. * Free socket when disconnect complete. */ int soclose(so) register struct socket *so; { int s = splnet(); /* conservative */ int error = 0; if (so->so_options & SO_ACCEPTCONN) { while (so->so_q0) (void) soabort(so->so_q0); while (so->so_q) (void) soabort(so->so_q); } if (so->so_pcb == 0) goto discard; if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); if (error) goto drop; } if (so->so_options & SO_LINGER) { if ((so->so_state & SS_ISDISCONNECTING) && (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "soclos", so->so_linger); if (error) break; } } } drop: if (so->so_pcb) { int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); if (error == 0) error = error2; } discard: if (so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); so->so_state |= SS_NOFDREF; sofree(so); splx(s); return (error); } /* * Must be called at splnet... */ int soabort(so) struct socket *so; { return ( (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); } int soaccept(so, nam) register struct socket *so; struct mbuf *nam; { int s = splnet(); int error; if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0, nam, (struct mbuf *)0); splx(s); return (error); } int soconnect(so, nam) register struct socket *so; struct mbuf *nam; { int s; int error; if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); s = splnet(); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. * This allows user to disconnect by connecting to, e.g., * a null address. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) error = EISCONN; else error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, (struct mbuf *)0, nam, (struct mbuf *)0); splx(s); return (error); } int soconnect2(so1, so2) register struct socket *so1; struct socket *so2; { int s = splnet(); int error; error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0); splx(s); return (error); } int sodisconnect(so) register struct socket *so; { int s = splnet(); int error; if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; goto bad; } if (so->so_state & SS_ISDISCONNECTING) { error = EALREADY; goto bad; } error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); bad: splx(s); return (error); } #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) /* * Send on a socket. * If send must go all at once and message is larger than * send buffering, then hard error. * Lock against other senders. * If must go all at once and not enough room now, then * inform user that this would block and do nothing. * Otherwise, if nonblocking, send as much as possible. * The data to be sent is described by "uio" if nonzero, * otherwise by the mbuf chain "top" (which must be null * if uio is not). Data provided in mbuf chain must be small * enough to send all at once. * * Returns nonzero on error, timeout or signal; callers * must check for short counts if EINTR/ERESTART are returned. * Data and control buffers are freed on return. */ int sosend(so, addr, uio, top, control, flags) register struct socket *so; struct mbuf *addr; struct uio *uio; struct mbuf *top; struct mbuf *control; int flags; { struct proc *p = curproc; /* XXX */ struct mbuf **mp; register struct mbuf *m; register long space, len, resid; int clen = 0, error, s, dontroute, mlen; int atomic = sosendallatonce(so) || top; if (uio) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. * However, space must be signed, as it might be less than 0 * if we over-committed, and we must use a signed comparison * of space and resid. On the other hand, a negative resid * causes us to loop sending 0-length segments to the protocol. */ if (resid < 0) return (EINVAL); dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); p->p_stats->p_ru.ru_msgsnd++; if (control) clen = control->m_len; #define snderr(errno) { error = errno; splx(s); goto release; } restart: error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out; do { s = splnet(); if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if (so->so_error) snderr(so->so_error); if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection- * based socket if it supports implied connect. * Return ENOTCONN if not connected and no address is * supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) snderr(ENOTCONN); } else if (addr == 0) snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? ENOTCONN : EDESTADDRREQ); } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) snderr(EMSGSIZE); if (space < resid + clen && uio && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); sbunlock(&so->so_snd); error = sbwait(&so->so_snd); splx(s); if (error) goto out; goto restart; } splx(s); mp = ⊤ space -= clen; do { if (uio == NULL) { /* * Data is prepackaged in "top". */ resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else do { if (top == 0) { MGETHDR(m, M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = (struct ifnet *)0; } else { MGET(m, M_WAIT, MT_DATA); mlen = MLEN; } if (resid >= MINCLSIZE) { MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = MCLBYTES; len = min(min(mlen, resid), space); } else { nopages: len = min(min(mlen, resid), space); /* * For datagram protocols, leave room * for protocol headers in first mbuf. */ if (atomic && top == 0 && len < mlen) MH_ALIGN(m, len); } space -= len; error = uiomove(mtod(m, caddr_t), (int)len, uio); resid = uio->uio_resid; m->m_len = len; *mp = m; top->m_pkthdr.len += len; if (error) goto release; mp = &m->m_next; if (resid <= 0) { if (flags & MSG_EOR) top->m_flags |= M_EOR; break; } } while (space > 0 && atomic); if (dontroute) so->so_options |= SO_DONTROUTE; s = splnet(); /* XXX */ error = (*so->so_proto->pr_usrreq)(so, (flags & MSG_OOB) ? PRU_SENDOOB : /* * If the user set MSG_EOF, the protocol * understands this flag and nothing left to * send then use PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRU_SEND_EOF : PRU_SEND, top, addr, control); splx(s); if (dontroute) so->so_options &= ~SO_DONTROUTE; clen = 0; control = 0; top = 0; mp = ⊤ if (error) goto release; } while (resid && space > 0); } while (resid); release: sbunlock(&so->so_snd); out: if (top) m_freem(top); if (control) m_freem(control); return (error); } /* * Implement receive operations on a socket. * We depend on the way that records are added to the sockbuf * by sbappend*. In particular, each record (mbufs linked through m_next) * must begin with an address if the protocol so specifies, * followed by an optional mbuf or mbufs containing ancillary data, * and then zero or more mbufs of data. * In order to avoid blocking network interrupts for the entire time here, * we splx() while doing the actual copy to user space. * Although the sockbuf is locked, new data may still be appended, * and thus we must maintain consistency of the sockbuf during that time. * * The caller may receive the data as a single mbuf chain by supplying * an mbuf **mp0 for use in returning the chain. The uio is then used * only for the count in uio_resid. */ int soreceive(so, paddr, uio, mp0, controlp, flagsp) register struct socket *so; struct mbuf **paddr; struct uio *uio; struct mbuf **mp0; struct mbuf **controlp; int *flagsp; { register struct mbuf *m, **mp; register int flags, len, error, s, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; int orig_resid = uio->uio_resid; mp = mp0; if (paddr) *paddr = 0; if (controlp) *controlp = 0; if (flagsp) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0); if (error) goto bad; do { error = uiomove(mtod(m, caddr_t), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); bad: if (m) m_freem(m); return (error); } if (mp) *mp = (struct mbuf *)0; if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0); restart: error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) return (error); s = splnet(); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more * (subject to any timeout) if: * 1. the current count is less than the low water mark, or * 2. MSG_WAITALL is set, and it is possible to do the entire * receive operation at once if we block (resid <= hiwat). * 3. MSG_DONTWAIT is not set * If MSG_WAITALL is set but resid is larger than the receive buffer, * we have to do the receive in sections, and thus risk returning * a short count if a timeout or signal occurs after we start. */ if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio->uio_resid) && (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { #ifdef DIAGNOSTIC if (m == 0 && so->so_rcv.sb_cc) panic("receive 1"); #endif if (so->so_error) { if (m) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) so->so_error = 0; goto release; } if (so->so_state & SS_CANTRCVMORE) { if (m) goto dontblock; else goto release; } for (; m; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; } if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { error = ENOTCONN; goto release; } if (uio->uio_resid == 0) goto release; if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { error = EWOULDBLOCK; goto release; } sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); if (error) return (error); goto restart; } dontblock: if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC if (m->m_type != MT_SONAME) panic("receive 1a"); #endif orig_resid = 0; if (flags & MSG_PEEK) { if (paddr) *paddr = m_copy(m, 0, m->m_len); m = m->m_next; } else { sbfree(&so->so_rcv, m); if (paddr) { *paddr = m; so->so_rcv.sb_mb = m->m_next; m->m_next = 0; m = so->so_rcv.sb_mb; } else { MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } } } while (m && m->m_type == MT_CONTROL && error == 0) { if (flags & MSG_PEEK) { if (controlp) *controlp = m_copy(m, 0, m->m_len); m = m->m_next; } else { sbfree(&so->so_rcv, m); if (controlp) { if (pr->pr_domain->dom_externalize && mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) error = (*pr->pr_domain->dom_externalize)(m); *controlp = m; so->so_rcv.sb_mb = m->m_next; m->m_next = 0; m = so->so_rcv.sb_mb; } else { MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } } if (controlp) { orig_resid = 0; controlp = &(*controlp)->m_next; } } if (m) { if ((flags & MSG_PEEK) == 0) m->m_nextpkt = nextrecord; type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; } moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; } else if (type == MT_OOBDATA) break; #ifdef DIAGNOSTIC else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) panic("receive 3"); #endif so->so_state &= ~SS_RCVATMARK; len = uio->uio_resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) len = m->m_len - moff; /* * If mp is set, just pass back the mbufs. * Otherwise copy them out via the uio, then free. * Sockbuf must be consistent here (points to current mbuf, * it points to next record) when we drop priority; * we must note any additions to the sockbuf when we * block interrupts again. */ if (mp == 0) { splx(s); error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); s = splnet(); } else uio->uio_resid -= len; if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; if (flags & MSG_PEEK) { m = m->m_next; moff = 0; } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); if (mp) { *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = (struct mbuf *)0; } else { MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } if (m) m->m_nextpkt = nextrecord; } } else { if (flags & MSG_PEEK) moff += len; else { if (mp) *mp = m_copym(m, 0, len, M_WAIT); m->m_data += len; m->m_len -= len; so->so_rcv.sb_cc -= len; } } if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_state |= SS_RCVATMARK; break; } } else { offset += len; if (offset == so->so_oobmark) break; } } if (flags & MSG_EOR) break; /* * If the MSG_WAITALL flag is set (for non-atomic socket), * we must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return * with a short count but without error. * Keep sockbuf locked against other readers. */ while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); splx(s); return (0); } m = so->so_rcv.sb_mb; if (m) nextrecord = m->m_nextpkt; } } if (m && pr->pr_flags & PR_ATOMIC) { flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == 0) so->so_rcv.sb_mb = nextrecord; if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)flags, (struct mbuf *)0); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { sbunlock(&so->so_rcv); splx(s); goto restart; } if (flagsp) *flagsp |= flags; release: sbunlock(&so->so_rcv); splx(s); return (error); } int soshutdown(so, how) register struct socket *so; register int how; { register struct protosw *pr = so->so_proto; how++; if (how & FREAD) sorflush(so); if (how & FWRITE) return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)); return (0); } void sorflush(so) register struct socket *so; { register struct sockbuf *sb = &so->so_rcv; register struct protosw *pr = so->so_proto; register int s; struct sockbuf asb; sb->sb_flags |= SB_NOINTR; (void) sblock(sb, M_WAITOK); s = splimp(); socantrcvmore(so); sbunlock(sb); asb = *sb; bzero((caddr_t)sb, sizeof (*sb)); splx(s); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); sbrelease(&asb); } int sosetopt(so, level, optname, m0) register struct socket *so; int level, optname; struct mbuf *m0; { int error = 0; register struct mbuf *m = m0; if (level != SOL_SOCKET) { if (so->so_proto && so->so_proto->pr_ctloutput) return ((*so->so_proto->pr_ctloutput) (PRCO_SETOPT, so, level, optname, &m0)); error = ENOPROTOOPT; } else { switch (optname) { case SO_LINGER: if (m == NULL || m->m_len != sizeof (struct linger)) { error = EINVAL; goto bad; } so->so_linger = mtod(m, struct linger *)->l_linger; /* fall thru... */ case SO_DEBUG: case SO_KEEPALIVE: case SO_DONTROUTE: case SO_USELOOPBACK: case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: case SO_OOBINLINE: if (m == NULL || m->m_len < sizeof (int)) { error = EINVAL; goto bad; } if (*mtod(m, int *)) so->so_options |= optname; else so->so_options &= ~optname; break; case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: case SO_RCVLOWAT: if (m == NULL || m->m_len < sizeof (int)) { error = EINVAL; goto bad; } switch (optname) { case SO_SNDBUF: case SO_RCVBUF: if (sbreserve(optname == SO_SNDBUF ? &so->so_snd : &so->so_rcv, (u_long) *mtod(m, int *)) == 0) { error = ENOBUFS; goto bad; } break; case SO_SNDLOWAT: so->so_snd.sb_lowat = *mtod(m, int *); break; case SO_RCVLOWAT: so->so_rcv.sb_lowat = *mtod(m, int *); break; } break; case SO_SNDTIMEO: case SO_RCVTIMEO: { struct timeval *tv; short val; if (m == NULL || m->m_len < sizeof (*tv)) { error = EINVAL; goto bad; } tv = mtod(m, struct timeval *); if (tv->tv_sec > SHRT_MAX / hz - hz) { error = EDOM; goto bad; } val = tv->tv_sec * hz + tv->tv_usec / tick; switch (optname) { case SO_SNDTIMEO: so->so_snd.sb_timeo = val; break; case SO_RCVTIMEO: so->so_rcv.sb_timeo = val; break; } break; } case SO_PRIVSTATE: /* we don't care what the parameter is... */ so->so_state &= ~SS_PRIV; break; default: error = ENOPROTOOPT; break; } if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { (void) ((*so->so_proto->pr_ctloutput) (PRCO_SETOPT, so, level, optname, &m0)); m = NULL; /* freed by protocol */ } } bad: if (m) (void) m_free(m); return (error); } int sogetopt(so, level, optname, mp) register struct socket *so; int level, optname; struct mbuf **mp; { register struct mbuf *m; if (level != SOL_SOCKET) { if (so->so_proto && so->so_proto->pr_ctloutput) { return ((*so->so_proto->pr_ctloutput) (PRCO_GETOPT, so, level, optname, mp)); } else return (ENOPROTOOPT); } else { m = m_get(M_WAIT, MT_SOOPTS); m->m_len = sizeof (int); switch (optname) { case SO_LINGER: m->m_len = sizeof (struct linger); mtod(m, struct linger *)->l_onoff = so->so_options & SO_LINGER; mtod(m, struct linger *)->l_linger = so->so_linger; break; case SO_USELOOPBACK: case SO_DONTROUTE: case SO_DEBUG: case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: case SO_BROADCAST: case SO_OOBINLINE: *mtod(m, int *) = so->so_options & optname; break; case SO_PRIVSTATE: *mtod(m, int *) = so->so_state & SS_PRIV; break; case SO_TYPE: *mtod(m, int *) = so->so_type; break; case SO_ERROR: *mtod(m, int *) = so->so_error; so->so_error = 0; break; case SO_SNDBUF: *mtod(m, int *) = so->so_snd.sb_hiwat; break; case SO_RCVBUF: *mtod(m, int *) = so->so_rcv.sb_hiwat; break; case SO_SNDLOWAT: *mtod(m, int *) = so->so_snd.sb_lowat; break; case SO_RCVLOWAT: *mtod(m, int *) = so->so_rcv.sb_lowat; break; case SO_SNDTIMEO: case SO_RCVTIMEO: { int val = (optname == SO_SNDTIMEO ? so->so_snd.sb_timeo : so->so_rcv.sb_timeo); m->m_len = sizeof(struct timeval); mtod(m, struct timeval *)->tv_sec = val / hz; mtod(m, struct timeval *)->tv_usec = (val % hz) * tick; break; } default: (void)m_free(m); return (ENOPROTOOPT); } *mp = m; return (0); } } void sohasoutofband(so) register struct socket *so; { struct proc *p; if (so->so_pgid < 0) gsignal(-so->so_pgid, SIGURG); else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) psignal(p, SIGURG); selwakeup(&so->so_rcv.sb_sel); } Index: head/sys/kern/uipc_syscalls.c =================================================================== --- head/sys/kern/uipc_syscalls.c (revision 14092) +++ head/sys/kern/uipc_syscalls.c (revision 14093) @@ -1,1275 +1,1275 @@ /* * Copyright (c) 1982, 1986, 1989, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 - * $Id: uipc_syscalls.c,v 1.12 1996/01/03 21:42:21 wollman Exp $ + * $Id: uipc_syscalls.c,v 1.13 1996/01/28 23:41:40 dyson Exp $ */ #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif extern int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags, int *retsize)); extern int recvit __P((struct proc *p, int s, struct msghdr *mp, caddr_t namelenp, int *retsize)); static int accept1 __P((struct proc *p, struct accept_args *uap, int *retval, int compat)); static int getsockname1 __P((struct proc *p, struct getsockname_args *uap, int *retval, int compat)); static int getpeername1 __P((struct proc *p, struct getpeername_args *uap, int *retval, int compat)); /* * System call interface to the socket abstraction. */ #if defined(COMPAT_43) || defined(COMPAT_SUNOS) #define COMPAT_OLDSOCK #endif extern struct fileops socketops; int socket(p, uap, retval) struct proc *p; register struct socket_args /* { int domain; int type; int protocol; } */ *uap; int *retval; { struct filedesc *fdp = p->p_fd; struct socket *so; struct file *fp; int fd, error; error = falloc(p, &fp, &fd); if (error) return (error); fp->f_flag = FREAD|FWRITE; fp->f_type = DTYPE_SOCKET; fp->f_ops = &socketops; - error = socreate(uap->domain, &so, uap->type, uap->protocol); + error = socreate(uap->domain, &so, uap->type, uap->protocol, p); if (error) { fdp->fd_ofiles[fd] = 0; ffree(fp); } else { fp->f_data = (caddr_t)so; *retval = fd; } return (error); } /* ARGSUSED */ int bind(p, uap, retval) struct proc *p; register struct bind_args /* { int s; caddr_t name; int namelen; } */ *uap; int *retval; { struct file *fp; struct mbuf *nam; int error; error = getsock(p->p_fd, uap->s, &fp); if (error) return (error); error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME); if (error) return (error); error = sobind((struct socket *)fp->f_data, nam); m_freem(nam); return (error); } /* ARGSUSED */ int listen(p, uap, retval) struct proc *p; register struct listen_args /* { int s; int backlog; } */ *uap; int *retval; { struct file *fp; int error; error = getsock(p->p_fd, uap->s, &fp); if (error) return (error); return (solisten((struct socket *)fp->f_data, uap->backlog)); } static int accept1(p, uap, retval, compat) struct proc *p; register struct accept_args /* { int s; caddr_t name; int *anamelen; } */ *uap; int *retval; int compat; { struct file *fp; struct mbuf *nam; int namelen, error, s; register struct socket *so; if (uap->name) { error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, sizeof (namelen)); if(error) return (error); } error = getsock(p->p_fd, uap->s, &fp); if (error) return (error); s = splnet(); so = (struct socket *)fp->f_data; if ((so->so_options & SO_ACCEPTCONN) == 0) { splx(s); return (EINVAL); } if ((so->so_state & SS_NBIO) && so->so_qlen == 0) { splx(s); return (EWOULDBLOCK); } while (so->so_qlen == 0 && so->so_error == 0) { if (so->so_state & SS_CANTRCVMORE) { so->so_error = ECONNABORTED; break; } error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "accept", 0); if (error) { splx(s); return (error); } } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); return (error); } error = falloc(p, &fp, retval); if (error) { splx(s); return (error); } { struct socket *aso = so->so_q; if (soqremque(aso, 1) == 0) panic("accept"); so = aso; } fp->f_type = DTYPE_SOCKET; fp->f_flag = FREAD|FWRITE; fp->f_ops = &socketops; fp->f_data = (caddr_t)so; nam = m_get(M_WAIT, MT_SONAME); (void) soaccept(so, nam); if (uap->name) { #ifdef COMPAT_OLDSOCK if (compat) mtod(nam, struct osockaddr *)->sa_family = mtod(nam, struct sockaddr *)->sa_family; #endif if (namelen > nam->m_len) namelen = nam->m_len; /* SHOULD COPY OUT A CHAIN HERE */ error = copyout(mtod(nam, caddr_t), (caddr_t)uap->name, (u_int)namelen); if (!error) error = copyout((caddr_t)&namelen, (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); } m_freem(nam); splx(s); return (error); } int accept(p, uap, retval) struct proc *p; struct accept_args *uap; int *retval; { return (accept1(p, uap, retval, 0)); } #ifdef COMPAT_OLDSOCK int oaccept(p, uap, retval) struct proc *p; struct accept_args *uap; int *retval; { return (accept1(p, uap, retval, 1)); } #endif /* COMPAT_OLDSOCK */ /* ARGSUSED */ int connect(p, uap, retval) struct proc *p; register struct connect_args /* { int s; caddr_t name; int namelen; } */ *uap; int *retval; { struct file *fp; register struct socket *so; struct mbuf *nam; int error, s; error = getsock(p->p_fd, uap->s, &fp); if (error) return (error); so = (struct socket *)fp->f_data; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) return (EALREADY); error = sockargs(&nam, uap->name, uap->namelen, MT_SONAME); if (error) return (error); error = soconnect(so, nam); if (error) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { m_freem(nam); return (EINPROGRESS); } s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0); if (error) break; } if (error == 0) { error = so->so_error; so->so_error = 0; } splx(s); bad: so->so_state &= ~SS_ISCONNECTING; m_freem(nam); if (error == ERESTART) error = EINTR; return (error); } int socketpair(p, uap, retval) struct proc *p; register struct socketpair_args /* { int domain; int type; int protocol; int *rsv; } */ *uap; int retval[]; { register struct filedesc *fdp = p->p_fd; struct file *fp1, *fp2; struct socket *so1, *so2; int fd, error, sv[2]; - error = socreate(uap->domain, &so1, uap->type, uap->protocol); + error = socreate(uap->domain, &so1, uap->type, uap->protocol, p); if (error) return (error); - error = socreate(uap->domain, &so2, uap->type, uap->protocol); + error = socreate(uap->domain, &so2, uap->type, uap->protocol, p); if (error) goto free1; error = falloc(p, &fp1, &fd); if (error) goto free2; sv[0] = fd; fp1->f_flag = FREAD|FWRITE; fp1->f_type = DTYPE_SOCKET; fp1->f_ops = &socketops; fp1->f_data = (caddr_t)so1; error = falloc(p, &fp2, &fd); if (error) goto free3; fp2->f_flag = FREAD|FWRITE; fp2->f_type = DTYPE_SOCKET; fp2->f_ops = &socketops; fp2->f_data = (caddr_t)so2; sv[1] = fd; error = soconnect2(so1, so2); if (error) goto free4; if (uap->type == SOCK_DGRAM) { /* * Datagram socket connection is asymmetric. */ error = soconnect2(so2, so1); if (error) goto free4; } error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); retval[0] = sv[0]; /* XXX ??? */ retval[1] = sv[1]; /* XXX ??? */ return (error); free4: ffree(fp2); fdp->fd_ofiles[sv[1]] = 0; free3: ffree(fp1); fdp->fd_ofiles[sv[0]] = 0; free2: (void)soclose(so2); free1: (void)soclose(so1); return (error); } int sendit(p, s, mp, flags, retsize) register struct proc *p; int s; register struct msghdr *mp; int flags, *retsize; { struct file *fp; struct uio auio; register struct iovec *iov; register int i; struct mbuf *to, *control; int len, error; #ifdef KTRACE struct iovec *ktriov = NULL; #endif error = getsock(p->p_fd, s, &fp); if (error) return (error); auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_segflg = UIO_USERSPACE; auio.uio_rw = UIO_WRITE; auio.uio_procp = p; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { if ((auio.uio_resid += iov->iov_len) < 0) return (EINVAL); } if (mp->msg_name) { error = sockargs(&to, mp->msg_name, mp->msg_namelen, MT_SONAME); if (error) return (error); } else to = 0; if (mp->msg_control) { if (mp->msg_controllen < sizeof(struct cmsghdr) #ifdef COMPAT_OLDSOCK && mp->msg_flags != MSG_COMPAT #endif ) { error = EINVAL; goto bad; } error = sockargs(&control, mp->msg_control, mp->msg_controllen, MT_CONTROL); if (error) goto bad; #ifdef COMPAT_OLDSOCK if (mp->msg_flags == MSG_COMPAT) { register struct cmsghdr *cm; M_PREPEND(control, sizeof(*cm), M_WAIT); if (control == 0) { error = ENOBUFS; goto bad; } else { cm = mtod(control, struct cmsghdr *); cm->cmsg_len = control->m_len; cm->cmsg_level = SOL_SOCKET; cm->cmsg_type = SCM_RIGHTS; } } #endif } else control = 0; #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO)) { int iovlen = auio.uio_iovcnt * sizeof (struct iovec); MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); } #endif len = auio.uio_resid; error = sosend((struct socket *)fp->f_data, to, &auio, (struct mbuf *)0, control, flags); if (error) { if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error == EPIPE) psignal(p, SIGPIPE); } if (error == 0) *retsize = len - auio.uio_resid; #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p->p_tracep, s, UIO_WRITE, ktriov, *retsize, error); FREE(ktriov, M_TEMP); } #endif bad: if (to) m_freem(to); return (error); } int sendto(p, uap, retval) struct proc *p; register struct sendto_args /* { int s; caddr_t buf; size_t len; int flags; caddr_t to; int tolen; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov; msg.msg_name = uap->to; msg.msg_namelen = uap->tolen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; msg.msg_control = 0; #ifdef COMPAT_OLDSOCK msg.msg_flags = 0; #endif aiov.iov_base = uap->buf; aiov.iov_len = uap->len; return (sendit(p, uap->s, &msg, uap->flags, retval)); } #ifdef COMPAT_OLDSOCK int osend(p, uap, retval) struct proc *p; register struct osend_args /* { int s; caddr_t buf; int len; int flags; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = uap->buf; aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = 0; return (sendit(p, uap->s, &msg, uap->flags, retval)); } int osendmsg(p, uap, retval) struct proc *p; register struct osendmsg_args /* { int s; caddr_t msg; int flags; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov[UIO_SMALLIOV], *iov; int error; error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); if (error) return (error); if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) return (EMSGSIZE); MALLOC(iov, struct iovec *, sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, M_WAITOK); } else iov = aiov; error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); if (error) goto done; msg.msg_flags = MSG_COMPAT; msg.msg_iov = iov; error = sendit(p, uap->s, &msg, uap->flags, retval); done: if (iov != aiov) FREE(iov, M_IOV); return (error); } #endif int sendmsg(p, uap, retval) struct proc *p; register struct sendmsg_args /* { int s; caddr_t msg; int flags; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov[UIO_SMALLIOV], *iov; int error; error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg)); if (error) return (error); if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) return (EMSGSIZE); MALLOC(iov, struct iovec *, sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, M_WAITOK); } else iov = aiov; if (msg.msg_iovlen && (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) goto done; msg.msg_iov = iov; #ifdef COMPAT_OLDSOCK msg.msg_flags = 0; #endif error = sendit(p, uap->s, &msg, uap->flags, retval); done: if (iov != aiov) FREE(iov, M_IOV); return (error); } int recvit(p, s, mp, namelenp, retsize) register struct proc *p; int s; register struct msghdr *mp; caddr_t namelenp; int *retsize; { struct file *fp; struct uio auio; register struct iovec *iov; register int i; int len, error; struct mbuf *from = 0, *control = 0; #ifdef KTRACE struct iovec *ktriov = NULL; #endif error = getsock(p->p_fd, s, &fp); if (error) return (error); auio.uio_iov = mp->msg_iov; auio.uio_iovcnt = mp->msg_iovlen; auio.uio_segflg = UIO_USERSPACE; auio.uio_rw = UIO_READ; auio.uio_procp = p; auio.uio_offset = 0; /* XXX */ auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { if ((auio.uio_resid += iov->iov_len) < 0) return (EINVAL); } #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO)) { int iovlen = auio.uio_iovcnt * sizeof (struct iovec); MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); } #endif len = auio.uio_resid; error = soreceive((struct socket *)fp->f_data, &from, &auio, (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, &mp->msg_flags); if (error) { if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p->p_tracep, s, UIO_READ, ktriov, len - auio.uio_resid, error); FREE(ktriov, M_TEMP); } #endif if (error) goto out; *retsize = len - auio.uio_resid; if (mp->msg_name) { len = mp->msg_namelen; if (len <= 0 || from == 0) len = 0; else { #ifdef COMPAT_OLDSOCK if (mp->msg_flags & MSG_COMPAT) mtod(from, struct osockaddr *)->sa_family = mtod(from, struct sockaddr *)->sa_family; #endif if (len > from->m_len) len = from->m_len; /* else if len < from->m_len ??? */ error = copyout(mtod(from, caddr_t), (caddr_t)mp->msg_name, (unsigned)len); if (error) goto out; } mp->msg_namelen = len; if (namelenp && (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { #ifdef COMPAT_OLDSOCK if (mp->msg_flags & MSG_COMPAT) error = 0; /* old recvfrom didn't check */ else #endif goto out; } } if (mp->msg_control) { #ifdef COMPAT_OLDSOCK /* * We assume that old recvmsg calls won't receive access * rights and other control info, esp. as control info * is always optional and those options didn't exist in 4.3. * If we receive rights, trim the cmsghdr; anything else * is tossed. */ if (control && mp->msg_flags & MSG_COMPAT) { if (mtod(control, struct cmsghdr *)->cmsg_level != SOL_SOCKET || mtod(control, struct cmsghdr *)->cmsg_type != SCM_RIGHTS) { mp->msg_controllen = 0; goto out; } control->m_len -= sizeof (struct cmsghdr); control->m_data += sizeof (struct cmsghdr); } #endif len = mp->msg_controllen; if (len <= 0 || control == 0) len = 0; else { if (len >= control->m_len) len = control->m_len; else mp->msg_flags |= MSG_CTRUNC; error = copyout((caddr_t)mtod(control, caddr_t), (caddr_t)mp->msg_control, (unsigned)len); } mp->msg_controllen = len; } out: if (from) m_freem(from); if (control) m_freem(control); return (error); } int recvfrom(p, uap, retval) struct proc *p; register struct recvfrom_args /* { int s; caddr_t buf; size_t len; int flags; caddr_t from; int *fromlenaddr; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin((caddr_t)uap->fromlenaddr, (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); if (error) return (error); } else msg.msg_namelen = 0; msg.msg_name = uap->from; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = uap->buf; aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = uap->flags; return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval)); } #ifdef COMPAT_OLDSOCK int orecvfrom(p, uap, retval) struct proc *p; struct recvfrom_args *uap; int *retval; { uap->flags |= MSG_COMPAT; return (recvfrom(p, uap, retval)); } #endif #ifdef COMPAT_OLDSOCK int orecv(p, uap, retval) struct proc *p; register struct orecv_args /* { int s; caddr_t buf; int len; int flags; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = uap->buf; aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = uap->flags; return (recvit(p, uap->s, &msg, (caddr_t)0, retval)); } /* * Old recvmsg. This code takes advantage of the fact that the old msghdr * overlays the new one, missing only the flags, and with the (old) access * rights where the control fields are now. */ int orecvmsg(p, uap, retval) struct proc *p; register struct orecvmsg_args /* { int s; struct omsghdr *msg; int flags; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov[UIO_SMALLIOV], *iov; int error; error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); if (error) return (error); if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) return (EMSGSIZE); MALLOC(iov, struct iovec *, sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, M_WAITOK); } else iov = aiov; msg.msg_flags = uap->flags | MSG_COMPAT; error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); if (error) goto done; msg.msg_iov = iov; error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval); if (msg.msg_controllen && error == 0) error = copyout((caddr_t)&msg.msg_controllen, (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); done: if (iov != aiov) FREE(iov, M_IOV); return (error); } #endif int recvmsg(p, uap, retval) struct proc *p; register struct recvmsg_args /* { int s; struct msghdr *msg; int flags; } */ *uap; int *retval; { struct msghdr msg; struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; register int error; error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg)); if (error) return (error); if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) return (EMSGSIZE); MALLOC(iov, struct iovec *, sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, M_WAITOK); } else iov = aiov; #ifdef COMPAT_OLDSOCK msg.msg_flags = uap->flags &~ MSG_COMPAT; #else msg.msg_flags = uap->flags; #endif uiov = msg.msg_iov; msg.msg_iov = iov; error = copyin((caddr_t)uiov, (caddr_t)iov, (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); if (error) goto done; error = recvit(p, uap->s, &msg, (caddr_t)0, retval); if (!error) { msg.msg_iov = uiov; error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); } done: if (iov != aiov) FREE(iov, M_IOV); return (error); } /* ARGSUSED */ int shutdown(p, uap, retval) struct proc *p; register struct shutdown_args /* { int s; int how; } */ *uap; int *retval; { struct file *fp; int error; error = getsock(p->p_fd, uap->s, &fp); if (error) return (error); return (soshutdown((struct socket *)fp->f_data, uap->how)); } /* ARGSUSED */ int setsockopt(p, uap, retval) struct proc *p; register struct setsockopt_args /* { int s; int level; int name; caddr_t val; int valsize; } */ *uap; int *retval; { struct file *fp; struct mbuf *m = NULL; int error; error = getsock(p->p_fd, uap->s, &fp); if (error) return (error); if (uap->valsize > MLEN) return (EINVAL); if (uap->val) { m = m_get(M_WAIT, MT_SOOPTS); if (m == NULL) return (ENOBUFS); error = copyin(uap->val, mtod(m, caddr_t), (u_int)uap->valsize); if (error) { (void) m_free(m); return (error); } m->m_len = uap->valsize; } return (sosetopt((struct socket *)fp->f_data, uap->level, uap->name, m)); } /* ARGSUSED */ int getsockopt(p, uap, retval) struct proc *p; register struct getsockopt_args /* { int s; int level; int name; caddr_t val; int *avalsize; } */ *uap; int *retval; { struct file *fp; struct mbuf *m = NULL; int valsize, error; error = getsock(p->p_fd, uap->s, &fp); if (error) return (error); if (uap->val) { error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, sizeof (valsize)); if (error) return (error); } else valsize = 0; if ((error = sogetopt((struct socket *)fp->f_data, uap->level, uap->name, &m)) == 0 && uap->val && valsize && m != NULL) { if (valsize > m->m_len) valsize = m->m_len; error = copyout(mtod(m, caddr_t), uap->val, (u_int)valsize); if (error == 0) error = copyout((caddr_t)&valsize, (caddr_t)uap->avalsize, sizeof (valsize)); } if (m != NULL) (void) m_free(m); return (error); } #ifdef OLD_PIPE /* ARGSUSED */ int pipe(p, uap, retval) struct proc *p; struct pipe_args /* { int dummy; } */ *uap; int retval[]; { register struct filedesc *fdp = p->p_fd; struct file *rf, *wf; struct socket *rso, *wso; int fd, error; - error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0, p); if (error) return (error); - error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0, p); if (error) goto free1; error = falloc(p, &rf, &fd); if (error) goto free2; retval[0] = fd; rf->f_flag = FREAD | FWRITE; rf->f_type = DTYPE_SOCKET; rf->f_ops = &socketops; rf->f_data = (caddr_t)rso; error = falloc(p, &wf, &fd); if (error) goto free3; wf->f_flag = FREAD | FWRITE; wf->f_type = DTYPE_SOCKET; wf->f_ops = &socketops; wf->f_data = (caddr_t)wso; retval[1] = fd; error = unp_connect2(wso, rso); if (error) goto free4; return (0); free4: ffree(wf); fdp->fd_ofiles[retval[1]] = 0; free3: ffree(rf); fdp->fd_ofiles[retval[0]] = 0; free2: (void)soclose(wso); free1: (void)soclose(rso); return (error); } #endif /* * Get socket name. */ /* ARGSUSED */ static int getsockname1(p, uap, retval, compat) struct proc *p; register struct getsockname_args /* { int fdes; caddr_t asa; int *alen; } */ *uap; int *retval; int compat; { struct file *fp; register struct socket *so; struct mbuf *m; int len, error; error = getsock(p->p_fd, uap->fdes, &fp); if (error) return (error); error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); if (error) return (error); so = (struct socket *)fp->f_data; m = m_getclr(M_WAIT, MT_SONAME); if (m == NULL) return (ENOBUFS); error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0); if (error) goto bad; if (len > m->m_len) len = m->m_len; #ifdef COMPAT_OLDSOCK if (compat) mtod(m, struct osockaddr *)->sa_family = mtod(m, struct sockaddr *)->sa_family; #endif error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len); if (error == 0) error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); bad: m_freem(m); return (error); } int getsockname(p, uap, retval) struct proc *p; struct getsockname_args *uap; int *retval; { return (getsockname1(p, uap, retval, 0)); } #ifdef COMPAT_OLDSOCK int ogetsockname(p, uap, retval) struct proc *p; struct getsockname_args *uap; int *retval; { return (getsockname1(p, uap, retval, 1)); } #endif /* COMPAT_OLDSOCK */ /* * Get name of peer for connected socket. */ /* ARGSUSED */ static int getpeername1(p, uap, retval, compat) struct proc *p; register struct getpeername_args /* { int fdes; caddr_t asa; int *alen; } */ *uap; int *retval; int compat; { struct file *fp; register struct socket *so; struct mbuf *m; int len, error; error = getsock(p->p_fd, uap->fdes, &fp); if (error) return (error); so = (struct socket *)fp->f_data; if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) return (ENOTCONN); error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); if (error) return (error); m = m_getclr(M_WAIT, MT_SONAME); if (m == NULL) return (ENOBUFS); error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0); if (error) goto bad; if (len > m->m_len) len = m->m_len; #ifdef COMPAT_OLDSOCK if (compat) mtod(m, struct osockaddr *)->sa_family = mtod(m, struct sockaddr *)->sa_family; #endif error = copyout(mtod(m, caddr_t), (caddr_t)uap->asa, (u_int)len); if (error) goto bad; error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); bad: m_freem(m); return (error); } int getpeername(p, uap, retval) struct proc *p; struct getpeername_args *uap; int *retval; { return (getpeername1(p, uap, retval, 0)); } #ifdef COMPAT_OLDSOCK int ogetpeername(p, uap, retval) struct proc *p; struct ogetpeername_args *uap; int *retval; { /* XXX uap should have type `getpeername_args *' to begin with. */ return (getpeername1(p, (struct getpeername_args *)uap, retval, 1)); } #endif /* COMPAT_OLDSOCK */ int sockargs(mp, buf, buflen, type) struct mbuf **mp; caddr_t buf; int buflen, type; { register struct sockaddr *sa; register struct mbuf *m; int error; if ((u_int)buflen > MLEN) { #ifdef COMPAT_OLDSOCK if (type == MT_SONAME && (u_int)buflen <= 112) buflen = MLEN; /* unix domain compat. hack */ else #endif return (EINVAL); } m = m_get(M_WAIT, type); if (m == NULL) return (ENOBUFS); m->m_len = buflen; error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); if (error) (void) m_free(m); else { *mp = m; if (type == MT_SONAME) { sa = mtod(m, struct sockaddr *); #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN if (sa->sa_family == 0 && sa->sa_len < AF_MAX) sa->sa_family = sa->sa_len; #endif sa->sa_len = buflen; } } return (error); } int getsock(fdp, fdes, fpp) struct filedesc *fdp; int fdes; struct file **fpp; { register struct file *fp; if ((unsigned)fdes >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fdes]) == NULL) return (EBADF); if (fp->f_type != DTYPE_SOCKET) return (ENOTSOCK); *fpp = fp; return (0); } Index: head/sys/miscfs/fifofs/fifo_vnops.c =================================================================== --- head/sys/miscfs/fifofs/fifo_vnops.c (revision 14092) +++ head/sys/miscfs/fifofs/fifo_vnops.c (revision 14093) @@ -1,545 +1,545 @@ /* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fifo_vnops.c 8.2 (Berkeley) 1/4/94 - * $Id: fifo_vnops.c,v 1.14 1995/12/11 10:26:34 phk Exp $ + * $Id: fifo_vnops.c,v 1.15 1995/12/14 09:53:03 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This structure is associated with the FIFO vnode and stores * the state associated with the FIFO. */ struct fifoinfo { struct socket *fi_readsock; struct socket *fi_writesock; long fi_readers; long fi_writers; }; static int fifo_ebadf __P((void)); static int fifo_unlock __P((struct vop_unlock_args *)); static int fifo_lock __P((struct vop_lock_args *)); static int fifo_print __P((struct vop_print_args *)); vop_t **fifo_vnodeop_p; static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { { &vop_default_desc, (vop_t *)vn_default_error }, { &vop_lookup_desc, (vop_t *)fifo_lookup }, /* lookup */ { &vop_create_desc, (vop_t *)fifo_create }, /* create */ { &vop_mknod_desc, (vop_t *)fifo_mknod }, /* mknod */ { &vop_open_desc, (vop_t *)fifo_open }, /* open */ { &vop_close_desc, (vop_t *)fifo_close }, /* close */ { &vop_access_desc, (vop_t *)fifo_access }, /* access */ { &vop_getattr_desc, (vop_t *)fifo_getattr }, /* getattr */ { &vop_setattr_desc, (vop_t *)fifo_setattr }, /* setattr */ { &vop_read_desc, (vop_t *)fifo_read }, /* read */ { &vop_write_desc, (vop_t *)fifo_write }, /* write */ { &vop_ioctl_desc, (vop_t *)fifo_ioctl }, /* ioctl */ { &vop_select_desc, (vop_t *)fifo_select }, /* select */ { &vop_mmap_desc, (vop_t *)fifo_mmap }, /* mmap */ { &vop_fsync_desc, (vop_t *)fifo_fsync }, /* fsync */ { &vop_seek_desc, (vop_t *)fifo_seek }, /* seek */ { &vop_remove_desc, (vop_t *)fifo_remove }, /* remove */ { &vop_link_desc, (vop_t *)fifo_link }, /* link */ { &vop_rename_desc, (vop_t *)fifo_rename }, /* rename */ { &vop_mkdir_desc, (vop_t *)fifo_mkdir }, /* mkdir */ { &vop_rmdir_desc, (vop_t *)fifo_rmdir }, /* rmdir */ { &vop_symlink_desc, (vop_t *)fifo_symlink }, /* symlink */ { &vop_readdir_desc, (vop_t *)fifo_readdir }, /* readdir */ { &vop_readlink_desc, (vop_t *)fifo_readlink }, /* readlink */ { &vop_abortop_desc, (vop_t *)fifo_abortop }, /* abortop */ { &vop_inactive_desc, (vop_t *)fifo_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *)fifo_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *)fifo_lock }, /* lock */ { &vop_unlock_desc, (vop_t *)fifo_unlock }, /* unlock */ { &vop_bmap_desc, (vop_t *)fifo_bmap }, /* bmap */ { &vop_strategy_desc, (vop_t *)fifo_strategy }, /* strategy */ { &vop_print_desc, (vop_t *)fifo_print }, /* print */ { &vop_islocked_desc, (vop_t *)fifo_islocked }, /* islocked */ { &vop_pathconf_desc, (vop_t *)fifo_pathconf }, /* pathconf */ { &vop_advlock_desc, (vop_t *)fifo_advlock }, /* advlock */ { &vop_blkatoff_desc, (vop_t *)fifo_blkatoff }, /* blkatoff */ { &vop_valloc_desc, (vop_t *)fifo_valloc }, /* valloc */ { &vop_vfree_desc, (vop_t *)fifo_vfree }, /* vfree */ { &vop_truncate_desc, (vop_t *)fifo_truncate }, /* truncate */ { &vop_update_desc, (vop_t *)fifo_update }, /* update */ { &vop_bwrite_desc, (vop_t *)fifo_bwrite }, /* bwrite */ { NULL, NULL } }; static struct vnodeopv_desc fifo_vnodeop_opv_desc = { &fifo_vnodeop_p, fifo_vnodeop_entries }; VNODEOP_SET(fifo_vnodeop_opv_desc); /* * Trivial lookup routine that always fails. */ /* ARGSUSED */ int fifo_lookup(ap) struct vop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open called to set up a new instance of a fifo or * to find an active instance of a fifo. */ /* ARGSUSED */ int fifo_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct fifoinfo *fip; struct socket *rso, *wso; int error; static char openstr[] = "fifo"; if ((fip = vp->v_fifoinfo) == NULL) { MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK); vp->v_fifoinfo = fip; - error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0, ap->a_p); if (error) { free(fip, M_VNODE); vp->v_fifoinfo = NULL; return (error); } fip->fi_readsock = rso; - error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0, ap->a_p); if (error) { (void)soclose(rso); free(fip, M_VNODE); vp->v_fifoinfo = NULL; return (error); } fip->fi_writesock = wso; error = unp_connect2(wso, rso); if (error) { (void)soclose(wso); (void)soclose(rso); free(fip, M_VNODE); vp->v_fifoinfo = NULL; return (error); } fip->fi_readers = fip->fi_writers = 0; wso->so_state |= SS_CANTRCVMORE; rso->so_state |= SS_CANTSENDMORE; } error = 0; if ((ap->a_mode & (FREAD|FWRITE)) == (FREAD|FWRITE)) { if (fip->fi_readers == 0) { fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; if (fip->fi_writers > 0) wakeup((caddr_t)&fip->fi_writers); } if (fip->fi_writers == 0) { fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; if (fip->fi_readers > 0) wakeup((caddr_t)&fip->fi_readers); } fip->fi_readers++; fip->fi_writers++; } else if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; if (fip->fi_writers > 0) wakeup((caddr_t)&fip->fi_writers); } if (!(ap->a_mode & O_NONBLOCK)) while (fip->fi_writers == 0) { VOP_UNLOCK(vp); error = tsleep((caddr_t)&fip->fi_readers, PCATCH | PSOCK, openstr, 0); VOP_LOCK(vp); if (error) break; } } else { fip->fi_writers++; if (fip->fi_readers == 0 && (ap->a_mode & O_NONBLOCK)) { error = ENXIO; } else { if (fip->fi_writers == 1) { fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; if (fip->fi_readers > 0) wakeup((caddr_t)&fip->fi_readers); } while (fip->fi_readers == 0) { VOP_UNLOCK(vp); error = tsleep((caddr_t)&fip->fi_writers, PCATCH | PSOCK, openstr, 0); VOP_LOCK(vp); if (error) break; } } } if (error) VOP_CLOSE(vp, ap->a_mode, ap->a_cred, ap->a_p); return (error); } /* * Vnode op for read */ /* ARGSUSED */ int fifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct uio *uio = ap->a_uio; register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock; int error, startresid; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("fifo_read mode"); #endif if (uio->uio_resid == 0) return (0); if (ap->a_ioflag & IO_NDELAY) rso->so_state |= SS_NBIO; startresid = uio->uio_resid; VOP_UNLOCK(ap->a_vp); error = soreceive(rso, (struct mbuf **)0, uio, (struct mbuf **)0, (struct mbuf **)0, (int*)0); VOP_LOCK(ap->a_vp); /* * Clear EOF indication after first such return. */ if (uio->uio_resid == startresid) rso->so_state &= ~SS_CANTRCVMORE; if (ap->a_ioflag & IO_NDELAY) rso->so_state &= ~SS_NBIO; return (error); } /* * Vnode op for write */ /* ARGSUSED */ int fifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock; int error; #ifdef DIAGNOSTIC if (ap->a_uio->uio_rw != UIO_WRITE) panic("fifo_write mode"); #endif if (ap->a_ioflag & IO_NDELAY) wso->so_state |= SS_NBIO; VOP_UNLOCK(ap->a_vp); error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0); VOP_LOCK(ap->a_vp); if (ap->a_ioflag & IO_NDELAY) wso->so_state &= ~SS_NBIO; return (error); } /* * Device ioctl operation. */ /* ARGSUSED */ int fifo_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct file filetmp; if (ap->a_command == FIONBIO) return (0); if (ap->a_fflag & FREAD) filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; else filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; return (soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p)); } /* ARGSUSED */ int fifo_select(ap) struct vop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct file filetmp; if (ap->a_fflags & FREAD) filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; else filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; return (soo_select(&filetmp, ap->a_which, ap->a_p)); } /* * This is a noop, simply returning what one has been given. */ int fifo_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * At the moment we do not do any locking. */ /* ARGSUSED */ static int fifo_lock(ap) struct vop_lock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* ARGSUSED */ static int fifo_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* * Device close routine */ /* ARGSUSED */ int fifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct fifoinfo *fip = vp->v_fifoinfo; int error1, error2; if (ap->a_fflag & FWRITE) { fip->fi_writers--; if (fip->fi_writers == 0) socantrcvmore(fip->fi_readsock); } if (ap->a_fflag & FREAD) { fip->fi_readers--; if (fip->fi_readers == 0) socantsendmore(fip->fi_writesock); } if (vp->v_usecount > 1) return (0); error1 = soclose(fip->fi_readsock); error2 = soclose(fip->fi_writesock); FREE(fip, M_VNODE); vp->v_fifoinfo = NULL; if (error1) return (error1); return (error2); } /* * Print out internal contents of a fifo vnode. */ int fifo_printinfo(vp) struct vnode *vp; { register struct fifoinfo *fip = vp->v_fifoinfo; printf(", fifo with %ld readers and %ld writers", fip->fi_readers, fip->fi_writers); return (0); } /* * Print out the contents of a fifo vnode. */ static int fifo_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON"); fifo_printinfo(ap->a_vp); printf("\n"); return (0); } /* * Return POSIX pathconf information applicable to fifo's. */ int fifo_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Fifo failed operation */ static int fifo_ebadf() { return (EBADF); } /* * Fifo advisory byte-level locks. */ /* ARGSUSED */ int fifo_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (EOPNOTSUPP); } /* * Fifo bad operation */ int fifo_badop() { panic("fifo_badop called"); /* NOTREACHED */ } Index: head/sys/miscfs/portal/portal_vnops.c =================================================================== --- head/sys/miscfs/portal/portal_vnops.c (revision 14092) +++ head/sys/miscfs/portal/portal_vnops.c (revision 14093) @@ -1,726 +1,726 @@ /* * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)portal_vnops.c 8.8 (Berkeley) 1/21/94 * - * $Id: portal_vnops.c,v 1.9 1995/12/03 14:54:29 bde Exp $ + * $Id: portal_vnops.c,v 1.10 1995/12/11 09:24:45 phk Exp $ */ /* * Portal Filesystem */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int portal_fileid = PORTAL_ROOTFILEID+1; static int portal_badop __P((void)); static void portal_closefd __P((struct proc *p, int fd)); static int portal_connect __P((struct socket *so, struct socket *so2)); static int portal_enotsupp __P((void)); static int portal_getattr __P((struct vop_getattr_args *ap)); static int portal_inactive __P((struct vop_inactive_args *ap)); static int portal_lookup __P((struct vop_lookup_args *ap)); static int portal_nullop __P((void)); static int portal_open __P((struct vop_open_args *ap)); static int portal_pathconf __P((struct vop_pathconf_args *ap)); static int portal_print __P((struct vop_print_args *ap)); static int portal_readdir __P((struct vop_readdir_args *ap)); static int portal_reclaim __P((struct vop_reclaim_args *ap)); static int portal_setattr __P((struct vop_setattr_args *ap)); static int portal_vfree __P((struct vop_vfree_args *ap)); static void portal_closefd(p, fd) struct proc *p; int fd; { int error; struct close_args ua; int rc; ua.fd = fd; error = close(p, &ua, &rc); /* * We should never get an error, and there isn't anything * we could do if we got one, so just print a message. */ if (error) printf("portal_closefd: error = %d\n", error); } /* * vp is the current namei directory * cnp is the name to locate in that directory... */ static int portal_lookup(ap) struct vop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; } */ *ap; { char *pname = ap->a_cnp->cn_nameptr; struct portalnode *pt; int error; struct vnode *fvp = 0; char *path; int size; if (ap->a_cnp->cn_namelen == 1 && *pname == '.') { *ap->a_vpp = ap->a_dvp; VREF(ap->a_dvp); /*VOP_LOCK(ap->a_dvp);*/ return (0); } error = getnewvnode(VT_PORTAL, ap->a_dvp->v_mount, portal_vnodeop_p, &fvp); if (error) goto bad; fvp->v_type = VREG; MALLOC(fvp->v_data, void *, sizeof(struct portalnode), M_TEMP, M_WAITOK); pt = VTOPORTAL(fvp); /* * Save all of the remaining pathname and * advance the namei next pointer to the end * of the string. */ for (size = 0, path = pname; *path; path++) size++; ap->a_cnp->cn_consume = size - ap->a_cnp->cn_namelen; pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK); pt->pt_size = size+1; bcopy(pname, pt->pt_arg, pt->pt_size); pt->pt_fileid = portal_fileid++; *ap->a_vpp = fvp; /*VOP_LOCK(fvp);*/ return (0); bad:; if (fvp) { vrele(fvp); } *ap->a_vpp = NULL; return (error); } static int portal_connect(so, so2) struct socket *so; struct socket *so2; { /* from unp_connect, bypassing the namei stuff... */ struct socket *so3; struct unpcb *unp2; struct unpcb *unp3; if (so2 == 0) return (ECONNREFUSED); if (so->so_type != so2->so_type) return (EPROTOTYPE); if ((so2->so_options & SO_ACCEPTCONN) == 0) return (ECONNREFUSED); if ((so3 = sonewconn(so2, 0)) == 0) return (ECONNREFUSED); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) unp3->unp_addr = m_copy(unp2->unp_addr, 0, (int)M_COPYALL); so2 = so3; return (unp_connect2(so, so2)); } static int portal_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct socket *so = 0; struct portalnode *pt; struct proc *p = ap->a_p; struct vnode *vp = ap->a_vp; int s; struct uio auio; struct iovec aiov[2]; int res; struct mbuf *cm = 0; struct cmsghdr *cmsg; int newfds; int *ip; int fd; int error; int len; struct portalmount *fmp; struct file *fp; struct portal_cred pcred; /* * Nothing to do when opening the root node. */ if (vp->v_flag & VROOT) return (0); /* * Can't be opened unless the caller is set up * to deal with the side effects. Check for this * by testing whether the p_dupfd has been set. */ if (p->p_dupfd >= 0) return (ENODEV); pt = VTOPORTAL(vp); fmp = VFSTOPORTAL(vp->v_mount); /* * Create a new socket. */ - error = socreate(AF_UNIX, &so, SOCK_STREAM, 0); + error = socreate(AF_UNIX, &so, SOCK_STREAM, 0, ap->a_p); if (error) goto bad; /* * Reserve some buffer space */ res = pt->pt_size + sizeof(pcred) + 512; /* XXX */ error = soreserve(so, res, res); if (error) goto bad; /* * Kick off connection */ error = portal_connect(so, (struct socket *)fmp->pm_server->f_data); if (error) goto bad; /* * Wait for connection to complete */ /* * XXX: Since the mount point is holding a reference on the * underlying server socket, it is not easy to find out whether * the server process is still running. To handle this problem * we loop waiting for the new socket to be connected (something * which will only happen if the server is still running) or for * the reference count on the server socket to drop to 1, which * will happen if the server dies. Sleep for 5 second intervals * and keep polling the reference count. XXX. */ s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { if (fmp->pm_server->f_count == 1) { error = ECONNREFUSED; splx(s); goto bad; } (void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz); } splx(s); if (so->so_error) { error = so->so_error; goto bad; } /* * Set miscellaneous flags */ so->so_rcv.sb_timeo = 0; so->so_snd.sb_timeo = 0; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; pcred.pcr_flag = ap->a_mode; pcred.pcr_uid = ap->a_cred->cr_uid; pcred.pcr_ngroups = ap->a_cred->cr_ngroups; bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t)); aiov[0].iov_base = (caddr_t) &pcred; aiov[0].iov_len = sizeof(pcred); aiov[1].iov_base = pt->pt_arg; aiov[1].iov_len = pt->pt_size; auio.uio_iov = aiov; auio.uio_iovcnt = 2; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = p; auio.uio_offset = 0; auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len; error = sosend(so, (struct mbuf *) 0, &auio, (struct mbuf *) 0, (struct mbuf *) 0, 0); if (error) goto bad; len = auio.uio_resid = sizeof(int); do { struct mbuf *m = 0; int flags = MSG_WAITALL; error = soreceive(so, (struct mbuf **) 0, &auio, &m, &cm, &flags); if (error) goto bad; /* * Grab an error code from the mbuf. */ if (m) { m = m_pullup(m, sizeof(int)); /* Needed? */ if (m) { error = *(mtod(m, int *)); m_freem(m); } else { error = EINVAL; } } else { if (cm == 0) { error = ECONNRESET; /* XXX */ #ifdef notdef break; #endif } } } while (cm == 0 && auio.uio_resid == len && !error); if (cm == 0) goto bad; if (auio.uio_resid) { error = 0; #ifdef notdef error = EMSGSIZE; goto bad; #endif } /* * XXX: Break apart the control message, and retrieve the * received file descriptor. Note that more than one descriptor * may have been received, or that the rights chain may have more * than a single mbuf in it. What to do? */ cmsg = mtod(cm, struct cmsghdr *); newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int); if (newfds == 0) { error = ECONNREFUSED; goto bad; } /* * At this point the rights message consists of a control message * header, followed by a data region containing a vector of * integer file descriptors. The fds were allocated by the action * of receiving the control message. */ ip = (int *) (cmsg + 1); fd = *ip++; if (newfds > 1) { /* * Close extra fds. */ int i; printf("portal_open: %d extra fds\n", newfds - 1); for (i = 1; i < newfds; i++) { portal_closefd(p, *ip); ip++; } } /* * Check that the mode the file is being opened for is a subset * of the mode of the existing descriptor. */ fp = p->p_fd->fd_ofiles[fd]; if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { portal_closefd(p, fd); error = EACCES; goto bad; } /* * Save the dup fd in the proc structure then return the * special error code (ENXIO) which causes magic things to * happen in vn_open. The whole concept is, well, hmmm. */ p->p_dupfd = fd; error = ENXIO; bad:; /* * And discard the control message. */ if (cm) { m_freem(cm); } if (so) { soshutdown(so, 2); soclose(so); } return (error); } static int portal_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; bzero(vap, sizeof(*vap)); vattr_null(vap); vap->va_uid = 0; vap->va_gid = 0; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_size = DEV_BSIZE; vap->va_blocksize = DEV_BSIZE; microtime((struct timeval *)&vap->va_atime); TIMEVAL_TO_TIMESPEC((struct timeval *)&vap->va_atime, (struct timespec *)&vap->va_atime); vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_ctime; vap->va_gen = 0; vap->va_flags = 0; vap->va_rdev = 0; /* vap->va_qbytes = 0; */ vap->va_bytes = 0; /* vap->va_qsize = 0; */ if (vp->v_flag & VROOT) { vap->va_type = VDIR; vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR| S_IRGRP|S_IWGRP|S_IXGRP| S_IROTH|S_IWOTH|S_IXOTH; vap->va_nlink = 2; vap->va_fileid = 2; } else { vap->va_type = VREG; vap->va_mode = S_IRUSR|S_IWUSR| S_IRGRP|S_IWGRP| S_IROTH|S_IWOTH; vap->va_nlink = 1; vap->va_fileid = VTOPORTAL(vp)->pt_fileid; } return (0); } static int portal_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { /* * Can't mess with the root vnode */ if (ap->a_vp->v_flag & VROOT) return (EACCES); return (0); } /* * Fake readdir, just return empty directory. * It is hard to deal with '.' and '..' so don't bother. */ static int portal_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { return (0); } static int portal_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; } */ *ap; { return (0); } static int portal_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct portalnode *pt = VTOPORTAL(ap->a_vp); if (pt->pt_arg) { free((caddr_t) pt->pt_arg, M_TEMP); pt->pt_arg = 0; } FREE(ap->a_vp->v_data, M_TEMP); ap->a_vp->v_data = 0; return (0); } /* * Return POSIX pathconf information applicable to special devices. */ static int portal_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_MAX_CANON: *ap->a_retval = MAX_CANON; return (0); case _PC_MAX_INPUT: *ap->a_retval = MAX_INPUT; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_VDISABLE: *ap->a_retval = _POSIX_VDISABLE; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Print out the contents of a Portal vnode. */ /* ARGSUSED */ static int portal_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_PORTAL, portal vnode\n"); return (0); } /*void*/ static int portal_vfree(ap) struct vop_vfree_args /* { struct vnode *a_pvp; ino_t a_ino; int a_mode; } */ *ap; { return (0); } /* * Portal vnode unsupported operation */ static int portal_enotsupp() { return (EOPNOTSUPP); } /* * Portal "should never get here" operation */ static int portal_badop() { panic("portal: bad op"); /* NOTREACHED */ } /* * Portal vnode null operation */ static int portal_nullop() { return (0); } #define portal_create ((int (*) __P((struct vop_create_args *)))portal_enotsupp) #define portal_mknod ((int (*) __P((struct vop_mknod_args *)))portal_enotsupp) #define portal_close ((int (*) __P((struct vop_close_args *)))nullop) #define portal_access ((int (*) __P((struct vop_access_args *)))nullop) #define portal_read ((int (*) __P((struct vop_read_args *)))portal_enotsupp) #define portal_write ((int (*) __P((struct vop_write_args *)))portal_enotsupp) #define portal_ioctl ((int (*) __P((struct vop_ioctl_args *)))portal_enotsupp) #define portal_select ((int (*) __P((struct vop_select_args *)))portal_enotsupp) #define portal_mmap ((int (*) __P((struct vop_mmap_args *)))portal_enotsupp) #define portal_fsync ((int (*) __P((struct vop_fsync_args *)))nullop) #define portal_seek ((int (*) __P((struct vop_seek_args *)))nullop) #define portal_remove ((int (*) __P((struct vop_remove_args *)))portal_enotsupp) #define portal_link ((int (*) __P((struct vop_link_args *)))portal_enotsupp) #define portal_rename ((int (*) __P((struct vop_rename_args *)))portal_enotsupp) #define portal_mkdir ((int (*) __P((struct vop_mkdir_args *)))portal_enotsupp) #define portal_rmdir ((int (*) __P((struct vop_rmdir_args *)))portal_enotsupp) #define portal_symlink \ ((int (*) __P((struct vop_symlink_args *)))portal_enotsupp) #define portal_readlink \ ((int (*) __P((struct vop_readlink_args *)))portal_enotsupp) #define portal_abortop ((int (*) __P((struct vop_abortop_args *)))nullop) #define portal_lock ((int (*) __P((struct vop_lock_args *)))nullop) #define portal_unlock ((int (*) __P((struct vop_unlock_args *)))nullop) #define portal_bmap ((int (*) __P((struct vop_bmap_args *)))portal_badop) #define portal_strategy \ ((int (*) __P((struct vop_strategy_args *)))portal_badop) #define portal_islocked ((int (*) __P((struct vop_islocked_args *)))nullop) #define portal_advlock \ ((int (*) __P((struct vop_advlock_args *)))portal_enotsupp) #define portal_blkatoff \ ((int (*) __P((struct vop_blkatoff_args *)))portal_enotsupp) #define portal_valloc ((int(*) __P(( \ struct vnode *pvp, \ int mode, \ struct ucred *cred, \ struct vnode **vpp))) portal_enotsupp) #define portal_truncate \ ((int (*) __P((struct vop_truncate_args *)))portal_enotsupp) #define portal_update ((int (*) __P((struct vop_update_args *)))portal_enotsupp) #define portal_bwrite ((int (*) __P((struct vop_bwrite_args *)))portal_enotsupp) vop_t **portal_vnodeop_p; static struct vnodeopv_entry_desc portal_vnodeop_entries[] = { { &vop_default_desc, (vop_t *)vn_default_error }, { &vop_lookup_desc, (vop_t *)portal_lookup }, /* lookup */ { &vop_create_desc, (vop_t *)portal_create }, /* create */ { &vop_mknod_desc, (vop_t *)portal_mknod }, /* mknod */ { &vop_open_desc, (vop_t *)portal_open }, /* open */ { &vop_close_desc, (vop_t *)portal_close }, /* close */ { &vop_access_desc, (vop_t *)portal_access }, /* access */ { &vop_getattr_desc, (vop_t *)portal_getattr }, /* getattr */ { &vop_setattr_desc, (vop_t *)portal_setattr }, /* setattr */ { &vop_read_desc, (vop_t *)portal_read }, /* read */ { &vop_write_desc, (vop_t *)portal_write }, /* write */ { &vop_ioctl_desc, (vop_t *)portal_ioctl }, /* ioctl */ { &vop_select_desc, (vop_t *)portal_select }, /* select */ { &vop_mmap_desc, (vop_t *)portal_mmap }, /* mmap */ { &vop_fsync_desc, (vop_t *)portal_fsync }, /* fsync */ { &vop_seek_desc, (vop_t *)portal_seek }, /* seek */ { &vop_remove_desc, (vop_t *)portal_remove }, /* remove */ { &vop_link_desc, (vop_t *)portal_link }, /* link */ { &vop_rename_desc, (vop_t *)portal_rename }, /* rename */ { &vop_mkdir_desc, (vop_t *)portal_mkdir }, /* mkdir */ { &vop_rmdir_desc, (vop_t *)portal_rmdir }, /* rmdir */ { &vop_symlink_desc, (vop_t *)portal_symlink }, /* symlink */ { &vop_readdir_desc, (vop_t *)portal_readdir }, /* readdir */ { &vop_readlink_desc, (vop_t *)portal_readlink }, /* readlink */ { &vop_abortop_desc, (vop_t *)portal_abortop }, /* abortop */ { &vop_inactive_desc, (vop_t *)portal_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *)portal_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *)portal_lock }, /* lock */ { &vop_unlock_desc, (vop_t *)portal_unlock }, /* unlock */ { &vop_bmap_desc, (vop_t *)portal_bmap }, /* bmap */ { &vop_strategy_desc, (vop_t *)portal_strategy }, /* strategy */ { &vop_print_desc, (vop_t *)portal_print }, /* print */ { &vop_islocked_desc, (vop_t *)portal_islocked }, /* islocked */ { &vop_pathconf_desc, (vop_t *)portal_pathconf }, /* pathconf */ { &vop_advlock_desc, (vop_t *)portal_advlock }, /* advlock */ { &vop_blkatoff_desc, (vop_t *)portal_blkatoff }, /* blkatoff */ { &vop_valloc_desc, (vop_t *)portal_valloc }, /* valloc */ { &vop_vfree_desc, (vop_t *)portal_vfree }, /* vfree */ { &vop_truncate_desc, (vop_t *)portal_truncate }, /* truncate */ { &vop_update_desc, (vop_t *)portal_update }, /* update */ { &vop_bwrite_desc, (vop_t *)portal_bwrite }, /* bwrite */ { NULL, NULL } }; static struct vnodeopv_desc portal_vnodeop_opv_desc = { &portal_vnodeop_p, portal_vnodeop_entries }; VNODEOP_SET(portal_vnodeop_opv_desc); Index: head/sys/netipx/spx_debug.c =================================================================== --- head/sys/netipx/spx_debug.c (revision 14092) +++ head/sys/netipx/spx_debug.c (revision 14093) @@ -1,170 +1,170 @@ /* * Copyright (c) 1995, Mike Mitchell * Copyright (c) 1984, 1985, 1986, 1987, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)spx_debug.c * - * $Id: spx_debug.c,v 1.2 1995/10/31 23:36:43 julian Exp $ + * $Id: spx_debug.c,v 1.3 1995/11/04 09:03:32 julian Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SPXTIMERS #include #include #define SANAMES #include int spxconsdebug = 0; /* * spx debug routines */ void spx_trace(act, ostate, sp, si, req) short act; u_char ostate; struct spxpcb *sp; struct spx *si; int req; { #ifdef INET #ifdef TCPDEBUG u_short seq, ack, len, alo; int flags; struct spx_debug *sd = &spx_debug[spx_debx++]; if (spx_debx == SPX_NDEBUG) spx_debx = 0; sd->sd_time = iptime(); sd->sd_act = act; sd->sd_ostate = ostate; sd->sd_cb = (caddr_t)sp; if (sp) sd->sd_sp = *sp; else bzero((caddr_t)&sd->sd_sp, sizeof (*sp)); if (si) sd->sd_si = *si; else bzero((caddr_t)&sd->sd_si, sizeof (*si)); sd->sd_req = req; if (spxconsdebug == 0) return; if (ostate >= TCP_NSTATES) ostate = 0; if (act >= SA_DROP) act = SA_DROP; if (sp) printf("%x %s:", sp, tcpstates[ostate]); else printf("???????? "); - printf("%s ", sanames[act]); + printf("%s ", spxnames[act]); switch (act) { case SA_RESPOND: case SA_INPUT: case SA_OUTPUT: case SA_DROP: if (si == 0) break; seq = si->si_seq; ack = si->si_ack; alo = si->si_alo; len = si->si_len; if (act == SA_OUTPUT) { seq = ntohs(seq); ack = ntohs(ack); alo = ntohs(alo); len = ntohs(len); } #ifndef lint #define p1(f) { printf("%s = %x, ", "f", f); } p1(seq); p1(ack); p1(alo); p1(len); #endif flags = si->si_cc; if (flags) { char *cp = "<"; #ifndef lint #define pf(f) { if (flags & SPX_ ## f) { printf("%s%s", cp, "f"); cp = ","; } } pf(SP); pf(SA); pf(OB); pf(EM); #else cp = cp; #endif printf(">"); } #ifndef lint #define p2(f) { printf("%s = %x, ", "f", si->si_ ## f); } p2(sid);p2(did);p2(dt);p2(pt); #endif ipx_printhost(&si->si_sna); ipx_printhost(&si->si_dna); if (act==SA_RESPOND) { printf("ipx_len = %x, ", ((struct ipx *)si)->ipx_len); } break; case SA_USER: printf("%s", prurequests[req&0xff]); if ((req & 0xff) == PRU_SLOWTIMO) printf("<%s>", spxtimers[req>>8]); break; } if (sp) printf(" -> %s", tcpstates[sp->s_state]); /* print out internal state of sp !?! */ printf("\n"); if (sp == 0) return; #ifndef lint #define p3(f) { printf("%s = %x, ", "f", sp->s_ ## f); } printf("\t"); p3(rack);p3(ralo);p3(smax);p3(flags); printf("\n"); #endif #endif #endif } Index: head/sys/nfs/nfs_socket.c =================================================================== --- head/sys/nfs/nfs_socket.c (revision 14092) +++ head/sys/nfs/nfs_socket.c (revision 14093) @@ -1,2176 +1,2178 @@ /* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_socket.c,v 1.13 1995/12/17 21:12:25 phk Exp $ + * $Id: nfs_socket.c,v 1.14 1996/01/13 23:27:52 phk Exp $ */ /* * Socket operations for use by nfs */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TRUE 1 #define FALSE 0 /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc * for the frequent rpcs and a default for the others. * The justification for doing "other" this way is that these rpcs * happen so infrequently that timer est. would probably be stale. * Also, since many of these rpcs are * non-idempotent, a conservative timeout is desired. * getattr, lookup - A+2D * read, write - A+4D * other - nm_timeo */ #define NFS_RTO(n, t) \ ((t) == 0 ? (n)->nm_timeo : \ ((t) < 3 ? \ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] /* * External data, mostly RPC constants in XDR form */ extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, rpc_auth_kerb; extern u_long nfs_prog, nqnfs_prog; extern time_t nqnfsstarttime; extern struct nfsstats nfsstats; extern int nfsv3_procid[NFS_NPROCS]; extern int nfs_ticks; /* * Defines which timer to use for the procnum. * 0 - default * 1 - getattr * 2 - lookup * 3 - read * 4 - write */ static int proct[NFS_NPROCS] = { 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, }; /* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: * Van Jacobson, Congestion avoidance and Control, In "Proceedings of * SIGCOMM '88". ACM, August 1988. * describes for TCP. The cwnd size is chopped in half on a retransmit timeout * and incremented by 1/cwnd when each rpc reply is received and a full cwnd * of rpcs is in progress. * (The sent count and cwnd are scaled for integer arith.) * Variants of "slow start" were tried and were found to be too much of a * performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */ #define NFS_CWNDSCALE 256 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; int nfsrtton = 0; struct nfsrtt nfsrtt; static int nfs_msg __P((struct proc *,char *,char *)); static int nfs_rcvlock __P((struct nfsreq *)); static void nfs_rcvunlock __P((int *flagp)); static void nfs_realign __P((struct mbuf *m, int hsiz)); static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp)); static int nfs_reconnect __P((struct nfsreq *rep)); static int nfsrv_getstream __P((struct nfssvc_sock *,int)); #ifndef NFS_NOSERVER int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mreqp)) = { nfsrv_null, nfsrv_getattr, nfsrv_setattr, nfsrv_lookup, nfsrv3_access, nfsrv_readlink, nfsrv_read, nfsrv_write, nfsrv_create, nfsrv_mkdir, nfsrv_symlink, nfsrv_mknod, nfsrv_remove, nfsrv_rmdir, nfsrv_rename, nfsrv_link, nfsrv_readdir, nfsrv_readdirplus, nfsrv_statfs, nfsrv_fsinfo, nfsrv_pathconf, nfsrv_commit, nqnfsrv_getlease, nqnfsrv_vacated, nfsrv_noop, nfsrv_noop }; #endif /* NFS_NOSERVER */ /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int nfs_connect(nmp, rep) register struct nfsmount *nmp; struct nfsreq *rep; { register struct socket *so; int s, error, rcvreserve, sndreserve; struct sockaddr *saddr; struct sockaddr_in *sin; struct mbuf *m; u_short tport; + struct proc *p = &proc0; /* only used for socreate */ nmp->nm_so = (struct socket *)0; saddr = mtod(nmp->nm_nam, struct sockaddr *); error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, - nmp->nm_soproto); + nmp->nm_soproto, p); if (error) goto bad; so = nmp->nm_so; + so->so_state &= ~SS_PRIV; /* don't need it */ nmp->nm_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port number. */ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { MGET(m, M_WAIT, MT_SONAME); sin = mtod(m, struct sockaddr_in *); sin->sin_len = m->m_len = sizeof (struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED - 1; sin->sin_port = htons(tport); while ((error = sobind(so, m)) == EADDRINUSE && --tport > IPPORT_RESERVED / 2) sin->sin_port = htons(tport); m_freem(m); if (error) goto bad; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; goto bad; } } else { error = soconnect(so, nmp->nm_nam); if (error) goto bad; /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_procp))) { so->so_state &= ~SS_ISCONNECTING; splx(s); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); } if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { so->so_rcv.sb_timeo = (5 * hz); so->so_snd.sb_timeo = (5 * hz); } else { so->so_rcv.sb_timeo = 0; so->so_snd.sb_timeo = 0; } if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; } else if (nmp->nm_sotype == SOCK_SEQPACKET) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; } else { if (nmp->nm_sotype != SOCK_STREAM) panic("nfscon sotype"); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; } error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = nmp->nm_srtt[4] = (NFS_TIMEO << 3); nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; nmp->nm_timeouts = 0; return (0); bad: nfs_disconnect(nmp); return (error); } /* * Reconnect routine: * Called when a connection is broken on a reliable protocol. * - clean up the old socket * - nfs_connect() again * - set R_MUSTRESEND for all outstanding requests on mount point * If this fails the mount point is DEAD! * nb: Must be called with the nfs_sndlock() set on the mount point. */ static int nfs_reconnect(rep) register struct nfsreq *rep; { register struct nfsreq *rp; register struct nfsmount *nmp = rep->r_nmp; int error; nfs_disconnect(nmp); while ((error = nfs_connect(nmp, rep))) { if (error == EINTR || error == ERESTART) return (EINTR); (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); } /* * Loop through outstanding request list and fix up all requests * on old socket. */ for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) { if (rp->r_nmp == nmp) rp->r_flags |= R_MUSTRESEND; } return (0); } /* * NFS disconnect. Clean up and unlink. */ void nfs_disconnect(nmp) register struct nfsmount *nmp; { register struct socket *so; if (nmp->nm_so) { so = nmp->nm_so; nmp->nm_so = (struct socket *)0; soshutdown(so, 2); soclose(so); } } /* * This is the nfs send routine. For connection based socket types, it * must be called with an nfs_sndlock() on the socket. * "rep == NULL" indicates that it has been called from a server. * For the client side: * - return EINTR if the RPC is terminated, 0 otherwise * - set R_MUSTRESEND if the send fails for any reason * - do any cleanup required by recoverable socket errors (???) * For the server side: * - return EINTR or ERESTART if interrupted by a signal * - return EPIPE if a connection is lost for connection based sockets (TCP...) * - do any cleanup required by recoverable socket errors (???) */ int nfs_send(so, nam, top, rep) register struct socket *so; struct mbuf *nam; register struct mbuf *top; struct nfsreq *rep; { struct mbuf *sendnam; int error, soflags, flags; if (rep) { if (rep->r_flags & R_SOFTTERM) { m_freem(top); return (EINTR); } if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; m_freem(top); return (0); } rep->r_flags &= ~R_MUSTRESEND; soflags = rep->r_nmp->nm_soflags; } else soflags = so->so_proto->pr_flags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = (struct mbuf *)0; else sendnam = nam; if (so->so_type == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; error = sosend(so, sendnam, (struct uio *)0, top, (struct mbuf *)0, flags); if (error) { if (rep) { log(LOG_INFO, "nfs send error %d for server %s\n",error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); /* * Deal with errors for the client side. */ if (rep->r_flags & R_SOFTTERM) error = EINTR; else rep->r_flags |= R_MUSTRESEND; } else log(LOG_INFO, "nfsd send error %d\n", error); /* * Handle any recoverable (soft) socket errors here. (???) */ if (error != EINTR && error != ERESTART && error != EWOULDBLOCK && error != EPIPE) error = 0; } return (error); } /* * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all * done by soreceive(), but for SOCK_STREAM we must deal with the Record * Mark and consolidate the data into a new mbuf list. * nb: Sometimes TCP passes the data up to soreceive() in long lists of * small mbufs. * For SOCK_STREAM we must be very careful to read an entire record once * we have read any of it, even if the system call has been interrupted. */ static int nfs_receive(rep, aname, mp) register struct nfsreq *rep; struct mbuf **aname; struct mbuf **mp; { register struct socket *so; struct uio auio; struct iovec aio; register struct mbuf *m; struct mbuf *control; u_long len; struct mbuf **getnam; int error, sotype, rcvflg; struct proc *p = curproc; /* XXX */ /* * Set up arguments for soreceive() */ *mp = (struct mbuf *)0; *aname = (struct mbuf *)0; sotype = rep->r_nmp->nm_sotype; /* * For reliable protocols, lock against other senders/receivers * in case a reconnect is necessary. * For SOCK_STREAM, first get the Record Mark to find out how much * more there is to get. * We must lock the socket against other receivers * until we have an entire rpc request/reply. */ if (sotype != SOCK_DGRAM) { error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); if (error) return (error); tryagain: /* * Check for fatal errors and resending request. */ /* * Ugh: If a reconnect attempt just happened, nm_so * would have changed. NULL indicates a failed * attempt that has essentially shut down this * mount point. */ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (EINTR); } so = rep->r_nmp->nm_so; if (!so) { error = nfs_reconnect(rep); if (error) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (error); } goto tryagain; } while (rep->r_flags & R_MUSTRESEND) { m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); nfsstats.rpcretries++; error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); if (error) { if (error == EINTR || error == ERESTART || (error = nfs_reconnect(rep))) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (error); } goto tryagain; } } nfs_sndunlock(&rep->r_nmp->nm_flag); if (sotype == SOCK_STREAM) { aio.iov_base = (caddr_t) &len; aio.iov_len = sizeof(u_long); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(u_long); auio.uio_procp = p; do { rcvflg = MSG_WAITALL; error = soreceive(so, (struct mbuf **)0, &auio, (struct mbuf **)0, (struct mbuf **)0, &rcvflg); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK); if (!error && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", sizeof(u_long) - auio.uio_resid, sizeof(u_long), rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } if (error) goto errout; len = ntohl(len) & ~0x80000000; /* * This is SERIOUS! We are out of sync with the sender * and forcing a disconnect/reconnect is all I can do. */ if (len > NFS_MAXPACKET) { log(LOG_ERR, "%s (%d) from nfs server %s\n", "impossible packet length", len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EFBIG; goto errout; } auio.uio_resid = len; do { rcvflg = MSG_WAITALL; error = soreceive(so, (struct mbuf **)0, &auio, mp, (struct mbuf **)0, &rcvflg); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); if (!error && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", len - auio.uio_resid, len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } } else { /* * NB: Since uio_resid is big, MSG_WAITALL is ignored * and soreceive() will return when it has either a * control msg or a data msg. * We have no use for control msg., but must grab them * and then throw them away so we know what is going * on. */ auio.uio_resid = len = 100000000; /* Anything Big */ auio.uio_procp = p; do { rcvflg = 0; error = soreceive(so, (struct mbuf **)0, &auio, mp, &control, &rcvflg); if (control) m_freem(control); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK || (!error && *mp == NULL && control)); if ((rcvflg & MSG_EOR) == 0) printf("Egad!!\n"); if (!error && *mp == NULL) error = EPIPE; len -= auio.uio_resid; } errout: if (error && error != EINTR && error != ERESTART) { m_freem(*mp); *mp = (struct mbuf *)0; if (error != EPIPE) log(LOG_INFO, "receive error %d from nfs server %s\n", error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); if (!error) error = nfs_reconnect(rep); if (!error) goto tryagain; } } else { if ((so = rep->r_nmp->nm_so) == NULL) return (EACCES); if (so->so_state & SS_ISCONNECTED) getnam = (struct mbuf **)0; else getnam = aname; auio.uio_resid = len = 1000000; auio.uio_procp = p; do { rcvflg = 0; error = soreceive(so, getnam, &auio, mp, (struct mbuf **)0, &rcvflg); if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) return (EINTR); } while (error == EWOULDBLOCK); len -= auio.uio_resid; } if (error) { m_freem(*mp); *mp = (struct mbuf *)0; } /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ nfs_realign(*mp, 5 * NFSX_UNSIGNED); return (error); } /* * Implement receipt of reply on a socket. * We must search through the list of received datagrams matching them * with outstanding requests using the xid, until ours is found. */ /* ARGSUSED */ int nfs_reply(myrep) struct nfsreq *myrep; { register struct nfsreq *rep; register struct nfsmount *nmp = myrep->r_nmp; register long t1; struct mbuf *mrep, *nam, *md; u_long rxid, *tl; caddr_t dpos, cp2; int error; /* * Loop around until we get our own reply */ for (;;) { /* * Lock against other receivers so that I don't get stuck in * sbwait() after someone else has received my reply for me. * Also necessary for connection based protocols to avoid * race conditions during a reconnect. */ error = nfs_rcvlock(myrep); if (error) return (error); /* Already received, bye bye */ if (myrep->r_mrep != NULL) { nfs_rcvunlock(&nmp->nm_flag); return (0); } /* * Get the next Rpc reply off the socket */ error = nfs_receive(myrep, &nam, &mrep); nfs_rcvunlock(&nmp->nm_flag); if (error) { /* * Ignore routing errors on connectionless protocols?? */ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { nmp->nm_so->so_error = 0; if (myrep->r_flags & R_GETONEREP) return (0); continue; } return (error); } if (nam) m_freem(nam); /* * Get the xid and check that it is an rpc reply */ md = mrep; dpos = mtod(md, caddr_t); nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); rxid = *tl++; if (*tl != rpc_reply) { #ifndef NFS_NOSERVER if (nmp->nm_flag & NFSMNT_NQNFS) { if (nqnfs_callback(nmp, mrep, md, dpos)) nfsstats.rpcinvalid++; } else { nfsstats.rpcinvalid++; m_freem(mrep); } #else nfsstats.rpcinvalid++; m_freem(mrep); #endif nfsmout: if (myrep->r_flags & R_GETONEREP) return (0); continue; } /* * Loop through the request list to match up the reply * Iff no match, just drop the datagram */ for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { if (rep->r_mrep == NULL && rxid == rep->r_xid) { /* Found it.. */ rep->r_mrep = mrep; rep->r_md = md; rep->r_dpos = dpos; if (nfsrtton) { struct rttl *rt; rt = &nfsrtt.rttl[nfsrtt.pos]; rt->proc = rep->r_procnum; rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); rt->sent = nmp->nm_sent; rt->cwnd = nmp->nm_cwnd; rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; rt->tstamp = time; if (rep->r_flags & R_TIMING) rt->rtt = rep->r_rtt; else rt->rtt = 1000000; nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; } /* * Update congestion window. * Do the additive increase of * one rpc/rtt. */ if (nmp->nm_cwnd <= nmp->nm_sent) { nmp->nm_cwnd += (NFS_CWNDSCALE * NFS_CWNDSCALE + (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; if (nmp->nm_cwnd > NFS_MAXCWND) nmp->nm_cwnd = NFS_MAXCWND; } rep->r_flags &= ~R_SENT; nmp->nm_sent -= NFS_CWNDSCALE; /* * Update rtt using a gain of 0.125 on the mean * and a gain of 0.25 on the deviation. */ if (rep->r_flags & R_TIMING) { /* * Since the timer resolution of * NFS_HZ is so course, it can often * result in r_rtt == 0. Since * r_rtt == N means that the actual * rtt is between N+dt and N+2-dt ticks, * add 1. */ t1 = rep->r_rtt + 1; t1 -= (NFS_SRTT(rep) >> 3); NFS_SRTT(rep) += t1; if (t1 < 0) t1 = -t1; t1 -= (NFS_SDRTT(rep) >> 2); NFS_SDRTT(rep) += t1; } nmp->nm_timeouts = 0; break; } } /* * If not matched to a request, drop it. * If it's mine, get out. */ if (rep == 0) { nfsstats.rpcunexpected++; m_freem(mrep); } else if (rep == myrep) { if (rep->r_mrep == NULL) panic("nfsreply nil"); return (0); } if (myrep->r_flags & R_GETONEREP) return (0); } } /* * nfs_request - goes something like this * - fill in request struct * - links it into list * - calls nfs_send() for first transmit * - calls nfs_receive() to get reply * - break down rpc header and return with nfs reply pointed to * by mrep or error * nb: always frees up mreq mbuf list */ int nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) struct vnode *vp; struct mbuf *mrest; int procnum; struct proc *procp; struct ucred *cred; struct mbuf **mrp; struct mbuf **mdp; caddr_t *dposp; { register struct mbuf *m, *mrep; register struct nfsreq *rep; register u_long *tl; register int i; struct nfsmount *nmp; struct mbuf *md, *mheadend; struct nfsnode *np; char nickv[RPCX_NICKVERF]; time_t reqtime, waituntil; caddr_t dpos, cp2; int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; int verf_len, verf_type; u_long xid; u_quad_t frev; char *auth_str, *verf_str; NFSKERBKEY_T key; /* save session key */ nmp = VFSTONFS(vp->v_mount); MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); rep->r_nmp = nmp; rep->r_vp = vp; rep->r_procp = procp; rep->r_procnum = procnum; i = 0; m = mrest; while (m) { i += m->m_len; m = m->m_next; } mrest_len = i; /* * Get the RPC header with authorization. */ kerbauth: verf_str = auth_str = (char *)0; if (nmp->nm_flag & NFSMNT_KERB) { verf_str = nickv; verf_len = sizeof (nickv); auth_type = RPCAUTH_KERB4; bzero((caddr_t)key, sizeof (key)); if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, &auth_len, verf_str, verf_len)) { error = nfs_getauth(nmp, rep, cred, &auth_str, &auth_len, verf_str, &verf_len, key); if (error) { free((caddr_t)rep, M_NFSREQ); m_freem(mrest); return (error); } } } else { auth_type = RPCAUTH_UNIX; if (cred->cr_ngroups < 1) panic("nfsreq nogrps"); auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 5 * NFSX_UNSIGNED; } m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); if (auth_str) free(auth_str, M_TEMP); /* * For stream protocols, insert a Sun RPC Record Mark. */ if (nmp->nm_sotype == SOCK_STREAM) { M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len - NFSX_UNSIGNED)); } rep->r_mreq = m; rep->r_xid = xid; tryagain: if (nmp->nm_flag & NFSMNT_SOFT) rep->r_retry = nmp->nm_retry; else rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ rep->r_rtt = rep->r_rexmit = 0; if (proct[procnum] > 0) rep->r_flags = R_TIMING; else rep->r_flags = 0; rep->r_mrep = NULL; /* * Do the client side RPC. */ nfsstats.rpcrequests++; /* * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. */ s = splsoftclock(); TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); /* Get send time for nqnfs */ reqtime = time.tv_sec; /* * If backing off another request or avoiding congestion, don't * send this one now but let timer do it. If not timing a request, * do it now. */ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { splx(s); if (nmp->nm_soflags & PR_CONNREQUIRED) error = nfs_sndlock(&nmp->nm_flag, rep); if (!error) { m = m_copym(m, 0, M_COPYALL, M_WAIT); error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); if (nmp->nm_soflags & PR_CONNREQUIRED) nfs_sndunlock(&nmp->nm_flag); } if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { nmp->nm_sent += NFS_CWNDSCALE; rep->r_flags |= R_SENT; } } else { splx(s); rep->r_rtt = -1; } /* * Wait for the reply from our send or the timer's. */ if (!error || error == EPIPE) error = nfs_reply(rep); /* * RPC done, unlink the request. */ s = splsoftclock(); TAILQ_REMOVE(&nfs_reqq, rep, r_chain); splx(s); /* * Decrement the outstanding request count. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; /* paranoia */ nmp->nm_sent -= NFS_CWNDSCALE; } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (!error && (rep->r_flags & R_TPRINTFMSG)) nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, "is alive again"); mrep = rep->r_mrep; md = rep->r_md; dpos = rep->r_dpos; if (error) { m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * break down the rpc header and check if ok */ nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (*tl++ == rpc_msgdenied) { if (*tl == rpc_mismatch) error = EOPNOTSUPP; else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { if (!failed_auth) { failed_auth++; mheadend->m_next = (struct mbuf *)0; m_freem(mrep); m_freem(rep->r_mreq); goto kerbauth; } else error = EAUTH; } else error = EACCES; m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * Grab any Kerberos verifier, otherwise just throw it away. */ verf_type = fxdr_unsigned(int, *tl++); i = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); if (error) goto nfsmout; } else if (i > 0) nfsm_adv(nfsm_rndup(i)); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); /* 0 == ok */ if (*tl == 0) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (*tl != 0) { error = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_NFSV3) && error == NFSERR_TRYLATER) { m_freem(mrep); error = 0; waituntil = time.tv_sec + trylater_delay; while (time.tv_sec < waituntil) (void) tsleep((caddr_t)&lbolt, PSOCK, "nqnfstry", 0); trylater_delay *= nfs_backoff[trylater_cnt]; if (trylater_cnt < 7) trylater_cnt++; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. */ if (error == ESTALE) cache_purge(vp); if (nmp->nm_flag & NFSMNT_NFSV3) { *mrp = mrep; *mdp = md; *dposp = dpos; error |= NFSERR_RETERR; } else m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * For nqnfs, get any lease in reply */ if (nmp->nm_flag & NFSMNT_NQNFS) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (*tl) { np = VTONFS(vp); nqlflag = fxdr_unsigned(int, *tl); nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); cachable = fxdr_unsigned(int, *tl++); reqtime += fxdr_unsigned(int, *tl++); if (reqtime > time.tv_sec) { fxdr_hyper(tl, &frev); nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime, frev); } } } *mrp = mrep; *mdp = md; *dposp = dpos; m_freem(rep->r_mreq); FREE((caddr_t)rep, M_NFSREQ); return (0); } m_freem(mrep); error = EPROTONOSUPPORT; nfsmout: m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } #ifndef NFS_NOSERVER /* * Generate the rpc reply header * siz arg. is used to decide if adding a cluster is worthwhile */ int nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) int siz; struct nfsrv_descript *nd; struct nfssvc_sock *slp; int err; int cache; u_quad_t *frev; struct mbuf **mrq; struct mbuf **mbp; caddr_t *bposp; { register u_long *tl; register struct mbuf *mreq; caddr_t bpos; struct mbuf *mb, *mb2; MGETHDR(mreq, M_WAIT, MT_DATA); mb = mreq; /* * If this is a big reply, use a cluster else * try and leave leading space for the lower level headers. */ siz += RPC_REPLYSIZ; if (siz >= MINCLSIZE) { MCLGET(mreq, M_WAIT); } else mreq->m_data += max_hdr; tl = mtod(mreq, u_long *); mreq->m_len = 6 * NFSX_UNSIGNED; bpos = ((caddr_t)tl) + mreq->m_len; *tl++ = txdr_unsigned(nd->nd_retxid); *tl++ = rpc_reply; if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { *tl++ = rpc_msgdenied; if (err & NFSERR_AUTHERR) { *tl++ = rpc_autherr; *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); mreq->m_len -= NFSX_UNSIGNED; bpos -= NFSX_UNSIGNED; } else { *tl++ = rpc_mismatch; *tl++ = txdr_unsigned(RPC_VER2); *tl = txdr_unsigned(RPC_VER2); } } else { *tl++ = rpc_msgaccepted; /* * For Kerberos authentication, we must send the nickname * verifier back, otherwise just RPCAUTH_NULL. */ if (nd->nd_flag & ND_KERBFULL) { register struct nfsuid *nuidp; struct timeval ktvin, ktvout; for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) break; } if (nuidp) { ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1); ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec); /* * Encrypt the timestamp in ecb mode using the * session key. */ #ifdef NFSKERB XXX #endif *tl++ = rpc_auth_kerb; *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); *tl = ktvout.tv_sec; nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = ktvout.tv_usec; *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); } else { *tl++ = 0; *tl++ = 0; } } else { *tl++ = 0; *tl++ = 0; } switch (err) { case EPROGUNAVAIL: *tl = txdr_unsigned(RPC_PROGUNAVAIL); break; case EPROGMISMATCH: *tl = txdr_unsigned(RPC_PROGMISMATCH); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NQNFS) { *tl++ = txdr_unsigned(3); *tl = txdr_unsigned(3); } else { *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(3); } break; case EPROCUNAVAIL: *tl = txdr_unsigned(RPC_PROCUNAVAIL); break; case EBADRPC: *tl = txdr_unsigned(RPC_GARBAGE); break; default: *tl = 0; if (err != NFSERR_RETVOID) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (err) *tl = txdr_unsigned(nfsrv_errmap(nd, err)); else *tl = 0; } break; }; } /* * For nqnfs, piggyback lease as requested. */ if ((nd->nd_flag & ND_NQNFS) && err == 0) { if (nd->nd_flag & ND_LEASE) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE); *tl++ = txdr_unsigned(cache); *tl++ = txdr_unsigned(nd->nd_duration); txdr_hyper(frev, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = 0; } } *mrq = mreq; *mbp = mb; *bposp = bpos; if (err != 0 && err != NFSERR_RETVOID) nfsstats.srvrpc_errs++; return (0); } /* * Nfs timer routine * Scan the nfsreq list and retranmit any requests that have timed out * To avoid retransmission attempts on STREAM sockets (in the future) make * sure to set the r_retry field to 0 (implies nm_retry == 0). */ void nfs_timer(arg) void *arg; /* never used */ { register struct nfsreq *rep; register struct mbuf *m; register struct socket *so; register struct nfsmount *nmp; register int timeo; register struct nfssvc_sock *slp; static long lasttime = 0; int s, error; u_quad_t cur_usec; s = splnet(); for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { nmp = rep->r_nmp; if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) continue; if (nfs_sigintr(nmp, rep, rep->r_procp)) { rep->r_flags |= R_SOFTTERM; continue; } if (rep->r_rtt >= 0) { rep->r_rtt++; if (nmp->nm_flag & NFSMNT_DUMBTIMR) timeo = nmp->nm_timeo; else timeo = NFS_RTO(nmp, proct[rep->r_procnum]); if (nmp->nm_timeouts > 0) timeo *= nfs_backoff[nmp->nm_timeouts - 1]; if (rep->r_rtt <= timeo) continue; if (nmp->nm_timeouts < 8) nmp->nm_timeouts++; } /* * Check for server not responding */ if ((rep->r_flags & R_TPRINTFMSG) == 0 && rep->r_rexmit > nmp->nm_deadthresh) { nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, "not responding"); rep->r_flags |= R_TPRINTFMSG; } if (rep->r_rexmit >= rep->r_retry) { /* too many */ nfsstats.rpctimeouts++; rep->r_flags |= R_SOFTTERM; continue; } if (nmp->nm_sotype != SOCK_DGRAM) { if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; continue; } if ((so = nmp->nm_so) == NULL) continue; /* * If there is enough space and the window allows.. * Resend it * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || nmp->nm_sent < nmp->nm_cwnd) && (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, (struct mbuf *)0, (struct mbuf *)0); else error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, nmp->nm_nam, (struct mbuf *)0); if (error) { if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) so->so_error = 0; } else { /* * Iff first send, start timing * else turn timing off, backoff timer * and divide congestion window by 2. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_TIMING; if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; nmp->nm_cwnd >>= 1; if (nmp->nm_cwnd < NFS_CWNDSCALE) nmp->nm_cwnd = NFS_CWNDSCALE; nfsstats.rpcretries++; } else { rep->r_flags |= R_SENT; nmp->nm_sent += NFS_CWNDSCALE; } rep->r_rtt = 0; } } } /* * Call the nqnfs server timer once a second to handle leases. */ if (lasttime != time.tv_sec) { lasttime = time.tv_sec; nqnfs_serverd(); } /* * Scan the write gathering queues for writes that need to be * completed now. */ cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = slp->ns_chain.tqe_next) { if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec) nfsrv_wakenfsd(slp); } splx(s); timeout(nfs_timer, (void *)0, nfs_ticks); } #endif /* NFS_NOSERVER */ /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int nfs_sigintr(nmp, rep, p) struct nfsmount *nmp; struct nfsreq *rep; register struct proc *p; { if (rep && (rep->r_flags & R_SOFTTERM)) return (EINTR); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (p && p->p_siglist && (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & NFSINT_SIGMASK)) return (EINTR); return (0); } /* * Lock a socket against others. * Necessary for STREAM sockets to ensure you get an entire rpc request/reply * and also to avoid race conditions between the processes with nfs requests * in progress when a reconnect is necessary. */ int nfs_sndlock(flagp, rep) register int *flagp; struct nfsreq *rep; { struct proc *p; int slpflag = 0, slptimeo = 0; if (rep) { p = rep->r_procp; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; } else p = (struct proc *)0; while (*flagp & NFSMNT_SNDLOCK) { if (nfs_sigintr(rep->r_nmp, rep, p)) return (EINTR); *flagp |= NFSMNT_WANTSND; (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *flagp |= NFSMNT_SNDLOCK; return (0); } /* * Unlock the stream socket for others. */ void nfs_sndunlock(flagp) register int *flagp; { if ((*flagp & NFSMNT_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSMNT_SNDLOCK; if (*flagp & NFSMNT_WANTSND) { *flagp &= ~NFSMNT_WANTSND; wakeup((caddr_t)flagp); } } static int nfs_rcvlock(rep) register struct nfsreq *rep; { register int *flagp = &rep->r_nmp->nm_flag; int slpflag, slptimeo = 0; if (*flagp & NFSMNT_INT) slpflag = PCATCH; else slpflag = 0; while (*flagp & NFSMNT_RCVLOCK) { if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) return (EINTR); *flagp |= NFSMNT_WANTRCV; (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *flagp |= NFSMNT_RCVLOCK; return (0); } /* * Unlock the stream socket for others. */ static void nfs_rcvunlock(flagp) register int *flagp; { if ((*flagp & NFSMNT_RCVLOCK) == 0) panic("nfs rcvunlock"); *flagp &= ~NFSMNT_RCVLOCK; if (*flagp & NFSMNT_WANTRCV) { *flagp &= ~NFSMNT_WANTRCV; wakeup((caddr_t)flagp); } } /* * Check for badly aligned mbuf data areas and * realign data in an mbuf list by copying the data areas up, as required. */ static void nfs_realign(m, hsiz) register struct mbuf *m; int hsiz; { register struct mbuf *m2; register int siz, mlen, olen; register caddr_t tcp, fcp; struct mbuf *mnew; while (m) { /* * This never happens for UDP, rarely happens for TCP * but frequently happens for iso transport. */ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { olen = m->m_len; fcp = mtod(m, caddr_t); if ((int)fcp & 0x3) { m->m_flags &= ~M_PKTHDR; if (m->m_flags & M_EXT) m->m_data = m->m_ext.ext_buf + ((m->m_ext.ext_size - olen) & ~0x3); else m->m_data = m->m_dat; } m->m_len = 0; tcp = mtod(m, caddr_t); mnew = m; m2 = m->m_next; /* * If possible, only put the first invariant part * of the RPC header in the first mbuf. */ mlen = M_TRAILINGSPACE(m); if (olen <= hsiz && mlen > hsiz) mlen = hsiz; /* * Loop through the mbuf list consolidating data. */ while (m) { while (olen > 0) { if (mlen == 0) { m2->m_flags &= ~M_PKTHDR; if (m2->m_flags & M_EXT) m2->m_data = m2->m_ext.ext_buf; else m2->m_data = m2->m_dat; m2->m_len = 0; mlen = M_TRAILINGSPACE(m2); tcp = mtod(m2, caddr_t); mnew = m2; m2 = m2->m_next; } siz = min(mlen, olen); if (tcp != fcp) bcopy(fcp, tcp, siz); mnew->m_len += siz; mlen -= siz; olen -= siz; tcp += siz; fcp += siz; } m = m->m_next; if (m) { olen = m->m_len; fcp = mtod(m, caddr_t); } } /* * Finally, set m_len == 0 for any trailing mbufs that have * been copied out of. */ while (m2) { m2->m_len = 0; m2 = m2->m_next; } return; } m = m->m_next; } } #ifndef NFS_NOSERVER /* * Socket upcall routine for the nfsd sockets. * The caddr_t arg is a pointer to the "struct nfssvc_sock". * Essentially do as much as possible non-blocking, else punt and it will * be called with M_WAIT from an nfsd. */ void nfsrv_rcv(so, arg, waitflag) struct socket *so; caddr_t arg; int waitflag; { register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; register struct mbuf *m; struct mbuf *mp, *nam; struct uio auio; int flags, error; if ((slp->ns_flag & SLP_VALID) == 0) return; #ifdef notdef /* * Define this to test for nfsds handling this under heavy load. */ if (waitflag == M_DONTWAIT) { slp->ns_flag |= SLP_NEEDQ; goto dorecs; } #endif auio.uio_procp = NULL; if (so->so_type == SOCK_STREAM) { /* * If there are already records on the queue, defer soreceive() * to an nfsd so that there is feedback to the TCP layer that * the nfs servers are heavily loaded. */ if (slp->ns_rec && waitflag == M_DONTWAIT) { slp->ns_flag |= SLP_NEEDQ; goto dorecs; } /* * Do soreceive(). */ auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); if (error || mp == (struct mbuf *)0) { if (error == EWOULDBLOCK) slp->ns_flag |= SLP_NEEDQ; else slp->ns_flag |= SLP_DISCONN; goto dorecs; } m = mp; if (slp->ns_rawend) { slp->ns_rawend->m_next = m; slp->ns_cc += 1000000000 - auio.uio_resid; } else { slp->ns_raw = m; slp->ns_cc = 1000000000 - auio.uio_resid; } while (m->m_next) m = m->m_next; slp->ns_rawend = m; /* * Now try and parse record(s) out of the raw stream data. */ error = nfsrv_getstream(slp, waitflag); if (error) { if (error == EPERM) slp->ns_flag |= SLP_DISCONN; else slp->ns_flag |= SLP_NEEDQ; } } else { do { auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); if (mp) { nfs_realign(mp, 10 * NFSX_UNSIGNED); if (nam) { m = nam; m->m_next = mp; } else m = mp; if (slp->ns_recend) slp->ns_recend->m_nextpkt = m; else slp->ns_rec = m; slp->ns_recend = m; m->m_nextpkt = (struct mbuf *)0; } if (error) { if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && error != EWOULDBLOCK) { slp->ns_flag |= SLP_DISCONN; goto dorecs; } } } while (mp); } /* * Now try and process the request records, non-blocking. */ dorecs: if (waitflag == M_DONTWAIT && (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) nfsrv_wakenfsd(slp); } /* * Try and extract an RPC request from the mbuf data list received on a * stream socket. The "waitflag" argument indicates whether or not it * can sleep. */ static int nfsrv_getstream(slp, waitflag) register struct nfssvc_sock *slp; int waitflag; { register struct mbuf *m, **mpp; register char *cp1, *cp2; register int len; struct mbuf *om, *m2, *recm = 0; u_long recmark; if (slp->ns_flag & SLP_GETSTREAM) panic("nfs getstream"); slp->ns_flag |= SLP_GETSTREAM; for (;;) { if (slp->ns_reclen == 0) { if (slp->ns_cc < NFSX_UNSIGNED) { slp->ns_flag &= ~SLP_GETSTREAM; return (0); } m = slp->ns_raw; if (m->m_len >= NFSX_UNSIGNED) { bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); m->m_data += NFSX_UNSIGNED; m->m_len -= NFSX_UNSIGNED; } else { cp1 = (caddr_t)&recmark; cp2 = mtod(m, caddr_t); while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { while (m->m_len == 0) { m = m->m_next; cp2 = mtod(m, caddr_t); } *cp1++ = *cp2++; m->m_data++; m->m_len--; } } slp->ns_cc -= NFSX_UNSIGNED; recmark = ntohl(recmark); slp->ns_reclen = recmark & ~0x80000000; if (recmark & 0x80000000) slp->ns_flag |= SLP_LASTFRAG; else slp->ns_flag &= ~SLP_LASTFRAG; if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { slp->ns_flag &= ~SLP_GETSTREAM; return (EPERM); } } /* * Now get the record part. */ if (slp->ns_cc == slp->ns_reclen) { recm = slp->ns_raw; slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; slp->ns_cc = slp->ns_reclen = 0; } else if (slp->ns_cc > slp->ns_reclen) { len = 0; m = slp->ns_raw; om = (struct mbuf *)0; while (len < slp->ns_reclen) { if ((len + m->m_len) > slp->ns_reclen) { m2 = m_copym(m, 0, slp->ns_reclen - len, waitflag); if (m2) { if (om) { om->m_next = m2; recm = slp->ns_raw; } else recm = m2; m->m_data += slp->ns_reclen - len; m->m_len -= slp->ns_reclen - len; len = slp->ns_reclen; } else { slp->ns_flag &= ~SLP_GETSTREAM; return (EWOULDBLOCK); } } else if ((len + m->m_len) == slp->ns_reclen) { om = m; len += m->m_len; m = m->m_next; recm = slp->ns_raw; om->m_next = (struct mbuf *)0; } else { om = m; len += m->m_len; m = m->m_next; } } slp->ns_raw = m; slp->ns_cc -= len; slp->ns_reclen = 0; } else { slp->ns_flag &= ~SLP_GETSTREAM; return (0); } /* * Accumulate the fragments into a record. */ mpp = &slp->ns_frag; while (*mpp) mpp = &((*mpp)->m_next); *mpp = recm; if (slp->ns_flag & SLP_LASTFRAG) { nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED); if (slp->ns_recend) slp->ns_recend->m_nextpkt = slp->ns_frag; else slp->ns_rec = slp->ns_frag; slp->ns_recend = slp->ns_frag; slp->ns_frag = (struct mbuf *)0; } } } /* * Parse an RPC header. */ int nfsrv_dorec(slp, nfsd, ndp) register struct nfssvc_sock *slp; struct nfsd *nfsd; struct nfsrv_descript **ndp; { register struct mbuf *m, *nam; register struct nfsrv_descript *nd; int error; *ndp = NULL; if ((slp->ns_flag & SLP_VALID) == 0 || (m = slp->ns_rec) == (struct mbuf *)0) return (ENOBUFS); slp->ns_rec = m->m_nextpkt; if (slp->ns_rec) m->m_nextpkt = (struct mbuf *)0; else slp->ns_recend = (struct mbuf *)0; if (m->m_type == MT_SONAME) { nam = m; m = m->m_next; nam->m_next = NULL; } else nam = NULL; MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); nd->nd_md = nd->nd_mrep = m; nd->nd_nam2 = nam; nd->nd_dpos = mtod(m, caddr_t); error = nfs_getreq(nd, nfsd, TRUE); if (error) { m_freem(nam); free((caddr_t)nd, M_NFSRVDESC); return (error); } *ndp = nd; nfsd->nfsd_nd = nd; return (0); } /* * Parse an RPC request * - verify it * - fill in the cred struct. */ int nfs_getreq(nd, nfsd, has_header) register struct nfsrv_descript *nd; struct nfsd *nfsd; int has_header; { register int len, i; register u_long *tl; register long t1; struct uio uio; struct iovec iov; caddr_t dpos, cp2, cp; u_long nfsvers, auth_type; uid_t nickuid; int error = 0, nqnfs = 0, ticklen; struct mbuf *mrep, *md; register struct nfsuid *nuidp; struct timeval tvin, tvout; mrep = nd->nd_mrep; md = nd->nd_md; dpos = nd->nd_dpos; if (has_header) { nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED); nd->nd_retxid = fxdr_unsigned(u_long, *tl++); if (*tl++ != rpc_call) { m_freem(mrep); return (EBADRPC); } } else nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED); nd->nd_repstat = 0; nd->nd_flag = 0; if (*tl++ != rpc_vers) { nd->nd_repstat = ERPCMISMATCH; nd->nd_procnum = NFSPROC_NOOP; return (0); } if (*tl != nfs_prog) { if (*tl == nqnfs_prog) nqnfs++; else { nd->nd_repstat = EPROGUNAVAIL; nd->nd_procnum = NFSPROC_NOOP; return (0); } } tl++; nfsvers = fxdr_unsigned(u_long, *tl++); if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) || (nfsvers != NQNFS_VER3 && nqnfs)) { nd->nd_repstat = EPROGMISMATCH; nd->nd_procnum = NFSPROC_NOOP; return (0); } if (nqnfs) nd->nd_flag = (ND_NFSV3 | ND_NQNFS); else if (nfsvers == NFS_VER3) nd->nd_flag = ND_NFSV3; nd->nd_procnum = fxdr_unsigned(u_long, *tl++); if (nd->nd_procnum == NFSPROC_NULL) return (0); if (nd->nd_procnum >= NFS_NPROCS || (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) || (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { nd->nd_repstat = EPROCUNAVAIL; nd->nd_procnum = NFSPROC_NOOP; return (0); } if ((nd->nd_flag & ND_NFSV3) == 0) nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; auth_type = *tl++; len = fxdr_unsigned(int, *tl++); if (len < 0 || len > RPCAUTH_MAXSIZ) { m_freem(mrep); return (EBADRPC); } nd->nd_flag &= ~ND_KERBAUTH; /* * Handle auth_unix or auth_kerb. */ if (auth_type == rpc_auth_unix) { len = fxdr_unsigned(int, *++tl); if (len < 0 || len > NFS_MAXNAMLEN) { m_freem(mrep); return (EBADRPC); } nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred)); nd->nd_cr.cr_ref = 1; nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); len = fxdr_unsigned(int, *tl); if (len < 0 || len > RPCAUTH_UNIXGIDS) { m_freem(mrep); return (EBADRPC); } nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED); for (i = 1; i <= len; i++) if (i < NGROUPS) nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); else tl++; nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); if (nd->nd_cr.cr_ngroups > 1) nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); len = fxdr_unsigned(int, *++tl); if (len < 0 || len > RPCAUTH_MAXSIZ) { m_freem(mrep); return (EBADRPC); } if (len > 0) nfsm_adv(nfsm_rndup(len)); } else if (auth_type == rpc_auth_kerb) { switch (fxdr_unsigned(int, *tl++)) { case RPCAKN_FULLNAME: ticklen = fxdr_unsigned(int, *tl); *((u_long *)nfsd->nfsd_authstr) = *tl; uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { m_freem(mrep); return (EBADRPC); } uio.uio_offset = 0; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; iov.iov_len = RPCAUTH_MAXSIZ - 4; nfsm_mtouio(&uio, uio.uio_resid); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); if (*tl++ != rpc_auth_kerb || fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { printf("Bad kerb verifier\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED); tl = (u_long *)cp; if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { printf("Not fullname kerb verifier\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } cp += NFSX_UNSIGNED; bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED); nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; nd->nd_flag |= ND_KERBFULL; nfsd->nfsd_flag |= NFSD_NEEDAUTH; break; case RPCAKN_NICKNAME: if (len != 2 * NFSX_UNSIGNED) { printf("Kerb nickname short\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } nickuid = fxdr_unsigned(uid_t, *tl); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); if (*tl++ != rpc_auth_kerb || fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { printf("Kerb nick verifier bad\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); tvin.tv_sec = *tl++; tvin.tv_usec = *tl; for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { if (nuidp->nu_cr.cr_uid == nickuid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) break; } if (!nuidp) { nd->nd_repstat = (NFSERR_AUTHERR|AUTH_REJECTCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } /* * Now, decrypt the timestamp using the session key * and validate it. */ #ifdef NFSKERB XXX #endif tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); if (nuidp->nu_expire < time.tv_sec || nuidp->nu_timestamp.tv_sec > tvout.tv_sec || (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { nuidp->nu_expire = 0; nd->nd_repstat = (NFSERR_AUTHERR|AUTH_REJECTVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); nd->nd_flag |= ND_KERBNICK; }; } else { nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } /* * For nqnfs, get piggybacked lease request. */ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); nd->nd_flag |= fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_LEASE) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); nd->nd_duration = fxdr_unsigned(int, *tl); } else nd->nd_duration = NQ_MINLEASE; } else nd->nd_duration = NQ_MINLEASE; nd->nd_md = md; nd->nd_dpos = dpos; return (0); nfsmout: return (error); } /* * Search for a sleeping nfsd and wake it up. * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the * running nfsds will go look for the work in the nfssvc_sock list. */ void nfsrv_wakenfsd(slp) struct nfssvc_sock *slp; { register struct nfsd *nd; if ((slp->ns_flag & SLP_VALID) == 0) return; for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) { if (nd->nfsd_flag & NFSD_WAITING) { nd->nfsd_flag &= ~NFSD_WAITING; if (nd->nfsd_slp) panic("nfsd wakeup"); slp->ns_sref++; nd->nfsd_slp = slp; wakeup((caddr_t)nd); return; } } slp->ns_flag |= SLP_DOREC; nfsd_head_flag |= NFSD_CHECKSLP; } #endif /* NFS_NOSERVER */ static int nfs_msg(p, server, msg) struct proc *p; char *server, *msg; { tpr_t tpr; if (p) tpr = tprintf_open(p); else tpr = NULL; tprintf(tpr, "nfs server %s: %s\n", server, msg); tprintf_close(tpr); return (0); } Index: head/sys/nfs/nfs_vfsops.c =================================================================== --- head/sys/nfs/nfs_vfsops.c (revision 14092) +++ head/sys/nfs/nfs_vfsops.c (revision 14093) @@ -1,949 +1,949 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vfsops.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_vfsops.c,v 1.25 1995/12/22 15:57:38 phk Exp $ + * $Id: nfs_vfsops.c,v 1.26 1995/12/28 21:56:49 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int nfs_mountroot __P((void)); extern int nfs_ticks; struct nfsstats nfsstats; SYSCTL_NODE(_fs, MOUNT_NFS, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_STRUCT(_fs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats, ""); static int nfs_iosize __P((struct nfsmount *nmp)); static int mountnfs __P((struct nfs_args *,struct mount *, struct mbuf *,char *,char *,struct vnode **)); static int nfs_mount __P(( struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); static int nfs_start __P(( struct mount *mp, int flags, struct proc *p)); static int nfs_unmount __P(( struct mount *mp, int mntflags, struct proc *p)); static int nfs_root __P(( struct mount *mp, struct vnode **vpp)); static int nfs_quotactl __P(( struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p)); static int nfs_statfs __P(( struct mount *mp, struct statfs *sbp, struct proc *p)); static int nfs_sync __P(( struct mount *mp, int waitfor, struct ucred *cred, struct proc *p)); static int nfs_vptofh __P(( struct vnode *vp, struct fid *fhp)); static int nfs_fhtovp __P((struct mount *mp, struct fid *fhp, struct mbuf *nam, struct vnode **vpp, int *exflagsp, struct ucred **credanonp)); static int nfs_vget __P((struct mount *, ino_t, struct vnode **)); /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { nfs_mount, nfs_start, nfs_unmount, nfs_root, nfs_quotactl, nfs_statfs, nfs_sync, nfs_vget, nfs_fhtovp, nfs_vptofh, nfs_init, }; VFS_SET(nfs_vfsops, nfs, MOUNT_NFS, VFCF_NETWORK); /* * This structure must be filled in by a primary bootstrap or bootstrap * server for a diskless/dataless machine. It is initialized below just * to ensure that it is allocated to initialized data (.data not .bss). */ struct nfs_diskless nfs_diskless = { 0 }; int nfs_diskless_valid = 0; void nfsargs_ntoh __P((struct nfs_args *)); static struct mount *nfs_mountdiskless __P((char *, char *, int, struct sockaddr_in *, struct nfs_args *, register struct vnode **)); static int nfs_iosize(nmp) struct nfsmount* nmp; { int iosize; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. */ iosize = max(nmp->nm_rsize, nmp->nm_wsize); if (iosize < NBPG) iosize = NBPG; return iosize; } /* * nfs statfs call */ int nfs_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct vnode *vp; register struct nfs_statfs *sfp; register caddr_t cp; register u_long *tl; register long t1, t2; caddr_t bpos, dpos, cp2; struct nfsmount *nmp = VFSTONFS(mp); int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct ucred *cred; struct nfsnode *np; u_quad_t tquad; #ifndef nolint sfp = (struct nfs_statfs *)0; #endif error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) return (error); vp = NFSTOV(np); cred = crget(); cred->cr_ngroups = 1; if (v3 && (nmp->nm_flag & NFSMNT_GOTFSINFO) == 0) (void)nfs_fsinfo(nmp, vp, cred, p); nfsstats.rpccnt[NFSPROC_FSSTAT]++; nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_FSSTAT, p, cred); if (v3) nfsm_postop_attr(vp, retattr); if (!error) nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); #ifdef __NetBSD__ #ifdef COMPAT_09 sbp->f_type = 2; #else sbp->f_type = 0; #endif #else sbp->f_type = MOUNT_NFS; #endif sbp->f_flags = nmp->nm_flag; sbp->f_iosize = nfs_iosize(nmp); if (v3) { sbp->f_bsize = NFS_FABLKSIZE; fxdr_hyper(&sfp->sf_tbytes, &tquad); sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); fxdr_hyper(&sfp->sf_fbytes, &tquad); sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); fxdr_hyper(&sfp->sf_abytes, &tquad); sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); sbp->f_files = (fxdr_unsigned(long, sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); sbp->f_ffree = (fxdr_unsigned(long, sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); } else { sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize); sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks); sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree); sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail); sbp->f_files = 0; sbp->f_ffree = 0; } if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } nfsm_reqdone; vput(vp); crfree(cred); return (error); } /* * nfs version 3 fsinfo rpc call */ int nfs_fsinfo(nmp, vp, cred, p) register struct nfsmount *nmp; register struct vnode *vp; struct ucred *cred; struct proc *p; { register struct nfsv3_fsinfo *fsp; register caddr_t cp; register long t1, t2; register u_long *tl, pref, max; caddr_t bpos, dpos, cp2; int error = 0, retattr; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsstats.rpccnt[NFSPROC_FSINFO]++; nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); nfsm_fhtom(vp, 1); nfsm_request(vp, NFSPROC_FSINFO, p, cred); nfsm_postop_attr(vp, retattr); if (!error) { nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO); pref = fxdr_unsigned(u_long, fsp->fs_wtpref); if (pref < nmp->nm_wsize) nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); max = fxdr_unsigned(u_long, fsp->fs_wtmax); if (max < nmp->nm_wsize) { nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = max; } pref = fxdr_unsigned(u_long, fsp->fs_rtpref); if (pref < nmp->nm_rsize) nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); max = fxdr_unsigned(u_long, fsp->fs_rtmax); if (max < nmp->nm_rsize) { nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = max; } pref = fxdr_unsigned(u_long, fsp->fs_dtpref); if (pref < nmp->nm_readdirsize) nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (max < nmp->nm_readdirsize) { nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = max; } nmp->nm_flag |= NFSMNT_GOTFSINFO; } nfsm_reqdone; return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - hand craft the swap nfs vnode hanging off a fake mount point * if swdevt[0].sw_dev == NODEV * - build the rootfs mount point and call mountnfs() to do the rest. */ int nfs_mountroot() { register struct mount *mp; register struct nfs_diskless *nd = &nfs_diskless; struct socket *so; struct vnode *vp; struct proc *p = curproc; /* XXX */ int error, i; u_long l; char buf[128]; /* * XXX time must be non-zero when we init the interface or else * the arp code will wedge... */ if (time.tv_sec == 0) time.tv_sec = 1; /* * XXX splnet, so networks will receive... */ splnet(); #ifdef notyet /* Set up swap credentials. */ proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid); proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid); if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) > NGROUPS) proc0.p_ucred->cr_ngroups = NGROUPS; for (i = 0; i < proc0.p_ucred->cr_ngroups; i++) proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]); #endif /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ - error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0); + error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, p); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p); if(!error) break; } if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); soclose(so); /* * If the gateway field is filled in, set it as the default route. */ if (nd->mygateway.sin_len != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, (struct rtentry **)0); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } if (nd->swap_nblks) { /* Convert to DEV_BSIZE instead of Kilobyte */ nd->swap_nblks *= 2; /* * Create a fake mount point just for the swap vnode so that the * swap file can be on a different server from the rootfs. */ nd->swap_args.fh = nd->swap_fh; /* * If using nfsv3_diskless, replace NFSX_V2FH with * nd->swap_fhsize. */ nd->swap_args.fhsize = NFSX_V2FH; l = ntohl(nd->swap_saddr.sin_addr.s_addr); sprintf(buf,"%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam); printf("NFS SWAP: %s\n",buf); (void) nfs_mountdiskless(buf, "/swap", 0, &nd->swap_saddr, &nd->swap_args, &vp); VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size = nd->swap_nblks * DEV_BSIZE ; /* * Since the swap file is not the root dir of a file system, * hack it to a regular file. */ vp->v_type = VREG; vp->v_flag = 0; VREF(vp); swaponvp(p, vp, NODEV, nd->swap_nblks); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; /* * If using nfsv3_diskless, replace NFSX_V2FH with nd->root_fhsize. */ nd->root_args.fhsize = NFSX_V2FH; l = ntohl(nd->root_saddr.sin_addr.s_addr); sprintf(buf,"%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam); printf("NFS ROOT: %s\n",buf); mp = nfs_mountdiskless(buf, "/", MNT_RDONLY, &nd->root_saddr, &nd->root_args, &vp); if (vfs_lock(mp)) panic("nfs_mountroot: vfs_lock"); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); mp->mnt_flag |= MNT_ROOTFS; mp->mnt_vnodecovered = NULLVP; vfs_unlock(mp); rootvp = vp; /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); hostname[MAXHOSTNAMELEN - 1] = '\0'; for (i = 0; i < MAXHOSTNAMELEN; i++) if (hostname[i] == '\0') break; inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static struct mount * nfs_mountdiskless(path, which, mountflag, sin, args, vpp) char *path; char *which; int mountflag; struct sockaddr_in *sin; struct nfs_args *args; register struct vnode **vpp; { register struct mount *mp; register struct mbuf *m; register int error; mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_NOWAIT); if (mp == NULL) panic("nfs_mountroot: %s mount malloc", which); bzero((char *)mp, (u_long)sizeof(struct mount)); mp->mnt_op = &nfs_vfsops; mp->mnt_flag = mountflag; MGET(m, MT_SONAME, M_DONTWAIT); if (m == NULL) panic("nfs_mountroot: %s mount mbuf", which); bcopy((caddr_t)sin, mtod(m, caddr_t), sin->sin_len); m->m_len = sin->sin_len; error = mountnfs(args, mp, m, which, path, vpp); if (error) panic("nfs_mountroot: mount %s on %s: %d", path, which, error); return (mp); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(mp, path, data, ndp, p) struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { int error; struct nfs_args args; struct mbuf *nam; struct vnode *vp; char pth[MNAMELEN], hst[MNAMELEN]; u_int len; u_char nfh[NFSX_V3FHMAX]; error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)); if (error) return (error); error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error) return (error); error = copyinstr(path, pth, MNAMELEN-1, &len); if (error) return (error); bzero(&pth[len], MNAMELEN - len); error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); if (error) return (error); bzero(&hst[len], MNAMELEN - len); /* sockargs() call must be after above copyin() calls */ error = sockargs(&nam, (caddr_t)args.addr, args.addrlen, MT_SONAME); if (error) return (error); args.fh = nfh; error = mountnfs(&args, mp, nam, pth, hst, &vp); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(argp, mp, nam, pth, hst, vpp) register struct nfs_args *argp; register struct mount *mp; struct mbuf *nam; char *pth, *hst; struct vnode **vpp; { register struct nfsmount *nmp; struct nfsnode *np; int error, maxio; struct vattr attrs; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); /* update paths, file handles, etc, here XXX */ m_freem(nam); return (0); } else { MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount), M_NFSMNT, M_WAITOK); bzero((caddr_t)nmp, sizeof (struct nfsmount)); TAILQ_INIT(&nmp->nm_uidlruhead); mp->mnt_data = (qaddr_t)nmp; } getnewfsid(mp, MOUNT_NFS); nmp->nm_mountp = mp; nmp->nm_flag = argp->flags; if (nmp->nm_flag & NFSMNT_NQNFS) /* * We have to set mnt_maxsymlink to a non-zero value so * that COMPAT_43 routines will know that we are setting * the d_type field in directories (and can zero it for * unsuspecting binaries). */ mp->mnt_maxsymlinklen = 1; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_readahead = NFS_DEFRAHEAD; nmp->nm_leaseterm = NQ_DEFLEASE; nmp->nm_deadthresh = NQ_DEADTHRESH; CIRCLEQ_INIT(&nmp->nm_timerhead); nmp->nm_inprog = NULLVP; nmp->nm_fhsize = argp->fhsize; bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); #ifdef __NetBSD__ #ifdef COMPAT_09 mp->mnt_stat.f_type = 2; #else mp->mnt_stat.f_type = 0; #endif #else mp->mnt_stat.f_type = MOUNT_NFS; #endif bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); nmp->nm_nam = nam; if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if (argp->flags & NFSMNT_NFSV3) { if (argp->sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXDATA; } else maxio = NFS_V2MAXDATA; if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* Round down to multiple of blocksize */ nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize <= 0) nmp->nm_wsize = NFS_FABLKSIZE; } if (nmp->nm_wsize > maxio) nmp->nm_wsize = maxio; if (nmp->nm_wsize > MAXBSIZE) nmp->nm_wsize = MAXBSIZE; if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* Round down to multiple of blocksize */ nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize <= 0) nmp->nm_rsize = NFS_FABLKSIZE; } if (nmp->nm_rsize > maxio) nmp->nm_rsize = maxio; if (nmp->nm_rsize > MAXBSIZE) nmp->nm_rsize = MAXBSIZE; if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; /* Round down to multiple of blocksize */ nmp->nm_readdirsize &= ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) nmp->nm_readdirsize = NFS_DIRBLKSIZ; } if (nmp->nm_readdirsize > maxio) nmp->nm_readdirsize = maxio; if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 && argp->maxgrouplist <= NFS_MAXGRPS) nmp->nm_numgrps = argp->maxgrouplist; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && argp->leaseterm <= NQ_MAXLEASE) nmp->nm_leaseterm = argp->leaseterm; if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && argp->deadthresh <= NQ_NEVERDEAD) nmp->nm_deadthresh = argp->deadthresh; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; /* * For Connection based sockets (TCP,...) defer the connect until * the first request, in case the server is not responding. */ if (nmp->nm_sotype == SOCK_DGRAM && (error = nfs_connect(nmp, (struct nfsreq *)0))) goto bad; /* * This is silly, but it has to be set so that vinifod() works. * We do not want to do an nfs_statfs() here since we can get * stuck on a dead server and we are holding a lock on the mount * point. */ mp->mnt_stat.f_iosize = nfs_iosize(nmp); /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == ROOTINO (2). */ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes for the mountpoint. This has the side * effect of filling in (*vpp)->v_type with the correct value. */ VOP_GETATTR(*vpp, &attrs, curproc->p_ucred, curproc); /* * Lose the lock but keep the ref. */ VOP_UNLOCK(*vpp); return (0); bad: nfs_disconnect(nmp); free((caddr_t)nmp, M_NFSMNT); m_freem(nam); return (error); } /* * unmount system call */ static int nfs_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct nfsmount *nmp; struct nfsnode *np; struct vnode *vp; int error, flags = 0; if (mntflags & MNT_FORCE) { if (!doforce) return (EINVAL); flags |= FORCECLOSE; } nmp = VFSTONFS(mp); /* * Goes something like this.. * - Check for activity on the root vnode (other than ourselves). * - Call vflush() to clear out vnodes for this file system, * except for the root vnode. * - Decrement reference on the vnode representing remote root. * - Close the socket * - Free up the data structures */ /* * We need to decrement the ref. count on the nfsnode representing * the remote root. See comment in mountnfs(). The VFS unmount() * has done vput on this vnode, otherwise we would get deadlock! */ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) return(error); vp = NFSTOV(np); if (vp->v_usecount > 2) { vput(vp); return (EBUSY); } /* * Must handshake with nqnfs_clientd() if it is active. */ nmp->nm_flag |= NFSMNT_DISMINPROG; while (nmp->nm_inprog != NULLVP) (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0); error = vflush(mp, vp, flags); if (error) { vput(vp); nmp->nm_flag &= ~NFSMNT_DISMINPROG; return (error); } /* * We are now committed to the unmount. * For NQNFS, let the server daemon free the nfsmount structure. */ if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) nmp->nm_flag |= NFSMNT_DISMNT; /* * There are two reference counts and one lock to get rid of here. */ vput(vp); vrele(vp); vgone(vp); nfs_disconnect(nmp); m_freem(nmp->nm_nam); if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0) free((caddr_t)nmp, M_NFSMNT); return (0); } /* * Return root of a filesystem */ static int nfs_root(mp, vpp) struct mount *mp; struct vnode **vpp; { register struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) return (error); vp = NFSTOV(np); VOP_UNLOCK(vp); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_flag = VROOT; *vpp = vp; return (0); } extern int syncprt; /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { register struct vnode *vp; int error, allerror = 0; /* * Force stale buffer cache information to be flushed. */ loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL) continue; if (vget(vp, 1)) goto loop; error = VOP_FSYNC(vp, cred, waitfor, p); if (error) allerror = error; vput(vp); } return (allerror); } /* * NFS flat namespace lookup. * Currently unsupported. */ /* ARGSUSED */ static int nfs_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { return (EOPNOTSUPP); } /* * At this point, this should never happen */ /* ARGSUSED */ static int nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) register struct mount *mp; struct fid *fhp; struct mbuf *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { return (EINVAL); } /* * Vnode pointer to File handle, should never happen either */ /* ARGSUSED */ static int nfs_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { return (EINVAL); } /* * Vfs start routine, a no-op. */ /* ARGSUSED */ static int nfs_start(mp, flags, p) struct mount *mp; int flags; struct proc *p; { return (0); } /* * Do operations associated with quotas, not supported */ /* ARGSUSED */ static int nfs_quotactl(mp, cmd, uid, arg, p) struct mount *mp; int cmd; uid_t uid; caddr_t arg; struct proc *p; { return (EOPNOTSUPP); } Index: head/sys/nfsclient/nfs_socket.c =================================================================== --- head/sys/nfsclient/nfs_socket.c (revision 14092) +++ head/sys/nfsclient/nfs_socket.c (revision 14093) @@ -1,2176 +1,2178 @@ /* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_socket.c,v 1.13 1995/12/17 21:12:25 phk Exp $ + * $Id: nfs_socket.c,v 1.14 1996/01/13 23:27:52 phk Exp $ */ /* * Socket operations for use by nfs */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TRUE 1 #define FALSE 0 /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc * for the frequent rpcs and a default for the others. * The justification for doing "other" this way is that these rpcs * happen so infrequently that timer est. would probably be stale. * Also, since many of these rpcs are * non-idempotent, a conservative timeout is desired. * getattr, lookup - A+2D * read, write - A+4D * other - nm_timeo */ #define NFS_RTO(n, t) \ ((t) == 0 ? (n)->nm_timeo : \ ((t) < 3 ? \ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] /* * External data, mostly RPC constants in XDR form */ extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, rpc_auth_kerb; extern u_long nfs_prog, nqnfs_prog; extern time_t nqnfsstarttime; extern struct nfsstats nfsstats; extern int nfsv3_procid[NFS_NPROCS]; extern int nfs_ticks; /* * Defines which timer to use for the procnum. * 0 - default * 1 - getattr * 2 - lookup * 3 - read * 4 - write */ static int proct[NFS_NPROCS] = { 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, }; /* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: * Van Jacobson, Congestion avoidance and Control, In "Proceedings of * SIGCOMM '88". ACM, August 1988. * describes for TCP. The cwnd size is chopped in half on a retransmit timeout * and incremented by 1/cwnd when each rpc reply is received and a full cwnd * of rpcs is in progress. * (The sent count and cwnd are scaled for integer arith.) * Variants of "slow start" were tried and were found to be too much of a * performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */ #define NFS_CWNDSCALE 256 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; int nfsrtton = 0; struct nfsrtt nfsrtt; static int nfs_msg __P((struct proc *,char *,char *)); static int nfs_rcvlock __P((struct nfsreq *)); static void nfs_rcvunlock __P((int *flagp)); static void nfs_realign __P((struct mbuf *m, int hsiz)); static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp)); static int nfs_reconnect __P((struct nfsreq *rep)); static int nfsrv_getstream __P((struct nfssvc_sock *,int)); #ifndef NFS_NOSERVER int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mreqp)) = { nfsrv_null, nfsrv_getattr, nfsrv_setattr, nfsrv_lookup, nfsrv3_access, nfsrv_readlink, nfsrv_read, nfsrv_write, nfsrv_create, nfsrv_mkdir, nfsrv_symlink, nfsrv_mknod, nfsrv_remove, nfsrv_rmdir, nfsrv_rename, nfsrv_link, nfsrv_readdir, nfsrv_readdirplus, nfsrv_statfs, nfsrv_fsinfo, nfsrv_pathconf, nfsrv_commit, nqnfsrv_getlease, nqnfsrv_vacated, nfsrv_noop, nfsrv_noop }; #endif /* NFS_NOSERVER */ /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int nfs_connect(nmp, rep) register struct nfsmount *nmp; struct nfsreq *rep; { register struct socket *so; int s, error, rcvreserve, sndreserve; struct sockaddr *saddr; struct sockaddr_in *sin; struct mbuf *m; u_short tport; + struct proc *p = &proc0; /* only used for socreate */ nmp->nm_so = (struct socket *)0; saddr = mtod(nmp->nm_nam, struct sockaddr *); error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, - nmp->nm_soproto); + nmp->nm_soproto, p); if (error) goto bad; so = nmp->nm_so; + so->so_state &= ~SS_PRIV; /* don't need it */ nmp->nm_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port number. */ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { MGET(m, M_WAIT, MT_SONAME); sin = mtod(m, struct sockaddr_in *); sin->sin_len = m->m_len = sizeof (struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED - 1; sin->sin_port = htons(tport); while ((error = sobind(so, m)) == EADDRINUSE && --tport > IPPORT_RESERVED / 2) sin->sin_port = htons(tport); m_freem(m); if (error) goto bad; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; goto bad; } } else { error = soconnect(so, nmp->nm_nam); if (error) goto bad; /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_procp))) { so->so_state &= ~SS_ISCONNECTING; splx(s); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); } if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { so->so_rcv.sb_timeo = (5 * hz); so->so_snd.sb_timeo = (5 * hz); } else { so->so_rcv.sb_timeo = 0; so->so_snd.sb_timeo = 0; } if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; } else if (nmp->nm_sotype == SOCK_SEQPACKET) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; } else { if (nmp->nm_sotype != SOCK_STREAM) panic("nfscon sotype"); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; } error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = nmp->nm_srtt[4] = (NFS_TIMEO << 3); nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; nmp->nm_timeouts = 0; return (0); bad: nfs_disconnect(nmp); return (error); } /* * Reconnect routine: * Called when a connection is broken on a reliable protocol. * - clean up the old socket * - nfs_connect() again * - set R_MUSTRESEND for all outstanding requests on mount point * If this fails the mount point is DEAD! * nb: Must be called with the nfs_sndlock() set on the mount point. */ static int nfs_reconnect(rep) register struct nfsreq *rep; { register struct nfsreq *rp; register struct nfsmount *nmp = rep->r_nmp; int error; nfs_disconnect(nmp); while ((error = nfs_connect(nmp, rep))) { if (error == EINTR || error == ERESTART) return (EINTR); (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); } /* * Loop through outstanding request list and fix up all requests * on old socket. */ for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) { if (rp->r_nmp == nmp) rp->r_flags |= R_MUSTRESEND; } return (0); } /* * NFS disconnect. Clean up and unlink. */ void nfs_disconnect(nmp) register struct nfsmount *nmp; { register struct socket *so; if (nmp->nm_so) { so = nmp->nm_so; nmp->nm_so = (struct socket *)0; soshutdown(so, 2); soclose(so); } } /* * This is the nfs send routine. For connection based socket types, it * must be called with an nfs_sndlock() on the socket. * "rep == NULL" indicates that it has been called from a server. * For the client side: * - return EINTR if the RPC is terminated, 0 otherwise * - set R_MUSTRESEND if the send fails for any reason * - do any cleanup required by recoverable socket errors (???) * For the server side: * - return EINTR or ERESTART if interrupted by a signal * - return EPIPE if a connection is lost for connection based sockets (TCP...) * - do any cleanup required by recoverable socket errors (???) */ int nfs_send(so, nam, top, rep) register struct socket *so; struct mbuf *nam; register struct mbuf *top; struct nfsreq *rep; { struct mbuf *sendnam; int error, soflags, flags; if (rep) { if (rep->r_flags & R_SOFTTERM) { m_freem(top); return (EINTR); } if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; m_freem(top); return (0); } rep->r_flags &= ~R_MUSTRESEND; soflags = rep->r_nmp->nm_soflags; } else soflags = so->so_proto->pr_flags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = (struct mbuf *)0; else sendnam = nam; if (so->so_type == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; error = sosend(so, sendnam, (struct uio *)0, top, (struct mbuf *)0, flags); if (error) { if (rep) { log(LOG_INFO, "nfs send error %d for server %s\n",error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); /* * Deal with errors for the client side. */ if (rep->r_flags & R_SOFTTERM) error = EINTR; else rep->r_flags |= R_MUSTRESEND; } else log(LOG_INFO, "nfsd send error %d\n", error); /* * Handle any recoverable (soft) socket errors here. (???) */ if (error != EINTR && error != ERESTART && error != EWOULDBLOCK && error != EPIPE) error = 0; } return (error); } /* * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all * done by soreceive(), but for SOCK_STREAM we must deal with the Record * Mark and consolidate the data into a new mbuf list. * nb: Sometimes TCP passes the data up to soreceive() in long lists of * small mbufs. * For SOCK_STREAM we must be very careful to read an entire record once * we have read any of it, even if the system call has been interrupted. */ static int nfs_receive(rep, aname, mp) register struct nfsreq *rep; struct mbuf **aname; struct mbuf **mp; { register struct socket *so; struct uio auio; struct iovec aio; register struct mbuf *m; struct mbuf *control; u_long len; struct mbuf **getnam; int error, sotype, rcvflg; struct proc *p = curproc; /* XXX */ /* * Set up arguments for soreceive() */ *mp = (struct mbuf *)0; *aname = (struct mbuf *)0; sotype = rep->r_nmp->nm_sotype; /* * For reliable protocols, lock against other senders/receivers * in case a reconnect is necessary. * For SOCK_STREAM, first get the Record Mark to find out how much * more there is to get. * We must lock the socket against other receivers * until we have an entire rpc request/reply. */ if (sotype != SOCK_DGRAM) { error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); if (error) return (error); tryagain: /* * Check for fatal errors and resending request. */ /* * Ugh: If a reconnect attempt just happened, nm_so * would have changed. NULL indicates a failed * attempt that has essentially shut down this * mount point. */ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (EINTR); } so = rep->r_nmp->nm_so; if (!so) { error = nfs_reconnect(rep); if (error) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (error); } goto tryagain; } while (rep->r_flags & R_MUSTRESEND) { m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); nfsstats.rpcretries++; error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); if (error) { if (error == EINTR || error == ERESTART || (error = nfs_reconnect(rep))) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (error); } goto tryagain; } } nfs_sndunlock(&rep->r_nmp->nm_flag); if (sotype == SOCK_STREAM) { aio.iov_base = (caddr_t) &len; aio.iov_len = sizeof(u_long); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(u_long); auio.uio_procp = p; do { rcvflg = MSG_WAITALL; error = soreceive(so, (struct mbuf **)0, &auio, (struct mbuf **)0, (struct mbuf **)0, &rcvflg); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK); if (!error && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", sizeof(u_long) - auio.uio_resid, sizeof(u_long), rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } if (error) goto errout; len = ntohl(len) & ~0x80000000; /* * This is SERIOUS! We are out of sync with the sender * and forcing a disconnect/reconnect is all I can do. */ if (len > NFS_MAXPACKET) { log(LOG_ERR, "%s (%d) from nfs server %s\n", "impossible packet length", len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EFBIG; goto errout; } auio.uio_resid = len; do { rcvflg = MSG_WAITALL; error = soreceive(so, (struct mbuf **)0, &auio, mp, (struct mbuf **)0, &rcvflg); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); if (!error && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", len - auio.uio_resid, len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } } else { /* * NB: Since uio_resid is big, MSG_WAITALL is ignored * and soreceive() will return when it has either a * control msg or a data msg. * We have no use for control msg., but must grab them * and then throw them away so we know what is going * on. */ auio.uio_resid = len = 100000000; /* Anything Big */ auio.uio_procp = p; do { rcvflg = 0; error = soreceive(so, (struct mbuf **)0, &auio, mp, &control, &rcvflg); if (control) m_freem(control); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK || (!error && *mp == NULL && control)); if ((rcvflg & MSG_EOR) == 0) printf("Egad!!\n"); if (!error && *mp == NULL) error = EPIPE; len -= auio.uio_resid; } errout: if (error && error != EINTR && error != ERESTART) { m_freem(*mp); *mp = (struct mbuf *)0; if (error != EPIPE) log(LOG_INFO, "receive error %d from nfs server %s\n", error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); if (!error) error = nfs_reconnect(rep); if (!error) goto tryagain; } } else { if ((so = rep->r_nmp->nm_so) == NULL) return (EACCES); if (so->so_state & SS_ISCONNECTED) getnam = (struct mbuf **)0; else getnam = aname; auio.uio_resid = len = 1000000; auio.uio_procp = p; do { rcvflg = 0; error = soreceive(so, getnam, &auio, mp, (struct mbuf **)0, &rcvflg); if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) return (EINTR); } while (error == EWOULDBLOCK); len -= auio.uio_resid; } if (error) { m_freem(*mp); *mp = (struct mbuf *)0; } /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ nfs_realign(*mp, 5 * NFSX_UNSIGNED); return (error); } /* * Implement receipt of reply on a socket. * We must search through the list of received datagrams matching them * with outstanding requests using the xid, until ours is found. */ /* ARGSUSED */ int nfs_reply(myrep) struct nfsreq *myrep; { register struct nfsreq *rep; register struct nfsmount *nmp = myrep->r_nmp; register long t1; struct mbuf *mrep, *nam, *md; u_long rxid, *tl; caddr_t dpos, cp2; int error; /* * Loop around until we get our own reply */ for (;;) { /* * Lock against other receivers so that I don't get stuck in * sbwait() after someone else has received my reply for me. * Also necessary for connection based protocols to avoid * race conditions during a reconnect. */ error = nfs_rcvlock(myrep); if (error) return (error); /* Already received, bye bye */ if (myrep->r_mrep != NULL) { nfs_rcvunlock(&nmp->nm_flag); return (0); } /* * Get the next Rpc reply off the socket */ error = nfs_receive(myrep, &nam, &mrep); nfs_rcvunlock(&nmp->nm_flag); if (error) { /* * Ignore routing errors on connectionless protocols?? */ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { nmp->nm_so->so_error = 0; if (myrep->r_flags & R_GETONEREP) return (0); continue; } return (error); } if (nam) m_freem(nam); /* * Get the xid and check that it is an rpc reply */ md = mrep; dpos = mtod(md, caddr_t); nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); rxid = *tl++; if (*tl != rpc_reply) { #ifndef NFS_NOSERVER if (nmp->nm_flag & NFSMNT_NQNFS) { if (nqnfs_callback(nmp, mrep, md, dpos)) nfsstats.rpcinvalid++; } else { nfsstats.rpcinvalid++; m_freem(mrep); } #else nfsstats.rpcinvalid++; m_freem(mrep); #endif nfsmout: if (myrep->r_flags & R_GETONEREP) return (0); continue; } /* * Loop through the request list to match up the reply * Iff no match, just drop the datagram */ for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { if (rep->r_mrep == NULL && rxid == rep->r_xid) { /* Found it.. */ rep->r_mrep = mrep; rep->r_md = md; rep->r_dpos = dpos; if (nfsrtton) { struct rttl *rt; rt = &nfsrtt.rttl[nfsrtt.pos]; rt->proc = rep->r_procnum; rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); rt->sent = nmp->nm_sent; rt->cwnd = nmp->nm_cwnd; rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; rt->tstamp = time; if (rep->r_flags & R_TIMING) rt->rtt = rep->r_rtt; else rt->rtt = 1000000; nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; } /* * Update congestion window. * Do the additive increase of * one rpc/rtt. */ if (nmp->nm_cwnd <= nmp->nm_sent) { nmp->nm_cwnd += (NFS_CWNDSCALE * NFS_CWNDSCALE + (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; if (nmp->nm_cwnd > NFS_MAXCWND) nmp->nm_cwnd = NFS_MAXCWND; } rep->r_flags &= ~R_SENT; nmp->nm_sent -= NFS_CWNDSCALE; /* * Update rtt using a gain of 0.125 on the mean * and a gain of 0.25 on the deviation. */ if (rep->r_flags & R_TIMING) { /* * Since the timer resolution of * NFS_HZ is so course, it can often * result in r_rtt == 0. Since * r_rtt == N means that the actual * rtt is between N+dt and N+2-dt ticks, * add 1. */ t1 = rep->r_rtt + 1; t1 -= (NFS_SRTT(rep) >> 3); NFS_SRTT(rep) += t1; if (t1 < 0) t1 = -t1; t1 -= (NFS_SDRTT(rep) >> 2); NFS_SDRTT(rep) += t1; } nmp->nm_timeouts = 0; break; } } /* * If not matched to a request, drop it. * If it's mine, get out. */ if (rep == 0) { nfsstats.rpcunexpected++; m_freem(mrep); } else if (rep == myrep) { if (rep->r_mrep == NULL) panic("nfsreply nil"); return (0); } if (myrep->r_flags & R_GETONEREP) return (0); } } /* * nfs_request - goes something like this * - fill in request struct * - links it into list * - calls nfs_send() for first transmit * - calls nfs_receive() to get reply * - break down rpc header and return with nfs reply pointed to * by mrep or error * nb: always frees up mreq mbuf list */ int nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) struct vnode *vp; struct mbuf *mrest; int procnum; struct proc *procp; struct ucred *cred; struct mbuf **mrp; struct mbuf **mdp; caddr_t *dposp; { register struct mbuf *m, *mrep; register struct nfsreq *rep; register u_long *tl; register int i; struct nfsmount *nmp; struct mbuf *md, *mheadend; struct nfsnode *np; char nickv[RPCX_NICKVERF]; time_t reqtime, waituntil; caddr_t dpos, cp2; int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; int verf_len, verf_type; u_long xid; u_quad_t frev; char *auth_str, *verf_str; NFSKERBKEY_T key; /* save session key */ nmp = VFSTONFS(vp->v_mount); MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); rep->r_nmp = nmp; rep->r_vp = vp; rep->r_procp = procp; rep->r_procnum = procnum; i = 0; m = mrest; while (m) { i += m->m_len; m = m->m_next; } mrest_len = i; /* * Get the RPC header with authorization. */ kerbauth: verf_str = auth_str = (char *)0; if (nmp->nm_flag & NFSMNT_KERB) { verf_str = nickv; verf_len = sizeof (nickv); auth_type = RPCAUTH_KERB4; bzero((caddr_t)key, sizeof (key)); if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, &auth_len, verf_str, verf_len)) { error = nfs_getauth(nmp, rep, cred, &auth_str, &auth_len, verf_str, &verf_len, key); if (error) { free((caddr_t)rep, M_NFSREQ); m_freem(mrest); return (error); } } } else { auth_type = RPCAUTH_UNIX; if (cred->cr_ngroups < 1) panic("nfsreq nogrps"); auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 5 * NFSX_UNSIGNED; } m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); if (auth_str) free(auth_str, M_TEMP); /* * For stream protocols, insert a Sun RPC Record Mark. */ if (nmp->nm_sotype == SOCK_STREAM) { M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len - NFSX_UNSIGNED)); } rep->r_mreq = m; rep->r_xid = xid; tryagain: if (nmp->nm_flag & NFSMNT_SOFT) rep->r_retry = nmp->nm_retry; else rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ rep->r_rtt = rep->r_rexmit = 0; if (proct[procnum] > 0) rep->r_flags = R_TIMING; else rep->r_flags = 0; rep->r_mrep = NULL; /* * Do the client side RPC. */ nfsstats.rpcrequests++; /* * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. */ s = splsoftclock(); TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); /* Get send time for nqnfs */ reqtime = time.tv_sec; /* * If backing off another request or avoiding congestion, don't * send this one now but let timer do it. If not timing a request, * do it now. */ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { splx(s); if (nmp->nm_soflags & PR_CONNREQUIRED) error = nfs_sndlock(&nmp->nm_flag, rep); if (!error) { m = m_copym(m, 0, M_COPYALL, M_WAIT); error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); if (nmp->nm_soflags & PR_CONNREQUIRED) nfs_sndunlock(&nmp->nm_flag); } if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { nmp->nm_sent += NFS_CWNDSCALE; rep->r_flags |= R_SENT; } } else { splx(s); rep->r_rtt = -1; } /* * Wait for the reply from our send or the timer's. */ if (!error || error == EPIPE) error = nfs_reply(rep); /* * RPC done, unlink the request. */ s = splsoftclock(); TAILQ_REMOVE(&nfs_reqq, rep, r_chain); splx(s); /* * Decrement the outstanding request count. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; /* paranoia */ nmp->nm_sent -= NFS_CWNDSCALE; } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (!error && (rep->r_flags & R_TPRINTFMSG)) nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, "is alive again"); mrep = rep->r_mrep; md = rep->r_md; dpos = rep->r_dpos; if (error) { m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * break down the rpc header and check if ok */ nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (*tl++ == rpc_msgdenied) { if (*tl == rpc_mismatch) error = EOPNOTSUPP; else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { if (!failed_auth) { failed_auth++; mheadend->m_next = (struct mbuf *)0; m_freem(mrep); m_freem(rep->r_mreq); goto kerbauth; } else error = EAUTH; } else error = EACCES; m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * Grab any Kerberos verifier, otherwise just throw it away. */ verf_type = fxdr_unsigned(int, *tl++); i = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); if (error) goto nfsmout; } else if (i > 0) nfsm_adv(nfsm_rndup(i)); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); /* 0 == ok */ if (*tl == 0) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (*tl != 0) { error = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_NFSV3) && error == NFSERR_TRYLATER) { m_freem(mrep); error = 0; waituntil = time.tv_sec + trylater_delay; while (time.tv_sec < waituntil) (void) tsleep((caddr_t)&lbolt, PSOCK, "nqnfstry", 0); trylater_delay *= nfs_backoff[trylater_cnt]; if (trylater_cnt < 7) trylater_cnt++; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. */ if (error == ESTALE) cache_purge(vp); if (nmp->nm_flag & NFSMNT_NFSV3) { *mrp = mrep; *mdp = md; *dposp = dpos; error |= NFSERR_RETERR; } else m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * For nqnfs, get any lease in reply */ if (nmp->nm_flag & NFSMNT_NQNFS) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (*tl) { np = VTONFS(vp); nqlflag = fxdr_unsigned(int, *tl); nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); cachable = fxdr_unsigned(int, *tl++); reqtime += fxdr_unsigned(int, *tl++); if (reqtime > time.tv_sec) { fxdr_hyper(tl, &frev); nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime, frev); } } } *mrp = mrep; *mdp = md; *dposp = dpos; m_freem(rep->r_mreq); FREE((caddr_t)rep, M_NFSREQ); return (0); } m_freem(mrep); error = EPROTONOSUPPORT; nfsmout: m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } #ifndef NFS_NOSERVER /* * Generate the rpc reply header * siz arg. is used to decide if adding a cluster is worthwhile */ int nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) int siz; struct nfsrv_descript *nd; struct nfssvc_sock *slp; int err; int cache; u_quad_t *frev; struct mbuf **mrq; struct mbuf **mbp; caddr_t *bposp; { register u_long *tl; register struct mbuf *mreq; caddr_t bpos; struct mbuf *mb, *mb2; MGETHDR(mreq, M_WAIT, MT_DATA); mb = mreq; /* * If this is a big reply, use a cluster else * try and leave leading space for the lower level headers. */ siz += RPC_REPLYSIZ; if (siz >= MINCLSIZE) { MCLGET(mreq, M_WAIT); } else mreq->m_data += max_hdr; tl = mtod(mreq, u_long *); mreq->m_len = 6 * NFSX_UNSIGNED; bpos = ((caddr_t)tl) + mreq->m_len; *tl++ = txdr_unsigned(nd->nd_retxid); *tl++ = rpc_reply; if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { *tl++ = rpc_msgdenied; if (err & NFSERR_AUTHERR) { *tl++ = rpc_autherr; *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); mreq->m_len -= NFSX_UNSIGNED; bpos -= NFSX_UNSIGNED; } else { *tl++ = rpc_mismatch; *tl++ = txdr_unsigned(RPC_VER2); *tl = txdr_unsigned(RPC_VER2); } } else { *tl++ = rpc_msgaccepted; /* * For Kerberos authentication, we must send the nickname * verifier back, otherwise just RPCAUTH_NULL. */ if (nd->nd_flag & ND_KERBFULL) { register struct nfsuid *nuidp; struct timeval ktvin, ktvout; for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) break; } if (nuidp) { ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1); ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec); /* * Encrypt the timestamp in ecb mode using the * session key. */ #ifdef NFSKERB XXX #endif *tl++ = rpc_auth_kerb; *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); *tl = ktvout.tv_sec; nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = ktvout.tv_usec; *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); } else { *tl++ = 0; *tl++ = 0; } } else { *tl++ = 0; *tl++ = 0; } switch (err) { case EPROGUNAVAIL: *tl = txdr_unsigned(RPC_PROGUNAVAIL); break; case EPROGMISMATCH: *tl = txdr_unsigned(RPC_PROGMISMATCH); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NQNFS) { *tl++ = txdr_unsigned(3); *tl = txdr_unsigned(3); } else { *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(3); } break; case EPROCUNAVAIL: *tl = txdr_unsigned(RPC_PROCUNAVAIL); break; case EBADRPC: *tl = txdr_unsigned(RPC_GARBAGE); break; default: *tl = 0; if (err != NFSERR_RETVOID) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (err) *tl = txdr_unsigned(nfsrv_errmap(nd, err)); else *tl = 0; } break; }; } /* * For nqnfs, piggyback lease as requested. */ if ((nd->nd_flag & ND_NQNFS) && err == 0) { if (nd->nd_flag & ND_LEASE) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE); *tl++ = txdr_unsigned(cache); *tl++ = txdr_unsigned(nd->nd_duration); txdr_hyper(frev, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = 0; } } *mrq = mreq; *mbp = mb; *bposp = bpos; if (err != 0 && err != NFSERR_RETVOID) nfsstats.srvrpc_errs++; return (0); } /* * Nfs timer routine * Scan the nfsreq list and retranmit any requests that have timed out * To avoid retransmission attempts on STREAM sockets (in the future) make * sure to set the r_retry field to 0 (implies nm_retry == 0). */ void nfs_timer(arg) void *arg; /* never used */ { register struct nfsreq *rep; register struct mbuf *m; register struct socket *so; register struct nfsmount *nmp; register int timeo; register struct nfssvc_sock *slp; static long lasttime = 0; int s, error; u_quad_t cur_usec; s = splnet(); for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { nmp = rep->r_nmp; if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) continue; if (nfs_sigintr(nmp, rep, rep->r_procp)) { rep->r_flags |= R_SOFTTERM; continue; } if (rep->r_rtt >= 0) { rep->r_rtt++; if (nmp->nm_flag & NFSMNT_DUMBTIMR) timeo = nmp->nm_timeo; else timeo = NFS_RTO(nmp, proct[rep->r_procnum]); if (nmp->nm_timeouts > 0) timeo *= nfs_backoff[nmp->nm_timeouts - 1]; if (rep->r_rtt <= timeo) continue; if (nmp->nm_timeouts < 8) nmp->nm_timeouts++; } /* * Check for server not responding */ if ((rep->r_flags & R_TPRINTFMSG) == 0 && rep->r_rexmit > nmp->nm_deadthresh) { nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, "not responding"); rep->r_flags |= R_TPRINTFMSG; } if (rep->r_rexmit >= rep->r_retry) { /* too many */ nfsstats.rpctimeouts++; rep->r_flags |= R_SOFTTERM; continue; } if (nmp->nm_sotype != SOCK_DGRAM) { if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; continue; } if ((so = nmp->nm_so) == NULL) continue; /* * If there is enough space and the window allows.. * Resend it * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || nmp->nm_sent < nmp->nm_cwnd) && (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, (struct mbuf *)0, (struct mbuf *)0); else error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, nmp->nm_nam, (struct mbuf *)0); if (error) { if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) so->so_error = 0; } else { /* * Iff first send, start timing * else turn timing off, backoff timer * and divide congestion window by 2. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_TIMING; if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; nmp->nm_cwnd >>= 1; if (nmp->nm_cwnd < NFS_CWNDSCALE) nmp->nm_cwnd = NFS_CWNDSCALE; nfsstats.rpcretries++; } else { rep->r_flags |= R_SENT; nmp->nm_sent += NFS_CWNDSCALE; } rep->r_rtt = 0; } } } /* * Call the nqnfs server timer once a second to handle leases. */ if (lasttime != time.tv_sec) { lasttime = time.tv_sec; nqnfs_serverd(); } /* * Scan the write gathering queues for writes that need to be * completed now. */ cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = slp->ns_chain.tqe_next) { if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec) nfsrv_wakenfsd(slp); } splx(s); timeout(nfs_timer, (void *)0, nfs_ticks); } #endif /* NFS_NOSERVER */ /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int nfs_sigintr(nmp, rep, p) struct nfsmount *nmp; struct nfsreq *rep; register struct proc *p; { if (rep && (rep->r_flags & R_SOFTTERM)) return (EINTR); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (p && p->p_siglist && (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & NFSINT_SIGMASK)) return (EINTR); return (0); } /* * Lock a socket against others. * Necessary for STREAM sockets to ensure you get an entire rpc request/reply * and also to avoid race conditions between the processes with nfs requests * in progress when a reconnect is necessary. */ int nfs_sndlock(flagp, rep) register int *flagp; struct nfsreq *rep; { struct proc *p; int slpflag = 0, slptimeo = 0; if (rep) { p = rep->r_procp; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; } else p = (struct proc *)0; while (*flagp & NFSMNT_SNDLOCK) { if (nfs_sigintr(rep->r_nmp, rep, p)) return (EINTR); *flagp |= NFSMNT_WANTSND; (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *flagp |= NFSMNT_SNDLOCK; return (0); } /* * Unlock the stream socket for others. */ void nfs_sndunlock(flagp) register int *flagp; { if ((*flagp & NFSMNT_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSMNT_SNDLOCK; if (*flagp & NFSMNT_WANTSND) { *flagp &= ~NFSMNT_WANTSND; wakeup((caddr_t)flagp); } } static int nfs_rcvlock(rep) register struct nfsreq *rep; { register int *flagp = &rep->r_nmp->nm_flag; int slpflag, slptimeo = 0; if (*flagp & NFSMNT_INT) slpflag = PCATCH; else slpflag = 0; while (*flagp & NFSMNT_RCVLOCK) { if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) return (EINTR); *flagp |= NFSMNT_WANTRCV; (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *flagp |= NFSMNT_RCVLOCK; return (0); } /* * Unlock the stream socket for others. */ static void nfs_rcvunlock(flagp) register int *flagp; { if ((*flagp & NFSMNT_RCVLOCK) == 0) panic("nfs rcvunlock"); *flagp &= ~NFSMNT_RCVLOCK; if (*flagp & NFSMNT_WANTRCV) { *flagp &= ~NFSMNT_WANTRCV; wakeup((caddr_t)flagp); } } /* * Check for badly aligned mbuf data areas and * realign data in an mbuf list by copying the data areas up, as required. */ static void nfs_realign(m, hsiz) register struct mbuf *m; int hsiz; { register struct mbuf *m2; register int siz, mlen, olen; register caddr_t tcp, fcp; struct mbuf *mnew; while (m) { /* * This never happens for UDP, rarely happens for TCP * but frequently happens for iso transport. */ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { olen = m->m_len; fcp = mtod(m, caddr_t); if ((int)fcp & 0x3) { m->m_flags &= ~M_PKTHDR; if (m->m_flags & M_EXT) m->m_data = m->m_ext.ext_buf + ((m->m_ext.ext_size - olen) & ~0x3); else m->m_data = m->m_dat; } m->m_len = 0; tcp = mtod(m, caddr_t); mnew = m; m2 = m->m_next; /* * If possible, only put the first invariant part * of the RPC header in the first mbuf. */ mlen = M_TRAILINGSPACE(m); if (olen <= hsiz && mlen > hsiz) mlen = hsiz; /* * Loop through the mbuf list consolidating data. */ while (m) { while (olen > 0) { if (mlen == 0) { m2->m_flags &= ~M_PKTHDR; if (m2->m_flags & M_EXT) m2->m_data = m2->m_ext.ext_buf; else m2->m_data = m2->m_dat; m2->m_len = 0; mlen = M_TRAILINGSPACE(m2); tcp = mtod(m2, caddr_t); mnew = m2; m2 = m2->m_next; } siz = min(mlen, olen); if (tcp != fcp) bcopy(fcp, tcp, siz); mnew->m_len += siz; mlen -= siz; olen -= siz; tcp += siz; fcp += siz; } m = m->m_next; if (m) { olen = m->m_len; fcp = mtod(m, caddr_t); } } /* * Finally, set m_len == 0 for any trailing mbufs that have * been copied out of. */ while (m2) { m2->m_len = 0; m2 = m2->m_next; } return; } m = m->m_next; } } #ifndef NFS_NOSERVER /* * Socket upcall routine for the nfsd sockets. * The caddr_t arg is a pointer to the "struct nfssvc_sock". * Essentially do as much as possible non-blocking, else punt and it will * be called with M_WAIT from an nfsd. */ void nfsrv_rcv(so, arg, waitflag) struct socket *so; caddr_t arg; int waitflag; { register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; register struct mbuf *m; struct mbuf *mp, *nam; struct uio auio; int flags, error; if ((slp->ns_flag & SLP_VALID) == 0) return; #ifdef notdef /* * Define this to test for nfsds handling this under heavy load. */ if (waitflag == M_DONTWAIT) { slp->ns_flag |= SLP_NEEDQ; goto dorecs; } #endif auio.uio_procp = NULL; if (so->so_type == SOCK_STREAM) { /* * If there are already records on the queue, defer soreceive() * to an nfsd so that there is feedback to the TCP layer that * the nfs servers are heavily loaded. */ if (slp->ns_rec && waitflag == M_DONTWAIT) { slp->ns_flag |= SLP_NEEDQ; goto dorecs; } /* * Do soreceive(). */ auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); if (error || mp == (struct mbuf *)0) { if (error == EWOULDBLOCK) slp->ns_flag |= SLP_NEEDQ; else slp->ns_flag |= SLP_DISCONN; goto dorecs; } m = mp; if (slp->ns_rawend) { slp->ns_rawend->m_next = m; slp->ns_cc += 1000000000 - auio.uio_resid; } else { slp->ns_raw = m; slp->ns_cc = 1000000000 - auio.uio_resid; } while (m->m_next) m = m->m_next; slp->ns_rawend = m; /* * Now try and parse record(s) out of the raw stream data. */ error = nfsrv_getstream(slp, waitflag); if (error) { if (error == EPERM) slp->ns_flag |= SLP_DISCONN; else slp->ns_flag |= SLP_NEEDQ; } } else { do { auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); if (mp) { nfs_realign(mp, 10 * NFSX_UNSIGNED); if (nam) { m = nam; m->m_next = mp; } else m = mp; if (slp->ns_recend) slp->ns_recend->m_nextpkt = m; else slp->ns_rec = m; slp->ns_recend = m; m->m_nextpkt = (struct mbuf *)0; } if (error) { if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && error != EWOULDBLOCK) { slp->ns_flag |= SLP_DISCONN; goto dorecs; } } } while (mp); } /* * Now try and process the request records, non-blocking. */ dorecs: if (waitflag == M_DONTWAIT && (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) nfsrv_wakenfsd(slp); } /* * Try and extract an RPC request from the mbuf data list received on a * stream socket. The "waitflag" argument indicates whether or not it * can sleep. */ static int nfsrv_getstream(slp, waitflag) register struct nfssvc_sock *slp; int waitflag; { register struct mbuf *m, **mpp; register char *cp1, *cp2; register int len; struct mbuf *om, *m2, *recm = 0; u_long recmark; if (slp->ns_flag & SLP_GETSTREAM) panic("nfs getstream"); slp->ns_flag |= SLP_GETSTREAM; for (;;) { if (slp->ns_reclen == 0) { if (slp->ns_cc < NFSX_UNSIGNED) { slp->ns_flag &= ~SLP_GETSTREAM; return (0); } m = slp->ns_raw; if (m->m_len >= NFSX_UNSIGNED) { bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); m->m_data += NFSX_UNSIGNED; m->m_len -= NFSX_UNSIGNED; } else { cp1 = (caddr_t)&recmark; cp2 = mtod(m, caddr_t); while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { while (m->m_len == 0) { m = m->m_next; cp2 = mtod(m, caddr_t); } *cp1++ = *cp2++; m->m_data++; m->m_len--; } } slp->ns_cc -= NFSX_UNSIGNED; recmark = ntohl(recmark); slp->ns_reclen = recmark & ~0x80000000; if (recmark & 0x80000000) slp->ns_flag |= SLP_LASTFRAG; else slp->ns_flag &= ~SLP_LASTFRAG; if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { slp->ns_flag &= ~SLP_GETSTREAM; return (EPERM); } } /* * Now get the record part. */ if (slp->ns_cc == slp->ns_reclen) { recm = slp->ns_raw; slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; slp->ns_cc = slp->ns_reclen = 0; } else if (slp->ns_cc > slp->ns_reclen) { len = 0; m = slp->ns_raw; om = (struct mbuf *)0; while (len < slp->ns_reclen) { if ((len + m->m_len) > slp->ns_reclen) { m2 = m_copym(m, 0, slp->ns_reclen - len, waitflag); if (m2) { if (om) { om->m_next = m2; recm = slp->ns_raw; } else recm = m2; m->m_data += slp->ns_reclen - len; m->m_len -= slp->ns_reclen - len; len = slp->ns_reclen; } else { slp->ns_flag &= ~SLP_GETSTREAM; return (EWOULDBLOCK); } } else if ((len + m->m_len) == slp->ns_reclen) { om = m; len += m->m_len; m = m->m_next; recm = slp->ns_raw; om->m_next = (struct mbuf *)0; } else { om = m; len += m->m_len; m = m->m_next; } } slp->ns_raw = m; slp->ns_cc -= len; slp->ns_reclen = 0; } else { slp->ns_flag &= ~SLP_GETSTREAM; return (0); } /* * Accumulate the fragments into a record. */ mpp = &slp->ns_frag; while (*mpp) mpp = &((*mpp)->m_next); *mpp = recm; if (slp->ns_flag & SLP_LASTFRAG) { nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED); if (slp->ns_recend) slp->ns_recend->m_nextpkt = slp->ns_frag; else slp->ns_rec = slp->ns_frag; slp->ns_recend = slp->ns_frag; slp->ns_frag = (struct mbuf *)0; } } } /* * Parse an RPC header. */ int nfsrv_dorec(slp, nfsd, ndp) register struct nfssvc_sock *slp; struct nfsd *nfsd; struct nfsrv_descript **ndp; { register struct mbuf *m, *nam; register struct nfsrv_descript *nd; int error; *ndp = NULL; if ((slp->ns_flag & SLP_VALID) == 0 || (m = slp->ns_rec) == (struct mbuf *)0) return (ENOBUFS); slp->ns_rec = m->m_nextpkt; if (slp->ns_rec) m->m_nextpkt = (struct mbuf *)0; else slp->ns_recend = (struct mbuf *)0; if (m->m_type == MT_SONAME) { nam = m; m = m->m_next; nam->m_next = NULL; } else nam = NULL; MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); nd->nd_md = nd->nd_mrep = m; nd->nd_nam2 = nam; nd->nd_dpos = mtod(m, caddr_t); error = nfs_getreq(nd, nfsd, TRUE); if (error) { m_freem(nam); free((caddr_t)nd, M_NFSRVDESC); return (error); } *ndp = nd; nfsd->nfsd_nd = nd; return (0); } /* * Parse an RPC request * - verify it * - fill in the cred struct. */ int nfs_getreq(nd, nfsd, has_header) register struct nfsrv_descript *nd; struct nfsd *nfsd; int has_header; { register int len, i; register u_long *tl; register long t1; struct uio uio; struct iovec iov; caddr_t dpos, cp2, cp; u_long nfsvers, auth_type; uid_t nickuid; int error = 0, nqnfs = 0, ticklen; struct mbuf *mrep, *md; register struct nfsuid *nuidp; struct timeval tvin, tvout; mrep = nd->nd_mrep; md = nd->nd_md; dpos = nd->nd_dpos; if (has_header) { nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED); nd->nd_retxid = fxdr_unsigned(u_long, *tl++); if (*tl++ != rpc_call) { m_freem(mrep); return (EBADRPC); } } else nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED); nd->nd_repstat = 0; nd->nd_flag = 0; if (*tl++ != rpc_vers) { nd->nd_repstat = ERPCMISMATCH; nd->nd_procnum = NFSPROC_NOOP; return (0); } if (*tl != nfs_prog) { if (*tl == nqnfs_prog) nqnfs++; else { nd->nd_repstat = EPROGUNAVAIL; nd->nd_procnum = NFSPROC_NOOP; return (0); } } tl++; nfsvers = fxdr_unsigned(u_long, *tl++); if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) || (nfsvers != NQNFS_VER3 && nqnfs)) { nd->nd_repstat = EPROGMISMATCH; nd->nd_procnum = NFSPROC_NOOP; return (0); } if (nqnfs) nd->nd_flag = (ND_NFSV3 | ND_NQNFS); else if (nfsvers == NFS_VER3) nd->nd_flag = ND_NFSV3; nd->nd_procnum = fxdr_unsigned(u_long, *tl++); if (nd->nd_procnum == NFSPROC_NULL) return (0); if (nd->nd_procnum >= NFS_NPROCS || (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) || (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { nd->nd_repstat = EPROCUNAVAIL; nd->nd_procnum = NFSPROC_NOOP; return (0); } if ((nd->nd_flag & ND_NFSV3) == 0) nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; auth_type = *tl++; len = fxdr_unsigned(int, *tl++); if (len < 0 || len > RPCAUTH_MAXSIZ) { m_freem(mrep); return (EBADRPC); } nd->nd_flag &= ~ND_KERBAUTH; /* * Handle auth_unix or auth_kerb. */ if (auth_type == rpc_auth_unix) { len = fxdr_unsigned(int, *++tl); if (len < 0 || len > NFS_MAXNAMLEN) { m_freem(mrep); return (EBADRPC); } nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred)); nd->nd_cr.cr_ref = 1; nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); len = fxdr_unsigned(int, *tl); if (len < 0 || len > RPCAUTH_UNIXGIDS) { m_freem(mrep); return (EBADRPC); } nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED); for (i = 1; i <= len; i++) if (i < NGROUPS) nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); else tl++; nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); if (nd->nd_cr.cr_ngroups > 1) nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); len = fxdr_unsigned(int, *++tl); if (len < 0 || len > RPCAUTH_MAXSIZ) { m_freem(mrep); return (EBADRPC); } if (len > 0) nfsm_adv(nfsm_rndup(len)); } else if (auth_type == rpc_auth_kerb) { switch (fxdr_unsigned(int, *tl++)) { case RPCAKN_FULLNAME: ticklen = fxdr_unsigned(int, *tl); *((u_long *)nfsd->nfsd_authstr) = *tl; uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { m_freem(mrep); return (EBADRPC); } uio.uio_offset = 0; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; iov.iov_len = RPCAUTH_MAXSIZ - 4; nfsm_mtouio(&uio, uio.uio_resid); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); if (*tl++ != rpc_auth_kerb || fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { printf("Bad kerb verifier\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED); tl = (u_long *)cp; if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { printf("Not fullname kerb verifier\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } cp += NFSX_UNSIGNED; bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED); nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; nd->nd_flag |= ND_KERBFULL; nfsd->nfsd_flag |= NFSD_NEEDAUTH; break; case RPCAKN_NICKNAME: if (len != 2 * NFSX_UNSIGNED) { printf("Kerb nickname short\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } nickuid = fxdr_unsigned(uid_t, *tl); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); if (*tl++ != rpc_auth_kerb || fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { printf("Kerb nick verifier bad\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); tvin.tv_sec = *tl++; tvin.tv_usec = *tl; for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { if (nuidp->nu_cr.cr_uid == nickuid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) break; } if (!nuidp) { nd->nd_repstat = (NFSERR_AUTHERR|AUTH_REJECTCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } /* * Now, decrypt the timestamp using the session key * and validate it. */ #ifdef NFSKERB XXX #endif tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); if (nuidp->nu_expire < time.tv_sec || nuidp->nu_timestamp.tv_sec > tvout.tv_sec || (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { nuidp->nu_expire = 0; nd->nd_repstat = (NFSERR_AUTHERR|AUTH_REJECTVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); nd->nd_flag |= ND_KERBNICK; }; } else { nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } /* * For nqnfs, get piggybacked lease request. */ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); nd->nd_flag |= fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_LEASE) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); nd->nd_duration = fxdr_unsigned(int, *tl); } else nd->nd_duration = NQ_MINLEASE; } else nd->nd_duration = NQ_MINLEASE; nd->nd_md = md; nd->nd_dpos = dpos; return (0); nfsmout: return (error); } /* * Search for a sleeping nfsd and wake it up. * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the * running nfsds will go look for the work in the nfssvc_sock list. */ void nfsrv_wakenfsd(slp) struct nfssvc_sock *slp; { register struct nfsd *nd; if ((slp->ns_flag & SLP_VALID) == 0) return; for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) { if (nd->nfsd_flag & NFSD_WAITING) { nd->nfsd_flag &= ~NFSD_WAITING; if (nd->nfsd_slp) panic("nfsd wakeup"); slp->ns_sref++; nd->nfsd_slp = slp; wakeup((caddr_t)nd); return; } } slp->ns_flag |= SLP_DOREC; nfsd_head_flag |= NFSD_CHECKSLP; } #endif /* NFS_NOSERVER */ static int nfs_msg(p, server, msg) struct proc *p; char *server, *msg; { tpr_t tpr; if (p) tpr = tprintf_open(p); else tpr = NULL; tprintf(tpr, "nfs server %s: %s\n", server, msg); tprintf_close(tpr); return (0); } Index: head/sys/nfsclient/nfs_vfsops.c =================================================================== --- head/sys/nfsclient/nfs_vfsops.c (revision 14092) +++ head/sys/nfsclient/nfs_vfsops.c (revision 14093) @@ -1,949 +1,949 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vfsops.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_vfsops.c,v 1.25 1995/12/22 15:57:38 phk Exp $ + * $Id: nfs_vfsops.c,v 1.26 1995/12/28 21:56:49 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int nfs_mountroot __P((void)); extern int nfs_ticks; struct nfsstats nfsstats; SYSCTL_NODE(_fs, MOUNT_NFS, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_STRUCT(_fs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats, ""); static int nfs_iosize __P((struct nfsmount *nmp)); static int mountnfs __P((struct nfs_args *,struct mount *, struct mbuf *,char *,char *,struct vnode **)); static int nfs_mount __P(( struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); static int nfs_start __P(( struct mount *mp, int flags, struct proc *p)); static int nfs_unmount __P(( struct mount *mp, int mntflags, struct proc *p)); static int nfs_root __P(( struct mount *mp, struct vnode **vpp)); static int nfs_quotactl __P(( struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p)); static int nfs_statfs __P(( struct mount *mp, struct statfs *sbp, struct proc *p)); static int nfs_sync __P(( struct mount *mp, int waitfor, struct ucred *cred, struct proc *p)); static int nfs_vptofh __P(( struct vnode *vp, struct fid *fhp)); static int nfs_fhtovp __P((struct mount *mp, struct fid *fhp, struct mbuf *nam, struct vnode **vpp, int *exflagsp, struct ucred **credanonp)); static int nfs_vget __P((struct mount *, ino_t, struct vnode **)); /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { nfs_mount, nfs_start, nfs_unmount, nfs_root, nfs_quotactl, nfs_statfs, nfs_sync, nfs_vget, nfs_fhtovp, nfs_vptofh, nfs_init, }; VFS_SET(nfs_vfsops, nfs, MOUNT_NFS, VFCF_NETWORK); /* * This structure must be filled in by a primary bootstrap or bootstrap * server for a diskless/dataless machine. It is initialized below just * to ensure that it is allocated to initialized data (.data not .bss). */ struct nfs_diskless nfs_diskless = { 0 }; int nfs_diskless_valid = 0; void nfsargs_ntoh __P((struct nfs_args *)); static struct mount *nfs_mountdiskless __P((char *, char *, int, struct sockaddr_in *, struct nfs_args *, register struct vnode **)); static int nfs_iosize(nmp) struct nfsmount* nmp; { int iosize; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. */ iosize = max(nmp->nm_rsize, nmp->nm_wsize); if (iosize < NBPG) iosize = NBPG; return iosize; } /* * nfs statfs call */ int nfs_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct vnode *vp; register struct nfs_statfs *sfp; register caddr_t cp; register u_long *tl; register long t1, t2; caddr_t bpos, dpos, cp2; struct nfsmount *nmp = VFSTONFS(mp); int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct ucred *cred; struct nfsnode *np; u_quad_t tquad; #ifndef nolint sfp = (struct nfs_statfs *)0; #endif error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) return (error); vp = NFSTOV(np); cred = crget(); cred->cr_ngroups = 1; if (v3 && (nmp->nm_flag & NFSMNT_GOTFSINFO) == 0) (void)nfs_fsinfo(nmp, vp, cred, p); nfsstats.rpccnt[NFSPROC_FSSTAT]++; nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_FSSTAT, p, cred); if (v3) nfsm_postop_attr(vp, retattr); if (!error) nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); #ifdef __NetBSD__ #ifdef COMPAT_09 sbp->f_type = 2; #else sbp->f_type = 0; #endif #else sbp->f_type = MOUNT_NFS; #endif sbp->f_flags = nmp->nm_flag; sbp->f_iosize = nfs_iosize(nmp); if (v3) { sbp->f_bsize = NFS_FABLKSIZE; fxdr_hyper(&sfp->sf_tbytes, &tquad); sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); fxdr_hyper(&sfp->sf_fbytes, &tquad); sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); fxdr_hyper(&sfp->sf_abytes, &tquad); sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); sbp->f_files = (fxdr_unsigned(long, sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); sbp->f_ffree = (fxdr_unsigned(long, sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); } else { sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize); sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks); sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree); sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail); sbp->f_files = 0; sbp->f_ffree = 0; } if (sbp != &mp->mnt_stat) { bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } nfsm_reqdone; vput(vp); crfree(cred); return (error); } /* * nfs version 3 fsinfo rpc call */ int nfs_fsinfo(nmp, vp, cred, p) register struct nfsmount *nmp; register struct vnode *vp; struct ucred *cred; struct proc *p; { register struct nfsv3_fsinfo *fsp; register caddr_t cp; register long t1, t2; register u_long *tl, pref, max; caddr_t bpos, dpos, cp2; int error = 0, retattr; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsstats.rpccnt[NFSPROC_FSINFO]++; nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); nfsm_fhtom(vp, 1); nfsm_request(vp, NFSPROC_FSINFO, p, cred); nfsm_postop_attr(vp, retattr); if (!error) { nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO); pref = fxdr_unsigned(u_long, fsp->fs_wtpref); if (pref < nmp->nm_wsize) nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); max = fxdr_unsigned(u_long, fsp->fs_wtmax); if (max < nmp->nm_wsize) { nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = max; } pref = fxdr_unsigned(u_long, fsp->fs_rtpref); if (pref < nmp->nm_rsize) nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); max = fxdr_unsigned(u_long, fsp->fs_rtmax); if (max < nmp->nm_rsize) { nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = max; } pref = fxdr_unsigned(u_long, fsp->fs_dtpref); if (pref < nmp->nm_readdirsize) nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (max < nmp->nm_readdirsize) { nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = max; } nmp->nm_flag |= NFSMNT_GOTFSINFO; } nfsm_reqdone; return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - hand craft the swap nfs vnode hanging off a fake mount point * if swdevt[0].sw_dev == NODEV * - build the rootfs mount point and call mountnfs() to do the rest. */ int nfs_mountroot() { register struct mount *mp; register struct nfs_diskless *nd = &nfs_diskless; struct socket *so; struct vnode *vp; struct proc *p = curproc; /* XXX */ int error, i; u_long l; char buf[128]; /* * XXX time must be non-zero when we init the interface or else * the arp code will wedge... */ if (time.tv_sec == 0) time.tv_sec = 1; /* * XXX splnet, so networks will receive... */ splnet(); #ifdef notyet /* Set up swap credentials. */ proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid); proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid); if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) > NGROUPS) proc0.p_ucred->cr_ngroups = NGROUPS; for (i = 0; i < proc0.p_ucred->cr_ngroups; i++) proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]); #endif /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ - error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0); + error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, p); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p); if(!error) break; } if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); soclose(so); /* * If the gateway field is filled in, set it as the default route. */ if (nd->mygateway.sin_len != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, (struct rtentry **)0); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } if (nd->swap_nblks) { /* Convert to DEV_BSIZE instead of Kilobyte */ nd->swap_nblks *= 2; /* * Create a fake mount point just for the swap vnode so that the * swap file can be on a different server from the rootfs. */ nd->swap_args.fh = nd->swap_fh; /* * If using nfsv3_diskless, replace NFSX_V2FH with * nd->swap_fhsize. */ nd->swap_args.fhsize = NFSX_V2FH; l = ntohl(nd->swap_saddr.sin_addr.s_addr); sprintf(buf,"%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam); printf("NFS SWAP: %s\n",buf); (void) nfs_mountdiskless(buf, "/swap", 0, &nd->swap_saddr, &nd->swap_args, &vp); VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size = nd->swap_nblks * DEV_BSIZE ; /* * Since the swap file is not the root dir of a file system, * hack it to a regular file. */ vp->v_type = VREG; vp->v_flag = 0; VREF(vp); swaponvp(p, vp, NODEV, nd->swap_nblks); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; /* * If using nfsv3_diskless, replace NFSX_V2FH with nd->root_fhsize. */ nd->root_args.fhsize = NFSX_V2FH; l = ntohl(nd->root_saddr.sin_addr.s_addr); sprintf(buf,"%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam); printf("NFS ROOT: %s\n",buf); mp = nfs_mountdiskless(buf, "/", MNT_RDONLY, &nd->root_saddr, &nd->root_args, &vp); if (vfs_lock(mp)) panic("nfs_mountroot: vfs_lock"); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); mp->mnt_flag |= MNT_ROOTFS; mp->mnt_vnodecovered = NULLVP; vfs_unlock(mp); rootvp = vp; /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); hostname[MAXHOSTNAMELEN - 1] = '\0'; for (i = 0; i < MAXHOSTNAMELEN; i++) if (hostname[i] == '\0') break; inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static struct mount * nfs_mountdiskless(path, which, mountflag, sin, args, vpp) char *path; char *which; int mountflag; struct sockaddr_in *sin; struct nfs_args *args; register struct vnode **vpp; { register struct mount *mp; register struct mbuf *m; register int error; mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_NOWAIT); if (mp == NULL) panic("nfs_mountroot: %s mount malloc", which); bzero((char *)mp, (u_long)sizeof(struct mount)); mp->mnt_op = &nfs_vfsops; mp->mnt_flag = mountflag; MGET(m, MT_SONAME, M_DONTWAIT); if (m == NULL) panic("nfs_mountroot: %s mount mbuf", which); bcopy((caddr_t)sin, mtod(m, caddr_t), sin->sin_len); m->m_len = sin->sin_len; error = mountnfs(args, mp, m, which, path, vpp); if (error) panic("nfs_mountroot: mount %s on %s: %d", path, which, error); return (mp); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(mp, path, data, ndp, p) struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { int error; struct nfs_args args; struct mbuf *nam; struct vnode *vp; char pth[MNAMELEN], hst[MNAMELEN]; u_int len; u_char nfh[NFSX_V3FHMAX]; error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)); if (error) return (error); error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error) return (error); error = copyinstr(path, pth, MNAMELEN-1, &len); if (error) return (error); bzero(&pth[len], MNAMELEN - len); error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); if (error) return (error); bzero(&hst[len], MNAMELEN - len); /* sockargs() call must be after above copyin() calls */ error = sockargs(&nam, (caddr_t)args.addr, args.addrlen, MT_SONAME); if (error) return (error); args.fh = nfh; error = mountnfs(&args, mp, nam, pth, hst, &vp); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(argp, mp, nam, pth, hst, vpp) register struct nfs_args *argp; register struct mount *mp; struct mbuf *nam; char *pth, *hst; struct vnode **vpp; { register struct nfsmount *nmp; struct nfsnode *np; int error, maxio; struct vattr attrs; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); /* update paths, file handles, etc, here XXX */ m_freem(nam); return (0); } else { MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount), M_NFSMNT, M_WAITOK); bzero((caddr_t)nmp, sizeof (struct nfsmount)); TAILQ_INIT(&nmp->nm_uidlruhead); mp->mnt_data = (qaddr_t)nmp; } getnewfsid(mp, MOUNT_NFS); nmp->nm_mountp = mp; nmp->nm_flag = argp->flags; if (nmp->nm_flag & NFSMNT_NQNFS) /* * We have to set mnt_maxsymlink to a non-zero value so * that COMPAT_43 routines will know that we are setting * the d_type field in directories (and can zero it for * unsuspecting binaries). */ mp->mnt_maxsymlinklen = 1; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; nmp->nm_readdirsize = NFS_READDIRSIZE; nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_readahead = NFS_DEFRAHEAD; nmp->nm_leaseterm = NQ_DEFLEASE; nmp->nm_deadthresh = NQ_DEADTHRESH; CIRCLEQ_INIT(&nmp->nm_timerhead); nmp->nm_inprog = NULLVP; nmp->nm_fhsize = argp->fhsize; bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); #ifdef __NetBSD__ #ifdef COMPAT_09 mp->mnt_stat.f_type = 2; #else mp->mnt_stat.f_type = 0; #endif #else mp->mnt_stat.f_type = MOUNT_NFS; #endif bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); nmp->nm_nam = nam; if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if (argp->flags & NFSMNT_NFSV3) { if (argp->sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXDATA; } else maxio = NFS_V2MAXDATA; if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* Round down to multiple of blocksize */ nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize <= 0) nmp->nm_wsize = NFS_FABLKSIZE; } if (nmp->nm_wsize > maxio) nmp->nm_wsize = maxio; if (nmp->nm_wsize > MAXBSIZE) nmp->nm_wsize = MAXBSIZE; if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* Round down to multiple of blocksize */ nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize <= 0) nmp->nm_rsize = NFS_FABLKSIZE; } if (nmp->nm_rsize > maxio) nmp->nm_rsize = maxio; if (nmp->nm_rsize > MAXBSIZE) nmp->nm_rsize = MAXBSIZE; if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; /* Round down to multiple of blocksize */ nmp->nm_readdirsize &= ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize < NFS_DIRBLKSIZ) nmp->nm_readdirsize = NFS_DIRBLKSIZ; } if (nmp->nm_readdirsize > maxio) nmp->nm_readdirsize = maxio; if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 && argp->maxgrouplist <= NFS_MAXGRPS) nmp->nm_numgrps = argp->maxgrouplist; if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && argp->leaseterm <= NQ_MAXLEASE) nmp->nm_leaseterm = argp->leaseterm; if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && argp->deadthresh <= NQ_NEVERDEAD) nmp->nm_deadthresh = argp->deadthresh; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; /* * For Connection based sockets (TCP,...) defer the connect until * the first request, in case the server is not responding. */ if (nmp->nm_sotype == SOCK_DGRAM && (error = nfs_connect(nmp, (struct nfsreq *)0))) goto bad; /* * This is silly, but it has to be set so that vinifod() works. * We do not want to do an nfs_statfs() here since we can get * stuck on a dead server and we are holding a lock on the mount * point. */ mp->mnt_stat.f_iosize = nfs_iosize(nmp); /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == ROOTINO (2). */ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes for the mountpoint. This has the side * effect of filling in (*vpp)->v_type with the correct value. */ VOP_GETATTR(*vpp, &attrs, curproc->p_ucred, curproc); /* * Lose the lock but keep the ref. */ VOP_UNLOCK(*vpp); return (0); bad: nfs_disconnect(nmp); free((caddr_t)nmp, M_NFSMNT); m_freem(nam); return (error); } /* * unmount system call */ static int nfs_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct nfsmount *nmp; struct nfsnode *np; struct vnode *vp; int error, flags = 0; if (mntflags & MNT_FORCE) { if (!doforce) return (EINVAL); flags |= FORCECLOSE; } nmp = VFSTONFS(mp); /* * Goes something like this.. * - Check for activity on the root vnode (other than ourselves). * - Call vflush() to clear out vnodes for this file system, * except for the root vnode. * - Decrement reference on the vnode representing remote root. * - Close the socket * - Free up the data structures */ /* * We need to decrement the ref. count on the nfsnode representing * the remote root. See comment in mountnfs(). The VFS unmount() * has done vput on this vnode, otherwise we would get deadlock! */ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) return(error); vp = NFSTOV(np); if (vp->v_usecount > 2) { vput(vp); return (EBUSY); } /* * Must handshake with nqnfs_clientd() if it is active. */ nmp->nm_flag |= NFSMNT_DISMINPROG; while (nmp->nm_inprog != NULLVP) (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0); error = vflush(mp, vp, flags); if (error) { vput(vp); nmp->nm_flag &= ~NFSMNT_DISMINPROG; return (error); } /* * We are now committed to the unmount. * For NQNFS, let the server daemon free the nfsmount structure. */ if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) nmp->nm_flag |= NFSMNT_DISMNT; /* * There are two reference counts and one lock to get rid of here. */ vput(vp); vrele(vp); vgone(vp); nfs_disconnect(nmp); m_freem(nmp->nm_nam); if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0) free((caddr_t)nmp, M_NFSMNT); return (0); } /* * Return root of a filesystem */ static int nfs_root(mp, vpp) struct mount *mp; struct vnode **vpp; { register struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); if (error) return (error); vp = NFSTOV(np); VOP_UNLOCK(vp); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_flag = VROOT; *vpp = vp; return (0); } extern int syncprt; /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { register struct vnode *vp; int error, allerror = 0; /* * Force stale buffer cache information to be flushed. */ loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL) continue; if (vget(vp, 1)) goto loop; error = VOP_FSYNC(vp, cred, waitfor, p); if (error) allerror = error; vput(vp); } return (allerror); } /* * NFS flat namespace lookup. * Currently unsupported. */ /* ARGSUSED */ static int nfs_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { return (EOPNOTSUPP); } /* * At this point, this should never happen */ /* ARGSUSED */ static int nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) register struct mount *mp; struct fid *fhp; struct mbuf *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { return (EINVAL); } /* * Vnode pointer to File handle, should never happen either */ /* ARGSUSED */ static int nfs_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { return (EINVAL); } /* * Vfs start routine, a no-op. */ /* ARGSUSED */ static int nfs_start(mp, flags, p) struct mount *mp; int flags; struct proc *p; { return (0); } /* * Do operations associated with quotas, not supported */ /* ARGSUSED */ static int nfs_quotactl(mp, cmd, uid, arg, p) struct mount *mp; int cmd; uid_t uid; caddr_t arg; struct proc *p; { return (EOPNOTSUPP); } Index: head/sys/nfsserver/nfs_srvsock.c =================================================================== --- head/sys/nfsserver/nfs_srvsock.c (revision 14092) +++ head/sys/nfsserver/nfs_srvsock.c (revision 14093) @@ -1,2176 +1,2178 @@ /* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_socket.c,v 1.13 1995/12/17 21:12:25 phk Exp $ + * $Id: nfs_socket.c,v 1.14 1996/01/13 23:27:52 phk Exp $ */ /* * Socket operations for use by nfs */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TRUE 1 #define FALSE 0 /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc * for the frequent rpcs and a default for the others. * The justification for doing "other" this way is that these rpcs * happen so infrequently that timer est. would probably be stale. * Also, since many of these rpcs are * non-idempotent, a conservative timeout is desired. * getattr, lookup - A+2D * read, write - A+4D * other - nm_timeo */ #define NFS_RTO(n, t) \ ((t) == 0 ? (n)->nm_timeo : \ ((t) < 3 ? \ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] /* * External data, mostly RPC constants in XDR form */ extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, rpc_auth_kerb; extern u_long nfs_prog, nqnfs_prog; extern time_t nqnfsstarttime; extern struct nfsstats nfsstats; extern int nfsv3_procid[NFS_NPROCS]; extern int nfs_ticks; /* * Defines which timer to use for the procnum. * 0 - default * 1 - getattr * 2 - lookup * 3 - read * 4 - write */ static int proct[NFS_NPROCS] = { 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, }; /* * There is a congestion window for outstanding rpcs maintained per mount * point. The cwnd size is adjusted in roughly the way that: * Van Jacobson, Congestion avoidance and Control, In "Proceedings of * SIGCOMM '88". ACM, August 1988. * describes for TCP. The cwnd size is chopped in half on a retransmit timeout * and incremented by 1/cwnd when each rpc reply is received and a full cwnd * of rpcs is in progress. * (The sent count and cwnd are scaled for integer arith.) * Variants of "slow start" were tried and were found to be too much of a * performance hit (ave. rtt 3 times larger), * I suspect due to the large rtt that nfs rpcs have. */ #define NFS_CWNDSCALE 256 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; int nfsrtton = 0; struct nfsrtt nfsrtt; static int nfs_msg __P((struct proc *,char *,char *)); static int nfs_rcvlock __P((struct nfsreq *)); static void nfs_rcvunlock __P((int *flagp)); static void nfs_realign __P((struct mbuf *m, int hsiz)); static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp)); static int nfs_reconnect __P((struct nfsreq *rep)); static int nfsrv_getstream __P((struct nfssvc_sock *,int)); #ifndef NFS_NOSERVER int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mreqp)) = { nfsrv_null, nfsrv_getattr, nfsrv_setattr, nfsrv_lookup, nfsrv3_access, nfsrv_readlink, nfsrv_read, nfsrv_write, nfsrv_create, nfsrv_mkdir, nfsrv_symlink, nfsrv_mknod, nfsrv_remove, nfsrv_rmdir, nfsrv_rename, nfsrv_link, nfsrv_readdir, nfsrv_readdirplus, nfsrv_statfs, nfsrv_fsinfo, nfsrv_pathconf, nfsrv_commit, nqnfsrv_getlease, nqnfsrv_vacated, nfsrv_noop, nfsrv_noop }; #endif /* NFS_NOSERVER */ /* * Initialize sockets and congestion for a new NFS connection. * We do not free the sockaddr if error. */ int nfs_connect(nmp, rep) register struct nfsmount *nmp; struct nfsreq *rep; { register struct socket *so; int s, error, rcvreserve, sndreserve; struct sockaddr *saddr; struct sockaddr_in *sin; struct mbuf *m; u_short tport; + struct proc *p = &proc0; /* only used for socreate */ nmp->nm_so = (struct socket *)0; saddr = mtod(nmp->nm_nam, struct sockaddr *); error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, - nmp->nm_soproto); + nmp->nm_soproto, p); if (error) goto bad; so = nmp->nm_so; + so->so_state &= ~SS_PRIV; /* don't need it */ nmp->nm_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port number. */ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { MGET(m, M_WAIT, MT_SONAME); sin = mtod(m, struct sockaddr_in *); sin->sin_len = m->m_len = sizeof (struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED - 1; sin->sin_port = htons(tport); while ((error = sobind(so, m)) == EADDRINUSE && --tport > IPPORT_RESERVED / 2) sin->sin_port = htons(tport); m_freem(m); if (error) goto bad; } /* * Protocols that do not require connections may be optionally left * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp->nm_flag & NFSMNT_NOCONN) { if (nmp->nm_soflags & PR_CONNREQUIRED) { error = ENOTCONN; goto bad; } } else { error = soconnect(so, nmp->nm_nam); if (error) goto bad; /* * Wait for the connection to complete. Cribbed from the * connect system call but with the wait timing out so * that interruptible mounts don't hang here for a long time. */ s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && rep && (error = nfs_sigintr(nmp, rep, rep->r_procp))) { so->so_state &= ~SS_ISCONNECTING; splx(s); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); } if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { so->so_rcv.sb_timeo = (5 * hz); so->so_snd.sb_timeo = (5 * hz); } else { so->so_rcv.sb_timeo = 0; so->so_snd.sb_timeo = 0; } if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; } else if (nmp->nm_sotype == SOCK_SEQPACKET) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; } else { if (nmp->nm_sotype != SOCK_STREAM) panic("nfscon sotype"); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); } if (so->so_proto->pr_protocol == IPPROTO_TCP) { MGET(m, M_WAIT, MT_SOOPTS); *mtod(m, int *) = 1; m->m_len = sizeof(int); sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) * 2; } error = soreserve(so, sndreserve, rcvreserve); if (error) goto bad; so->so_rcv.sb_flags |= SB_NOINTR; so->so_snd.sb_flags |= SB_NOINTR; /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = nmp->nm_srtt[4] = (NFS_TIMEO << 3); nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; nmp->nm_timeouts = 0; return (0); bad: nfs_disconnect(nmp); return (error); } /* * Reconnect routine: * Called when a connection is broken on a reliable protocol. * - clean up the old socket * - nfs_connect() again * - set R_MUSTRESEND for all outstanding requests on mount point * If this fails the mount point is DEAD! * nb: Must be called with the nfs_sndlock() set on the mount point. */ static int nfs_reconnect(rep) register struct nfsreq *rep; { register struct nfsreq *rp; register struct nfsmount *nmp = rep->r_nmp; int error; nfs_disconnect(nmp); while ((error = nfs_connect(nmp, rep))) { if (error == EINTR || error == ERESTART) return (EINTR); (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); } /* * Loop through outstanding request list and fix up all requests * on old socket. */ for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) { if (rp->r_nmp == nmp) rp->r_flags |= R_MUSTRESEND; } return (0); } /* * NFS disconnect. Clean up and unlink. */ void nfs_disconnect(nmp) register struct nfsmount *nmp; { register struct socket *so; if (nmp->nm_so) { so = nmp->nm_so; nmp->nm_so = (struct socket *)0; soshutdown(so, 2); soclose(so); } } /* * This is the nfs send routine. For connection based socket types, it * must be called with an nfs_sndlock() on the socket. * "rep == NULL" indicates that it has been called from a server. * For the client side: * - return EINTR if the RPC is terminated, 0 otherwise * - set R_MUSTRESEND if the send fails for any reason * - do any cleanup required by recoverable socket errors (???) * For the server side: * - return EINTR or ERESTART if interrupted by a signal * - return EPIPE if a connection is lost for connection based sockets (TCP...) * - do any cleanup required by recoverable socket errors (???) */ int nfs_send(so, nam, top, rep) register struct socket *so; struct mbuf *nam; register struct mbuf *top; struct nfsreq *rep; { struct mbuf *sendnam; int error, soflags, flags; if (rep) { if (rep->r_flags & R_SOFTTERM) { m_freem(top); return (EINTR); } if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; m_freem(top); return (0); } rep->r_flags &= ~R_MUSTRESEND; soflags = rep->r_nmp->nm_soflags; } else soflags = so->so_proto->pr_flags; if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = (struct mbuf *)0; else sendnam = nam; if (so->so_type == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; error = sosend(so, sendnam, (struct uio *)0, top, (struct mbuf *)0, flags); if (error) { if (rep) { log(LOG_INFO, "nfs send error %d for server %s\n",error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); /* * Deal with errors for the client side. */ if (rep->r_flags & R_SOFTTERM) error = EINTR; else rep->r_flags |= R_MUSTRESEND; } else log(LOG_INFO, "nfsd send error %d\n", error); /* * Handle any recoverable (soft) socket errors here. (???) */ if (error != EINTR && error != ERESTART && error != EWOULDBLOCK && error != EPIPE) error = 0; } return (error); } /* * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all * done by soreceive(), but for SOCK_STREAM we must deal with the Record * Mark and consolidate the data into a new mbuf list. * nb: Sometimes TCP passes the data up to soreceive() in long lists of * small mbufs. * For SOCK_STREAM we must be very careful to read an entire record once * we have read any of it, even if the system call has been interrupted. */ static int nfs_receive(rep, aname, mp) register struct nfsreq *rep; struct mbuf **aname; struct mbuf **mp; { register struct socket *so; struct uio auio; struct iovec aio; register struct mbuf *m; struct mbuf *control; u_long len; struct mbuf **getnam; int error, sotype, rcvflg; struct proc *p = curproc; /* XXX */ /* * Set up arguments for soreceive() */ *mp = (struct mbuf *)0; *aname = (struct mbuf *)0; sotype = rep->r_nmp->nm_sotype; /* * For reliable protocols, lock against other senders/receivers * in case a reconnect is necessary. * For SOCK_STREAM, first get the Record Mark to find out how much * more there is to get. * We must lock the socket against other receivers * until we have an entire rpc request/reply. */ if (sotype != SOCK_DGRAM) { error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); if (error) return (error); tryagain: /* * Check for fatal errors and resending request. */ /* * Ugh: If a reconnect attempt just happened, nm_so * would have changed. NULL indicates a failed * attempt that has essentially shut down this * mount point. */ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (EINTR); } so = rep->r_nmp->nm_so; if (!so) { error = nfs_reconnect(rep); if (error) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (error); } goto tryagain; } while (rep->r_flags & R_MUSTRESEND) { m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); nfsstats.rpcretries++; error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); if (error) { if (error == EINTR || error == ERESTART || (error = nfs_reconnect(rep))) { nfs_sndunlock(&rep->r_nmp->nm_flag); return (error); } goto tryagain; } } nfs_sndunlock(&rep->r_nmp->nm_flag); if (sotype == SOCK_STREAM) { aio.iov_base = (caddr_t) &len; aio.iov_len = sizeof(u_long); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(u_long); auio.uio_procp = p; do { rcvflg = MSG_WAITALL; error = soreceive(so, (struct mbuf **)0, &auio, (struct mbuf **)0, (struct mbuf **)0, &rcvflg); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK); if (!error && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", sizeof(u_long) - auio.uio_resid, sizeof(u_long), rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } if (error) goto errout; len = ntohl(len) & ~0x80000000; /* * This is SERIOUS! We are out of sync with the sender * and forcing a disconnect/reconnect is all I can do. */ if (len > NFS_MAXPACKET) { log(LOG_ERR, "%s (%d) from nfs server %s\n", "impossible packet length", len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EFBIG; goto errout; } auio.uio_resid = len; do { rcvflg = MSG_WAITALL; error = soreceive(so, (struct mbuf **)0, &auio, mp, (struct mbuf **)0, &rcvflg); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); if (!error && auio.uio_resid > 0) { log(LOG_INFO, "short receive (%d/%d) from nfs server %s\n", len - auio.uio_resid, len, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = EPIPE; } } else { /* * NB: Since uio_resid is big, MSG_WAITALL is ignored * and soreceive() will return when it has either a * control msg or a data msg. * We have no use for control msg., but must grab them * and then throw them away so we know what is going * on. */ auio.uio_resid = len = 100000000; /* Anything Big */ auio.uio_procp = p; do { rcvflg = 0; error = soreceive(so, (struct mbuf **)0, &auio, mp, &control, &rcvflg); if (control) m_freem(control); if (error == EWOULDBLOCK && rep) { if (rep->r_flags & R_SOFTTERM) return (EINTR); } } while (error == EWOULDBLOCK || (!error && *mp == NULL && control)); if ((rcvflg & MSG_EOR) == 0) printf("Egad!!\n"); if (!error && *mp == NULL) error = EPIPE; len -= auio.uio_resid; } errout: if (error && error != EINTR && error != ERESTART) { m_freem(*mp); *mp = (struct mbuf *)0; if (error != EPIPE) log(LOG_INFO, "receive error %d from nfs server %s\n", error, rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); if (!error) error = nfs_reconnect(rep); if (!error) goto tryagain; } } else { if ((so = rep->r_nmp->nm_so) == NULL) return (EACCES); if (so->so_state & SS_ISCONNECTED) getnam = (struct mbuf **)0; else getnam = aname; auio.uio_resid = len = 1000000; auio.uio_procp = p; do { rcvflg = 0; error = soreceive(so, getnam, &auio, mp, (struct mbuf **)0, &rcvflg); if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) return (EINTR); } while (error == EWOULDBLOCK); len -= auio.uio_resid; } if (error) { m_freem(*mp); *mp = (struct mbuf *)0; } /* * Search for any mbufs that are not a multiple of 4 bytes long * or with m_data not longword aligned. * These could cause pointer alignment problems, so copy them to * well aligned mbufs. */ nfs_realign(*mp, 5 * NFSX_UNSIGNED); return (error); } /* * Implement receipt of reply on a socket. * We must search through the list of received datagrams matching them * with outstanding requests using the xid, until ours is found. */ /* ARGSUSED */ int nfs_reply(myrep) struct nfsreq *myrep; { register struct nfsreq *rep; register struct nfsmount *nmp = myrep->r_nmp; register long t1; struct mbuf *mrep, *nam, *md; u_long rxid, *tl; caddr_t dpos, cp2; int error; /* * Loop around until we get our own reply */ for (;;) { /* * Lock against other receivers so that I don't get stuck in * sbwait() after someone else has received my reply for me. * Also necessary for connection based protocols to avoid * race conditions during a reconnect. */ error = nfs_rcvlock(myrep); if (error) return (error); /* Already received, bye bye */ if (myrep->r_mrep != NULL) { nfs_rcvunlock(&nmp->nm_flag); return (0); } /* * Get the next Rpc reply off the socket */ error = nfs_receive(myrep, &nam, &mrep); nfs_rcvunlock(&nmp->nm_flag); if (error) { /* * Ignore routing errors on connectionless protocols?? */ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { nmp->nm_so->so_error = 0; if (myrep->r_flags & R_GETONEREP) return (0); continue; } return (error); } if (nam) m_freem(nam); /* * Get the xid and check that it is an rpc reply */ md = mrep; dpos = mtod(md, caddr_t); nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); rxid = *tl++; if (*tl != rpc_reply) { #ifndef NFS_NOSERVER if (nmp->nm_flag & NFSMNT_NQNFS) { if (nqnfs_callback(nmp, mrep, md, dpos)) nfsstats.rpcinvalid++; } else { nfsstats.rpcinvalid++; m_freem(mrep); } #else nfsstats.rpcinvalid++; m_freem(mrep); #endif nfsmout: if (myrep->r_flags & R_GETONEREP) return (0); continue; } /* * Loop through the request list to match up the reply * Iff no match, just drop the datagram */ for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { if (rep->r_mrep == NULL && rxid == rep->r_xid) { /* Found it.. */ rep->r_mrep = mrep; rep->r_md = md; rep->r_dpos = dpos; if (nfsrtton) { struct rttl *rt; rt = &nfsrtt.rttl[nfsrtt.pos]; rt->proc = rep->r_procnum; rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); rt->sent = nmp->nm_sent; rt->cwnd = nmp->nm_cwnd; rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; rt->tstamp = time; if (rep->r_flags & R_TIMING) rt->rtt = rep->r_rtt; else rt->rtt = 1000000; nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; } /* * Update congestion window. * Do the additive increase of * one rpc/rtt. */ if (nmp->nm_cwnd <= nmp->nm_sent) { nmp->nm_cwnd += (NFS_CWNDSCALE * NFS_CWNDSCALE + (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; if (nmp->nm_cwnd > NFS_MAXCWND) nmp->nm_cwnd = NFS_MAXCWND; } rep->r_flags &= ~R_SENT; nmp->nm_sent -= NFS_CWNDSCALE; /* * Update rtt using a gain of 0.125 on the mean * and a gain of 0.25 on the deviation. */ if (rep->r_flags & R_TIMING) { /* * Since the timer resolution of * NFS_HZ is so course, it can often * result in r_rtt == 0. Since * r_rtt == N means that the actual * rtt is between N+dt and N+2-dt ticks, * add 1. */ t1 = rep->r_rtt + 1; t1 -= (NFS_SRTT(rep) >> 3); NFS_SRTT(rep) += t1; if (t1 < 0) t1 = -t1; t1 -= (NFS_SDRTT(rep) >> 2); NFS_SDRTT(rep) += t1; } nmp->nm_timeouts = 0; break; } } /* * If not matched to a request, drop it. * If it's mine, get out. */ if (rep == 0) { nfsstats.rpcunexpected++; m_freem(mrep); } else if (rep == myrep) { if (rep->r_mrep == NULL) panic("nfsreply nil"); return (0); } if (myrep->r_flags & R_GETONEREP) return (0); } } /* * nfs_request - goes something like this * - fill in request struct * - links it into list * - calls nfs_send() for first transmit * - calls nfs_receive() to get reply * - break down rpc header and return with nfs reply pointed to * by mrep or error * nb: always frees up mreq mbuf list */ int nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) struct vnode *vp; struct mbuf *mrest; int procnum; struct proc *procp; struct ucred *cred; struct mbuf **mrp; struct mbuf **mdp; caddr_t *dposp; { register struct mbuf *m, *mrep; register struct nfsreq *rep; register u_long *tl; register int i; struct nfsmount *nmp; struct mbuf *md, *mheadend; struct nfsnode *np; char nickv[RPCX_NICKVERF]; time_t reqtime, waituntil; caddr_t dpos, cp2; int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; int verf_len, verf_type; u_long xid; u_quad_t frev; char *auth_str, *verf_str; NFSKERBKEY_T key; /* save session key */ nmp = VFSTONFS(vp->v_mount); MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); rep->r_nmp = nmp; rep->r_vp = vp; rep->r_procp = procp; rep->r_procnum = procnum; i = 0; m = mrest; while (m) { i += m->m_len; m = m->m_next; } mrest_len = i; /* * Get the RPC header with authorization. */ kerbauth: verf_str = auth_str = (char *)0; if (nmp->nm_flag & NFSMNT_KERB) { verf_str = nickv; verf_len = sizeof (nickv); auth_type = RPCAUTH_KERB4; bzero((caddr_t)key, sizeof (key)); if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, &auth_len, verf_str, verf_len)) { error = nfs_getauth(nmp, rep, cred, &auth_str, &auth_len, verf_str, &verf_len, key); if (error) { free((caddr_t)rep, M_NFSREQ); m_freem(mrest); return (error); } } } else { auth_type = RPCAUTH_UNIX; if (cred->cr_ngroups < 1) panic("nfsreq nogrps"); auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 5 * NFSX_UNSIGNED; } m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); if (auth_str) free(auth_str, M_TEMP); /* * For stream protocols, insert a Sun RPC Record Mark. */ if (nmp->nm_sotype == SOCK_STREAM) { M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len - NFSX_UNSIGNED)); } rep->r_mreq = m; rep->r_xid = xid; tryagain: if (nmp->nm_flag & NFSMNT_SOFT) rep->r_retry = nmp->nm_retry; else rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ rep->r_rtt = rep->r_rexmit = 0; if (proct[procnum] > 0) rep->r_flags = R_TIMING; else rep->r_flags = 0; rep->r_mrep = NULL; /* * Do the client side RPC. */ nfsstats.rpcrequests++; /* * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. */ s = splsoftclock(); TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); /* Get send time for nqnfs */ reqtime = time.tv_sec; /* * If backing off another request or avoiding congestion, don't * send this one now but let timer do it. If not timing a request, * do it now. */ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { splx(s); if (nmp->nm_soflags & PR_CONNREQUIRED) error = nfs_sndlock(&nmp->nm_flag, rep); if (!error) { m = m_copym(m, 0, M_COPYALL, M_WAIT); error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); if (nmp->nm_soflags & PR_CONNREQUIRED) nfs_sndunlock(&nmp->nm_flag); } if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { nmp->nm_sent += NFS_CWNDSCALE; rep->r_flags |= R_SENT; } } else { splx(s); rep->r_rtt = -1; } /* * Wait for the reply from our send or the timer's. */ if (!error || error == EPIPE) error = nfs_reply(rep); /* * RPC done, unlink the request. */ s = splsoftclock(); TAILQ_REMOVE(&nfs_reqq, rep, r_chain); splx(s); /* * Decrement the outstanding request count. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; /* paranoia */ nmp->nm_sent -= NFS_CWNDSCALE; } /* * If there was a successful reply and a tprintf msg. * tprintf a response. */ if (!error && (rep->r_flags & R_TPRINTFMSG)) nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, "is alive again"); mrep = rep->r_mrep; md = rep->r_md; dpos = rep->r_dpos; if (error) { m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * break down the rpc header and check if ok */ nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (*tl++ == rpc_msgdenied) { if (*tl == rpc_mismatch) error = EOPNOTSUPP; else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { if (!failed_auth) { failed_auth++; mheadend->m_next = (struct mbuf *)0; m_freem(mrep); m_freem(rep->r_mreq); goto kerbauth; } else error = EAUTH; } else error = EACCES; m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * Grab any Kerberos verifier, otherwise just throw it away. */ verf_type = fxdr_unsigned(int, *tl++); i = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); if (error) goto nfsmout; } else if (i > 0) nfsm_adv(nfsm_rndup(i)); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); /* 0 == ok */ if (*tl == 0) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (*tl != 0) { error = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_NFSV3) && error == NFSERR_TRYLATER) { m_freem(mrep); error = 0; waituntil = time.tv_sec + trylater_delay; while (time.tv_sec < waituntil) (void) tsleep((caddr_t)&lbolt, PSOCK, "nqnfstry", 0); trylater_delay *= nfs_backoff[trylater_cnt]; if (trylater_cnt < 7) trylater_cnt++; goto tryagain; } /* * If the File Handle was stale, invalidate the * lookup cache, just in case. */ if (error == ESTALE) cache_purge(vp); if (nmp->nm_flag & NFSMNT_NFSV3) { *mrp = mrep; *mdp = md; *dposp = dpos; error |= NFSERR_RETERR; } else m_freem(mrep); m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } /* * For nqnfs, get any lease in reply */ if (nmp->nm_flag & NFSMNT_NQNFS) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (*tl) { np = VTONFS(vp); nqlflag = fxdr_unsigned(int, *tl); nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); cachable = fxdr_unsigned(int, *tl++); reqtime += fxdr_unsigned(int, *tl++); if (reqtime > time.tv_sec) { fxdr_hyper(tl, &frev); nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime, frev); } } } *mrp = mrep; *mdp = md; *dposp = dpos; m_freem(rep->r_mreq); FREE((caddr_t)rep, M_NFSREQ); return (0); } m_freem(mrep); error = EPROTONOSUPPORT; nfsmout: m_freem(rep->r_mreq); free((caddr_t)rep, M_NFSREQ); return (error); } #ifndef NFS_NOSERVER /* * Generate the rpc reply header * siz arg. is used to decide if adding a cluster is worthwhile */ int nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) int siz; struct nfsrv_descript *nd; struct nfssvc_sock *slp; int err; int cache; u_quad_t *frev; struct mbuf **mrq; struct mbuf **mbp; caddr_t *bposp; { register u_long *tl; register struct mbuf *mreq; caddr_t bpos; struct mbuf *mb, *mb2; MGETHDR(mreq, M_WAIT, MT_DATA); mb = mreq; /* * If this is a big reply, use a cluster else * try and leave leading space for the lower level headers. */ siz += RPC_REPLYSIZ; if (siz >= MINCLSIZE) { MCLGET(mreq, M_WAIT); } else mreq->m_data += max_hdr; tl = mtod(mreq, u_long *); mreq->m_len = 6 * NFSX_UNSIGNED; bpos = ((caddr_t)tl) + mreq->m_len; *tl++ = txdr_unsigned(nd->nd_retxid); *tl++ = rpc_reply; if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { *tl++ = rpc_msgdenied; if (err & NFSERR_AUTHERR) { *tl++ = rpc_autherr; *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); mreq->m_len -= NFSX_UNSIGNED; bpos -= NFSX_UNSIGNED; } else { *tl++ = rpc_mismatch; *tl++ = txdr_unsigned(RPC_VER2); *tl = txdr_unsigned(RPC_VER2); } } else { *tl++ = rpc_msgaccepted; /* * For Kerberos authentication, we must send the nickname * verifier back, otherwise just RPCAUTH_NULL. */ if (nd->nd_flag & ND_KERBFULL) { register struct nfsuid *nuidp; struct timeval ktvin, ktvout; for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) break; } if (nuidp) { ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1); ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec); /* * Encrypt the timestamp in ecb mode using the * session key. */ #ifdef NFSKERB XXX #endif *tl++ = rpc_auth_kerb; *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); *tl = ktvout.tv_sec; nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = ktvout.tv_usec; *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); } else { *tl++ = 0; *tl++ = 0; } } else { *tl++ = 0; *tl++ = 0; } switch (err) { case EPROGUNAVAIL: *tl = txdr_unsigned(RPC_PROGUNAVAIL); break; case EPROGMISMATCH: *tl = txdr_unsigned(RPC_PROGMISMATCH); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (nd->nd_flag & ND_NQNFS) { *tl++ = txdr_unsigned(3); *tl = txdr_unsigned(3); } else { *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(3); } break; case EPROCUNAVAIL: *tl = txdr_unsigned(RPC_PROCUNAVAIL); break; case EBADRPC: *tl = txdr_unsigned(RPC_GARBAGE); break; default: *tl = 0; if (err != NFSERR_RETVOID) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (err) *tl = txdr_unsigned(nfsrv_errmap(nd, err)); else *tl = 0; } break; }; } /* * For nqnfs, piggyback lease as requested. */ if ((nd->nd_flag & ND_NQNFS) && err == 0) { if (nd->nd_flag & ND_LEASE) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE); *tl++ = txdr_unsigned(cache); *tl++ = txdr_unsigned(nd->nd_duration); txdr_hyper(frev, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = 0; } } *mrq = mreq; *mbp = mb; *bposp = bpos; if (err != 0 && err != NFSERR_RETVOID) nfsstats.srvrpc_errs++; return (0); } /* * Nfs timer routine * Scan the nfsreq list and retranmit any requests that have timed out * To avoid retransmission attempts on STREAM sockets (in the future) make * sure to set the r_retry field to 0 (implies nm_retry == 0). */ void nfs_timer(arg) void *arg; /* never used */ { register struct nfsreq *rep; register struct mbuf *m; register struct socket *so; register struct nfsmount *nmp; register int timeo; register struct nfssvc_sock *slp; static long lasttime = 0; int s, error; u_quad_t cur_usec; s = splnet(); for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { nmp = rep->r_nmp; if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) continue; if (nfs_sigintr(nmp, rep, rep->r_procp)) { rep->r_flags |= R_SOFTTERM; continue; } if (rep->r_rtt >= 0) { rep->r_rtt++; if (nmp->nm_flag & NFSMNT_DUMBTIMR) timeo = nmp->nm_timeo; else timeo = NFS_RTO(nmp, proct[rep->r_procnum]); if (nmp->nm_timeouts > 0) timeo *= nfs_backoff[nmp->nm_timeouts - 1]; if (rep->r_rtt <= timeo) continue; if (nmp->nm_timeouts < 8) nmp->nm_timeouts++; } /* * Check for server not responding */ if ((rep->r_flags & R_TPRINTFMSG) == 0 && rep->r_rexmit > nmp->nm_deadthresh) { nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, "not responding"); rep->r_flags |= R_TPRINTFMSG; } if (rep->r_rexmit >= rep->r_retry) { /* too many */ nfsstats.rpctimeouts++; rep->r_flags |= R_SOFTTERM; continue; } if (nmp->nm_sotype != SOCK_DGRAM) { if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; continue; } if ((so = nmp->nm_so) == NULL) continue; /* * If there is enough space and the window allows.. * Resend it * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || nmp->nm_sent < nmp->nm_cwnd) && (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, (struct mbuf *)0, (struct mbuf *)0); else error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, nmp->nm_nam, (struct mbuf *)0); if (error) { if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) so->so_error = 0; } else { /* * Iff first send, start timing * else turn timing off, backoff timer * and divide congestion window by 2. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_TIMING; if (++rep->r_rexmit > NFS_MAXREXMIT) rep->r_rexmit = NFS_MAXREXMIT; nmp->nm_cwnd >>= 1; if (nmp->nm_cwnd < NFS_CWNDSCALE) nmp->nm_cwnd = NFS_CWNDSCALE; nfsstats.rpcretries++; } else { rep->r_flags |= R_SENT; nmp->nm_sent += NFS_CWNDSCALE; } rep->r_rtt = 0; } } } /* * Call the nqnfs server timer once a second to handle leases. */ if (lasttime != time.tv_sec) { lasttime = time.tv_sec; nqnfs_serverd(); } /* * Scan the write gathering queues for writes that need to be * completed now. */ cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = slp->ns_chain.tqe_next) { if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec) nfsrv_wakenfsd(slp); } splx(s); timeout(nfs_timer, (void *)0, nfs_ticks); } #endif /* NFS_NOSERVER */ /* * Test for a termination condition pending on the process. * This is used for NFSMNT_INT mounts. */ int nfs_sigintr(nmp, rep, p) struct nfsmount *nmp; struct nfsreq *rep; register struct proc *p; { if (rep && (rep->r_flags & R_SOFTTERM)) return (EINTR); if (!(nmp->nm_flag & NFSMNT_INT)) return (0); if (p && p->p_siglist && (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & NFSINT_SIGMASK)) return (EINTR); return (0); } /* * Lock a socket against others. * Necessary for STREAM sockets to ensure you get an entire rpc request/reply * and also to avoid race conditions between the processes with nfs requests * in progress when a reconnect is necessary. */ int nfs_sndlock(flagp, rep) register int *flagp; struct nfsreq *rep; { struct proc *p; int slpflag = 0, slptimeo = 0; if (rep) { p = rep->r_procp; if (rep->r_nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; } else p = (struct proc *)0; while (*flagp & NFSMNT_SNDLOCK) { if (nfs_sigintr(rep->r_nmp, rep, p)) return (EINTR); *flagp |= NFSMNT_WANTSND; (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *flagp |= NFSMNT_SNDLOCK; return (0); } /* * Unlock the stream socket for others. */ void nfs_sndunlock(flagp) register int *flagp; { if ((*flagp & NFSMNT_SNDLOCK) == 0) panic("nfs sndunlock"); *flagp &= ~NFSMNT_SNDLOCK; if (*flagp & NFSMNT_WANTSND) { *flagp &= ~NFSMNT_WANTSND; wakeup((caddr_t)flagp); } } static int nfs_rcvlock(rep) register struct nfsreq *rep; { register int *flagp = &rep->r_nmp->nm_flag; int slpflag, slptimeo = 0; if (*flagp & NFSMNT_INT) slpflag = PCATCH; else slpflag = 0; while (*flagp & NFSMNT_RCVLOCK) { if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) return (EINTR); *flagp |= NFSMNT_WANTRCV; (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } *flagp |= NFSMNT_RCVLOCK; return (0); } /* * Unlock the stream socket for others. */ static void nfs_rcvunlock(flagp) register int *flagp; { if ((*flagp & NFSMNT_RCVLOCK) == 0) panic("nfs rcvunlock"); *flagp &= ~NFSMNT_RCVLOCK; if (*flagp & NFSMNT_WANTRCV) { *flagp &= ~NFSMNT_WANTRCV; wakeup((caddr_t)flagp); } } /* * Check for badly aligned mbuf data areas and * realign data in an mbuf list by copying the data areas up, as required. */ static void nfs_realign(m, hsiz) register struct mbuf *m; int hsiz; { register struct mbuf *m2; register int siz, mlen, olen; register caddr_t tcp, fcp; struct mbuf *mnew; while (m) { /* * This never happens for UDP, rarely happens for TCP * but frequently happens for iso transport. */ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { olen = m->m_len; fcp = mtod(m, caddr_t); if ((int)fcp & 0x3) { m->m_flags &= ~M_PKTHDR; if (m->m_flags & M_EXT) m->m_data = m->m_ext.ext_buf + ((m->m_ext.ext_size - olen) & ~0x3); else m->m_data = m->m_dat; } m->m_len = 0; tcp = mtod(m, caddr_t); mnew = m; m2 = m->m_next; /* * If possible, only put the first invariant part * of the RPC header in the first mbuf. */ mlen = M_TRAILINGSPACE(m); if (olen <= hsiz && mlen > hsiz) mlen = hsiz; /* * Loop through the mbuf list consolidating data. */ while (m) { while (olen > 0) { if (mlen == 0) { m2->m_flags &= ~M_PKTHDR; if (m2->m_flags & M_EXT) m2->m_data = m2->m_ext.ext_buf; else m2->m_data = m2->m_dat; m2->m_len = 0; mlen = M_TRAILINGSPACE(m2); tcp = mtod(m2, caddr_t); mnew = m2; m2 = m2->m_next; } siz = min(mlen, olen); if (tcp != fcp) bcopy(fcp, tcp, siz); mnew->m_len += siz; mlen -= siz; olen -= siz; tcp += siz; fcp += siz; } m = m->m_next; if (m) { olen = m->m_len; fcp = mtod(m, caddr_t); } } /* * Finally, set m_len == 0 for any trailing mbufs that have * been copied out of. */ while (m2) { m2->m_len = 0; m2 = m2->m_next; } return; } m = m->m_next; } } #ifndef NFS_NOSERVER /* * Socket upcall routine for the nfsd sockets. * The caddr_t arg is a pointer to the "struct nfssvc_sock". * Essentially do as much as possible non-blocking, else punt and it will * be called with M_WAIT from an nfsd. */ void nfsrv_rcv(so, arg, waitflag) struct socket *so; caddr_t arg; int waitflag; { register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; register struct mbuf *m; struct mbuf *mp, *nam; struct uio auio; int flags, error; if ((slp->ns_flag & SLP_VALID) == 0) return; #ifdef notdef /* * Define this to test for nfsds handling this under heavy load. */ if (waitflag == M_DONTWAIT) { slp->ns_flag |= SLP_NEEDQ; goto dorecs; } #endif auio.uio_procp = NULL; if (so->so_type == SOCK_STREAM) { /* * If there are already records on the queue, defer soreceive() * to an nfsd so that there is feedback to the TCP layer that * the nfs servers are heavily loaded. */ if (slp->ns_rec && waitflag == M_DONTWAIT) { slp->ns_flag |= SLP_NEEDQ; goto dorecs; } /* * Do soreceive(). */ auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); if (error || mp == (struct mbuf *)0) { if (error == EWOULDBLOCK) slp->ns_flag |= SLP_NEEDQ; else slp->ns_flag |= SLP_DISCONN; goto dorecs; } m = mp; if (slp->ns_rawend) { slp->ns_rawend->m_next = m; slp->ns_cc += 1000000000 - auio.uio_resid; } else { slp->ns_raw = m; slp->ns_cc = 1000000000 - auio.uio_resid; } while (m->m_next) m = m->m_next; slp->ns_rawend = m; /* * Now try and parse record(s) out of the raw stream data. */ error = nfsrv_getstream(slp, waitflag); if (error) { if (error == EPERM) slp->ns_flag |= SLP_DISCONN; else slp->ns_flag |= SLP_NEEDQ; } } else { do { auio.uio_resid = 1000000000; flags = MSG_DONTWAIT; error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); if (mp) { nfs_realign(mp, 10 * NFSX_UNSIGNED); if (nam) { m = nam; m->m_next = mp; } else m = mp; if (slp->ns_recend) slp->ns_recend->m_nextpkt = m; else slp->ns_rec = m; slp->ns_recend = m; m->m_nextpkt = (struct mbuf *)0; } if (error) { if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && error != EWOULDBLOCK) { slp->ns_flag |= SLP_DISCONN; goto dorecs; } } } while (mp); } /* * Now try and process the request records, non-blocking. */ dorecs: if (waitflag == M_DONTWAIT && (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) nfsrv_wakenfsd(slp); } /* * Try and extract an RPC request from the mbuf data list received on a * stream socket. The "waitflag" argument indicates whether or not it * can sleep. */ static int nfsrv_getstream(slp, waitflag) register struct nfssvc_sock *slp; int waitflag; { register struct mbuf *m, **mpp; register char *cp1, *cp2; register int len; struct mbuf *om, *m2, *recm = 0; u_long recmark; if (slp->ns_flag & SLP_GETSTREAM) panic("nfs getstream"); slp->ns_flag |= SLP_GETSTREAM; for (;;) { if (slp->ns_reclen == 0) { if (slp->ns_cc < NFSX_UNSIGNED) { slp->ns_flag &= ~SLP_GETSTREAM; return (0); } m = slp->ns_raw; if (m->m_len >= NFSX_UNSIGNED) { bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); m->m_data += NFSX_UNSIGNED; m->m_len -= NFSX_UNSIGNED; } else { cp1 = (caddr_t)&recmark; cp2 = mtod(m, caddr_t); while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { while (m->m_len == 0) { m = m->m_next; cp2 = mtod(m, caddr_t); } *cp1++ = *cp2++; m->m_data++; m->m_len--; } } slp->ns_cc -= NFSX_UNSIGNED; recmark = ntohl(recmark); slp->ns_reclen = recmark & ~0x80000000; if (recmark & 0x80000000) slp->ns_flag |= SLP_LASTFRAG; else slp->ns_flag &= ~SLP_LASTFRAG; if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { slp->ns_flag &= ~SLP_GETSTREAM; return (EPERM); } } /* * Now get the record part. */ if (slp->ns_cc == slp->ns_reclen) { recm = slp->ns_raw; slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; slp->ns_cc = slp->ns_reclen = 0; } else if (slp->ns_cc > slp->ns_reclen) { len = 0; m = slp->ns_raw; om = (struct mbuf *)0; while (len < slp->ns_reclen) { if ((len + m->m_len) > slp->ns_reclen) { m2 = m_copym(m, 0, slp->ns_reclen - len, waitflag); if (m2) { if (om) { om->m_next = m2; recm = slp->ns_raw; } else recm = m2; m->m_data += slp->ns_reclen - len; m->m_len -= slp->ns_reclen - len; len = slp->ns_reclen; } else { slp->ns_flag &= ~SLP_GETSTREAM; return (EWOULDBLOCK); } } else if ((len + m->m_len) == slp->ns_reclen) { om = m; len += m->m_len; m = m->m_next; recm = slp->ns_raw; om->m_next = (struct mbuf *)0; } else { om = m; len += m->m_len; m = m->m_next; } } slp->ns_raw = m; slp->ns_cc -= len; slp->ns_reclen = 0; } else { slp->ns_flag &= ~SLP_GETSTREAM; return (0); } /* * Accumulate the fragments into a record. */ mpp = &slp->ns_frag; while (*mpp) mpp = &((*mpp)->m_next); *mpp = recm; if (slp->ns_flag & SLP_LASTFRAG) { nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED); if (slp->ns_recend) slp->ns_recend->m_nextpkt = slp->ns_frag; else slp->ns_rec = slp->ns_frag; slp->ns_recend = slp->ns_frag; slp->ns_frag = (struct mbuf *)0; } } } /* * Parse an RPC header. */ int nfsrv_dorec(slp, nfsd, ndp) register struct nfssvc_sock *slp; struct nfsd *nfsd; struct nfsrv_descript **ndp; { register struct mbuf *m, *nam; register struct nfsrv_descript *nd; int error; *ndp = NULL; if ((slp->ns_flag & SLP_VALID) == 0 || (m = slp->ns_rec) == (struct mbuf *)0) return (ENOBUFS); slp->ns_rec = m->m_nextpkt; if (slp->ns_rec) m->m_nextpkt = (struct mbuf *)0; else slp->ns_recend = (struct mbuf *)0; if (m->m_type == MT_SONAME) { nam = m; m = m->m_next; nam->m_next = NULL; } else nam = NULL; MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); nd->nd_md = nd->nd_mrep = m; nd->nd_nam2 = nam; nd->nd_dpos = mtod(m, caddr_t); error = nfs_getreq(nd, nfsd, TRUE); if (error) { m_freem(nam); free((caddr_t)nd, M_NFSRVDESC); return (error); } *ndp = nd; nfsd->nfsd_nd = nd; return (0); } /* * Parse an RPC request * - verify it * - fill in the cred struct. */ int nfs_getreq(nd, nfsd, has_header) register struct nfsrv_descript *nd; struct nfsd *nfsd; int has_header; { register int len, i; register u_long *tl; register long t1; struct uio uio; struct iovec iov; caddr_t dpos, cp2, cp; u_long nfsvers, auth_type; uid_t nickuid; int error = 0, nqnfs = 0, ticklen; struct mbuf *mrep, *md; register struct nfsuid *nuidp; struct timeval tvin, tvout; mrep = nd->nd_mrep; md = nd->nd_md; dpos = nd->nd_dpos; if (has_header) { nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED); nd->nd_retxid = fxdr_unsigned(u_long, *tl++); if (*tl++ != rpc_call) { m_freem(mrep); return (EBADRPC); } } else nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED); nd->nd_repstat = 0; nd->nd_flag = 0; if (*tl++ != rpc_vers) { nd->nd_repstat = ERPCMISMATCH; nd->nd_procnum = NFSPROC_NOOP; return (0); } if (*tl != nfs_prog) { if (*tl == nqnfs_prog) nqnfs++; else { nd->nd_repstat = EPROGUNAVAIL; nd->nd_procnum = NFSPROC_NOOP; return (0); } } tl++; nfsvers = fxdr_unsigned(u_long, *tl++); if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) || (nfsvers != NQNFS_VER3 && nqnfs)) { nd->nd_repstat = EPROGMISMATCH; nd->nd_procnum = NFSPROC_NOOP; return (0); } if (nqnfs) nd->nd_flag = (ND_NFSV3 | ND_NQNFS); else if (nfsvers == NFS_VER3) nd->nd_flag = ND_NFSV3; nd->nd_procnum = fxdr_unsigned(u_long, *tl++); if (nd->nd_procnum == NFSPROC_NULL) return (0); if (nd->nd_procnum >= NFS_NPROCS || (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) || (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { nd->nd_repstat = EPROCUNAVAIL; nd->nd_procnum = NFSPROC_NOOP; return (0); } if ((nd->nd_flag & ND_NFSV3) == 0) nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; auth_type = *tl++; len = fxdr_unsigned(int, *tl++); if (len < 0 || len > RPCAUTH_MAXSIZ) { m_freem(mrep); return (EBADRPC); } nd->nd_flag &= ~ND_KERBAUTH; /* * Handle auth_unix or auth_kerb. */ if (auth_type == rpc_auth_unix) { len = fxdr_unsigned(int, *++tl); if (len < 0 || len > NFS_MAXNAMLEN) { m_freem(mrep); return (EBADRPC); } nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred)); nd->nd_cr.cr_ref = 1; nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); len = fxdr_unsigned(int, *tl); if (len < 0 || len > RPCAUTH_UNIXGIDS) { m_freem(mrep); return (EBADRPC); } nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED); for (i = 1; i <= len; i++) if (i < NGROUPS) nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); else tl++; nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); if (nd->nd_cr.cr_ngroups > 1) nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); len = fxdr_unsigned(int, *++tl); if (len < 0 || len > RPCAUTH_MAXSIZ) { m_freem(mrep); return (EBADRPC); } if (len > 0) nfsm_adv(nfsm_rndup(len)); } else if (auth_type == rpc_auth_kerb) { switch (fxdr_unsigned(int, *tl++)) { case RPCAKN_FULLNAME: ticklen = fxdr_unsigned(int, *tl); *((u_long *)nfsd->nfsd_authstr) = *tl; uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { m_freem(mrep); return (EBADRPC); } uio.uio_offset = 0; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; iov.iov_len = RPCAUTH_MAXSIZ - 4; nfsm_mtouio(&uio, uio.uio_resid); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); if (*tl++ != rpc_auth_kerb || fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { printf("Bad kerb verifier\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED); tl = (u_long *)cp; if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { printf("Not fullname kerb verifier\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } cp += NFSX_UNSIGNED; bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED); nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; nd->nd_flag |= ND_KERBFULL; nfsd->nfsd_flag |= NFSD_NEEDAUTH; break; case RPCAKN_NICKNAME: if (len != 2 * NFSX_UNSIGNED) { printf("Kerb nickname short\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } nickuid = fxdr_unsigned(uid_t, *tl); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); if (*tl++ != rpc_auth_kerb || fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { printf("Kerb nick verifier bad\n"); nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); tvin.tv_sec = *tl++; tvin.tv_usec = *tl; for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { if (nuidp->nu_cr.cr_uid == nickuid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) break; } if (!nuidp) { nd->nd_repstat = (NFSERR_AUTHERR|AUTH_REJECTCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } /* * Now, decrypt the timestamp using the session key * and validate it. */ #ifdef NFSKERB XXX #endif tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); if (nuidp->nu_expire < time.tv_sec || nuidp->nu_timestamp.tv_sec > tvout.tv_sec || (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { nuidp->nu_expire = 0; nd->nd_repstat = (NFSERR_AUTHERR|AUTH_REJECTVERF); nd->nd_procnum = NFSPROC_NOOP; return (0); } nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); nd->nd_flag |= ND_KERBNICK; }; } else { nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); nd->nd_procnum = NFSPROC_NOOP; return (0); } /* * For nqnfs, get piggybacked lease request. */ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); nd->nd_flag |= fxdr_unsigned(int, *tl); if (nd->nd_flag & ND_LEASE) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); nd->nd_duration = fxdr_unsigned(int, *tl); } else nd->nd_duration = NQ_MINLEASE; } else nd->nd_duration = NQ_MINLEASE; nd->nd_md = md; nd->nd_dpos = dpos; return (0); nfsmout: return (error); } /* * Search for a sleeping nfsd and wake it up. * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the * running nfsds will go look for the work in the nfssvc_sock list. */ void nfsrv_wakenfsd(slp) struct nfssvc_sock *slp; { register struct nfsd *nd; if ((slp->ns_flag & SLP_VALID) == 0) return; for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) { if (nd->nfsd_flag & NFSD_WAITING) { nd->nfsd_flag &= ~NFSD_WAITING; if (nd->nfsd_slp) panic("nfsd wakeup"); slp->ns_sref++; nd->nfsd_slp = slp; wakeup((caddr_t)nd); return; } } slp->ns_flag |= SLP_DOREC; nfsd_head_flag |= NFSD_CHECKSLP; } #endif /* NFS_NOSERVER */ static int nfs_msg(p, server, msg) struct proc *p; char *server, *msg; { tpr_t tpr; if (p) tpr = tprintf_open(p); else tpr = NULL; tprintf(tpr, "nfs server %s: %s\n", server, msg); tprintf_close(tpr); return (0); } Index: head/sys/sys/socketvar.h =================================================================== --- head/sys/sys/socketvar.h (revision 14092) +++ head/sys/sys/socketvar.h (revision 14093) @@ -1,256 +1,259 @@ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)socketvar.h 8.1 (Berkeley) 6/2/93 - * $Id: socketvar.h,v 1.9 1995/12/14 22:51:13 bde Exp $ + * $Id: socketvar.h,v 1.10 1996/01/30 23:01:25 mpp Exp $ */ #ifndef _SYS_SOCKETVAR_H_ #define _SYS_SOCKETVAR_H_ #include /* for struct stat */ #include /* for struct filedesc */ #include /* for struct selinfo */ /* * Kernel structure per socket. * Contains send and receive buffer queues, * handle on protocol and pointer to protocol * private data and error information. */ struct socket { short so_type; /* generic type, see socket.h */ short so_options; /* from socket call, see socket.h */ short so_linger; /* time to linger while closing */ short so_state; /* internal state flags SS_*, below */ caddr_t so_pcb; /* protocol control block */ struct protosw *so_proto; /* protocol handle */ /* * Variables for connection queuing. * Socket where accepts occur is so_head in all subsidiary sockets. * If so_head is 0, socket is not related to an accept. * For head socket so_q0 queues partially completed connections, * while so_q is a queue of connections ready to be accepted. * If a connection is aborted and it has so_head set, then * it has to be pulled out of either so_q0 or so_q. * We allow connections to queue up based on current queue lengths * and limit on number of queued connections for this socket. */ struct socket *so_head; /* back pointer to accept socket */ struct socket *so_q0; /* queue of partial connections */ struct socket *so_q; /* queue of incoming connections */ short so_q0len; /* partials on so_q0 */ short so_qlen; /* number of connections on so_q */ short so_qlimit; /* max number queued connections */ short so_timeo; /* connection timeout */ u_short so_error; /* error affecting connection */ pid_t so_pgid; /* pgid for signals */ u_long so_oobmark; /* chars to oob mark */ /* * Variables for socket buffering. */ struct sockbuf { u_long sb_cc; /* actual chars in buffer */ u_long sb_hiwat; /* max actual char count */ u_long sb_mbcnt; /* chars of mbufs used */ u_long sb_mbmax; /* max chars of mbufs to use */ long sb_lowat; /* low water mark */ struct mbuf *sb_mb; /* the mbuf chain */ struct selinfo sb_sel; /* process selecting read/write */ short sb_flags; /* flags, see below */ short sb_timeo; /* timeout for read/write */ } so_rcv, so_snd; #define SB_MAX (256*1024) /* default for max chars in sockbuf */ #define SB_LOCK 0x01 /* lock on data queue */ #define SB_WANT 0x02 /* someone is waiting to lock */ #define SB_WAIT 0x04 /* someone is waiting for data/space */ #define SB_SEL 0x08 /* someone is selecting */ #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_NOTIFY (SB_WAIT|SB_SEL|SB_ASYNC) #define SB_NOINTR 0x40 /* operations not interruptible */ caddr_t so_tpcb; /* Wisc. protocol control block XXX */ void (*so_upcall) __P((struct socket *so, caddr_t arg, int waitf)); caddr_t so_upcallarg; /* Arg for above */ }; /* * Socket state bits. */ #define SS_NOFDREF 0x001 /* no file table ref any more */ #define SS_ISCONNECTED 0x002 /* socket connected to a peer */ #define SS_ISCONNECTING 0x004 /* in process of connecting to peer */ #define SS_ISDISCONNECTING 0x008 /* in process of disconnecting */ #define SS_CANTSENDMORE 0x010 /* can't send more data to peer */ #define SS_CANTRCVMORE 0x020 /* can't receive more data from peer */ #define SS_RCVATMARK 0x040 /* at mark on input */ #define SS_PRIV 0x080 /* privileged for broadcast, raw... */ #define SS_NBIO 0x100 /* non-blocking ops */ #define SS_ASYNC 0x200 /* async i/o notify */ #define SS_ISCONFIRMING 0x400 /* deciding to accept connection req */ /* * Macros for sockets and socket buffering. */ /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might * still be negative (cc > hiwat or mbcnt > mbmax). Should detect * overflow and return 0. Should use "lmin" but it doesn't exist now. */ #define sbspace(sb) \ ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \ (int)((sb)->sb_mbmax - (sb)->sb_mbcnt))) /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) /* can we read something from so? */ #define soreadable(so) \ ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ ((so)->so_state & SS_CANTRCVMORE) || \ (so)->so_qlen || (so)->so_error) /* can we write something to so? */ #define sowriteable(so) \ ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ (((so)->so_state&SS_ISCONNECTED) || \ ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ ((so)->so_state & SS_CANTSENDMORE) || \ (so)->so_error) /* adjust counters in sb reflecting allocation of m */ #define sballoc(sb, m) { \ (sb)->sb_cc += (m)->m_len; \ (sb)->sb_mbcnt += MSIZE; \ if ((m)->m_flags & M_EXT) \ (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ } /* adjust counters in sb reflecting freeing of m */ #define sbfree(sb, m) { \ (sb)->sb_cc -= (m)->m_len; \ (sb)->sb_mbcnt -= MSIZE; \ if ((m)->m_flags & M_EXT) \ (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ } /* * Set lock on sockbuf sb; sleep if lock is already held. * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. * Returns error without lock if sleep is interrupted. */ #define sblock(sb, wf) ((sb)->sb_flags & SB_LOCK ? \ (((wf) == M_WAITOK) ? sb_lock(sb) : EWOULDBLOCK) : \ ((sb)->sb_flags |= SB_LOCK), 0) /* release lock on sockbuf sb */ #define sbunlock(sb) { \ (sb)->sb_flags &= ~SB_LOCK; \ if ((sb)->sb_flags & SB_WANT) { \ (sb)->sb_flags &= ~SB_WANT; \ wakeup((caddr_t)&(sb)->sb_flags); \ } \ } #define sorwakeup(so) { sowakeup((so), &(so)->so_rcv); \ if ((so)->so_upcall) \ (*((so)->so_upcall))((so), (so)->so_upcallarg, M_DONTWAIT); \ } #define sowwakeup(so) sowakeup((so), &(so)->so_snd) #ifdef KERNEL extern u_long sb_max; /* to catch callers missing new second argument to sonewconn: */ #define sonewconn(head, connstatus) sonewconn1((head), (connstatus)) struct socket *sonewconn1 __P((struct socket *head, int connstatus)); /* * File operations on sockets. */ int soo_ioctl __P((struct file *fp, int com, caddr_t data, struct proc *p)); int soo_select __P((struct file *fp, int which, struct proc *p)); int soo_stat __P((struct socket *, struct stat *)); /* * From uipc_socket and friends */ void soqinsque __P((struct socket *, struct socket *, int)); void sowakeup __P((struct socket *, struct sockbuf *)); void socantrcvmore __P((struct socket *)); void socantsendmore __P((struct socket *)); void sbrelease __P((struct sockbuf *)); void sbappend __P((struct sockbuf *, struct mbuf *)); void sbappendrecord __P((struct sockbuf *, struct mbuf *)); int sbappendcontrol __P((struct sockbuf *, struct mbuf *, struct mbuf *)); -int sbappendaddr __P((struct sockbuf *, struct sockaddr *, struct mbuf *, struct mbuf *)); +int sbappendaddr __P((struct sockbuf *, struct sockaddr *, + struct mbuf *, struct mbuf *)); void sbdroprecord __P((struct sockbuf *)); void sbcompress __P((struct sockbuf *, struct mbuf *, struct mbuf *)); void sbflush __P((struct sockbuf *)); void sbinsertoob __P((struct sockbuf *, struct mbuf *)); -int sbreserve __P((struct sockbuf *,u_long)); -int soreserve __P((struct socket *,u_long,u_long)); +int sbreserve __P((struct sockbuf *, u_long)); +int soreserve __P((struct socket *, u_long, u_long)); int sb_lock __P((struct sockbuf *)); int sbwait __P((struct sockbuf *)); void sbdrop __P((struct sockbuf *, int)); void sofree __P((struct socket *)); void sorflush __P((struct socket *)); int soqremque __P((struct socket *,int)); int soabort __P((struct socket *)); void soisdisconnected __P((struct socket *)); void soisconnected __P((struct socket *)); void soisconnecting __P((struct socket *)); void soisdisconnecting __P((struct socket *)); void sohasoutofband __P((struct socket *)); int sodisconnect __P((struct socket *)); -int sosend __P((struct socket *,struct mbuf *, struct uio *, struct mbuf *, struct mbuf *, int)); -int socreate __P((int, struct socket **,int,int)); -int getsock __P((struct filedesc *,int,struct file **)); -int sockargs __P((struct mbuf **,caddr_t,int,int)); -int sobind __P((struct socket *,struct mbuf *)); -int solisten __P((struct socket *,int)); -int soaccept __P((struct socket *,struct mbuf *)); -int soconnect __P((struct socket *,struct mbuf *)); -int soconnect2 __P((struct socket *,struct socket *)); +int sosend __P((struct socket *, struct mbuf *, struct uio *, + struct mbuf *, struct mbuf *, int)); +int socreate __P((int, struct socket **, int, int, struct proc *)); +int getsock __P((struct filedesc *, int, struct file **)); +int sockargs __P((struct mbuf **, caddr_t, int, int)); +int sobind __P((struct socket *, struct mbuf *)); +int solisten __P((struct socket *, int)); +int soaccept __P((struct socket *, struct mbuf *)); +int soconnect __P((struct socket *, struct mbuf *)); +int soconnect2 __P((struct socket *, struct socket *)); int soclose __P((struct socket *)); -int soshutdown __P((struct socket *,int)); -int soreceive __P((struct socket *,struct mbuf **,struct uio *,struct mbuf **,struct mbuf **,int *)); -int sosetopt __P((struct socket *,int, int, struct mbuf *)); -int sogetopt __P((struct socket *,int, int, struct mbuf **)); +int soshutdown __P((struct socket *, int)); +int soreceive __P((struct socket *, struct mbuf **, struct uio *, + struct mbuf **, struct mbuf **, int *)); +int sosetopt __P((struct socket *, int, int, struct mbuf *)); +int sogetopt __P((struct socket *, int, int, struct mbuf **)); #endif #endif