Index: head/sys/arm/allwinner/a10_fb.c =================================================================== --- head/sys/arm/allwinner/a10_fb.c (revision 338106) +++ head/sys/arm/allwinner/a10_fb.c (revision 338107) @@ -1,662 +1,662 @@ /*- * Copyright (c) 2016 Jared McNeill * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Allwinner A10/A20 Framebuffer */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fb_if.h" #include "hdmi_if.h" #define FB_DEFAULT_W 800 #define FB_DEFAULT_H 600 #define FB_DEFAULT_REF 60 #define FB_BPP 32 #define FB_ALIGN 0x1000 #define HDMI_ENABLE_DELAY 20000 #define DEBE_FREQ 300000000 #define DOT_CLOCK_TO_HZ(c) ((c) * 1000) /* Display backend */ #define DEBE_REG_START 0x800 #define DEBE_REG_END 0x1000 #define DEBE_REG_WIDTH 4 #define DEBE_MODCTL 0x800 #define MODCTL_ITLMOD_EN (1 << 28) #define MODCTL_OUT_SEL_MASK (0x7 << 20) #define MODCTL_OUT_SEL(sel) ((sel) << 20) #define OUT_SEL_LCD 0 #define MODCTL_LAY0_EN (1 << 8) #define MODCTL_START_CTL (1 << 1) #define MODCTL_EN (1 << 0) #define DEBE_DISSIZE 0x808 #define DIS_HEIGHT(h) (((h) - 1) << 16) #define DIS_WIDTH(w) (((w) - 1) << 0) #define DEBE_LAYSIZE0 0x810 #define LAY_HEIGHT(h) (((h) - 1) << 16) #define LAY_WIDTH(w) (((w) - 1) << 0) #define DEBE_LAYCOOR0 0x820 #define LAY_XCOOR(x) ((x) << 16) #define LAY_YCOOR(y) ((y) << 0) #define DEBE_LAYLINEWIDTH0 0x840 #define DEBE_LAYFB_L32ADD0 0x850 #define LAYFB_L32ADD(pa) ((pa) << 3) #define DEBE_LAYFB_H4ADD 0x860 #define LAY0FB_H4ADD(pa) ((pa) >> 29) #define DEBE_REGBUFFCTL 0x870 #define REGBUFFCTL_LOAD (1 << 0) #define DEBE_ATTCTL1 0x8a0 #define ATTCTL1_FBFMT(fmt) ((fmt) << 8) #define FBFMT_XRGB8888 9 #define ATTCTL1_FBPS(ps) ((ps) << 0) #define FBPS_32BPP_ARGB 0 /* Timing controller */ #define TCON_GCTL 0x000 #define GCTL_TCON_EN (1 << 31) #define GCTL_IO_MAP_SEL_TCON1 (1 << 0) #define TCON_GINT1 0x008 #define GINT1_TCON1_LINENO(n) (((n) + 2) << 0) #define TCON0_DCLK 0x044 #define DCLK_EN 0xf0000000 #define TCON1_CTL 0x090 #define TCON1_EN (1 << 31) #define INTERLACE_EN (1 << 20) #define TCON1_SRC_SEL(src) ((src) << 0) #define TCON1_SRC_CH1 0 #define TCON1_SRC_CH2 1 #define TCON1_SRC_BLUE 2 #define TCON1_START_DELAY(sd) ((sd) << 4) #define TCON1_BASIC0 0x094 #define TCON1_BASIC1 0x098 #define TCON1_BASIC2 0x09c #define TCON1_BASIC3 0x0a0 #define TCON1_BASIC4 0x0a4 #define TCON1_BASIC5 0x0a8 #define BASIC_X(x) (((x) - 1) << 16) #define BASIC_Y(y) (((y) - 1) << 0) #define BASIC3_HT(ht) (((ht) - 1) << 16) #define BASIC3_HBP(hbp) (((hbp) - 1) << 0) #define BASIC4_VT(vt) ((vt) << 16) #define BASIC4_VBP(vbp) (((vbp) - 1) << 0) #define BASIC5_HSPW(hspw) (((hspw) - 1) << 16) #define BASIC5_VSPW(vspw) (((vspw) - 1) << 0) #define TCON1_IO_POL 0x0f0 #define IO_POL_IO2_INV (1 << 26) #define IO_POL_PHSYNC (1 << 25) #define IO_POL_PVSYNC (1 << 24) #define TCON1_IO_TRI 0x0f4 #define IO0_OUTPUT_TRI_EN (1 << 24) #define IO1_OUTPUT_TRI_EN (1 << 25) #define IO_TRI_MASK 0xffffffff #define START_DELAY(vbl) (MIN(32, (vbl)) - 2) #define VBLANK_LEN(vt, vd, i) ((((vt) << (i)) >> 1) - (vd) - 2) #define VTOTAL(vt) ((vt) * 2) #define DIVIDE(x, y) (((x) + ((y) / 2)) / (y)) struct a10fb_softc { device_t dev; device_t fbdev; struct resource *res[2]; /* Framebuffer */ struct fb_info info; size_t fbsize; bus_addr_t paddr; vm_offset_t vaddr; /* HDMI */ eventhandler_tag hdmi_evh; }; static struct resource_spec a10fb_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, /* DEBE */ { SYS_RES_MEMORY, 1, RF_ACTIVE }, /* TCON */ { -1, 0 } }; #define DEBE_READ(sc, reg) bus_read_4((sc)->res[0], (reg)) #define DEBE_WRITE(sc, reg, val) bus_write_4((sc)->res[0], (reg), (val)) #define TCON_READ(sc, reg) bus_read_4((sc)->res[1], (reg)) #define TCON_WRITE(sc, reg, val) bus_write_4((sc)->res[1], (reg), (val)) static int a10fb_allocfb(struct a10fb_softc *sc) { - sc->vaddr = kmem_alloc_contig(kernel_arena, sc->fbsize, - M_NOWAIT | M_ZERO, 0, ~0, FB_ALIGN, 0, VM_MEMATTR_WRITE_COMBINING); + sc->vaddr = kmem_alloc_contig(sc->fbsize, M_NOWAIT | M_ZERO, 0, ~0, + FB_ALIGN, 0, VM_MEMATTR_WRITE_COMBINING); if (sc->vaddr == 0) { device_printf(sc->dev, "failed to allocate FB memory\n"); return (ENOMEM); } sc->paddr = pmap_kextract(sc->vaddr); return (0); } static void a10fb_freefb(struct a10fb_softc *sc) { kmem_free(kernel_arena, sc->vaddr, sc->fbsize); } static int a10fb_setup_debe(struct a10fb_softc *sc, const struct videomode *mode) { int width, height, interlace, reg; clk_t clk_ahb, clk_dram, clk_debe; hwreset_t rst; uint32_t val; int error; interlace = !!(mode->flags & VID_INTERLACE); width = mode->hdisplay; height = mode->vdisplay << interlace; /* Leave reset */ error = hwreset_get_by_ofw_name(sc->dev, 0, "de_be", &rst); if (error != 0) { device_printf(sc->dev, "cannot find reset 'de_be'\n"); return (error); } error = hwreset_deassert(rst); if (error != 0) { device_printf(sc->dev, "couldn't de-assert reset 'de_be'\n"); return (error); } /* Gating AHB clock for BE */ error = clk_get_by_ofw_name(sc->dev, 0, "ahb_de_be", &clk_ahb); if (error != 0) { device_printf(sc->dev, "cannot find clk 'ahb_de_be'\n"); return (error); } error = clk_enable(clk_ahb); if (error != 0) { device_printf(sc->dev, "cannot enable clk 'ahb_de_be'\n"); return (error); } /* Enable DRAM clock to BE */ error = clk_get_by_ofw_name(sc->dev, 0, "dram_de_be", &clk_dram); if (error != 0) { device_printf(sc->dev, "cannot find clk 'dram_de_be'\n"); return (error); } error = clk_enable(clk_dram); if (error != 0) { device_printf(sc->dev, "cannot enable clk 'dram_de_be'\n"); return (error); } /* Set BE clock to 300MHz and enable */ error = clk_get_by_ofw_name(sc->dev, 0, "de_be", &clk_debe); if (error != 0) { device_printf(sc->dev, "cannot find clk 'de_be'\n"); return (error); } error = clk_set_freq(clk_debe, DEBE_FREQ, CLK_SET_ROUND_DOWN); if (error != 0) { device_printf(sc->dev, "cannot set 'de_be' frequency\n"); return (error); } error = clk_enable(clk_debe); if (error != 0) { device_printf(sc->dev, "cannot enable clk 'de_be'\n"); return (error); } /* Initialize all registers to 0 */ for (reg = DEBE_REG_START; reg < DEBE_REG_END; reg += DEBE_REG_WIDTH) DEBE_WRITE(sc, reg, 0); /* Enable display backend */ DEBE_WRITE(sc, DEBE_MODCTL, MODCTL_EN); /* Set display size */ DEBE_WRITE(sc, DEBE_DISSIZE, DIS_HEIGHT(height) | DIS_WIDTH(width)); /* Set layer 0 size, position, and stride */ DEBE_WRITE(sc, DEBE_LAYSIZE0, LAY_HEIGHT(height) | LAY_WIDTH(width)); DEBE_WRITE(sc, DEBE_LAYCOOR0, LAY_XCOOR(0) | LAY_YCOOR(0)); DEBE_WRITE(sc, DEBE_LAYLINEWIDTH0, width * FB_BPP); /* Point layer 0 to FB memory */ DEBE_WRITE(sc, DEBE_LAYFB_L32ADD0, LAYFB_L32ADD(sc->paddr)); DEBE_WRITE(sc, DEBE_LAYFB_H4ADD, LAY0FB_H4ADD(sc->paddr)); /* Set backend format and pixel sequence */ DEBE_WRITE(sc, DEBE_ATTCTL1, ATTCTL1_FBFMT(FBFMT_XRGB8888) | ATTCTL1_FBPS(FBPS_32BPP_ARGB)); /* Enable layer 0, output to LCD, setup interlace */ val = DEBE_READ(sc, DEBE_MODCTL); val |= MODCTL_LAY0_EN; val &= ~MODCTL_OUT_SEL_MASK; val |= MODCTL_OUT_SEL(OUT_SEL_LCD); if (interlace) val |= MODCTL_ITLMOD_EN; else val &= ~MODCTL_ITLMOD_EN; DEBE_WRITE(sc, DEBE_MODCTL, val); /* Commit settings */ DEBE_WRITE(sc, DEBE_REGBUFFCTL, REGBUFFCTL_LOAD); /* Start DEBE */ val = DEBE_READ(sc, DEBE_MODCTL); val |= MODCTL_START_CTL; DEBE_WRITE(sc, DEBE_MODCTL, val); return (0); } static int a10fb_setup_pll(struct a10fb_softc *sc, uint64_t freq) { clk_t clk_sclk1, clk_sclk2; int error; error = clk_get_by_ofw_name(sc->dev, 0, "lcd_ch1_sclk1", &clk_sclk1); if (error != 0) { device_printf(sc->dev, "cannot find clk 'lcd_ch1_sclk1'\n"); return (error); } error = clk_get_by_ofw_name(sc->dev, 0, "lcd_ch1_sclk2", &clk_sclk2); if (error != 0) { device_printf(sc->dev, "cannot find clk 'lcd_ch1_sclk2'\n"); return (error); } error = clk_set_freq(clk_sclk2, freq, 0); if (error != 0) { device_printf(sc->dev, "cannot set lcd ch1 frequency\n"); return (error); } error = clk_enable(clk_sclk2); if (error != 0) { device_printf(sc->dev, "cannot enable lcd ch1 sclk2\n"); return (error); } error = clk_enable(clk_sclk1); if (error != 0) { device_printf(sc->dev, "cannot enable lcd ch1 sclk1\n"); return (error); } return (0); } static int a10fb_setup_tcon(struct a10fb_softc *sc, const struct videomode *mode) { u_int interlace, hspw, hbp, vspw, vbp, vbl, width, height, start_delay; u_int vtotal, framerate, clk; clk_t clk_ahb; hwreset_t rst; uint32_t val; int error; interlace = !!(mode->flags & VID_INTERLACE); width = mode->hdisplay; height = mode->vdisplay; hspw = mode->hsync_end - mode->hsync_start; hbp = mode->htotal - mode->hsync_start; vspw = mode->vsync_end - mode->vsync_start; vbp = mode->vtotal - mode->vsync_start; vbl = VBLANK_LEN(mode->vtotal, mode->vdisplay, interlace); start_delay = START_DELAY(vbl); /* Leave reset */ error = hwreset_get_by_ofw_name(sc->dev, 0, "lcd", &rst); if (error != 0) { device_printf(sc->dev, "cannot find reset 'lcd'\n"); return (error); } error = hwreset_deassert(rst); if (error != 0) { device_printf(sc->dev, "couldn't de-assert reset 'lcd'\n"); return (error); } /* Gating AHB clock for LCD */ error = clk_get_by_ofw_name(sc->dev, 0, "ahb_lcd", &clk_ahb); if (error != 0) { device_printf(sc->dev, "cannot find clk 'ahb_lcd'\n"); return (error); } error = clk_enable(clk_ahb); if (error != 0) { device_printf(sc->dev, "cannot enable clk 'ahb_lcd'\n"); return (error); } /* Disable TCON and TCON1 */ TCON_WRITE(sc, TCON_GCTL, 0); TCON_WRITE(sc, TCON1_CTL, 0); /* Enable clocks */ TCON_WRITE(sc, TCON0_DCLK, DCLK_EN); /* Disable IO and data output ports */ TCON_WRITE(sc, TCON1_IO_TRI, IO_TRI_MASK); /* Disable TCON and select TCON1 */ TCON_WRITE(sc, TCON_GCTL, GCTL_IO_MAP_SEL_TCON1); /* Source width and height */ TCON_WRITE(sc, TCON1_BASIC0, BASIC_X(width) | BASIC_Y(height)); /* Scaler width and height */ TCON_WRITE(sc, TCON1_BASIC1, BASIC_X(width) | BASIC_Y(height)); /* Output width and height */ TCON_WRITE(sc, TCON1_BASIC2, BASIC_X(width) | BASIC_Y(height)); /* Horizontal total and back porch */ TCON_WRITE(sc, TCON1_BASIC3, BASIC3_HT(mode->htotal) | BASIC3_HBP(hbp)); /* Vertical total and back porch */ vtotal = VTOTAL(mode->vtotal); if (interlace) { framerate = DIVIDE(DIVIDE(DOT_CLOCK_TO_HZ(mode->dot_clock), mode->htotal), mode->vtotal); clk = mode->htotal * (VTOTAL(mode->vtotal) + 1) * framerate; if ((clk / 2) == DOT_CLOCK_TO_HZ(mode->dot_clock)) vtotal += 1; } TCON_WRITE(sc, TCON1_BASIC4, BASIC4_VT(vtotal) | BASIC4_VBP(vbp)); /* Horizontal and vertical sync */ TCON_WRITE(sc, TCON1_BASIC5, BASIC5_HSPW(hspw) | BASIC5_VSPW(vspw)); /* Polarity */ val = IO_POL_IO2_INV; if (mode->flags & VID_PHSYNC) val |= IO_POL_PHSYNC; if (mode->flags & VID_PVSYNC) val |= IO_POL_PVSYNC; TCON_WRITE(sc, TCON1_IO_POL, val); /* Set scan line for TCON1 line trigger */ TCON_WRITE(sc, TCON_GINT1, GINT1_TCON1_LINENO(start_delay)); /* Enable TCON1 */ val = TCON1_EN; if (interlace) val |= INTERLACE_EN; val |= TCON1_START_DELAY(start_delay); val |= TCON1_SRC_SEL(TCON1_SRC_CH1); TCON_WRITE(sc, TCON1_CTL, val); /* Setup PLL */ return (a10fb_setup_pll(sc, DOT_CLOCK_TO_HZ(mode->dot_clock))); } static void a10fb_enable_tcon(struct a10fb_softc *sc, int onoff) { uint32_t val; /* Enable TCON */ val = TCON_READ(sc, TCON_GCTL); if (onoff) val |= GCTL_TCON_EN; else val &= ~GCTL_TCON_EN; TCON_WRITE(sc, TCON_GCTL, val); /* Enable TCON1 IO0/IO1 outputs */ val = TCON_READ(sc, TCON1_IO_TRI); if (onoff) val &= ~(IO0_OUTPUT_TRI_EN | IO1_OUTPUT_TRI_EN); else val |= (IO0_OUTPUT_TRI_EN | IO1_OUTPUT_TRI_EN); TCON_WRITE(sc, TCON1_IO_TRI, val); } static int a10fb_configure(struct a10fb_softc *sc, const struct videomode *mode) { size_t fbsize; int error; fbsize = round_page(mode->hdisplay * mode->vdisplay * (FB_BPP / NBBY)); /* Detach the old FB device */ if (sc->fbdev != NULL) { device_delete_child(sc->dev, sc->fbdev); sc->fbdev = NULL; } /* If the FB size has changed, free the old FB memory */ if (sc->fbsize > 0 && sc->fbsize != fbsize) { a10fb_freefb(sc); sc->vaddr = 0; } /* Allocate the FB if necessary */ sc->fbsize = fbsize; if (sc->vaddr == 0) { error = a10fb_allocfb(sc); if (error != 0) { device_printf(sc->dev, "failed to allocate FB memory\n"); return (ENXIO); } } /* Setup display backend */ error = a10fb_setup_debe(sc, mode); if (error != 0) return (error); /* Setup display timing controller */ error = a10fb_setup_tcon(sc, mode); if (error != 0) return (error); /* Attach framebuffer device */ sc->info.fb_name = device_get_nameunit(sc->dev); sc->info.fb_vbase = (intptr_t)sc->vaddr; sc->info.fb_pbase = sc->paddr; sc->info.fb_size = sc->fbsize; sc->info.fb_bpp = sc->info.fb_depth = FB_BPP; sc->info.fb_stride = mode->hdisplay * (FB_BPP / NBBY); sc->info.fb_width = mode->hdisplay; sc->info.fb_height = mode->vdisplay; sc->fbdev = device_add_child(sc->dev, "fbd", device_get_unit(sc->dev)); if (sc->fbdev == NULL) { device_printf(sc->dev, "failed to add fbd child\n"); return (ENOENT); } error = device_probe_and_attach(sc->fbdev); if (error != 0) { device_printf(sc->dev, "failed to attach fbd device\n"); return (error); } return (0); } static void a10fb_hdmi_event(void *arg, device_t hdmi_dev) { const struct videomode *mode; struct videomode hdmi_mode; struct a10fb_softc *sc; struct edid_info ei; uint8_t *edid; uint32_t edid_len; int error; sc = arg; edid = NULL; edid_len = 0; mode = NULL; error = HDMI_GET_EDID(hdmi_dev, &edid, &edid_len); if (error != 0) { device_printf(sc->dev, "failed to get EDID: %d\n", error); } else { error = edid_parse(edid, &ei); if (error != 0) { device_printf(sc->dev, "failed to parse EDID: %d\n", error); } else { if (bootverbose) edid_print(&ei); mode = ei.edid_preferred_mode; } } /* If the preferred mode could not be determined, use the default */ if (mode == NULL) mode = pick_mode_by_ref(FB_DEFAULT_W, FB_DEFAULT_H, FB_DEFAULT_REF); if (mode == NULL) { device_printf(sc->dev, "failed to find usable video mode\n"); return; } if (bootverbose) device_printf(sc->dev, "using %dx%d\n", mode->hdisplay, mode->vdisplay); /* Disable HDMI */ HDMI_ENABLE(hdmi_dev, 0); /* Disable timing controller */ a10fb_enable_tcon(sc, 0); /* Configure DEBE and TCON */ error = a10fb_configure(sc, mode); if (error != 0) { device_printf(sc->dev, "failed to configure FB: %d\n", error); return; } hdmi_mode = *mode; hdmi_mode.hskew = mode->hsync_end - mode->hsync_start; hdmi_mode.flags |= VID_HSKEW; HDMI_SET_VIDEOMODE(hdmi_dev, &hdmi_mode); /* Enable timing controller */ a10fb_enable_tcon(sc, 1); DELAY(HDMI_ENABLE_DELAY); /* Enable HDMI */ HDMI_ENABLE(hdmi_dev, 1); } static int a10fb_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "allwinner,sun7i-a20-fb")) return (ENXIO); device_set_desc(dev, "Allwinner Framebuffer"); return (BUS_PROBE_DEFAULT); } static int a10fb_attach(device_t dev) { struct a10fb_softc *sc; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, a10fb_spec, sc->res)) { device_printf(dev, "cannot allocate resources for device\n"); return (ENXIO); } sc->hdmi_evh = EVENTHANDLER_REGISTER(hdmi_event, a10fb_hdmi_event, sc, 0); return (0); } static struct fb_info * a10fb_fb_getinfo(device_t dev) { struct a10fb_softc *sc; sc = device_get_softc(dev); return (&sc->info); } static device_method_t a10fb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, a10fb_probe), DEVMETHOD(device_attach, a10fb_attach), /* FB interface */ DEVMETHOD(fb_getinfo, a10fb_fb_getinfo), DEVMETHOD_END }; static driver_t a10fb_driver = { "fb", a10fb_methods, sizeof(struct a10fb_softc), }; static devclass_t a10fb_devclass; DRIVER_MODULE(fb, simplebus, a10fb_driver, a10fb_devclass, 0, 0); Index: head/sys/arm/arm/busdma_machdep-v4.c =================================================================== --- head/sys/arm/arm/busdma_machdep-v4.c (revision 338106) +++ head/sys/arm/arm/busdma_machdep-v4.c (revision 338107) @@ -1,1618 +1,1617 @@ /*- * Copyright (c) 2012 Ian Lepore * Copyright (c) 2004 Olivier Houchard * Copyright (c) 2002 Peter Grehan * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred */ #include __FBSDID("$FreeBSD$"); /* * ARM bus dma support routines. * * XXX Things to investigate / fix some day... * - What is the earliest that this API can be called? Could there be any * fallout from changing the SYSINIT() order from SI_SUB_VM to SI_SUB_KMEM? * - The manpage mentions the BUS_DMA_NOWAIT flag only in the context of the * bus_dmamap_load() function. This code has historically (and still does) * honor it in bus_dmamem_alloc(). If we got rid of that we could lose some * error checking because some resource management calls would become WAITOK * and thus "cannot fail." * - The decisions made by _bus_dma_can_bounce() should be made once, at tag * creation time, and the result stored in the tag. * - It should be possible to take some shortcuts when mapping a buffer we know * came from the uma(9) allocators based on what we know about such buffers * (aligned, contiguous, etc). * - The allocation of bounce pages could probably be cleaned up, then we could * retire arm_remap_nocache(). */ #define _ARM32_BUS_DMA_PRIVATE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 64 #define MAX_DMA_SEGMENTS 4096 #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; struct bounce_zone *bounce_zone; /* * DMA range for this tag. If the page doesn't fall within * one of these ranges, an error is returned. The caller * may then decide what to do with the transfer. If the * range pointer is NULL, it is ignored. */ struct arm32_dma_range *ranges; int _nranges; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of client data */ vm_page_t pages; /* starting page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static uint32_t tags_total; static uint32_t maps_total; static uint32_t maps_dmamem; static uint32_t maps_coherent; static counter_u64_t maploads_total; static counter_u64_t maploads_bounced; static counter_u64_t maploads_coherent; static counter_u64_t maploads_dmamem; static counter_u64_t maploads_mbuf; static counter_u64_t maploads_physmem; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, "Number of active tags"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, "Number of active maps"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, "Number of active maps for bus_dmamem_alloc buffers"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, "Number of active maps with BUS_DMA_COHERENT flag set"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, &maploads_total, "Number of load operations performed"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, &maploads_bounced, "Number of load operations that used bounce buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, &maploads_mbuf, "Number of load operations for mbufs"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, &maploads_physmem, "Number of load operations on physical buffers"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; int flags; #define DMAMAP_COHERENT (1 << 0) #define DMAMAP_DMAMEM_ALLOC (1 << 1) #define DMAMAP_MBUF (1 << 2) #define DMAMAP_CACHE_ALIGNED (1 << 3) STAILQ_ENTRY(bus_dmamap) links; bus_dma_segment_t *segments; int sync_count; struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static void bus_dmamap_sync_sl(struct sync_list *sl, bus_dmasync_op_t op, int bufaligned); /* * ---------------------------------------------------------------------------- * Begin block of code useful to transplant to other implementations. */ static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages"); static void busdma_init(void *dummy) { maploads_total = counter_u64_alloc(M_WAITOK); maploads_bounced = counter_u64_alloc(M_WAITOK); maploads_coherent = counter_u64_alloc(M_WAITOK); maploads_dmamem = counter_u64_alloc(M_WAITOK); maploads_mbuf = counter_u64_alloc(M_WAITOK); maploads_physmem = counter_u64_alloc(M_WAITOK); /* Create a cache of buffers in standard (cacheable) memory. */ standard_allocator = busdma_bufalloc_create("buffer", arm_dcache_align, /* minimum_alignment */ NULL, /* uma_alloc func */ NULL, /* uma_free func */ 0); /* uma_zcreate_flags */ /* * Create a cache of buffers in uncacheable memory, to implement the * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. */ coherent_allocator = busdma_bufalloc_create("coherent", arm_dcache_align, /* minimum_alignment */ busdma_bufalloc_alloc_uncacheable, busdma_bufalloc_free_uncacheable, 0); /* uma_zcreate_flags */ } /* * This init historically used SI_SUB_VM, but now the init code requires * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by * using SI_SUB_KMEM+1. */ SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL); /* * End block of code useful to transplant to other implementations. * ---------------------------------------------------------------------------- */ /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || ((paddr & (dmat->alignment - 1)) != 0)) && (dmat->filter == NULL || (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * This routine checks the exclusion zone constraints from a tag against the * physical RAM available on the machine. If a tag specifies an exclusion zone * but there's no RAM in that zone, then we avoid allocating resources to bounce * a request, and we can use any memory allocator (as opposed to needing * kmem_alloc_contig() just because it can allocate pages in an address range). * * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the * same value on 32-bit architectures) as their lowaddr constraint, and we can't * possibly have RAM at an address higher than the highest address we can * express, so we take a fast out. */ static __inline int _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; if (lowaddr >= BUS_SPACE_MAXADDR) return (0); for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr > phys_avail[i])) return (1); } return (0); } static __inline struct arm32_dma_range * _bus_dma_inrange(struct arm32_dma_range *ranges, int nranges, bus_addr_t curaddr) { struct arm32_dma_range *dr; int i; for (i = 0, dr = ranges; i < nranges; i++, dr++) { if (curaddr >= dr->dr_sysbase && round_page(curaddr) <= (dr->dr_sysbase + dr->dr_len)) return (dr); } return (NULL); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { #ifdef INVARIANTS panic("driver error: busdma dflt_lock called"); #else printf("DRIVER_ERROR: busdma dflt_lock called\n"); #endif } /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment ? alignment : 1; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; newtag->ranges = bus_dma_get_range(); newtag->_nranges = bus_dma_get_range_nb(); if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if ((newtag->filter != NULL) || ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) || newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_BUSDMA); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) { free(newtag, M_BUSDMA); } else { atomic_add_32(&tags_total, 1); *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) { return (0); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { atomic_subtract_32(&tags_total, 1); free(dmat, M_BUSDMA); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static int allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t map) { int error; /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { return (error); } } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&(map->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ maxpages = MAX_BPAGES; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) return (ENOMEM); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } bz->map_count++; } return (0); } static bus_dmamap_t allocate_map(bus_dma_tag_t dmat, int mflags) { int mapsize, segsize; bus_dmamap_t map; /* * Allocate the map. The map structure ends with an embedded * variable-sized array of sync_list structures. Following that * we allocate enough extra space to hold the array of bus_dma_segments. */ KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, ("cannot allocate %u dma segments (max is %u)", dmat->nsegments, MAX_DMA_SEGMENTS)); segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (NULL); } map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); return (map); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { bus_dmamap_t map; int error = 0; *mapp = map = allocate_map(dmat, M_NOWAIT); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an exclusion * region, a data alignment that is stricter than 1, or DMA that begins * or ends with a partial cacheline. Whether bouncing will actually * happen can't be known until mapping time, but we need to pre-allocate * resources now because we might not be allowed to at mapping time. */ error = allocate_bz_and_pages(dmat, map); if (error != 0) { free(map, M_BUSDMA); *mapp = NULL; return (error); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); free(map, M_BUSDMA); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into bus device * space based on the constraints listed in the dma tag. Returns a pointer to * the allocated memory, and a pointer to an associated bus_dmamap. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, bus_dmamap_t *mapp) { busdma_bufalloc_t ba; struct busdma_bufzone *bufzone; bus_dmamap_t map; vm_memattr_t memattr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; *mapp = map = allocate_map(dmat, mflags); if (map == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } map->flags = DMAMAP_DMAMEM_ALLOC; /* Choose a busdma buffer allocator based on memory type flags. */ if (flags & BUS_DMA_COHERENT) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; map->flags |= DMAMAP_COHERENT; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; } /* * Try to find a bufzone in the allocator that holds a cache of buffers * of the right size for this request. If the buffer is too big to be * held in the allocator cache, this returns NULL. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); /* * Allocate the buffer from the uma(9) allocator if... * - It's small enough to be in the allocator (bufzone not NULL). * - The alignment constraint isn't larger than the allocation size * (the allocator aligns buffers to their size boundaries). * - There's no need to handle lowaddr/highaddr exclusion zones. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed nsegments. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) { *vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && dmat->alignment <= PAGE_SIZE && (dmat->boundary % PAGE_SIZE) == 0) { *vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0, dmat->lowaddr, memattr); } else { - *vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->maxsize, - mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary, - memattr); + *vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0, + dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); free(map, M_BUSDMA); *mapp = NULL; return (ENOMEM); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_dmamem, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); return (0); } /* * Free a piece of memory that was allocated via bus_dmamem_alloc, along with * its associated map. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; if (map->flags & DMAMAP_COHERENT) ba = coherent_allocator; else ba = standard_allocator; bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); dmat->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); atomic_subtract_32(&maps_dmamem, 1); free(map, M_BUSDMA); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = trunc_page((vm_offset_t)buf); vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { if (__predict_true(pmap == kernel_pmap)) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (run_filter(dmat, paddr) != 0) map->pagesneeded++; vaddr += PAGE_SIZE; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } if (dmat->ranges) { struct arm32_dma_range *dr; dr = _bus_dma_inrange(dmat->ranges, dmat->_nranges, curaddr); if (dr == NULL) return (0); /* * In a valid DMA range. Translate the physical * memory address to an address in the DMA window. */ curaddr = (curaddr - dr->dr_sysbase) + dr->dr_busbase; } seg = *segp; /* * Insert chunk into a segment, coalescing with * the previous segment if possible. */ if (seg >= 0 && curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) { segs[seg].ds_len += sgsize; } else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_addr_t sl_end = 0; bus_size_t sgsize; struct sync_list *sl; int error; if (segs == NULL) segs = map->segments; counter_u64_add(maploads_total, 1); counter_u64_add(maploads_physmem, 1); if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } else { if (map->sync_count > 0) sl_end = VM_PAGE_TO_PHYS(sl->pages) + sl->dataoffs + sl->datacount; if (map->sync_count == 0 || curaddr != sl_end) { if (++map->sync_count > dmat->nsegments) break; sl++; sl->vaddr = 0; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); sl->dataoffs = curaddr & PAGE_MASK; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; bus_addr_t sl_pend = 0; struct sync_list *sl; vm_offset_t kvaddr; vm_offset_t vaddr = (vm_offset_t)buf; vm_offset_t sl_vend = 0; int error = 0; counter_u64_add(maploads_total, 1); if (map->flags & DMAMAP_COHERENT) counter_u64_add(maploads_coherent, 1); if (map->flags & DMAMAP_DMAMEM_ALLOC) counter_u64_add(maploads_dmamem, 1); if (segs == NULL) segs = map->segments; if (flags & BUS_DMA_LOAD_MBUF) { counter_u64_add(maploads_mbuf, 1); map->flags |= DMAMAP_CACHE_ALIGNED; } if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, " "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); sl = map->slist + map->sync_count - 1; while (buflen > 0) { /* * Get the physical address for this segment. */ if (__predict_true(pmap == kernel_pmap)) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); map->flags &= ~DMAMAP_COHERENT; kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else { if (map->sync_count > 0) { sl_pend = VM_PAGE_TO_PHYS(sl->pages) + sl->dataoffs + sl->datacount; sl_vend = sl->vaddr + sl->datacount; } if (map->sync_count == 0 || (kvaddr != 0 && kvaddr != sl_vend) || (kvaddr == 0 && curaddr != sl_pend)) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = kvaddr; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); sl->dataoffs = curaddr & PAGE_MASK; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { KASSERT(dmat != NULL, ("dmatag is NULL")); KASSERT(map != NULL, ("dmamap is NULL")); map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = map->segments; return (segs); } /* * Release the mapping held by map. */ void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; struct bounce_zone *bz; if ((bz = dmat->bounce_zone) != NULL) { while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } bz = dmat->bounce_zone; bz->free_bpages += map->pagesreserved; bz->reserved_bpages -= map->pagesreserved; map->pagesreserved = 0; map->pagesneeded = 0; } map->sync_count = 0; map->flags &= ~DMAMAP_MBUF; } static void bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int bufaligned) { char _tmp_cl[arm_dcache_align], _tmp_clend[arm_dcache_align]; register_t s; int partial; if ((op & BUS_DMASYNC_PREWRITE) && !(op & BUS_DMASYNC_PREREAD)) { cpu_dcache_wb_range(buf, len); cpu_l2cache_wb_range(buf, len); } /* * If the caller promises the buffer is properly aligned to a cache line * (even if the call parms make it look like it isn't) we can avoid * attempting to preserve the non-DMA part of the cache line in the * POSTREAD case, but we MUST still do a writeback in the PREREAD case. * * This covers the case of mbufs, where we know how they're aligned and * know the CPU doesn't touch the header in front of the DMA data area * during the IO, but it may have touched it right before invoking the * sync, so a PREREAD writeback is required. * * It also handles buffers we created in bus_dmamem_alloc(), which are * always aligned and padded to cache line size even if the IO length * isn't a multiple of cache line size. In this case the PREREAD * writeback probably isn't required, but it's harmless. */ partial = (((vm_offset_t)buf) | len) & arm_dcache_align_mask; if (op & BUS_DMASYNC_PREREAD) { if (!(op & BUS_DMASYNC_PREWRITE) && !partial) { cpu_dcache_inv_range(buf, len); cpu_l2cache_inv_range(buf, len); } else { cpu_dcache_wbinv_range(buf, len); cpu_l2cache_wbinv_range(buf, len); } } if (op & BUS_DMASYNC_POSTREAD) { if (partial && !bufaligned) { s = intr_disable(); if (buf & arm_dcache_align_mask) memcpy(_tmp_cl, (void *)(buf & ~arm_dcache_align_mask), buf & arm_dcache_align_mask); if ((buf + len) & arm_dcache_align_mask) memcpy(_tmp_clend, (void *)(buf + len), arm_dcache_align - ((buf + len) & arm_dcache_align_mask)); } cpu_dcache_inv_range(buf, len); cpu_l2cache_inv_range(buf, len); if (partial && !bufaligned) { if (buf & arm_dcache_align_mask) memcpy((void *)(buf & ~arm_dcache_align_mask), _tmp_cl, buf & arm_dcache_align_mask); if ((buf + len) & arm_dcache_align_mask) memcpy((void *)(buf + len), _tmp_clend, arm_dcache_align - ((buf + len) & arm_dcache_align_mask)); intr_restore(s); } } } static void bus_dmamap_sync_sl(struct sync_list *sl, bus_dmasync_op_t op, int bufaligned) { vm_offset_t tempvaddr; vm_page_t curpage; size_t npages; if (sl->vaddr != 0) { bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, bufaligned); return; } tempvaddr = 0; npages = atop(round_page(sl->dataoffs + sl->datacount)); for (curpage = sl->pages; curpage != sl->pages + npages; ++curpage) { /* * If the page is mapped to some other VA that hasn't * been supplied to busdma, then pmap_quick_enter_page() * will find all duplicate mappings and mark them * uncacheable. * That will also do any necessary wb/inv. Otherwise, * if the page is truly unmapped, then we don't actually * need to do cache maintenance. * XXX: May overwrite DMA'ed data in the POSTREAD * case where the CPU has written to a cacheline not * completely covered by the DMA region. */ KASSERT(VM_PAGE_TO_PHYS(curpage) == VM_PAGE_TO_PHYS(sl->pages) + ptoa(curpage - sl->pages), ("unexpected vm_page_t phys: 0x%08x != 0x%08x", VM_PAGE_TO_PHYS(curpage), VM_PAGE_TO_PHYS(sl->pages) + ptoa(curpage - sl->pages))); tempvaddr = pmap_quick_enter_page(curpage); pmap_quick_remove_page(tempvaddr); } } static void _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; vm_offset_t datavaddr, tempvaddr; if ((op & (BUS_DMASYNC_PREWRITE | BUS_DMASYNC_POSTREAD)) == 0) return; STAILQ_FOREACH(bpage, &map->bpages, links) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (op & BUS_DMASYNC_PREWRITE) { if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); cpu_dcache_wb_range(bpage->vaddr, bpage->datacount); cpu_l2cache_wb_range(bpage->vaddr, bpage->datacount); dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { cpu_dcache_inv_range(bpage->vaddr, bpage->datacount); cpu_l2cache_inv_range(bpage->vaddr, bpage->datacount); if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); dmat->bounce_zone->total_bounced++; } } } void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct sync_list *sl, *end; int bufaligned; if (op == BUS_DMASYNC_POSTWRITE) return; if (map->flags & DMAMAP_COHERENT) goto drain; if (STAILQ_FIRST(&map->bpages)) _bus_dmamap_sync_bp(dmat, map, op); CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags); bufaligned = (map->flags & DMAMAP_CACHE_ALIGNED); if (map->sync_count) { end = &map->slist[map->sync_count]; for (sl = &map->slist[0]; sl != end; sl++) bus_dmamap_sync_sl(sl, op, bufaligned); } drain: cpu_drain_writebuf(); } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests (pages bounced)"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_BUSDMA); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: head/sys/arm/arm/busdma_machdep-v6.c =================================================================== --- head/sys/arm/arm/busdma_machdep-v6.c (revision 338106) +++ head/sys/arm/arm/busdma_machdep-v6.c (revision 338107) @@ -1,1720 +1,1719 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012-2015 Ian Lepore * Copyright (c) 2010 Mark Tinguely * Copyright (c) 2004 Olivier Houchard * Copyright (c) 2002 Peter Grehan * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define BUSDMA_DCACHE_ALIGN cpuinfo.dcache_line_size #define BUSDMA_DCACHE_MASK cpuinfo.dcache_line_mask #define MAX_BPAGES 64 #define MAX_DMA_SEGMENTS 4096 #define BUS_DMA_EXCL_BOUNCE BUS_DMA_BUS2 #define BUS_DMA_ALIGN_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_COULD_BOUNCE (BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE) #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of client data */ bus_addr_t paddr; /* physical address */ vm_page_t pages; /* starting page of client data */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static uint32_t tags_total; static uint32_t maps_total; static uint32_t maps_dmamem; static uint32_t maps_coherent; static counter_u64_t maploads_total; static counter_u64_t maploads_bounced; static counter_u64_t maploads_coherent; static counter_u64_t maploads_dmamem; static counter_u64_t maploads_mbuf; static counter_u64_t maploads_physmem; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, "Number of active tags"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, "Number of active maps"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, "Number of active maps for bus_dmamem_alloc buffers"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, "Number of active maps with BUS_DMA_COHERENT flag set"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, &maploads_total, "Number of load operations performed"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, &maploads_bounced, "Number of load operations that used bounce buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, &maploads_mbuf, "Number of load operations for mbufs"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, &maploads_physmem, "Number of load operations on physical buffers"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; int flags; #define DMAMAP_COHERENT (1 << 0) #define DMAMAP_DMAMEM_ALLOC (1 << 1) #define DMAMAP_MBUF (1 << 2) STAILQ_ENTRY(bus_dmamap) links; bus_dma_segment_t *segments; int sync_count; struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size); static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op); static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages"); static void busdma_init(void *dummy) { int uma_flags; maploads_total = counter_u64_alloc(M_WAITOK); maploads_bounced = counter_u64_alloc(M_WAITOK); maploads_coherent = counter_u64_alloc(M_WAITOK); maploads_dmamem = counter_u64_alloc(M_WAITOK); maploads_mbuf = counter_u64_alloc(M_WAITOK); maploads_physmem = counter_u64_alloc(M_WAITOK); uma_flags = 0; /* Create a cache of buffers in standard (cacheable) memory. */ standard_allocator = busdma_bufalloc_create("buffer", BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ NULL, /* uma_alloc func */ NULL, /* uma_free func */ uma_flags); /* uma_zcreate_flags */ #ifdef INVARIANTS /* * Force UMA zone to allocate service structures like * slabs using own allocator. uma_debug code performs * atomic ops on uma_slab_t fields and safety of this * operation is not guaranteed for write-back caches */ uma_flags = UMA_ZONE_OFFPAGE; #endif /* * Create a cache of buffers in uncacheable memory, to implement the * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. */ coherent_allocator = busdma_bufalloc_create("coherent", BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ busdma_bufalloc_alloc_uncacheable, busdma_bufalloc_free_uncacheable, uma_flags); /* uma_zcreate_flags */ } /* * This init historically used SI_SUB_VM, but now the init code requires * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by * using SI_SUB_KMEM+1. */ SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL); /* * This routine checks the exclusion zone constraints from a tag against the * physical RAM available on the machine. If a tag specifies an exclusion zone * but there's no RAM in that zone, then we avoid allocating resources to bounce * a request, and we can use any memory allocator (as opposed to needing * kmem_alloc_contig() just because it can allocate pages in an address range). * * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the * same value on 32-bit architectures) as their lowaddr constraint, and we can't * possibly have RAM at an address higher than the highest address we can * express, so we take a fast out. */ static int exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; if (lowaddr >= BUS_SPACE_MAXADDR) return (0); for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr >= phys_avail[i])) return (1); } return (0); } /* * Return true if the tag has an exclusion zone that could lead to bouncing. */ static __inline int exclusion_bounce(bus_dma_tag_t dmat) { return (dmat->flags & BUS_DMA_EXCL_BOUNCE); } /* * Return true if the given address does not fall on the alignment boundary. */ static __inline int alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) { return (addr & (dmat->alignment - 1)); } /* * Return true if the DMA should bounce because the start or end does not fall * on a cacheline boundary (which would require a partial cacheline flush). * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a * strict rule that such memory cannot be accessed by the CPU while DMA is in * progress (or by multiple DMA engines at once), so that it's always safe to do * full cacheline flushes even if that affects memory outside the range of a * given DMA operation that doesn't involve the full allocated buffer. If we're * mapping an mbuf, that follows the same rules as a buffer we allocated. */ static __inline int cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF)) return (0); return ((addr | size) & BUSDMA_DCACHE_MASK); } /* * Return true if we might need to bounce the DMA described by addr and size. * * This is used to quick-check whether we need to do the more expensive work of * checking the DMA page-by-page looking for alignment and exclusion bounces. * * Note that the addr argument might be either virtual or physical. It doesn't * matter because we only look at the low-order bits, which are the same in both * address spaces. */ static __inline int might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { return ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || alignment_bounce(dmat, addr) || cacheline_bounce(map, addr, size)); } /* * Return true if we must bounce the DMA described by paddr and size. * * Bouncing can be triggered by DMA that doesn't begin and end on cacheline * boundaries, or doesn't begin on an alignment boundary, or falls within the * exclusion zone of any tag in the ancestry chain. * * For exclusions, walk the chain of tags comparing paddr to the exclusion zone * within each tag. If the tag has a filter function, use it to decide whether * the DMA needs to bounce, otherwise any DMA within the zone bounces. */ static int must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, bus_size_t size) { if (cacheline_bounce(map, paddr, size)) return (1); /* * The tag already contains ancestors' alignment restrictions so this * check doesn't need to be inside the loop. */ if (alignment_bounce(dmat, paddr)) return (1); /* * Even though each tag has an exclusion zone that is a superset of its * own and all its ancestors' exclusions, the exclusion zone of each tag * up the chain must be checked within the loop, because the busdma * rules say the filter function is called only when the address lies * within the low-highaddr range of the tag that filterfunc belongs to. */ while (dmat != NULL && exclusion_bounce(dmat)) { if ((paddr >= dmat->lowaddr && paddr <= dmat->highaddr) && (dmat->filter == NULL || dmat->filter(dmat->filterarg, paddr) != 0)) return (1); dmat = dmat->parent; } return (0); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Basic sanity checking. */ KASSERT(boundary == 0 || powerof2(boundary), ("dma tag boundary %lu, must be a power of 2", boundary)); KASSERT(boundary == 0 || boundary >= maxsegsz, ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz)); KASSERT(alignment != 0 && powerof2(alignment), ("dma tag alignment %lu, must be non-zero power of 2", alignment)); KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero")); /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); newtag->alignment = MAX(parent->alignment, newtag->alignment); newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE; newtag->flags |= parent->flags & BUS_DMA_COHERENT; if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if (newtag->filter == NULL) { /* * Short circuit to looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr)) newtag->flags |= BUS_DMA_EXCL_BOUNCE; if (alignment_bounce(newtag, 1)) newtag->flags |= BUS_DMA_ALIGN_BOUNCE; /* * Any request can auto-bounce due to cacheline alignment, in addition * to any alignment or boundary specifications in the tag, so if the * ALLOCNOW flag is set, there's always work to do. */ if ((flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* * Round size up to a full page, and add one more page because * there can always be one more boundary crossing than the * number of pages in a transfer. */ maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE; if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_BUSDMA); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) { free(newtag, M_BUSDMA); } else { atomic_add_32(&tags_total, 1); *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) { return (0); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { atomic_subtract_32(&tags_total, 1); free(dmat, M_BUSDMA); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static int allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp) { struct bounce_zone *bz; int maxpages; int error; if (dmat->bounce_zone == NULL) if ((error = alloc_bounce_zone(dmat)) != 0) return (error); bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&(mapp->bpages)); /* * Attempt to add pages to our pool on a per-instance basis up to a sane * limit. Even if the tag isn't flagged as COULD_BOUNCE due to * alignment and boundary constraints, it could still auto-bounce due to * cacheline alignment, which requires at most two bounce pages. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) maxpages = MAX_BPAGES; else maxpages = 2 * bz->map_count; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1; pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 2); if (alloc_bounce_pages(dmat, pages) < pages) return (ENOMEM); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } bz->map_count++; return (0); } static bus_dmamap_t allocate_map(bus_dma_tag_t dmat, int mflags) { int mapsize, segsize; bus_dmamap_t map; /* * Allocate the map. The map structure ends with an embedded * variable-sized array of sync_list structures. Following that * we allocate enough extra space to hold the array of bus_dma_segments. */ KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, ("cannot allocate %u dma segments (max is %u)", dmat->nsegments, MAX_DMA_SEGMENTS)); segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (NULL); } map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); return (map); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { bus_dmamap_t map; int error = 0; *mapp = map = allocate_map(dmat, M_NOWAIT); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an exclusion * region, a data alignment that is stricter than 1, or DMA that begins * or ends with a partial cacheline. Whether bouncing will actually * happen can't be known until mapping time, but we need to pre-allocate * resources now because we might not be allowed to at mapping time. */ error = allocate_bz_and_pages(dmat, map); if (error != 0) { free(map, M_BUSDMA); *mapp = NULL; return (error); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); free(map, M_BUSDMA); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into bus device * space based on the constraints listed in the dma tag. Returns a pointer to * the allocated memory, and a pointer to an associated bus_dmamap. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, bus_dmamap_t *mapp) { busdma_bufalloc_t ba; struct busdma_bufzone *bufzone; bus_dmamap_t map; vm_memattr_t memattr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; *mapp = map = allocate_map(dmat, mflags); if (map == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } map->flags = DMAMAP_DMAMEM_ALLOC; /* For coherent memory, set the map flag that disables sync ops. */ if (flags & BUS_DMA_COHERENT) map->flags |= DMAMAP_COHERENT; /* * Choose a busdma buffer allocator based on memory type flags. * If the tag's COHERENT flag is set, that means normal memory * is already coherent, use the normal allocator. */ if ((flags & BUS_DMA_COHERENT) && ((dmat->flags & BUS_DMA_COHERENT) == 0)) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; } /* * Try to find a bufzone in the allocator that holds a cache of buffers * of the right size for this request. If the buffer is too big to be * held in the allocator cache, this returns NULL. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); /* * Allocate the buffer from the uma(9) allocator if... * - It's small enough to be in the allocator (bufzone not NULL). * - The alignment constraint isn't larger than the allocation size * (the allocator aligns buffers to their size boundaries). * - There's no need to handle lowaddr/highaddr exclusion zones. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && !exclusion_bounce(dmat)) { *vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && dmat->alignment <= PAGE_SIZE && (dmat->boundary % PAGE_SIZE) == 0) { *vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0, dmat->lowaddr, memattr); } else { - *vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->maxsize, - mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary, - memattr); + *vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0, + dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); free(map, M_BUSDMA); *mapp = NULL; return (ENOMEM); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_dmamem, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); return (0); } /* * Free a piece of memory that was allocated via bus_dmamem_alloc, along with * its associated map. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; if ((map->flags & DMAMAP_COHERENT) && ((dmat->flags & BUS_DMA_COHERENT) == 0)) ba = coherent_allocator; else ba = standard_allocator; bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && !exclusion_bounce(dmat)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); dmat->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); atomic_subtract_32(&maps_dmamem, 1); free(map, M_BUSDMA); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" " map= %p, pagesneeded= %d", dmat->lowaddr, dmat->boundary, dmat->alignment, map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (must_bounce(dmat, map, curaddr, sgsize) != 0) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" " map= %p, pagesneeded= %d", dmat->lowaddr, dmat->boundary, dmat->alignment, map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { if (__predict_true(pmap == kernel_pmap)) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (must_bounce(dmat, map, paddr, min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)))) != 0) { map->pagesneeded++; } vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); } CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { map->pagesneeded = 0; mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_addr_t sl_end = 0; bus_size_t sgsize; struct sync_list *sl; int error; if (segs == NULL) segs = map->segments; counter_u64_add(maploads_total, 1); counter_u64_add(maploads_physmem, 1); if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { if (map->sync_count > 0) sl_end = sl->paddr + sl->datacount; if (map->sync_count == 0 || curaddr != sl_end) { if (++map->sync_count > dmat->nsegments) break; sl++; sl->vaddr = 0; sl->paddr = curaddr; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not in " "vm_page_array", __func__, curaddr)); } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; bus_addr_t sl_pend = 0; vm_offset_t kvaddr, vaddr, sl_vend = 0; struct sync_list *sl; int error; counter_u64_add(maploads_total, 1); if (map->flags & DMAMAP_COHERENT) counter_u64_add(maploads_coherent, 1); if (map->flags & DMAMAP_DMAMEM_ALLOC) counter_u64_add(maploads_dmamem, 1); if (segs == NULL) segs = map->segments; if (flags & BUS_DMA_LOAD_MBUF) { counter_u64_add(maploads_mbuf, 1); map->flags |= DMAMAP_MBUF; } if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { _bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; vaddr = (vm_offset_t)buf; while (buflen > 0) { /* * Get the physical address for this segment. */ if (__predict_true(pmap == kernel_pmap)) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { if (map->sync_count > 0) { sl_pend = sl->paddr + sl->datacount; sl_vend = sl->vaddr + sl->datacount; } if (map->sync_count == 0 || (kvaddr != 0 && kvaddr != sl_vend) || (curaddr != sl_pend)) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = kvaddr; sl->paddr = curaddr; if (kvaddr != 0) { sl->pages = NULL; } else { sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not " "in vm_page_array", __func__, curaddr)); } sl->datacount = sgsize; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = map->segments; return (segs); } /* * Release the mapping held by map. */ void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; struct bounce_zone *bz; if ((bz = dmat->bounce_zone) != NULL) { while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } bz = dmat->bounce_zone; bz->free_bpages += map->pagesreserved; bz->reserved_bpages -= map->pagesreserved; map->pagesreserved = 0; map->pagesneeded = 0; } map->sync_count = 0; map->flags &= ~DMAMAP_MBUF; } static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size) { /* * Write back any partial cachelines immediately before and * after the DMA region. We don't need to round the address * down to the nearest cacheline or specify the exact size, * as dcache_wb_poc() will do the rounding for us and works * at cacheline granularity. */ if (va & BUSDMA_DCACHE_MASK) dcache_wb_poc(va, pa, 1); if ((va + size) & BUSDMA_DCACHE_MASK) dcache_wb_poc(va + size, pa + size, 1); dcache_inv_poc_dma(va, pa, size); } static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) { uint32_t len, offset; vm_page_t m; vm_paddr_t pa; vm_offset_t va, tempva; bus_size_t size; offset = sl->paddr & PAGE_MASK; m = sl->pages; size = sl->datacount; pa = sl->paddr; for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { tempva = 0; if (sl->vaddr == 0) { len = min(PAGE_SIZE - offset, size); tempva = pmap_quick_enter_page(m); va = tempva | offset; KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), ("unexpected vm_page_t phys: 0x%08x != 0x%08x", VM_PAGE_TO_PHYS(m) | offset, pa)); } else { len = sl->datacount; va = sl->vaddr; } switch (op) { case BUS_DMASYNC_PREWRITE: case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: dcache_wb_poc(va, pa, len); break; case BUS_DMASYNC_PREREAD: /* * An mbuf may start in the middle of a cacheline. There * will be no cpu writes to the beginning of that line * (which contains the mbuf header) while dma is in * progress. Handle that case by doing a writeback of * just the first cacheline before invalidating the * overall buffer. Any mbuf in a chain may have this * misalignment. Buffers which are not mbufs bounce if * they are not aligned to a cacheline. */ dma_preread_safe(va, pa, len); break; case BUS_DMASYNC_POSTREAD: case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: dcache_inv_poc(va, pa, len); break; default: panic("unsupported combination of sync operations: " "0x%08x\n", op); } if (tempva != 0) pmap_quick_remove_page(tempva); } } void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; struct sync_list *sl, *end; vm_offset_t datavaddr, tempvaddr; if (op == BUS_DMASYNC_POSTWRITE) return; /* * If the buffer was from user space, it is possible that this is not * the same vm map, especially on a POST operation. It's not clear that * dma on userland buffers can work at all right now. To be safe, until * we're able to test direct userland dma, panic on a map mismatch. */ if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->flags, op); /* * For PREWRITE do a writeback. Clean the caches from the * innermost to the outermost levels. */ if (op & BUS_DMASYNC_PREWRITE) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if ((dmat->flags & BUS_DMA_COHERENT) == 0) dcache_wb_poc(bpage->vaddr, bpage->busaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } /* * Do an invalidate for PREREAD unless a writeback was already * done above due to PREWRITE also being set. The reason for a * PREREAD invalidate is to prevent dirty lines currently in the * cache from being evicted during the DMA. If a writeback was * done due to PREWRITE also being set there will be no dirty * lines and the POSTREAD invalidate handles the rest. The * invalidate is done from the innermost to outermost level. If * L2 were done first, a dirty cacheline could be automatically * evicted from L1 before we invalidated it, re-dirtying the L2. */ if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) { bpage = STAILQ_FIRST(&map->bpages); while (bpage != NULL) { if ((dmat->flags & BUS_DMA_COHERENT) == 0) dcache_inv_poc_dma(bpage->vaddr, bpage->busaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } } /* * Re-invalidate the caches on a POSTREAD, even though they were * already invalidated at PREREAD time. Aggressive prefetching * due to accesses to other data near the dma buffer could have * brought buffer data into the caches which is now stale. The * caches are invalidated from the outermost to innermost; the * prefetches could be happening right now, and if L1 were * invalidated first, stale L2 data could be prefetched into L1. */ if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { if ((dmat->flags & BUS_DMA_COHERENT) == 0) dcache_inv_poc(bpage->vaddr, bpage->busaddr, bpage->datacount); tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } /* * For COHERENT memory no cache maintenance is necessary, but ensure all * writes have reached memory for the PREWRITE case. No action is * needed for a PREREAD without PREWRITE also set, because that would * imply that the cpu had written to the COHERENT buffer and expected * the dma device to see that change, and by definition a PREWRITE sync * is required to make that happen. */ if (map->flags & DMAMAP_COHERENT) { if (op & BUS_DMASYNC_PREWRITE) { dsb(); if ((dmat->flags & BUS_DMA_COHERENT) == 0) cpu_l2cache_drain_writebuf(); } return; } /* * Cache maintenance for normal (non-COHERENT non-bounce) buffers. All * the comments about the sequences for flushing cache levels in the * bounce buffer code above apply here as well. In particular, the fact * that the sequence is inner-to-outer for PREREAD invalidation and * outer-to-inner for POSTREAD invalidation is not a mistake. */ if (map->sync_count != 0) { sl = &map->slist[0]; end = &map->slist[map->sync_count]; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing sync", __func__, dmat, dmat->flags, op); for ( ; sl != end; ++sl) dma_dcache_sync(sl, op); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests (pages bounced)"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_BUSDMA); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: head/sys/arm/arm/pmap-v6.c =================================================================== --- head/sys/arm/arm/pmap-v6.c (revision 338106) +++ head/sys/arm/arm/pmap-v6.c (revision 338107) @@ -1,6985 +1,6984 @@ /*- * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause-FreeBSD * * Copyright (c) 1991 Regents of the University of California. * Copyright (c) 1994 John S. Dyson * Copyright (c) 1994 David Greenman * Copyright (c) 2005-2010 Alan L. Cox * Copyright (c) 2014-2016 Svatopluk Kraus * Copyright (c) 2014-2016 Michal Meloun * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_vm.h" #include "opt_pmap.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #ifndef DIAGNOSTIC #define PMAP_INLINE __inline #else #define PMAP_INLINE #endif #ifdef PMAP_DEBUG static void pmap_zero_page_check(vm_page_t m); void pmap_debug(int level); int pmap_pid_dump(int pid); #define PDEBUG(_lev_,_stat_) \ if (pmap_debug_level >= (_lev_)) \ ((_stat_)) #define dprintf printf int pmap_debug_level = 1; #else /* PMAP_DEBUG */ #define PDEBUG(_lev_,_stat_) /* Nothing */ #define dprintf(x, arg...) #endif /* PMAP_DEBUG */ /* * Level 2 page tables map definion ('max' is excluded). */ #define PT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) #define PT2V_MAX_ADDRESS ((vm_offset_t)PT2MAP + PT2MAP_SIZE) #define UPT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) #define UPT2V_MAX_ADDRESS \ ((vm_offset_t)(PT2MAP + (KERNBASE >> PT2MAP_SHIFT))) /* * Promotion to a 1MB (PTE1) page mapping requires that the corresponding * 4KB (PTE2) page mappings have identical settings for the following fields: */ #define PTE2_PROMOTE (PTE2_V | PTE2_A | PTE2_NM | PTE2_S | PTE2_NG | \ PTE2_NX | PTE2_RO | PTE2_U | PTE2_W | \ PTE2_ATTR_MASK) #define PTE1_PROMOTE (PTE1_V | PTE1_A | PTE1_NM | PTE1_S | PTE1_NG | \ PTE1_NX | PTE1_RO | PTE1_U | PTE1_W | \ PTE1_ATTR_MASK) #define ATTR_TO_L1(l2_attr) ((((l2_attr) & L2_TEX0) ? L1_S_TEX0 : 0) | \ (((l2_attr) & L2_C) ? L1_S_C : 0) | \ (((l2_attr) & L2_B) ? L1_S_B : 0) | \ (((l2_attr) & PTE2_A) ? PTE1_A : 0) | \ (((l2_attr) & PTE2_NM) ? PTE1_NM : 0) | \ (((l2_attr) & PTE2_S) ? PTE1_S : 0) | \ (((l2_attr) & PTE2_NG) ? PTE1_NG : 0) | \ (((l2_attr) & PTE2_NX) ? PTE1_NX : 0) | \ (((l2_attr) & PTE2_RO) ? PTE1_RO : 0) | \ (((l2_attr) & PTE2_U) ? PTE1_U : 0) | \ (((l2_attr) & PTE2_W) ? PTE1_W : 0)) #define ATTR_TO_L2(l1_attr) ((((l1_attr) & L1_S_TEX0) ? L2_TEX0 : 0) | \ (((l1_attr) & L1_S_C) ? L2_C : 0) | \ (((l1_attr) & L1_S_B) ? L2_B : 0) | \ (((l1_attr) & PTE1_A) ? PTE2_A : 0) | \ (((l1_attr) & PTE1_NM) ? PTE2_NM : 0) | \ (((l1_attr) & PTE1_S) ? PTE2_S : 0) | \ (((l1_attr) & PTE1_NG) ? PTE2_NG : 0) | \ (((l1_attr) & PTE1_NX) ? PTE2_NX : 0) | \ (((l1_attr) & PTE1_RO) ? PTE2_RO : 0) | \ (((l1_attr) & PTE1_U) ? PTE2_U : 0) | \ (((l1_attr) & PTE1_W) ? PTE2_W : 0)) /* * PTE2 descriptors creation macros. */ #define PTE2_ATTR_DEFAULT vm_memattr_to_pte2(VM_MEMATTR_DEFAULT) #define PTE2_ATTR_PT vm_memattr_to_pte2(pt_memattr) #define PTE2_KPT(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_PT) #define PTE2_KPT_NG(pa) PTE2_KERN_NG(pa, PTE2_AP_KRW, PTE2_ATTR_PT) #define PTE2_KRW(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT) #define PTE2_KRO(pa) PTE2_KERN(pa, PTE2_AP_KR, PTE2_ATTR_DEFAULT) #define PV_STATS #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif /* * The boot_pt1 is used temporary in very early boot stage as L1 page table. * We can init many things with no memory allocation thanks to its static * allocation and this brings two main advantages: * (1) other cores can be started very simply, * (2) various boot loaders can be supported as its arguments can be processed * in virtual address space and can be moved to safe location before * first allocation happened. * Only disadvantage is that boot_pt1 is used only in very early boot stage. * However, the table is uninitialized and so lays in bss. Therefore kernel * image size is not influenced. * * QQQ: In the future, maybe, boot_pt1 can be used for soft reset and * CPU suspend/resume game. */ extern pt1_entry_t boot_pt1[]; vm_paddr_t base_pt1; pt1_entry_t *kern_pt1; pt2_entry_t *kern_pt2tab; pt2_entry_t *PT2MAP; static uint32_t ttb_flags; static vm_memattr_t pt_memattr; ttb_entry_t pmap_kern_ttb; struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static vm_offset_t kernel_vm_end_new; vm_offset_t kernel_vm_end = KERNBASE + NKPT2PG * NPT2_IN_PG * PTE1_SIZE; vm_offset_t vm_max_kernel_address; vm_paddr_t kernel_l1pa; static struct rwlock __aligned(CACHE_LINE_SIZE) pvh_global_lock; /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; /* XXX: Is it used only the list in md_page? */ static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ int pv_maxchunks; /* How many chunks we have KVA for */ vm_offset_t pv_vafree; /* freelist stored in the PTE */ vm_paddr_t first_managed_pa; #define pa_to_pvh(pa) (&pv_table[pte1_index(pa - first_managed_pa)]) /* * All those kernel PT submaps that BSD is so fond of */ caddr_t _tmppt = 0; /* * Crashdump maps. */ static caddr_t crashdumpmap; static pt2_entry_t *PMAP1 = NULL, *PMAP2; static pt2_entry_t *PADDR1 = NULL, *PADDR2; #ifdef DDB static pt2_entry_t *PMAP3; static pt2_entry_t *PADDR3; static int PMAP3cpu __unused; /* for SMP only */ #endif #ifdef SMP static int PMAP1cpu; static int PMAP1changedcpu; SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, &PMAP1changedcpu, 0, "Number of times pmap_pte2_quick changed CPU with same PMAP1"); #endif static int PMAP1changed; SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, &PMAP1changed, 0, "Number of times pmap_pte2_quick changed PMAP1"); static int PMAP1unchanged; SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, &PMAP1unchanged, 0, "Number of times pmap_pte2_quick didn't change PMAP1"); static struct mtx PMAP2mutex; /* * Internal flags for pmap_enter()'s helper functions. */ #define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ #define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ static __inline void pt2_wirecount_init(vm_page_t m); static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va); static int pmap_enter_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1, u_int flags, vm_page_t m); void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size); /* * Function to set the debug level of the pmap code. */ #ifdef PMAP_DEBUG void pmap_debug(int level) { pmap_debug_level = level; dprintf("pmap_debug: level=%d\n", pmap_debug_level); } #endif /* PMAP_DEBUG */ /* * This table must corespond with memory attribute configuration in vm.h. * First entry is used for normal system mapping. * * Device memory is always marked as shared. * Normal memory is shared only in SMP . * Not outer shareable bits are not used yet. * Class 6 cannot be used on ARM11. */ #define TEXDEF_TYPE_SHIFT 0 #define TEXDEF_TYPE_MASK 0x3 #define TEXDEF_INNER_SHIFT 2 #define TEXDEF_INNER_MASK 0x3 #define TEXDEF_OUTER_SHIFT 4 #define TEXDEF_OUTER_MASK 0x3 #define TEXDEF_NOS_SHIFT 6 #define TEXDEF_NOS_MASK 0x1 #define TEX(t, i, o, s) \ ((t) << TEXDEF_TYPE_SHIFT) | \ ((i) << TEXDEF_INNER_SHIFT) | \ ((o) << TEXDEF_OUTER_SHIFT | \ ((s) << TEXDEF_NOS_SHIFT)) static uint32_t tex_class[8] = { /* type inner cache outer cache */ TEX(PRRR_MEM, NMRR_WB_WA, NMRR_WB_WA, 0), /* 0 - ATTR_WB_WA */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 1 - ATTR_NOCACHE */ TEX(PRRR_DEV, NMRR_NC, NMRR_NC, 0), /* 2 - ATTR_DEVICE */ TEX(PRRR_SO, NMRR_NC, NMRR_NC, 0), /* 3 - ATTR_SO */ TEX(PRRR_MEM, NMRR_WT, NMRR_WT, 0), /* 4 - ATTR_WT */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 5 - NOT USED YET */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 6 - NOT USED YET */ TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 7 - NOT USED YET */ }; #undef TEX static uint32_t pte2_attr_tab[8] = { PTE2_ATTR_WB_WA, /* 0 - VM_MEMATTR_WB_WA */ PTE2_ATTR_NOCACHE, /* 1 - VM_MEMATTR_NOCACHE */ PTE2_ATTR_DEVICE, /* 2 - VM_MEMATTR_DEVICE */ PTE2_ATTR_SO, /* 3 - VM_MEMATTR_SO */ PTE2_ATTR_WT, /* 4 - VM_MEMATTR_WRITE_THROUGH */ 0, /* 5 - NOT USED YET */ 0, /* 6 - NOT USED YET */ 0 /* 7 - NOT USED YET */ }; CTASSERT(VM_MEMATTR_WB_WA == 0); CTASSERT(VM_MEMATTR_NOCACHE == 1); CTASSERT(VM_MEMATTR_DEVICE == 2); CTASSERT(VM_MEMATTR_SO == 3); CTASSERT(VM_MEMATTR_WRITE_THROUGH == 4); #define VM_MEMATTR_END (VM_MEMATTR_WRITE_THROUGH + 1) boolean_t pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) { return (mode >= 0 && mode < VM_MEMATTR_END); } static inline uint32_t vm_memattr_to_pte2(vm_memattr_t ma) { KASSERT((u_int)ma < VM_MEMATTR_END, ("%s: bad vm_memattr_t %d", __func__, ma)); return (pte2_attr_tab[(u_int)ma]); } static inline uint32_t vm_page_pte2_attr(vm_page_t m) { return (vm_memattr_to_pte2(m->md.pat_mode)); } /* * Convert TEX definition entry to TTB flags. */ static uint32_t encode_ttb_flags(int idx) { uint32_t inner, outer, nos, reg; inner = (tex_class[idx] >> TEXDEF_INNER_SHIFT) & TEXDEF_INNER_MASK; outer = (tex_class[idx] >> TEXDEF_OUTER_SHIFT) & TEXDEF_OUTER_MASK; nos = (tex_class[idx] >> TEXDEF_NOS_SHIFT) & TEXDEF_NOS_MASK; reg = nos << 5; reg |= outer << 3; if (cpuinfo.coherent_walk) reg |= (inner & 0x1) << 6; reg |= (inner & 0x2) >> 1; #ifdef SMP ARM_SMP_UP( reg |= 1 << 1, ); #endif return reg; } /* * Set TEX remapping registers in current CPU. */ void pmap_set_tex(void) { uint32_t prrr, nmrr; uint32_t type, inner, outer, nos; int i; #ifdef PMAP_PTE_NOCACHE /* XXX fixme */ if (cpuinfo.coherent_walk) { pt_memattr = VM_MEMATTR_WB_WA; ttb_flags = encode_ttb_flags(0); } else { pt_memattr = VM_MEMATTR_NOCACHE; ttb_flags = encode_ttb_flags(1); } #else pt_memattr = VM_MEMATTR_WB_WA; ttb_flags = encode_ttb_flags(0); #endif prrr = 0; nmrr = 0; /* Build remapping register from TEX classes. */ for (i = 0; i < 8; i++) { type = (tex_class[i] >> TEXDEF_TYPE_SHIFT) & TEXDEF_TYPE_MASK; inner = (tex_class[i] >> TEXDEF_INNER_SHIFT) & TEXDEF_INNER_MASK; outer = (tex_class[i] >> TEXDEF_OUTER_SHIFT) & TEXDEF_OUTER_MASK; nos = (tex_class[i] >> TEXDEF_NOS_SHIFT) & TEXDEF_NOS_MASK; prrr |= type << (i * 2); prrr |= nos << (i + 24); nmrr |= inner << (i * 2); nmrr |= outer << (i * 2 + 16); } /* Add shareable bits for device memory. */ prrr |= PRRR_DS0 | PRRR_DS1; /* Add shareable bits for normal memory in SMP case. */ #ifdef SMP ARM_SMP_UP( prrr |= PRRR_NS1, ); #endif cp15_prrr_set(prrr); cp15_nmrr_set(nmrr); /* Caches are disabled, so full TLB flush should be enough. */ tlb_flush_all_local(); } /* * Remap one vm_meattr class to another one. This can be useful as * workaround for SOC errata, e.g. if devices must be accessed using * SO memory class. * * !!! Please note that this function is absolutely last resort thing. * It should not be used under normal circumstances. !!! * * Usage rules: * - it shall be called after pmap_bootstrap_prepare() and before * cpu_mp_start() (thus only on boot CPU). In practice, it's expected * to be called from platform_attach() or platform_late_init(). * * - if remapping doesn't change caching mode, or until uncached class * is remapped to any kind of cached one, then no other restriction exists. * * - if pmap_remap_vm_attr() changes caching mode, but both (original and * remapped) remain cached, then caller is resposible for calling * of dcache_wbinv_poc_all(). * * - remapping of any kind of cached class to uncached is not permitted. */ void pmap_remap_vm_attr(vm_memattr_t old_attr, vm_memattr_t new_attr) { int old_idx, new_idx; /* Map VM memattrs to indexes to tex_class table. */ old_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)old_attr]); new_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)new_attr]); /* Replace TEX attribute and apply it. */ tex_class[old_idx] = tex_class[new_idx]; pmap_set_tex(); } /* * KERNBASE must be multiple of NPT2_IN_PG * PTE1_SIZE. In other words, * KERNBASE is mapped by first L2 page table in L2 page table page. It * meets same constrain due to PT2MAP being placed just under KERNBASE. */ CTASSERT((KERNBASE & (NPT2_IN_PG * PTE1_SIZE - 1)) == 0); CTASSERT((KERNBASE - VM_MAXUSER_ADDRESS) >= PT2MAP_SIZE); /* * In crazy dreams, PAGE_SIZE could be a multiple of PTE2_SIZE in general. * For now, anyhow, the following check must be fulfilled. */ CTASSERT(PAGE_SIZE == PTE2_SIZE); /* * We don't want to mess up MI code with all MMU and PMAP definitions, * so some things, which depend on other ones, are defined independently. * Now, it is time to check that we don't screw up something. */ CTASSERT(PDRSHIFT == PTE1_SHIFT); /* * Check L1 and L2 page table entries definitions consistency. */ CTASSERT(NB_IN_PT1 == (sizeof(pt1_entry_t) * NPTE1_IN_PT1)); CTASSERT(NB_IN_PT2 == (sizeof(pt2_entry_t) * NPTE2_IN_PT2)); /* * Check L2 page tables page consistency. */ CTASSERT(PAGE_SIZE == (NPT2_IN_PG * NB_IN_PT2)); CTASSERT((1 << PT2PG_SHIFT) == NPT2_IN_PG); /* * Check PT2TAB consistency. * PT2TAB_ENTRIES is defined as a division of NPTE1_IN_PT1 by NPT2_IN_PG. * This should be done without remainder. */ CTASSERT(NPTE1_IN_PT1 == (PT2TAB_ENTRIES * NPT2_IN_PG)); /* * A PT2MAP magic. * * All level 2 page tables (PT2s) are mapped continuously and accordingly * into PT2MAP address space. As PT2 size is less than PAGE_SIZE, this can * be done only if PAGE_SIZE is a multiple of PT2 size. All PT2s in one page * must be used together, but not necessary at once. The first PT2 in a page * must map things on correctly aligned address and the others must follow * in right order. */ #define NB_IN_PT2TAB (PT2TAB_ENTRIES * sizeof(pt2_entry_t)) #define NPT2_IN_PT2TAB (NB_IN_PT2TAB / NB_IN_PT2) #define NPG_IN_PT2TAB (NB_IN_PT2TAB / PAGE_SIZE) /* * Check PT2TAB consistency. * NPT2_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by NB_IN_PT2. * NPG_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by PAGE_SIZE. * The both should be done without remainder. */ CTASSERT(NB_IN_PT2TAB == (NPT2_IN_PT2TAB * NB_IN_PT2)); CTASSERT(NB_IN_PT2TAB == (NPG_IN_PT2TAB * PAGE_SIZE)); /* * The implementation was made general, however, with the assumption * bellow in mind. In case of another value of NPG_IN_PT2TAB, * the code should be once more rechecked. */ CTASSERT(NPG_IN_PT2TAB == 1); /* * Get offset of PT2 in a page * associated with given PT1 index. */ static __inline u_int page_pt2off(u_int pt1_idx) { return ((pt1_idx & PT2PG_MASK) * NB_IN_PT2); } /* * Get physical address of PT2 * associated with given PT2s page and PT1 index. */ static __inline vm_paddr_t page_pt2pa(vm_paddr_t pgpa, u_int pt1_idx) { return (pgpa + page_pt2off(pt1_idx)); } /* * Get first entry of PT2 * associated with given PT2s page and PT1 index. */ static __inline pt2_entry_t * page_pt2(vm_offset_t pgva, u_int pt1_idx) { return ((pt2_entry_t *)(pgva + page_pt2off(pt1_idx))); } /* * Get virtual address of PT2s page (mapped in PT2MAP) * which holds PT2 which holds entry which maps given virtual address. */ static __inline vm_offset_t pt2map_pt2pg(vm_offset_t va) { va &= ~(NPT2_IN_PG * PTE1_SIZE - 1); return ((vm_offset_t)pt2map_entry(va)); } /***************************************************************************** * * THREE pmap initialization milestones exist: * * locore.S * -> fundamental init (including MMU) in ASM * * initarm() * -> fundamental init continues in C * -> first available physical address is known * * pmap_bootstrap_prepare() -> FIRST PMAP MILESTONE (first epoch begins) * -> basic (safe) interface for physical address allocation is made * -> basic (safe) interface for virtual mapping is made * -> limited not SMP coherent work is possible * * -> more fundamental init continues in C * -> locks and some more things are available * -> all fundamental allocations and mappings are done * * pmap_bootstrap() -> SECOND PMAP MILESTONE (second epoch begins) * -> phys_avail[] and virtual_avail is set * -> control is passed to vm subsystem * -> physical and virtual address allocation are off limit * -> low level mapping functions, some SMP coherent, * are available, which cannot be used before vm subsystem * is being inited * * mi_startup() * -> vm subsystem is being inited * * pmap_init() -> THIRD PMAP MILESTONE (third epoch begins) * -> pmap is fully inited * *****************************************************************************/ /***************************************************************************** * * PMAP first stage initialization and utility functions * for pre-bootstrap epoch. * * After pmap_bootstrap_prepare() is called, the following functions * can be used: * * (1) strictly only for this stage functions for physical page allocations, * virtual space allocations, and mappings: * * vm_paddr_t pmap_preboot_get_pages(u_int num); * void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num); * vm_offset_t pmap_preboot_reserve_pages(u_int num); * vm_offset_t pmap_preboot_get_vpages(u_int num); * void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, * vm_prot_t prot, vm_memattr_t attr); * * (2) for all stages: * * vm_paddr_t pmap_kextract(vm_offset_t va); * * NOTE: This is not SMP coherent stage. * *****************************************************************************/ #define KERNEL_P2V(pa) \ ((vm_offset_t)((pa) - arm_physmem_kernaddr + KERNVIRTADDR)) #define KERNEL_V2P(va) \ ((vm_paddr_t)((va) - KERNVIRTADDR + arm_physmem_kernaddr)) static vm_paddr_t last_paddr; /* * Pre-bootstrap epoch page allocator. */ vm_paddr_t pmap_preboot_get_pages(u_int num) { vm_paddr_t ret; ret = last_paddr; last_paddr += num * PAGE_SIZE; return (ret); } /* * The fundamental initialization of PMAP stuff. * * Some things already happened in locore.S and some things could happen * before pmap_bootstrap_prepare() is called, so let's recall what is done: * 1. Caches are disabled. * 2. We are running on virtual addresses already with 'boot_pt1' * as L1 page table. * 3. So far, all virtual addresses can be converted to physical ones and * vice versa by the following macros: * KERNEL_P2V(pa) .... physical to virtual ones, * KERNEL_V2P(va) .... virtual to physical ones. * * What is done herein: * 1. The 'boot_pt1' is replaced by real kernel L1 page table 'kern_pt1'. * 2. PT2MAP magic is brought to live. * 3. Basic preboot functions for page allocations and mappings can be used. * 4. Everything is prepared for L1 cache enabling. * * Variations: * 1. To use second TTB register, so kernel and users page tables will be * separated. This way process forking - pmap_pinit() - could be faster, * it saves physical pages and KVA per a process, and it's simple change. * However, it will lead, due to hardware matter, to the following: * (a) 2G space for kernel and 2G space for users. * (b) 1G space for kernel in low addresses and 3G for users above it. * A question is: Is the case (b) really an option? Note that case (b) * does save neither physical memory and KVA. */ void pmap_bootstrap_prepare(vm_paddr_t last) { vm_paddr_t pt2pg_pa, pt2tab_pa, pa, size; vm_offset_t pt2pg_va; pt1_entry_t *pte1p; pt2_entry_t *pte2p; u_int i; uint32_t l1_attr; /* * Now, we are going to make real kernel mapping. Note that we are * already running on some mapping made in locore.S and we expect * that it's large enough to ensure nofault access to physical memory * allocated herein before switch. * * As kernel image and everything needed before are and will be mapped * by section mappings, we align last physical address to PTE1_SIZE. */ last_paddr = pte1_roundup(last); /* * Allocate and zero page(s) for kernel L1 page table. * * Note that it's first allocation on space which was PTE1_SIZE * aligned and as such base_pt1 is aligned to NB_IN_PT1 too. */ base_pt1 = pmap_preboot_get_pages(NPG_IN_PT1); kern_pt1 = (pt1_entry_t *)KERNEL_P2V(base_pt1); bzero((void*)kern_pt1, NB_IN_PT1); pte1_sync_range(kern_pt1, NB_IN_PT1); /* Allocate and zero page(s) for kernel PT2TAB. */ pt2tab_pa = pmap_preboot_get_pages(NPG_IN_PT2TAB); kern_pt2tab = (pt2_entry_t *)KERNEL_P2V(pt2tab_pa); bzero(kern_pt2tab, NB_IN_PT2TAB); pte2_sync_range(kern_pt2tab, NB_IN_PT2TAB); /* Allocate and zero page(s) for kernel L2 page tables. */ pt2pg_pa = pmap_preboot_get_pages(NKPT2PG); pt2pg_va = KERNEL_P2V(pt2pg_pa); size = NKPT2PG * PAGE_SIZE; bzero((void*)pt2pg_va, size); pte2_sync_range((pt2_entry_t *)pt2pg_va, size); /* * Add a physical memory segment (vm_phys_seg) corresponding to the * preallocated pages for kernel L2 page tables so that vm_page * structures representing these pages will be created. The vm_page * structures are required for promotion of the corresponding kernel * virtual addresses to section mappings. */ vm_phys_add_seg(pt2tab_pa, pmap_preboot_get_pages(0)); /* * Insert allocated L2 page table pages to PT2TAB and make * link to all PT2s in L1 page table. See how kernel_vm_end * is initialized. * * We play simple and safe. So every KVA will have underlaying * L2 page table, even kernel image mapped by sections. */ pte2p = kern_pt2tab_entry(KERNBASE); for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += PTE2_SIZE) pt2tab_store(pte2p++, PTE2_KPT(pa)); pte1p = kern_pte1(KERNBASE); for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += NB_IN_PT2) pte1_store(pte1p++, PTE1_LINK(pa)); /* Make section mappings for kernel. */ l1_attr = ATTR_TO_L1(PTE2_ATTR_DEFAULT); pte1p = kern_pte1(KERNBASE); for (pa = KERNEL_V2P(KERNBASE); pa < last; pa += PTE1_SIZE) pte1_store(pte1p++, PTE1_KERN(pa, PTE1_AP_KRW, l1_attr)); /* * Get free and aligned space for PT2MAP and make L1 page table links * to L2 page tables held in PT2TAB. * * Note that pages holding PT2s are stored in PT2TAB as pt2_entry_t * descriptors and PT2TAB page(s) itself is(are) used as PT2s. Thus * each entry in PT2TAB maps all PT2s in a page. This implies that * virtual address of PT2MAP must be aligned to NPT2_IN_PG * PTE1_SIZE. */ PT2MAP = (pt2_entry_t *)(KERNBASE - PT2MAP_SIZE); pte1p = kern_pte1((vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { pte1_store(pte1p++, PTE1_LINK(pa)); } /* * Store PT2TAB in PT2TAB itself, i.e. self reference mapping. * Each pmap will hold own PT2TAB, so the mapping should be not global. */ pte2p = kern_pt2tab_entry((vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); } /* * Choose correct L2 page table and make mappings for allocations * made herein which replaces temporary locore.S mappings after a while. * Note that PT2MAP cannot be used until we switch to kern_pt1. * * Note, that these allocations started aligned on 1M section and * kernel PT1 was allocated first. Making of mappings must follow * order of physical allocations as we've used KERNEL_P2V() macro * for virtual addresses resolution. */ pte2p = kern_pt2tab_entry((vm_offset_t)kern_pt1); pt2pg_va = KERNEL_P2V(pte2_pa(pte2_load(pte2p))); pte2p = page_pt2(pt2pg_va, pte1_index((vm_offset_t)kern_pt1)); /* Make mapping for kernel L1 page table. */ for (pa = base_pt1, i = 0; i < NPG_IN_PT1; i++, pa += PTE2_SIZE) pte2_store(pte2p++, PTE2_KPT(pa)); /* Make mapping for kernel PT2TAB. */ for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) pte2_store(pte2p++, PTE2_KPT(pa)); /* Finally, switch from 'boot_pt1' to 'kern_pt1'. */ pmap_kern_ttb = base_pt1 | ttb_flags; cpuinfo_reinit_mmu(pmap_kern_ttb); /* * Initialize the first available KVA. As kernel image is mapped by * sections, we are leaving some gap behind. */ virtual_avail = (vm_offset_t)kern_pt2tab + NPG_IN_PT2TAB * PAGE_SIZE; } /* * Setup L2 page table page for given KVA. * Used in pre-bootstrap epoch. * * Note that we have allocated NKPT2PG pages for L2 page tables in advance * and used them for mapping KVA starting from KERNBASE. However, this is not * enough. Vectors and devices need L2 page tables too. Note that they are * even above VM_MAX_KERNEL_ADDRESS. */ static __inline vm_paddr_t pmap_preboot_pt2pg_setup(vm_offset_t va) { pt2_entry_t *pte2p, pte2; vm_paddr_t pt2pg_pa; /* Get associated entry in PT2TAB. */ pte2p = kern_pt2tab_entry(va); /* Just return, if PT2s page exists already. */ pte2 = pt2tab_load(pte2p); if (pte2_is_valid(pte2)) return (pte2_pa(pte2)); KASSERT(va >= VM_MAX_KERNEL_ADDRESS, ("%s: NKPT2PG too small", __func__)); /* * Allocate page for PT2s and insert it to PT2TAB. * In other words, map it into PT2MAP space. */ pt2pg_pa = pmap_preboot_get_pages(1); pt2tab_store(pte2p, PTE2_KPT(pt2pg_pa)); /* Zero all PT2s in allocated page. */ bzero((void*)pt2map_pt2pg(va), PAGE_SIZE); pte2_sync_range((pt2_entry_t *)pt2map_pt2pg(va), PAGE_SIZE); return (pt2pg_pa); } /* * Setup L2 page table for given KVA. * Used in pre-bootstrap epoch. */ static void pmap_preboot_pt2_setup(vm_offset_t va) { pt1_entry_t *pte1p; vm_paddr_t pt2pg_pa, pt2_pa; /* Setup PT2's page. */ pt2pg_pa = pmap_preboot_pt2pg_setup(va); pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(va)); /* Insert PT2 to PT1. */ pte1p = kern_pte1(va); pte1_store(pte1p, PTE1_LINK(pt2_pa)); } /* * Get L2 page entry associated with given KVA. * Used in pre-bootstrap epoch. */ static __inline pt2_entry_t* pmap_preboot_vtopte2(vm_offset_t va) { pt1_entry_t *pte1p; /* Setup PT2 if needed. */ pte1p = kern_pte1(va); if (!pte1_is_valid(pte1_load(pte1p))) /* XXX - sections ?! */ pmap_preboot_pt2_setup(va); return (pt2map_entry(va)); } /* * Pre-bootstrap epoch page(s) mapping(s). */ void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num) { u_int i; pt2_entry_t *pte2p; /* Map all the pages. */ for (i = 0; i < num; i++) { pte2p = pmap_preboot_vtopte2(va); pte2_store(pte2p, PTE2_KRW(pa)); va += PAGE_SIZE; pa += PAGE_SIZE; } } /* * Pre-bootstrap epoch virtual space alocator. */ vm_offset_t pmap_preboot_reserve_pages(u_int num) { u_int i; vm_offset_t start, va; pt2_entry_t *pte2p; /* Allocate virtual space. */ start = va = virtual_avail; virtual_avail += num * PAGE_SIZE; /* Zero the mapping. */ for (i = 0; i < num; i++) { pte2p = pmap_preboot_vtopte2(va); pte2_store(pte2p, 0); va += PAGE_SIZE; } return (start); } /* * Pre-bootstrap epoch page(s) allocation and mapping(s). */ vm_offset_t pmap_preboot_get_vpages(u_int num) { vm_paddr_t pa; vm_offset_t va; /* Allocate physical page(s). */ pa = pmap_preboot_get_pages(num); /* Allocate virtual space. */ va = virtual_avail; virtual_avail += num * PAGE_SIZE; /* Map and zero all. */ pmap_preboot_map_pages(pa, va, num); bzero((void *)va, num * PAGE_SIZE); return (va); } /* * Pre-bootstrap epoch page mapping(s) with attributes. */ void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, vm_prot_t prot, vm_memattr_t attr) { u_int num; u_int l1_attr, l1_prot, l2_prot, l2_attr; pt1_entry_t *pte1p; pt2_entry_t *pte2p; l2_prot = prot & VM_PROT_WRITE ? PTE2_AP_KRW : PTE2_AP_KR; l2_prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; l2_attr = vm_memattr_to_pte2(attr); l1_prot = ATTR_TO_L1(l2_prot); l1_attr = ATTR_TO_L1(l2_attr); /* Map all the pages. */ num = round_page(size); while (num > 0) { if ((((va | pa) & PTE1_OFFSET) == 0) && (num >= PTE1_SIZE)) { pte1p = kern_pte1(va); pte1_store(pte1p, PTE1_KERN(pa, l1_prot, l1_attr)); va += PTE1_SIZE; pa += PTE1_SIZE; num -= PTE1_SIZE; } else { pte2p = pmap_preboot_vtopte2(va); pte2_store(pte2p, PTE2_KERN(pa, l2_prot, l2_attr)); va += PAGE_SIZE; pa += PAGE_SIZE; num -= PAGE_SIZE; } } } /* * Extract from the kernel page table the physical address * that is mapped by the given virtual address "va". */ vm_paddr_t pmap_kextract(vm_offset_t va) { vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t pte2; pte1 = pte1_load(kern_pte1(va)); if (pte1_is_section(pte1)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); } else if (pte1_is_link(pte1)) { /* * We should beware of concurrent promotion that changes * pte1 at this point. However, it's not a problem as PT2 * page is preserved by promotion in PT2TAB. So even if * it happens, using of PT2MAP is still safe. * * QQQ: However, concurrent removing is a problem which * ends in abort on PT2MAP space. Locking must be used * to deal with this. */ pte2 = pte2_load(pt2map_entry(va)); pa = pte2_pa(pte2) | (va & PTE2_OFFSET); } else { panic("%s: va %#x pte1 %#x", __func__, va, pte1); } return (pa); } /* * Extract from the kernel page table the physical address * that is mapped by the given virtual address "va". Also * return L2 page table entry which maps the address. * * This is only intended to be used for panic dumps. */ vm_paddr_t pmap_dump_kextract(vm_offset_t va, pt2_entry_t *pte2p) { vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t pte2; pte1 = pte1_load(kern_pte1(va)); if (pte1_is_section(pte1)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); pte2 = pa | ATTR_TO_L2(pte1) | PTE2_V; } else if (pte1_is_link(pte1)) { pte2 = pte2_load(pt2map_entry(va)); pa = pte2_pa(pte2); } else { pte2 = 0; pa = 0; } if (pte2p != NULL) *pte2p = pte2; return (pa); } /***************************************************************************** * * PMAP second stage initialization and utility functions * for bootstrap epoch. * * After pmap_bootstrap() is called, the following functions for * mappings can be used: * * void pmap_kenter(vm_offset_t va, vm_paddr_t pa); * void pmap_kremove(vm_offset_t va); * vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, * int prot); * * NOTE: This is not SMP coherent stage. And physical page allocation is not * allowed during this stage. * *****************************************************************************/ /* * Initialize kernel PMAP locks and lists, kernel_pmap itself, and * reserve various virtual spaces for temporary mappings. */ void pmap_bootstrap(vm_offset_t firstaddr) { pt2_entry_t *unused __unused; struct pcpu *pc; /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_l1pa = (vm_paddr_t)kern_pt1; /* for libkvm */ kernel_pmap->pm_pt1 = kern_pt1; kernel_pmap->pm_pt2tab = kern_pt2tab; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); LIST_INIT(&allpmaps); /* * Request a spin mutex so that changes to allpmaps cannot be * preempted by smp_rendezvous_cpus(). */ mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) do { \ v = (c)pmap_preboot_reserve_pages(n); \ p = pt2map_entry((vm_offset_t)v); \ } while (0) /* * Local CMAP1/CMAP2 are used for zeroing and copying pages. * Local CMAP2 is also used for data cache cleaning. */ pc = get_pcpu(); mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); SYSMAP(caddr_t, pc->pc_cmap1_pte2p, pc->pc_cmap1_addr, 1); SYSMAP(caddr_t, pc->pc_cmap2_pte2p, pc->pc_cmap2_addr, 1); SYSMAP(vm_offset_t, pc->pc_qmap_pte2p, pc->pc_qmap_addr, 1); /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS); /* * _tmppt is used for reading arbitrary physical pages via /dev/mem. */ SYSMAP(caddr_t, unused, _tmppt, 1); /* * PADDR1 and PADDR2 are used by pmap_pte2_quick() and pmap_pte2(), * respectively. PADDR3 is used by pmap_pte2_ddb(). */ SYSMAP(pt2_entry_t *, PMAP1, PADDR1, 1); SYSMAP(pt2_entry_t *, PMAP2, PADDR2, 1); #ifdef DDB SYSMAP(pt2_entry_t *, PMAP3, PADDR3, 1); #endif mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); /* * Note that in very short time in initarm(), we are going to * initialize phys_avail[] array and no further page allocation * can happen after that until vm subsystem will be initialized. */ kernel_vm_end_new = kernel_vm_end; virtual_end = vm_max_kernel_address; } static void pmap_init_reserved_pages(void) { struct pcpu *pc; vm_offset_t pages; int i; CPU_FOREACH(i) { pc = pcpu_find(i); /* * Skip if the mapping has already been initialized, * i.e. this is the BSP. */ if (pc->pc_cmap1_addr != 0) continue; mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); pages = kva_alloc(PAGE_SIZE * 3); if (pages == 0) panic("%s: unable to allocate KVA", __func__); pc->pc_cmap1_pte2p = pt2map_entry(pages); pc->pc_cmap2_pte2p = pt2map_entry(pages + PAGE_SIZE); pc->pc_qmap_pte2p = pt2map_entry(pages + (PAGE_SIZE * 2)); pc->pc_cmap1_addr = (caddr_t)pages; pc->pc_cmap2_addr = (caddr_t)(pages + PAGE_SIZE); pc->pc_qmap_addr = pages + (PAGE_SIZE * 2); } } SYSINIT(rpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_reserved_pages, NULL); /* * The function can already be use in second initialization stage. * As such, the function DOES NOT call pmap_growkernel() where PT2 * allocation can happen. So if used, be sure that PT2 for given * virtual address is allocated already! * * Add a wired page to the kva. * Note: not SMP coherent. */ static __inline void pmap_kenter_prot_attr(vm_offset_t va, vm_paddr_t pa, uint32_t prot, uint32_t attr) { pt1_entry_t *pte1p; pt2_entry_t *pte2p; pte1p = kern_pte1(va); if (!pte1_is_valid(pte1_load(pte1p))) { /* XXX - sections ?! */ /* * This is a very low level function, so PT2 and particularly * PT2PG associated with given virtual address must be already * allocated. It's a pain mainly during pmap initialization * stage. However, called after pmap initialization with * virtual address not under kernel_vm_end will lead to * the same misery. */ if (!pte2_is_valid(pte2_load(kern_pt2tab_entry(va)))) panic("%s: kernel PT2 not allocated!", __func__); } pte2p = pt2map_entry(va); pte2_store(pte2p, PTE2_KERN(pa, prot, attr)); } PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt1_entry_t *pte1p; pt2_entry_t *pte2p; pte1p = kern_pte1(va); if (pte1_is_section(pte1_load(pte1p))) { pte1_clear(pte1p); } else { pte2p = pt2map_entry(va); pte2_clear(pte2p); } } /* * Share new kernel PT2PG with all pmaps. * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pt2tab(vm_offset_t va, pt2_entry_t npte2) { pmap_t pmap; pt2_entry_t *pte2p; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { pte2p = pmap_pt2tab_entry(pmap, va); pt2tab_store(pte2p, npte2); } mtx_unlock_spin(&allpmaps_lock); } /* * Share new kernel PTE1 with all pmaps. * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pte1(vm_offset_t va, pt1_entry_t npte1) { pmap_t pmap; pt1_entry_t *pte1p; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { pte1p = pmap_pte1(pmap, va); pte1_store(pte1p, npte1); } mtx_unlock_spin(&allpmaps_lock); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. * * NOTE: Read the comments above pmap_kenter_prot_attr() as * the function is used herein! */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; vm_paddr_t pte1_offset; pt1_entry_t npte1; uint32_t l1prot, l2prot; uint32_t l1attr, l2attr; PDEBUG(1, printf("%s: virt = %#x, start = %#x, end = %#x (size = %#x)," " prot = %d\n", __func__, *virt, start, end, end - start, prot)); l2prot = (prot & VM_PROT_WRITE) ? PTE2_AP_KRW : PTE2_AP_KR; l2prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; l1prot = ATTR_TO_L1(l2prot); l2attr = PTE2_ATTR_DEFAULT; l1attr = ATTR_TO_L1(l2attr); va = *virt; /* * Does the physical address range's size and alignment permit at * least one section mapping to be created? */ pte1_offset = start & PTE1_OFFSET; if ((end - start) - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) >= PTE1_SIZE) { /* * Increase the starting virtual address so that its alignment * does not preclude the use of section mappings. */ if ((va & PTE1_OFFSET) < pte1_offset) va = pte1_trunc(va) + pte1_offset; else if ((va & PTE1_OFFSET) > pte1_offset) va = pte1_roundup(va) + pte1_offset; } sva = va; while (start < end) { if ((start & PTE1_OFFSET) == 0 && end - start >= PTE1_SIZE) { KASSERT((va & PTE1_OFFSET) == 0, ("%s: misaligned va %#x", __func__, va)); npte1 = PTE1_KERN(start, l1prot, l1attr); pmap_kenter_pte1(va, npte1); va += PTE1_SIZE; start += PTE1_SIZE; } else { pmap_kenter_prot_attr(va, start, l2prot, l2attr); va += PAGE_SIZE; start += PAGE_SIZE; } } tlb_flush_range(sva, va - sva); *virt = va; return (sva); } /* * Make a temporary mapping for a physical address. * This is only intended to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; /* QQQ: 'i' should be less or equal to MAXDUMPPGS. */ va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); tlb_flush_local(va); return ((void *)crashdumpmap); } /************************************* * * TLB & cache maintenance routines. * *************************************/ /* * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_tlb_flush(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) tlb_flush(va); } PMAP_INLINE void pmap_tlb_flush_range(pmap_t pmap, vm_offset_t sva, vm_size_t size) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) tlb_flush_range(sva, size); } /* * Abuse the pte2 nodes for unmapped kva to thread a kva freelist through. * Requirements: * - Must deal with pages in order to ensure that none of the PTE2_* bits * are ever set, PTE2_V in particular. * - Assumes we can write to pte2s without pte2_store() atomic ops. * - Assumes nothing will ever test these addresses for 0 to indicate * no mapping instead of correctly checking PTE2_V. * - Assumes a vm_offset_t will fit in a pte2 (true for arm). * Because PTE2_V is never set, there can be no mappings to invalidate. */ static vm_offset_t pmap_pte2list_alloc(vm_offset_t *head) { pt2_entry_t *pte2p; vm_offset_t va; va = *head; if (va == 0) panic("pmap_ptelist_alloc: exhausted ptelist KVA"); pte2p = pt2map_entry(va); *head = *pte2p; if (*head & PTE2_V) panic("%s: va with PTE2_V set!", __func__); *pte2p = 0; return (va); } static void pmap_pte2list_free(vm_offset_t *head, vm_offset_t va) { pt2_entry_t *pte2p; if (va & PTE2_V) panic("%s: freeing va with PTE2_V set!", __func__); pte2p = pt2map_entry(va); *pte2p = *head; /* virtual! PTE2_V is 0 though */ *head = va; } static void pmap_pte2list_init(vm_offset_t *head, void *base, int npages) { int i; vm_offset_t va; *head = 0; for (i = npages - 1; i >= 0; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_pte2list_free(head, va); } } /***************************************************************************** * * PMAP third and final stage initialization. * * After pmap_init() is called, PMAP subsystem is fully initialized. * *****************************************************************************/ SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); static u_long nkpt2pg = NKPT2PG; SYSCTL_ULONG(_vm_pmap, OID_AUTO, nkpt2pg, CTLFLAG_RD, &nkpt2pg, 0, "Pre-allocated pages for kernel PT2s"); static int sp_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &sp_enabled, 0, "Are large page mappings enabled?"); bool pmap_ps_enabled(pmap_t pmap __unused) { return (sp_enabled != 0); } static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte1, CTLFLAG_RD, 0, "1MB page mapping counters"); static u_long pmap_pte1_demotions; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pte1_demotions, 0, "1MB page demotions"); static u_long pmap_pte1_mappings; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pte1_mappings, 0, "1MB page mappings"); static u_long pmap_pte1_p_failures; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pte1_p_failures, 0, "1MB page promotion failures"); static u_long pmap_pte1_promotions; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pte1_promotions, 0, "1MB page promotions"); static u_long pmap_pte1_kern_demotions; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_demotions, CTLFLAG_RD, &pmap_pte1_kern_demotions, 0, "1MB page kernel demotions"); static u_long pmap_pte1_kern_promotions; SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_promotions, CTLFLAG_RD, &pmap_pte1_kern_promotions, 0, "1MB page kernel promotions"); static __inline ttb_entry_t pmap_ttb_get(pmap_t pmap) { return (vtophys(pmap->pm_pt1) | ttb_flags); } /* * Initialize a vm_page's machine-dependent fields. * * Variations: * 1. Pages for L2 page tables are always not managed. So, pv_list and * pt2_wirecount can share same physical space. However, proper * initialization on a page alloc for page tables and reinitialization * on the page free must be ensured. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); pt2_wirecount_init(m); m->md.pat_mode = VM_MEMATTR_DEFAULT; } /* * Virtualization for faster way how to zero whole page. */ static __inline void pagezero(void *page) { bzero(page, PAGE_SIZE); } /* * Zero L2 page table page. * Use same KVA as in pmap_zero_page(). */ static __inline vm_paddr_t pmap_pt2pg_zero(vm_page_t m) { pt2_entry_t *cmap2_pte2p; vm_paddr_t pa; struct pcpu *pc; pa = VM_PAGE_TO_PHYS(m); /* * XXX: For now, we map whole page even if it's already zero, * to sync it even if the sync is only DSB. */ sched_pin(); pc = get_pcpu(); cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap2_pte2p) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, vm_page_pte2_attr(m))); /* Even VM_ALLOC_ZERO request is only advisory. */ if ((m->flags & PG_ZERO) == 0) pagezero(pc->pc_cmap2_addr); pte2_sync_range((pt2_entry_t *)pc->pc_cmap2_addr, PAGE_SIZE); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); /* * Unpin the thread before releasing the lock. Otherwise the thread * could be rescheduled while still bound to the current CPU, only * to unpin itself immediately upon resuming execution. */ sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); return (pa); } /* * Init just allocated page as L2 page table(s) holder * and return its physical address. */ static __inline vm_paddr_t pmap_pt2pg_init(pmap_t pmap, vm_offset_t va, vm_page_t m) { vm_paddr_t pa; pt2_entry_t *pte2p; /* Check page attributes. */ if (m->md.pat_mode != pt_memattr) pmap_page_set_memattr(m, pt_memattr); /* Zero page and init wire counts. */ pa = pmap_pt2pg_zero(m); pt2_wirecount_init(m); /* * Map page to PT2MAP address space for given pmap. * Note that PT2MAP space is shared with all pmaps. */ if (pmap == kernel_pmap) pmap_kenter_pt2tab(va, PTE2_KPT(pa)); else { pte2p = pmap_pt2tab_entry(pmap, va); pt2tab_store(pte2p, PTE2_KPT_NG(pa)); } return (pa); } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { vm_size_t s; pt2_entry_t *pte2p, pte2; u_int i, pte1_idx, pv_npg; PDEBUG(1, printf("%s: phys_start = %#x\n", __func__, PHYSADDR)); /* * Initialize the vm page array entries for kernel pmap's * L2 page table pages allocated in advance. */ pte1_idx = pte1_index(KERNBASE - PT2MAP_SIZE); pte2p = kern_pt2tab_entry(KERNBASE - PT2MAP_SIZE); for (i = 0; i < nkpt2pg + NPG_IN_PT2TAB; i++, pte2p++) { vm_paddr_t pa; vm_page_t m; pte2 = pte2_load(pte2p); KASSERT(pte2_is_valid(pte2), ("%s: no valid entry", __func__)); pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size], ("%s: L2 page table page is out of range", __func__)); m->pindex = pte1_idx; m->phys_addr = pa; pte1_idx += NPT2_IN_PG; } /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); if (sp_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("%s: can't assign to pagesizes[1]", __func__)); pagesizes[1] = PTE1_SIZE; } /* * Calculate the size of the pv head table for sections. * Handle the possibility that "vm_phys_segs[...].end" is zero. * Note that the table is only for sections which could be promoted. */ first_managed_pa = pte1_trunc(vm_phys_segs[0].start); pv_npg = (pte1_trunc(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) - first_managed_pa) / PTE1_SIZE + 1; /* * Allocate memory for the pv head table for sections. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("%s: not enough kvm for pv chunks", __func__); pmap_pte2list_init(&pv_vafree, pv_chunkbase, pv_maxchunks); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { u_int anychanged; pt2_entry_t *epte2p, *pte2p, pte2; vm_page_t m; vm_paddr_t pa; anychanged = 0; pte2p = pt2map_entry(sva); epte2p = pte2p + count; while (pte2p < epte2p) { m = *ma++; pa = VM_PAGE_TO_PHYS(m); pte2 = pte2_load(pte2p); if ((pte2_pa(pte2) != pa) || (pte2_attr(pte2) != vm_page_pte2_attr(m))) { anychanged++; pte2_store(pte2p, PTE2_KERN(pa, PTE2_AP_KRW, vm_page_pte2_attr(m))); } pte2p++; } if (__predict_false(anychanged)) tlb_flush_range(sva, count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } tlb_flush_range(sva, va - sva); } /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || (pmap == vmspace_pmap(curthread->td_proc->p_vmspace))); } /* * If the given pmap is not the current or kernel pmap, the returned * pte2 must be released by passing it to pmap_pte2_release(). */ static pt2_entry_t * pmap_pte2(pmap_t pmap, vm_offset_t va) { pt1_entry_t pte1; vm_paddr_t pt2pg_pa; pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) panic("%s: attempt to map PTE1", __func__); if (pte1_is_link(pte1)) { /* Are we current address space or kernel? */ if (pmap_is_current(pmap)) return (pt2map_entry(va)); /* Note that L2 page table size is not equal to PAGE_SIZE. */ pt2pg_pa = trunc_page(pte1_link_pa(pte1)); mtx_lock(&PMAP2mutex); if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); tlb_flush((vm_offset_t)PADDR2); } return (PADDR2 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); } return (NULL); } /* * Releases a pte2 that was obtained from pmap_pte2(). * Be prepared for the pte2p being NULL. */ static __inline void pmap_pte2_release(pt2_entry_t *pte2p) { if ((pt2_entry_t *)(trunc_page((vm_offset_t)pte2p)) == PADDR2) { mtx_unlock(&PMAP2mutex); } } /* * Super fast pmap_pte2 routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire tlb flush for checking a single mapping. * * If the given pmap is not the current pmap, pvh_global_lock * must be held and curthread pinned to a CPU. */ static pt2_entry_t * pmap_pte2_quick(pmap_t pmap, vm_offset_t va) { pt1_entry_t pte1; vm_paddr_t pt2pg_pa; pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) panic("%s: attempt to map PTE1", __func__); if (pte1_is_link(pte1)) { /* Are we current address space or kernel? */ if (pmap_is_current(pmap)) return (pt2map_entry(va)); rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("%s: curthread not pinned", __func__)); /* Note that L2 page table size is not equal to PAGE_SIZE. */ pt2pg_pa = trunc_page(pte1_link_pa(pte1)); if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif tlb_flush_local((vm_offset_t)PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); tlb_flush_local((vm_offset_t)PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR1 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); } return (NULL); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t *pte2p; PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) pa = pte1_pa(pte1) | (va & PTE1_OFFSET); else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, va); pa = pte2_pa(pte2_load(pte2p)) | (va & PTE2_OFFSET); pmap_pte2_release(pte2p); } else pa = 0; PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { vm_paddr_t pa, lockpa; pt1_entry_t pte1; pt2_entry_t pte2, *pte2p; vm_page_t m; lockpa = 0; m = NULL; PMAP_LOCK(pmap); retry: pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) { if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, va); pte2 = pte2_load(pte2p); pmap_pte2_release(pte2p); if (pte2_is_valid(pte2) && (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { pa = pte2_pa(pte2); if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } PA_UNLOCK_COND(lockpa); PMAP_UNLOCK(pmap); return (m); } /* * Grow the number of kernel L2 page table entries, if needed. */ void pmap_growkernel(vm_offset_t addr) { vm_page_t m; vm_paddr_t pt2pg_pa, pt2_pa; pt1_entry_t pte1; pt2_entry_t pte2; PDEBUG(1, printf("%s: addr = %#x\n", __func__, addr)); /* * All the time kernel_vm_end is first KVA for which underlying * L2 page table is either not allocated or linked from L1 page table * (not considering sections). Except for two possible cases: * * (1) in the very beginning as long as pmap_growkernel() was * not called, it could be first unused KVA (which is not * rounded up to PTE1_SIZE), * * (2) when all KVA space is mapped and kernel_map->max_offset * address is not rounded up to PTE1_SIZE. (For example, * it could be 0xFFFFFFFF.) */ kernel_vm_end = pte1_roundup(kernel_vm_end); mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, PTE1_SIZE); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { pte1 = pte1_load(kern_pte1(kernel_vm_end)); if (pte1_is_valid(pte1)) { kernel_vm_end += PTE1_SIZE; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } /* * kernel_vm_end_new is used in pmap_pinit() when kernel * mappings are entered to new pmap all at once to avoid race * between pmap_kenter_pte1() and kernel_vm_end increase. * The same aplies to pmap_kenter_pt2tab(). */ kernel_vm_end_new = kernel_vm_end + PTE1_SIZE; pte2 = pt2tab_load(kern_pt2tab_entry(kernel_vm_end)); if (!pte2_is_valid(pte2)) { /* * Install new PT2s page into kernel PT2TAB. */ m = vm_page_alloc(NULL, pte1_index(kernel_vm_end) & ~PT2PG_MASK, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) panic("%s: no memory to grow kernel", __func__); /* * QQQ: To link all new L2 page tables from L1 page * table now and so pmap_kenter_pte1() them * at once together with pmap_kenter_pt2tab() * could be nice speed up. However, * pmap_growkernel() does not happen so often... * QQQ: The other TTBR is another option. */ pt2pg_pa = pmap_pt2pg_init(kernel_pmap, kernel_vm_end, m); } else pt2pg_pa = pte2_pa(pte2); pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(kernel_vm_end)); pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa)); kernel_vm_end = kernel_vm_end_new; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = vm_max_kernel_address - KERNBASE; return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = vm_max_kernel_address - kernel_vm_end; return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /*********************************************** * * Pmap allocation/deallocation routines. * ***********************************************/ /* * Initialize the pmap for the swapper process. */ void pmap_pinit0(pmap_t pmap) { PDEBUG(1, printf("%s: pmap = %p\n", __func__, pmap)); PMAP_LOCK_INIT(pmap); /* * Kernel page table directory and pmap stuff around is already * initialized, we are using it right now and here. So, finish * only PMAP structures initialization for process0 ... * * Since the L1 page table and PT2TAB is shared with the kernel pmap, * which is already included in the list "allpmaps", this pmap does * not need to be inserted into that list. */ pmap->pm_pt1 = kern_pt1; pmap->pm_pt2tab = kern_pt2tab; CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); CPU_SET(0, &pmap->pm_active); } static __inline void pte1_copy_nosync(pt1_entry_t *spte1p, pt1_entry_t *dpte1p, vm_offset_t sva, vm_offset_t eva) { u_int idx, count; idx = pte1_index(sva); count = (pte1_index(eva) - idx + 1) * sizeof(pt1_entry_t); bcopy(spte1p + idx, dpte1p + idx, count); } static __inline void pt2tab_copy_nosync(pt2_entry_t *spte2p, pt2_entry_t *dpte2p, vm_offset_t sva, vm_offset_t eva) { u_int idx, count; idx = pt2tab_index(sva); count = (pt2tab_index(eva) - idx + 1) * sizeof(pt2_entry_t); bcopy(spte2p + idx, dpte2p + idx, count); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { pt1_entry_t *pte1p; pt2_entry_t *pte2p; vm_paddr_t pa, pt2tab_pa; u_int i; PDEBUG(6, printf("%s: pmap = %p, pm_pt1 = %p\n", __func__, pmap, pmap->pm_pt1)); /* * No need to allocate L2 page table space yet but we do need * a valid L1 page table and PT2TAB table. * * Install shared kernel mappings to these tables. It's a little * tricky as some parts of KVA are reserved for vectors, devices, * and whatever else. These parts are supposed to be above * vm_max_kernel_address. Thus two regions should be installed: * * (1) . * * QQQ: The second region should be stable enough to be installed * only once in time when the tables are allocated. * QQQ: Maybe copy of both regions at once could be faster ... * QQQ: Maybe the other TTBR is an option. * * Finally, install own PT2TAB table to these tables. */ if (pmap->pm_pt1 == NULL) { - pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(kernel_arena, - NB_IN_PT1, M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, - pt_memattr); + pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(NB_IN_PT1, + M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, pt_memattr); if (pmap->pm_pt1 == NULL) return (0); } if (pmap->pm_pt2tab == NULL) { /* * QQQ: (1) PT2TAB must be contiguous. If PT2TAB is one page * only, what should be the only size for 32 bit systems, * then we could allocate it with vm_page_alloc() and all * the stuff needed as other L2 page table pages. * (2) Note that a process PT2TAB is special L2 page table * page. Its mapping in kernel_arena is permanent and can * be used no matter which process is current. Its mapping * in PT2MAP can be used only for current process. */ pmap->pm_pt2tab = (pt2_entry_t *)kmem_alloc_attr(NB_IN_PT2TAB, M_NOWAIT | M_ZERO, 0, -1UL, pt_memattr); if (pmap->pm_pt2tab == NULL) { /* * QQQ: As struct pmap is allocated from UMA with * UMA_ZONE_NOFREE flag, it's important to leave * no allocation in pmap if initialization failed. */ kmem_free(kernel_arena, (vm_offset_t)pmap->pm_pt1, NB_IN_PT1); pmap->pm_pt1 = NULL; return (0); } /* * QQQ: Each L2 page table page vm_page_t has pindex set to * pte1 index of virtual address mapped by this page. * It's not valid for non kernel PT2TABs themselves. * The pindex of these pages can not be altered because * of the way how they are allocated now. However, it * should not be a problem. */ } mtx_lock_spin(&allpmaps_lock); /* * To avoid race with pmap_kenter_pte1() and pmap_kenter_pt2tab(), * kernel_vm_end_new is used here instead of kernel_vm_end. */ pte1_copy_nosync(kern_pt1, pmap->pm_pt1, KERNBASE, kernel_vm_end_new - 1); pte1_copy_nosync(kern_pt1, pmap->pm_pt1, vm_max_kernel_address, 0xFFFFFFFF); pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, KERNBASE, kernel_vm_end_new - 1); pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, vm_max_kernel_address, 0xFFFFFFFF); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* * Store PT2MAP PT2 pages (a.k.a. PT2TAB) in PT2TAB itself. * I.e. self reference mapping. The PT2TAB is private, however mapped * into shared PT2MAP space, so the mapping should be not global. */ pt2tab_pa = vtophys(pmap->pm_pt2tab); pte2p = pmap_pt2tab_entry(pmap, (vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); } /* Insert PT2MAP PT2s into pmap PT1. */ pte1p = pmap_pte1(pmap, (vm_offset_t)PT2MAP); for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { pte1_store(pte1p++, PTE1_LINK(pa)); } /* * Now synchronize new mapping which was made above. */ pte1_sync_range(pmap->pm_pt1, NB_IN_PT1); pte2_sync_range(pmap->pm_pt2tab, NB_IN_PT2TAB); CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } #ifdef INVARIANTS static boolean_t pt2tab_user_is_empty(pt2_entry_t *tab) { u_int i, end; end = pt2tab_index(VM_MAXUSER_ADDRESS); for (i = 0; i < end; i++) if (tab[i] != 0) return (FALSE); return (TRUE); } #endif /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { #ifdef INVARIANTS vm_offset_t start, end; #endif KASSERT(pmap->pm_stats.resident_count == 0, ("%s: pmap resident count %ld != 0", __func__, pmap->pm_stats.resident_count)); KASSERT(pt2tab_user_is_empty(pmap->pm_pt2tab), ("%s: has allocated user PT2(s)", __func__)); KASSERT(CPU_EMPTY(&pmap->pm_active), ("%s: pmap %p is active on some CPU(s)", __func__, pmap)); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); #ifdef INVARIANTS start = pte1_index(KERNBASE) * sizeof(pt1_entry_t); end = (pte1_index(0xFFFFFFFF) + 1) * sizeof(pt1_entry_t); bzero((char *)pmap->pm_pt1 + start, end - start); start = pt2tab_index(KERNBASE) * sizeof(pt2_entry_t); end = (pt2tab_index(0xFFFFFFFF) + 1) * sizeof(pt2_entry_t); bzero((char *)pmap->pm_pt2tab + start, end - start); #endif /* * We are leaving PT1 and PT2TAB allocated on released pmap, * so hopefully UMA vmspace_zone will always be inited with * UMA_ZONE_NOFREE flag. */ } /********************************************************* * * L2 table pages and their pages management routines. * *********************************************************/ /* * Virtual interface for L2 page table wire counting. * * Each L2 page table in a page has own counter which counts a number of * valid mappings in a table. Global page counter counts mappings in all * tables in a page plus a single itself mapping in PT2TAB. * * During a promotion we leave the associated L2 page table counter * untouched, so the table (strictly speaking a page which holds it) * is never freed if promoted. * * If a page m->wire_count == 1 then no valid mappings exist in any L2 page * table in the page and the page itself is only mapped in PT2TAB. */ static __inline void pt2_wirecount_init(vm_page_t m) { u_int i; /* * Note: A page m is allocated with VM_ALLOC_WIRED flag and * m->wire_count should be already set correctly. * So, there is no need to set it again herein. */ for (i = 0; i < NPT2_IN_PG; i++) m->md.pt2_wirecount[i] = 0; } static __inline void pt2_wirecount_inc(vm_page_t m, uint32_t pte1_idx) { /* * Note: A just modificated pte2 (i.e. already allocated) * is acquiring one extra reference which must be * explicitly cleared. It influences the KASSERTs herein. * All L2 page tables in a page always belong to the same * pmap, so we allow only one extra reference for the page. */ KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] < (NPTE2_IN_PT2 + 1), ("%s: PT2 is overflowing ...", __func__)); KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), ("%s: PT2PG is overflowing ...", __func__)); m->wire_count++; m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]++; } static __inline void pt2_wirecount_dec(vm_page_t m, uint32_t pte1_idx) { KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] != 0, ("%s: PT2 is underflowing ...", __func__)); KASSERT(m->wire_count > 1, ("%s: PT2PG is underflowing ...", __func__)); m->wire_count--; m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]--; } static __inline void pt2_wirecount_set(vm_page_t m, uint32_t pte1_idx, uint16_t count) { KASSERT(count <= NPTE2_IN_PT2, ("%s: invalid count %u", __func__, count)); KASSERT(m->wire_count > m->md.pt2_wirecount[pte1_idx & PT2PG_MASK], ("%s: PT2PG corrupting (%u, %u) ...", __func__, m->wire_count, m->md.pt2_wirecount[pte1_idx & PT2PG_MASK])); m->wire_count -= m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]; m->wire_count += count; m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] = count; KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), ("%s: PT2PG is overflowed (%u) ...", __func__, m->wire_count)); } static __inline uint32_t pt2_wirecount_get(vm_page_t m, uint32_t pte1_idx) { return (m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]); } static __inline boolean_t pt2_is_empty(vm_page_t m, vm_offset_t va) { return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 0); } static __inline boolean_t pt2_is_full(vm_page_t m, vm_offset_t va) { return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == NPTE2_IN_PT2); } static __inline boolean_t pt2pg_is_empty(vm_page_t m) { return (m->wire_count == 1); } /* * This routine is called if the L2 page table * is not mapped correctly. */ static vm_page_t _pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) { uint32_t pte1_idx; pt1_entry_t *pte1p; pt2_entry_t pte2; vm_page_t m; vm_paddr_t pt2pg_pa, pt2_pa; pte1_idx = pte1_index(va); pte1p = pmap->pm_pt1 + pte1_idx; KASSERT(pte1_load(pte1p) == 0, ("%s: pm_pt1[%#x] is not zero: %#x", __func__, pte1_idx, pte1_load(pte1p))); pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, va)); if (!pte2_is_valid(pte2)) { /* * Install new PT2s page into pmap PT2TAB. */ m = vm_page_alloc(NULL, pte1_idx & ~PT2PG_MASK, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) { if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, * the L2 page table page may have been allocated. */ return (NULL); } pmap->pm_stats.resident_count++; pt2pg_pa = pmap_pt2pg_init(pmap, va, m); } else { pt2pg_pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pt2pg_pa); } pt2_wirecount_inc(m, pte1_idx); pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); pte1_store(pte1p, PTE1_LINK(pt2_pa)); return (m); } static vm_page_t pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) { u_int pte1_idx; pt1_entry_t *pte1p, pte1; vm_page_t m; pte1_idx = pte1_index(va); retry: pte1p = pmap->pm_pt1 + pte1_idx; pte1 = pte1_load(pte1p); /* * This supports switching from a 1MB page to a * normal 4K page. */ if (pte1_is_section(pte1)) { (void)pmap_demote_pte1(pmap, pte1p, va); /* * Reload pte1 after demotion. * * Note: Demotion can even fail as either PT2 is not find for * the virtual address or PT2PG can not be allocated. */ pte1 = pte1_load(pte1p); } /* * If the L2 page table page is mapped, we just increment the * hold count, and activate it. */ if (pte1_is_link(pte1)) { m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); pt2_wirecount_inc(m, pte1_idx); } else { /* * Here if the PT2 isn't mapped, or if it has * been deallocated. */ m = _pmap_allocpte2(pmap, va, flags); if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) goto retry; } return (m); } /* * Schedule the specified unused L2 page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free) { /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ #ifdef PMAP_DEBUG pmap_zero_page_check(m); #endif m->flags |= PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Unwire L2 page tables page. */ static void pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m) { pt1_entry_t *pte1p, opte1 __unused; pt2_entry_t *pte2p; uint32_t i; KASSERT(pt2pg_is_empty(m), ("%s: pmap %p PT2PG %p wired", __func__, pmap, m)); /* * Unmap all L2 page tables in the page from L1 page table. * * QQQ: Individual L2 page tables (except the last one) can be unmapped * earlier. However, we are doing that this way. */ KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), ("%s: pmap %p va %#x PT2PG %p bad index", __func__, pmap, va, m)); pte1p = pmap->pm_pt1 + m->pindex; for (i = 0; i < NPT2_IN_PG; i++, pte1p++) { KASSERT(m->md.pt2_wirecount[i] == 0, ("%s: pmap %p PT2 %u (PG %p) wired", __func__, pmap, i, m)); opte1 = pte1_load(pte1p); if (pte1_is_link(opte1)) { pte1_clear(pte1p); /* * Flush intermediate TLB cache. */ pmap_tlb_flush(pmap, (m->pindex + i) << PTE1_SHIFT); } #ifdef INVARIANTS else KASSERT((opte1 == 0) || pte1_is_section(opte1), ("%s: pmap %p va %#x bad pte1 %x at %u", __func__, pmap, va, opte1, i)); #endif } /* * Unmap the page from PT2TAB. */ pte2p = pmap_pt2tab_entry(pmap, va); (void)pt2tab_load_clear(pte2p); pmap_tlb_flush(pmap, pt2map_pt2pg(va)); m->wire_count = 0; pmap->pm_stats.resident_count--; /* * This barrier is so that the ordinary store unmapping * the L2 page table page is globally performed before TLB shoot- * down is begun. */ wmb(); vm_wire_sub(1); } /* * Decrements a L2 page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static __inline boolean_t pmap_unwire_pt2(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { pt2_wirecount_dec(m, pte1_index(va)); if (pt2pg_is_empty(m)) { /* * QQQ: Wire count is zero, so whole page should be zero and * we can set PG_ZERO flag to it. * Note that when promotion is enabled, it takes some * more efforts. See pmap_unwire_pt2_all() below. */ pmap_unwire_pt2pg(pmap, va, m); pmap_add_delayed_free_list(m, free); return (TRUE); } else return (FALSE); } /* * Drop a L2 page table page's wire count at once, which is used to record * the number of valid L2 page table entries within the page. If the wire * count drops to zero, then the L2 page table page is unmapped. */ static __inline void pmap_unwire_pt2_all(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { u_int pte1_idx = pte1_index(va); KASSERT(m->pindex == (pte1_idx & ~PT2PG_MASK), ("%s: PT2 page's pindex is wrong", __func__)); KASSERT(m->wire_count > pt2_wirecount_get(m, pte1_idx), ("%s: bad pt2 wire count %u > %u", __func__, m->wire_count, pt2_wirecount_get(m, pte1_idx))); /* * It's possible that the L2 page table was never used. * It happened in case that a section was created without promotion. */ if (pt2_is_full(m, va)) { pt2_wirecount_set(m, pte1_idx, 0); /* * QQQ: We clear L2 page table now, so when L2 page table page * is going to be freed, we can set it PG_ZERO flag ... * This function is called only on section mappings, so * hopefully it's not to big overload. * * XXX: If pmap is current, existing PT2MAP mapping could be * used for zeroing. */ pmap_zero_page_area(m, page_pt2off(pte1_idx), NB_IN_PT2); } #ifdef INVARIANTS else KASSERT(pt2_is_empty(m, va), ("%s: PT2 is not empty (%u)", __func__, pt2_wirecount_get(m, pte1_idx))); #endif if (pt2pg_is_empty(m)) { pmap_unwire_pt2pg(pmap, va, m); pmap_add_delayed_free_list(m, free); } } /* * After removing a L2 page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static boolean_t pmap_unuse_pt2(pmap_t pmap, vm_offset_t va, struct spglist *free) { pt1_entry_t pte1; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (FALSE); pte1 = pte1_load(pmap_pte1(pmap, va)); mpte = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); return (pmap_unwire_pt2(pmap, va, mpte, free)); } /************************************* * * Page management routines. * *************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); CTASSERT(_NPCPV == 336); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ static const uint32_t pc_freemask[_NPCM] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE10 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif /* * Is given page managed? */ static __inline bool is_managed(vm_paddr_t pa) { vm_page_t m; m = PHYS_TO_VM_PAGE(pa); if (m == NULL) return (false); return ((m->oflags & VPO_UNMANAGED) == 0); } static __inline bool pte1_is_managed(pt1_entry_t pte1) { return (is_managed(pte1_pa(pte1))); } static __inline bool pte2_is_managed(pt2_entry_t pte2) { return (is_managed(pte2_pa(pte2))); } /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. */ static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap) { struct pch newtail; struct pv_chunk *pc; struct md_page *pvh; pt1_entry_t *pte1p; pmap_t pmap; pt2_entry_t *pte2p, tpte2; pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; struct spglist free; uint32_t inuse; int bit, field, freed; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); pmap = NULL; m_pc = NULL; SLIST_INIT(&free); TAILQ_INIT(&newtail); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || SLIST_EMPTY(&free))) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); if (pmap != pc->pc_pmap) { if (pmap != NULL) { if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { pmap = NULL; TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = ffs(inuse) - 1; pv = &pc->pc_pventry[field * 32 + bit]; va = pv->pv_va; pte1p = pmap_pte1(pmap, va); if (pte1_is_section(pte1_load(pte1p))) continue; pte2p = pmap_pte2(pmap, va); tpte2 = pte2_load(pte2p); if ((tpte2 & PTE2_W) == 0) tpte2 = pte2_load_clear(pte2p); pmap_pte2_release(pte2p); if ((tpte2 & PTE2_W) != 0) continue; KASSERT(tpte2 != 0, ("pmap_pv_reclaim: pmap %p va %#x zero pte", pmap, va)); pmap_tlb_flush(pmap, va); m = PHYS_TO_VM_PAGE(pte2_pa(tpte2)); if (pte2_is_dirty(tpte2)) vm_page_dirty(m); if ((tpte2 & PTE2_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) { vm_page_aflag_clear(m, PGA_WRITEABLE); } } pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt2(pmap, va, &free); freed++; } } if (freed == 0) { TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } /* Every freed mapping is for a 4 KB page. */ pmap->pm_stats.resident_count -= freed; PV_STAT(pv_entry_frees += freed); PV_STAT(pv_entry_spare += freed); pv_entry_count -= freed; TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != pc_freemask[field]) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); /* * One freed pv entry in locked_pmap is * sufficient. */ if (pmap == locked_pmap) goto out; break; } if (field == _NPCM) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); break; } } out: TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); if (pmap != NULL) { if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; vm_wire_add(1); } vm_page_free_pages_toq(&free, false); return (m_pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, PQ_NONE); vm_page_free(m); pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); } /* * Free the pv_entry back to the free list. */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 32; bit = idx % 32; pc->pc_map[field] |= 1ul << bit; for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) { /* * 98% of the time, pc is already at the head of the * list. If it isn't already, move it to the head. */ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != pc)) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } /* * Get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffs(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 32 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the pte2list "pv_vafree" is synchronized by the pvh * global lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_pte2list_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } m = pmap_pv_reclaim(pmap); if (m == NULL) goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); pc = (struct pv_chunk *)pmap_pte2list_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } return (pv); } static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { struct md_page *pvh; rw_assert(&pvh_global_lock, RA_WLOCKED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } static void pmap_pv_demote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PTE1_OFFSET) == 0, ("pmap_pv_demote_pte1: pa is not 1mpage aligned")); /* * Transfer the 1mpage's pv entry for this mapping to the first * page's pv list. */ pvh = pa_to_pvh(pa); va = pte1_trunc(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pte1: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTE2_IN_PT2 - 1 pv entries. */ va_last = va + PTE1_SIZE - PAGE_SIZE; do { m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pte1: page %p is not managed", m)); va += PAGE_SIZE; pmap_insert_entry(pmap, va, m); } while (va < va_last); } #if VM_NRESERVLEVEL > 0 static void pmap_pv_promote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PTE1_OFFSET) == 0, ("pmap_pv_promote_pte1: pa is not 1mpage aligned")); /* * Transfer the first page's pv entry for this mapping to the * 1mpage's pv list. Aside from avoiding the cost of a call * to get_pv_entry(), a transfer avoids the possibility that * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() * removes one of the mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = pte1_trunc(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pte1: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTE2_IN_PT2 - 1 pv entries. */ va_last = va + PTE1_SIZE - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } #endif /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Create the pv entries for each of the pages within a section. */ static bool pmap_pv_insert_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1, u_int flags) { struct md_page *pvh; pv_entry_t pv; bool noreclaim; rw_assert(&pvh_global_lock, RA_WLOCKED); noreclaim = (flags & PMAP_ENTER_NORECLAIM) != 0; if ((noreclaim && pv_entry_count >= pv_entry_high_water) || (pv = get_pv_entry(pmap, noreclaim)) == NULL) return (false); pv->pv_va = va; pvh = pa_to_pvh(pte1_pa(pte1)); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (true); } static inline void pmap_tlb_flush_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t npte1) { /* Kill all the small mappings or the big one only. */ if (pte1_is_section(npte1)) pmap_tlb_flush_range(pmap, pte1_trunc(va), PTE1_SIZE); else pmap_tlb_flush(pmap, pte1_trunc(va)); } /* * Update kernel pte1 on all pmaps. * * The following function is called only on one cpu with disabled interrupts. * In SMP case, smp_rendezvous_cpus() is used to stop other cpus. This way * nobody can invoke explicit hardware table walk during the update of pte1. * Unsolicited hardware table walk can still happen, invoked by speculative * data or instruction prefetch or even by speculative hardware table walk. * * The break-before-make approach should be implemented here. However, it's * not so easy to do that for kernel mappings as it would be unhappy to unmap * itself unexpectedly but voluntarily. */ static void pmap_update_pte1_kernel(vm_offset_t va, pt1_entry_t npte1) { pmap_t pmap; pt1_entry_t *pte1p; /* * Get current pmap. Interrupts should be disabled here * so PCPU_GET() is done atomically. */ pmap = PCPU_GET(curpmap); if (pmap == NULL) pmap = kernel_pmap; /* * (1) Change pte1 on current pmap. * (2) Flush all obsolete TLB entries on current CPU. * (3) Change pte1 on all pmaps. * (4) Flush all obsolete TLB entries on all CPUs in SMP case. */ pte1p = pmap_pte1(pmap, va); pte1_store(pte1p, npte1); /* Kill all the small mappings or the big one only. */ if (pte1_is_section(npte1)) { pmap_pte1_kern_promotions++; tlb_flush_range_local(pte1_trunc(va), PTE1_SIZE); } else { pmap_pte1_kern_demotions++; tlb_flush_local(pte1_trunc(va)); } /* * In SMP case, this function is called when all cpus are at smp * rendezvous, so there is no need to use 'allpmaps_lock' lock here. * In UP case, the function is called with this lock locked. */ LIST_FOREACH(pmap, &allpmaps, pm_list) { pte1p = pmap_pte1(pmap, va); pte1_store(pte1p, npte1); } #ifdef SMP /* Kill all the small mappings or the big one only. */ if (pte1_is_section(npte1)) tlb_flush_range(pte1_trunc(va), PTE1_SIZE); else tlb_flush(pte1_trunc(va)); #endif } #ifdef SMP struct pte1_action { vm_offset_t va; pt1_entry_t npte1; u_int update; /* CPU that updates the PTE1 */ }; static void pmap_update_pte1_action(void *arg) { struct pte1_action *act = arg; if (act->update == PCPU_GET(cpuid)) pmap_update_pte1_kernel(act->va, act->npte1); } /* * Change pte1 on current pmap. * Note that kernel pte1 must be changed on all pmaps. * * According to the architecture reference manual published by ARM, * the behaviour is UNPREDICTABLE when two or more TLB entries map the same VA. * According to this manual, UNPREDICTABLE behaviours must never happen in * a viable system. In contrast, on x86 processors, it is not specified which * TLB entry mapping the virtual address will be used, but the MMU doesn't * generate a bogus translation the way it does on Cortex-A8 rev 2 (Beaglebone * Black). * * It's a problem when either promotion or demotion is being done. The pte1 * update and appropriate TLB flush must be done atomically in general. */ static void pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va, pt1_entry_t npte1) { if (pmap == kernel_pmap) { struct pte1_action act; sched_pin(); act.va = va; act.npte1 = npte1; act.update = PCPU_GET(cpuid); smp_rendezvous_cpus(all_cpus, smp_no_rendezvous_barrier, pmap_update_pte1_action, NULL, &act); sched_unpin(); } else { register_t cspr; /* * Use break-before-make approach for changing userland * mappings. It can cause L1 translation aborts on other * cores in SMP case. So, special treatment is implemented * in pmap_fault(). To reduce the likelihood that another core * will be affected by the broken mapping, disable interrupts * until the mapping change is completed. */ cspr = disable_interrupts(PSR_I | PSR_F); pte1_clear(pte1p); pmap_tlb_flush_pte1(pmap, va, npte1); pte1_store(pte1p, npte1); restore_interrupts(cspr); } } #else static void pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va, pt1_entry_t npte1) { if (pmap == kernel_pmap) { mtx_lock_spin(&allpmaps_lock); pmap_update_pte1_kernel(va, npte1); mtx_unlock_spin(&allpmaps_lock); } else { register_t cspr; /* * Use break-before-make approach for changing userland * mappings. It's absolutely safe in UP case when interrupts * are disabled. */ cspr = disable_interrupts(PSR_I | PSR_F); pte1_clear(pte1p); pmap_tlb_flush_pte1(pmap, va, npte1); pte1_store(pte1p, npte1); restore_interrupts(cspr); } } #endif #if VM_NRESERVLEVEL > 0 /* * Tries to promote the NPTE2_IN_PT2, contiguous 4KB page mappings that are * within a single page table page (PT2) to a single 1MB page mapping. * For promotion to occur, two conditions must be met: (1) the 4KB page * mappings must map aligned, contiguous physical memory and (2) the 4KB page * mappings must have identical characteristics. * * Managed (PG_MANAGED) mappings within the kernel address space are not * promoted. The reason is that kernel PTE1s are replicated in each pmap but * pmap_remove_write(), pmap_clear_modify(), and pmap_clear_reference() only * read the PTE1 from the kernel pmap. */ static void pmap_promote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) { pt1_entry_t npte1; pt2_entry_t *fpte2p, fpte2, fpte2_fav; pt2_entry_t *pte2p, pte2; vm_offset_t pteva __unused; vm_page_t m __unused; PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, pmap, va, pte1_load(pte1p), pte1p)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE2 in the specified PT2. Abort if this PTE2 is * either invalid, unused, or does not map the first 4KB physical page * within a 1MB page. */ fpte2p = pmap_pte2_quick(pmap, pte1_trunc(va)); fpte2 = pte2_load(fpte2p); if ((fpte2 & ((PTE2_FRAME & PTE1_OFFSET) | PTE2_A | PTE2_V)) != (PTE2_A | PTE2_V)) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(1) for va %#x in pmap %p", __func__, va, pmap); return; } if (pte2_is_managed(fpte2) && pmap == kernel_pmap) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(2) for va %#x in pmap %p", __func__, va, pmap); return; } if ((fpte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { /* * When page is not modified, PTE2_RO can be set without * a TLB invalidation. */ fpte2 |= PTE2_RO; pte2_store(fpte2p, fpte2); } /* * Examine each of the other PTE2s in the specified PT2. Abort if this * PTE2 maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE2. */ fpte2_fav = (fpte2 & (PTE2_FRAME | PTE2_A | PTE2_V)); fpte2_fav += PTE1_SIZE - PTE2_SIZE; /* examine from the end */ for (pte2p = fpte2p + NPTE2_IN_PT2 - 1; pte2p > fpte2p; pte2p--) { pte2 = pte2_load(pte2p); if ((pte2 & (PTE2_FRAME | PTE2_A | PTE2_V)) != fpte2_fav) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(3) for va %#x in pmap %p", __func__, va, pmap); return; } if ((pte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { /* * When page is not modified, PTE2_RO can be set * without a TLB invalidation. See note above. */ pte2 |= PTE2_RO; pte2_store(pte2p, pte2); pteva = pte1_trunc(va) | (pte2 & PTE1_OFFSET & PTE2_FRAME); CTR3(KTR_PMAP, "%s: protect for va %#x in pmap %p", __func__, pteva, pmap); } if ((pte2 & PTE2_PROMOTE) != (fpte2 & PTE2_PROMOTE)) { pmap_pte1_p_failures++; CTR3(KTR_PMAP, "%s: failure(4) for va %#x in pmap %p", __func__, va, pmap); return; } fpte2_fav -= PTE2_SIZE; } /* * The page table page in its current state will stay in PT2TAB * until the PTE1 mapping the section is demoted by pmap_demote_pte1() * or destroyed by pmap_remove_pte1(). * * Note that L2 page table size is not equal to PAGE_SIZE. */ m = PHYS_TO_VM_PAGE(trunc_page(pte1_link_pa(pte1_load(pte1p)))); KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size], ("%s: PT2 page is out of range", __func__)); KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), ("%s: PT2 page's pindex is wrong", __func__)); /* * Get pte1 from pte2 format. */ npte1 = (fpte2 & PTE1_FRAME) | ATTR_TO_L1(fpte2) | PTE1_V; /* * Promote the pv entries. */ if (pte2_is_managed(fpte2)) pmap_pv_promote_pte1(pmap, va, pte1_pa(npte1)); /* * Promote the mappings. */ pmap_change_pte1(pmap, pte1p, va, npte1); pmap_pte1_promotions++; CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", __func__, va, pmap); PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); } #endif /* VM_NRESERVLEVEL > 0 */ /* * Zero L2 page table page. */ static __inline void pmap_clear_pt2(pt2_entry_t *fpte2p) { pt2_entry_t *pte2p; for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) pte2_clear(pte2p); } /* * Removes a 1MB page mapping from the kernel pmap. */ static void pmap_remove_kernel_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) { vm_page_t m; uint32_t pte1_idx; pt2_entry_t *fpte2p; vm_paddr_t pt2_pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); m = pmap_pt2_page(pmap, va); if (m == NULL) /* * QQQ: Is this function called only on promoted pte1? * We certainly do section mappings directly * (without promotion) in kernel !!! */ panic("%s: missing pt2 page", __func__); pte1_idx = pte1_index(va); /* * Initialize the L2 page table. */ fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); pmap_clear_pt2(fpte2p); /* * Remove the mapping. */ pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(m), pte1_idx); pmap_kenter_pte1(va, PTE1_LINK(pt2_pa)); /* * QQQ: We do not need to invalidate PT2MAP mapping * as we did not change it. I.e. the L2 page table page * was and still is mapped the same way. */ } /* * Do the things to unmap a section in a process */ static void pmap_remove_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, struct spglist *free) { pt1_entry_t opte1; struct md_page *pvh; vm_offset_t eva, va; vm_page_t m; PDEBUG(6, printf("%s(%p): va %#x pte1 %#x at %p\n", __func__, pmap, sva, pte1_load(pte1p), pte1p)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PTE1_OFFSET) == 0, ("%s: sva is not 1mpage aligned", __func__)); /* * Clear and invalidate the mapping. It should occupy one and only TLB * entry. So, pmap_tlb_flush() called with aligned address should be * sufficient. */ opte1 = pte1_load_clear(pte1p); pmap_tlb_flush(pmap, sva); if (pte1_is_wired(opte1)) pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE; pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; if (pte1_is_managed(opte1)) { pvh = pa_to_pvh(pte1_pa(opte1)); pmap_pvh_free(pvh, pmap, sva); eva = sva + PTE1_SIZE; for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); va < eva; va += PAGE_SIZE, m++) { if (pte1_is_dirty(opte1)) vm_page_dirty(m); if (opte1 & PTE1_A) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { /* * L2 page table(s) can't be removed from kernel map as * kernel counts on it (stuff around pmap_growkernel()). */ pmap_remove_kernel_pte1(pmap, pte1p, sva); } else { /* * Get associated L2 page table page. * It's possible that the page was never allocated. */ m = pmap_pt2_page(pmap, sva); if (m != NULL) pmap_unwire_pt2_all(pmap, sva, m, free); } } /* * Fills L2 page table page with mappings to consecutive physical pages. */ static __inline void pmap_fill_pt2(pt2_entry_t *fpte2p, pt2_entry_t npte2) { pt2_entry_t *pte2p; for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) { pte2_store(pte2p, npte2); npte2 += PTE2_SIZE; } } /* * Tries to demote a 1MB page mapping. If demotion fails, the * 1MB page mapping is invalidated. */ static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) { pt1_entry_t opte1, npte1; pt2_entry_t *fpte2p, npte2; vm_paddr_t pt2pg_pa, pt2_pa; vm_page_t m; struct spglist free; uint32_t pte1_idx, isnew = 0; PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, pmap, va, pte1_load(pte1p), pte1p)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); opte1 = pte1_load(pte1p); KASSERT(pte1_is_section(opte1), ("%s: opte1 not a section", __func__)); if ((opte1 & PTE1_A) == 0 || (m = pmap_pt2_page(pmap, va)) == NULL) { KASSERT(!pte1_is_wired(opte1), ("%s: PT2 page for a wired mapping is missing", __func__)); /* * Invalidate the 1MB page mapping and return * "failure" if the mapping was never accessed or the * allocation of the new page table page fails. */ if ((opte1 & PTE1_A) == 0 || (m = vm_page_alloc(NULL, pte1_index(va) & ~PT2PG_MASK, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); pmap_remove_pte1(pmap, pte1p, pte1_trunc(va), &free); vm_page_free_pages_toq(&free, false); CTR3(KTR_PMAP, "%s: failure for va %#x in pmap %p", __func__, va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap->pm_stats.resident_count++; isnew = 1; /* * We init all L2 page tables in the page even if * we are going to change everything for one L2 page * table in a while. */ pt2pg_pa = pmap_pt2pg_init(pmap, va, m); } else { if (va < VM_MAXUSER_ADDRESS) { if (pt2_is_empty(m, va)) isnew = 1; /* Demoting section w/o promotion. */ #ifdef INVARIANTS else KASSERT(pt2_is_full(m, va), ("%s: bad PT2 wire" " count %u", __func__, pt2_wirecount_get(m, pte1_index(va)))); #endif } } pt2pg_pa = VM_PAGE_TO_PHYS(m); pte1_idx = pte1_index(va); /* * If the pmap is current, then the PT2MAP can provide access to * the page table page (promoted L2 page tables are not unmapped). * Otherwise, temporarily map the L2 page table page (m) into * the kernel's address space at either PADDR1 or PADDR2. * * Note that L2 page table size is not equal to PAGE_SIZE. */ if (pmap_is_current(pmap)) fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif tlb_flush_local((vm_offset_t)PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); tlb_flush_local((vm_offset_t)PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; fpte2p = page_pt2((vm_offset_t)PADDR1, pte1_idx); } else { mtx_lock(&PMAP2mutex); if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); tlb_flush((vm_offset_t)PADDR2); } fpte2p = page_pt2((vm_offset_t)PADDR2, pte1_idx); } pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); npte1 = PTE1_LINK(pt2_pa); KASSERT((opte1 & PTE1_A) != 0, ("%s: opte1 is missing PTE1_A", __func__)); KASSERT((opte1 & (PTE1_NM | PTE1_RO)) != PTE1_NM, ("%s: opte1 has PTE1_NM", __func__)); /* * Get pte2 from pte1 format. */ npte2 = pte1_pa(opte1) | ATTR_TO_L2(opte1) | PTE2_V; /* * If the L2 page table page is new, initialize it. If the mapping * has changed attributes, update the page table entries. */ if (isnew != 0) { pt2_wirecount_set(m, pte1_idx, NPTE2_IN_PT2); pmap_fill_pt2(fpte2p, npte2); } else if ((pte2_load(fpte2p) & PTE2_PROMOTE) != (npte2 & PTE2_PROMOTE)) pmap_fill_pt2(fpte2p, npte2); KASSERT(pte2_pa(pte2_load(fpte2p)) == pte2_pa(npte2), ("%s: fpte2p and npte2 map different physical addresses", __func__)); if (fpte2p == PADDR2) mtx_unlock(&PMAP2mutex); /* * Demote the mapping. This pmap is locked. The old PTE1 has * PTE1_A set. If the old PTE1 has not PTE1_RO set, it also * has not PTE1_NM set. Thus, there is no danger of a race with * another processor changing the setting of PTE1_A and/or PTE1_NM * between the read above and the store below. */ pmap_change_pte1(pmap, pte1p, va, npte1); /* * Demote the pv entry. This depends on the earlier demotion * of the mapping. Specifically, the (re)creation of a per- * page pv entry might trigger the execution of pmap_pv_reclaim(), * which might reclaim a newly (re)created per-page pv entry * and destroy the associated mapping. In order to destroy * the mapping, the PTE1 must have already changed from mapping * the 1mpage to referencing the page table page. */ if (pte1_is_managed(opte1)) pmap_pv_demote_pte1(pmap, va, pte1_pa(opte1)); pmap_pte1_demotions++; CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", __func__, va, pmap); PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); return (TRUE); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { pt1_entry_t *pte1p; pt2_entry_t *pte2p; pt2_entry_t npte2, opte2; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte2, om; int rv; va = trunc_page(va); KASSERT(va <= vm_max_kernel_address, ("%s: toobig", __func__)); KASSERT(va < UPT2V_MIN_ADDRESS || va >= UPT2V_MAX_ADDRESS, ("%s: invalid to pmap_enter page table pages (va: 0x%x)", __func__, va)); KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("%s: managed mapping within the clean submap", __func__)); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); KASSERT((flags & PMAP_ENTER_RESERVED) == 0, ("%s: flags %u has reserved bits set", __func__, flags)); pa = VM_PAGE_TO_PHYS(m); npte2 = PTE2(pa, PTE2_A, vm_page_pte2_attr(m)); if ((flags & VM_PROT_WRITE) == 0) npte2 |= PTE2_NM; if ((prot & VM_PROT_WRITE) == 0) npte2 |= PTE2_RO; KASSERT((npte2 & (PTE2_NM | PTE2_RO)) != PTE2_RO, ("%s: flags includes VM_PROT_WRITE but prot doesn't", __func__)); if ((prot & VM_PROT_EXECUTE) == 0) npte2 |= PTE2_NX; if ((flags & PMAP_ENTER_WIRED) != 0) npte2 |= PTE2_W; if (va < VM_MAXUSER_ADDRESS) npte2 |= PTE2_U; if (pmap != kernel_pmap) npte2 |= PTE2_NG; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); if (psind == 1) { /* Assert the required virtual and physical alignment. */ KASSERT((va & PTE1_OFFSET) == 0, ("%s: va unaligned", __func__)); KASSERT(m->psind > 0, ("%s: m->psind < psind", __func__)); rv = pmap_enter_pte1(pmap, va, PTE1_PA(pa) | ATTR_TO_L1(npte2) | PTE1_V, flags, m); goto out; } /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { mpte2 = pmap_allocpte2(pmap, va, flags); if (mpte2 == NULL) { KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, ("pmap_allocpte2 failed with sleep allowed")); rv = KERN_RESOURCE_SHORTAGE; goto out; } } else mpte2 = NULL; pte1p = pmap_pte1(pmap, va); if (pte1_is_section(pte1_load(pte1p))) panic("%s: attempted on 1MB page", __func__); pte2p = pmap_pte2_quick(pmap, va); if (pte2p == NULL) panic("%s: invalid L1 page table entry va=%#x", __func__, va); om = NULL; opte2 = pte2_load(pte2p); opa = pte2_pa(opte2); /* * Mapping has not changed, must be protection or wiring change. */ if (pte2_is_valid(opte2) && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT2 pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT2 page will be also. */ if (pte2_is_wired(npte2) && !pte2_is_wired(opte2)) pmap->pm_stats.wired_count++; else if (!pte2_is_wired(npte2) && pte2_is_wired(opte2)) pmap->pm_stats.wired_count--; /* * Remove extra pte2 reference */ if (mpte2) pt2_wirecount_dec(mpte2, pte1_index(va)); if ((m->oflags & VPO_UNMANAGED) == 0) om = m; goto validate; } /* * QQQ: We think that changing physical address on writeable mapping * is not safe. Well, maybe on kernel address space with correct * locking, it can make a sense. However, we have no idea why * anyone should do that on user address space. Are we wrong? */ KASSERT((opa == 0) || (opa == pa) || !pte2_is_valid(opte2) || ((opte2 & PTE2_RO) != 0), ("%s: pmap %p va %#x(%#x) opa %#x pa %#x - gotcha %#x %#x!", __func__, pmap, va, opte2, opa, pa, flags, prot)); pv = NULL; /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { if (pte2_is_wired(opte2)) pmap->pm_stats.wired_count--; om = PHYS_TO_VM_PAGE(opa); if (om != NULL && (om->oflags & VPO_UNMANAGED) != 0) om = NULL; if (om != NULL) pv = pmap_pvh_remove(&om->md, pmap, va); /* * Remove extra pte2 reference */ if (mpte2 != NULL) pt2_wirecount_dec(mpte2, va >> PTE1_SHIFT); } else pmap->pm_stats.resident_count++; /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { if (pv == NULL) { pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; } TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } else if (pv != NULL) free_pv_entry(pmap, pv); /* * Increment counters */ if (pte2_is_wired(npte2)) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ if (prot & VM_PROT_WRITE) { if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * If the mapping or permission bits are different, we need * to update the pte2. * * QQQ: Think again and again what to do * if the mapping is going to be changed! */ if ((opte2 & ~(PTE2_NM | PTE2_A)) != (npte2 & ~(PTE2_NM | PTE2_A))) { /* * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA * is set. Do it now, before the mapping is stored and made * valid for hardware table walk. If done later, there is a race * for other threads of current process in lazy loading case. * Don't do it for kernel memory which is mapped with exec * permission even if the memory isn't going to hold executable * code. The only time when icache sync is needed is after * kernel module is loaded and the relocation info is processed. * And it's done in elf_cpu_load_file(). * * QQQ: (1) Does it exist any better way where * or how to sync icache? * (2) Now, we do it on a page basis. */ if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && m->md.pat_mode == VM_MEMATTR_WB_WA && (opa != pa || (opte2 & PTE2_NX))) cache_icache_sync_fresh(va, pa, PAGE_SIZE); if (opte2 & PTE2_V) { /* Change mapping with break-before-make approach. */ opte2 = pte2_load_clear(pte2p); pmap_tlb_flush(pmap, va); pte2_store(pte2p, npte2); if (om != NULL) { KASSERT((om->oflags & VPO_UNMANAGED) == 0, ("%s: om %p unmanaged", __func__, om)); if ((opte2 & PTE2_A) != 0) vm_page_aflag_set(om, PGA_REFERENCED); if (pte2_is_dirty(opte2)) vm_page_dirty(om); if (TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); } } else pte2_store(pte2p, npte2); } #if 0 else { /* * QQQ: In time when both access and not mofified bits are * emulated by software, this should not happen. Some * analysis is need, if this really happen. Missing * tlb flush somewhere could be the reason. */ panic("%s: pmap %p va %#x opte2 %x npte2 %x !!", __func__, pmap, va, opte2, npte2); } #endif #if VM_NRESERVLEVEL > 0 /* * If both the L2 page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte2 == NULL || pt2_is_full(mpte2, va)) && sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pte1(pmap, pte1p, va); #endif rv = KERN_SUCCESS; out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (rv); } /* * Do the things to unmap a page in a process. */ static int pmap_remove_pte2(pmap_t pmap, pt2_entry_t *pte2p, vm_offset_t va, struct spglist *free) { pt2_entry_t opte2; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Clear and invalidate the mapping. */ opte2 = pte2_load_clear(pte2p); pmap_tlb_flush(pmap, va); KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %#x not link pte2 %#x", __func__, pmap, va, opte2)); if (opte2 & PTE2_W) pmap->pm_stats.wired_count -= 1; pmap->pm_stats.resident_count -= 1; if (pte2_is_managed(opte2)) { m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); if (pte2_is_dirty(opte2)) vm_page_dirty(m); if (opte2 & PTE2_A) vm_page_aflag_set(m, PGA_REFERENCED); pmap_remove_entry(pmap, m, va); } return (pmap_unuse_pt2(pmap, va, free)); } /* * Remove a single page from a process address space. */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) { pt2_entry_t *pte2p; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("%s: curthread not pinned", __func__)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte2p = pmap_pte2_quick(pmap, va)) == NULL || !pte2_is_valid(pte2_load(pte2p))) return; pmap_remove_pte2(pmap, pte2p, va, free); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t nextva; pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; struct spglist free; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); PMAP_LOCK(pmap); /* * Special handling of removing one page. A very common * operation and easy to short circuit some code. */ if (sva + PAGE_SIZE == eva) { pte1 = pte1_load(pmap_pte1(pmap, sva)); if (pte1_is_link(pte1)) { pmap_remove_page(pmap, sva, &free); goto out; } } for (; sva < eva; sva = nextva) { /* * Calculate address for next L2 page table. */ nextva = pte1_trunc(sva + PTE1_SIZE); if (nextva < sva) nextva = eva; if (pmap->pm_stats.resident_count == 0) break; pte1p = pmap_pte1(pmap, sva); pte1 = pte1_load(pte1p); /* * Weed out invalid mappings. Note: we assume that the L1 page * table is always allocated, and in kernel virtual. */ if (pte1 == 0) continue; if (pte1_is_section(pte1)) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + PTE1_SIZE == nextva && eva >= nextva) { pmap_remove_pte1(pmap, pte1p, sva, &free); continue; } else if (!pmap_demote_pte1(pmap, pte1p, sva)) { /* The large page mapping was destroyed. */ continue; } #ifdef INVARIANTS else { /* Update pte1 after demotion. */ pte1 = pte1_load(pte1p); } #endif } KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" " is not link", __func__, pmap, sva, pte1, pte1p)); /* * Limit our scan to either the end of the va represented * by the current L2 page table page, or to the end of the * range being removed. */ if (nextva > eva) nextva = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, sva += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2)) continue; if (pmap_remove_pte2(pmap, pte2p, sva, &free)) break; } } out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt2_entry_t *pte2p, opte2; pt1_entry_t *pte1p; vm_offset_t va; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, va); (void)pmap_demote_pte1(pmap, pte1p, va); PMAP_UNLOCK(pmap); } small_mappings: while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found " "a 1mpage in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); opte2 = pte2_load_clear(pte2p); pmap_tlb_flush(pmap, pv->pv_va); KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %x zero pte2", __func__, pmap, pv->pv_va)); if (pte2_is_wired(opte2)) pmap->pm_stats.wired_count--; if (opte2 & PTE2_A) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if (pte2_is_dirty(opte2)) vm_page_dirty(m); pmap_unuse_pt2(pmap, pv->pv_va, &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); vm_page_free_pages_toq(&free, false); } /* * Just subroutine for pmap_remove_pages() to reasonably satisfy * good coding style, a.k.a. 80 character line width limit hell. */ static __inline void pmap_remove_pte1_quick(pmap_t pmap, pt1_entry_t pte1, pv_entry_t pv, struct spglist *free) { vm_paddr_t pa; vm_page_t m, mt, mpt2pg; struct md_page *pvh; pa = pte1_pa(pte1); m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", __func__, m, m->phys_addr, pa)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("%s: bad pte1 %#x", __func__, pte1)); if (pte1_is_dirty(pte1)) { for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) vm_page_dirty(mt); } pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; pvh = pa_to_pvh(pa); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpt2pg = pmap_pt2_page(pmap, pv->pv_va); if (mpt2pg != NULL) pmap_unwire_pt2_all(pmap, pv->pv_va, mpt2pg, free); } /* * Just subroutine for pmap_remove_pages() to reasonably satisfy * good coding style, a.k.a. 80 character line width limit hell. */ static __inline void pmap_remove_pte2_quick(pmap_t pmap, pt2_entry_t pte2, pv_entry_t pv, struct spglist *free) { vm_paddr_t pa; vm_page_t m; struct md_page *pvh; pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", __func__, m, m->phys_addr, pa)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("%s: bad pte2 %#x", __func__, pte2)); if (pte2_is_dirty(pte2)) vm_page_dirty(m); pmap->pm_stats.resident_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(pa); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } pmap_unuse_pt2(pmap, pv->pv_va, free); } /* * Remove all pages from specified address space this aids process * exit speeds. Also, this code is special cased for current process * only, but can have the more generic (and slightly slower) mode enabled. * This is much faster than pmap_remove in the case of running down * an entire address space. */ void pmap_remove_pages(pmap_t pmap) { pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; pv_entry_t pv; struct pv_chunk *pc, *npc; struct spglist free; int field, idx; int32_t bit; uint32_t inuse, bitmask; boolean_t allfree; /* * Assert that the given pmap is only active on the current * CPU. Unfortunately, we cannot block another CPU from * activating the pmap while this function is executing. */ KASSERT(pmap == vmspace_pmap(curthread->td_proc->p_vmspace), ("%s: non-current pmap %p", __func__, pmap)); #if defined(SMP) && defined(INVARIANTS) { cpuset_t other_cpus; sched_pin(); other_cpus = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &other_cpus); sched_unpin(); KASSERT(CPU_EMPTY(&other_cpus), ("%s: pmap %p active on other cpus", __func__, pmap)); } #endif SLIST_INIT(&free); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { KASSERT(pc->pc_pmap == pmap, ("%s: wrong pmap %p %p", __func__, pmap, pc->pc_pmap)); allfree = TRUE; for (field = 0; field < _NPCM; field++) { inuse = (~(pc->pc_map[field])) & pc_freemask[field]; while (inuse != 0) { bit = ffs(inuse) - 1; bitmask = 1UL << bit; idx = field * 32 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; /* * Note that we cannot remove wired pages * from a process' mapping at this time */ pte1p = pmap_pte1(pmap, pv->pv_va); pte1 = pte1_load(pte1p); if (pte1_is_section(pte1)) { if (pte1_is_wired(pte1)) { allfree = FALSE; continue; } pte1_clear(pte1p); pmap_remove_pte1_quick(pmap, pte1, pv, &free); } else if (pte1_is_link(pte1)) { pte2p = pt2map_entry(pv->pv_va); pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2)) { printf("%s: pmap %p va %#x " "pte2 %#x\n", __func__, pmap, pv->pv_va, pte2); panic("bad pte2"); } if (pte2_is_wired(pte2)) { allfree = FALSE; continue; } pte2_clear(pte2p); pmap_remove_pte2_quick(pmap, pte2, pv, &free); } else { printf("%s: pmap %p va %#x pte1 %#x\n", __func__, pmap, pv->pv_va, pte1); panic("bad pte1"); } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; } } if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } tlb_flush_all_ng_local(); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } /* * This code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No L2 page table pages. * but is *MUCH* faster than pmap_enter... */ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpt2pg) { pt2_entry_t *pte2p, pte2; vm_paddr_t pa; struct spglist free; uint32_t l2prot; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("%s: managed mapping within the clean submap", __func__)); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a L2 page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { u_int pte1_idx; pt1_entry_t pte1, *pte1p; vm_paddr_t pt2_pa; /* * Get L1 page table things. */ pte1_idx = pte1_index(va); pte1p = pmap_pte1(pmap, va); pte1 = pte1_load(pte1p); if (mpt2pg && (mpt2pg->pindex == (pte1_idx & ~PT2PG_MASK))) { /* * Each of NPT2_IN_PG L2 page tables on the page can * come here. Make sure that associated L1 page table * link is established. * * QQQ: It comes that we don't establish all links to * L2 page tables for newly allocated L2 page * tables page. */ KASSERT(!pte1_is_section(pte1), ("%s: pte1 %#x is section", __func__, pte1)); if (!pte1_is_link(pte1)) { pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(mpt2pg), pte1_idx); pte1_store(pte1p, PTE1_LINK(pt2_pa)); } pt2_wirecount_inc(mpt2pg, pte1_idx); } else { /* * If the L2 page table page is mapped, we just * increment the hold count, and activate it. */ if (pte1_is_section(pte1)) { return (NULL); } else if (pte1_is_link(pte1)) { mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); pt2_wirecount_inc(mpt2pg, pte1_idx); } else { mpt2pg = _pmap_allocpte2(pmap, va, PMAP_ENTER_NOSLEEP); if (mpt2pg == NULL) return (NULL); } } } else { mpt2pg = NULL; } /* * This call to pt2map_entry() makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte2_quick(). * But that isn't as quick as pt2map_entry(). */ pte2p = pt2map_entry(va); pte2 = pte2_load(pte2p); if (pte2_is_valid(pte2)) { if (mpt2pg != NULL) { /* * Remove extra pte2 reference */ pt2_wirecount_dec(mpt2pg, pte1_index(va)); mpt2pg = NULL; } return (NULL); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpt2pg != NULL) { SLIST_INIT(&free); if (pmap_unwire_pt2(pmap, va, mpt2pg, &free)) { pmap_tlb_flush(pmap, va); vm_page_free_pages_toq(&free, false); } mpt2pg = NULL; } return (NULL); } /* * Increment counters */ pmap->pm_stats.resident_count++; /* * Now validate mapping with RO protection */ pa = VM_PAGE_TO_PHYS(m); l2prot = PTE2_RO | PTE2_NM; if (va < VM_MAXUSER_ADDRESS) l2prot |= PTE2_U | PTE2_NG; if ((prot & VM_PROT_EXECUTE) == 0) l2prot |= PTE2_NX; else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) { /* * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA * is set. QQQ: For more info, see comments in pmap_enter(). */ cache_icache_sync_fresh(va, pa, PAGE_SIZE); } pte2_store(pte2p, PTE2(pa, l2prot, vm_page_pte2_attr(m))); return (mpt2pg); } void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Tries to create a read- and/or execute-only 1 MB page mapping. Returns * true if successful. Returns false if (1) a mapping already exists at the * specified virtual address or (2) a PV entry cannot be allocated without * reclaiming another PV entry. */ static bool pmap_enter_1mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pt1_entry_t pte1; vm_paddr_t pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); pa = VM_PAGE_TO_PHYS(m); pte1 = PTE1(pa, PTE1_NM | PTE1_RO, ATTR_TO_L1(vm_page_pte2_attr(m))); if ((prot & VM_PROT_EXECUTE) == 0) pte1 |= PTE1_NX; if (va < VM_MAXUSER_ADDRESS) pte1 |= PTE1_U; if (pmap != kernel_pmap) pte1 |= PTE1_NG; return (pmap_enter_pte1(pmap, va, pte1, PMAP_ENTER_NOSLEEP | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, m) == KERN_SUCCESS); } /* * Tries to create the specified 1 MB page mapping. Returns KERN_SUCCESS if * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and * a mapping already exists at the specified virtual address. Returns * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NORECLAIM was specified and PV entry * allocation failed. */ static int pmap_enter_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1, u_int flags, vm_page_t m) { struct spglist free; pt1_entry_t opte1, *pte1p; pt2_entry_t pte2, *pte2p; vm_offset_t cur, end; vm_page_t mt; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pte1 & (PTE1_NM | PTE1_RO)) == 0 || (pte1 & (PTE1_NM | PTE1_RO)) == (PTE1_NM | PTE1_RO), ("%s: pte1 has inconsistent NM and RO attributes", __func__)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pte1p = pmap_pte1(pmap, va); opte1 = pte1_load(pte1p); if (pte1_is_valid(opte1)) { if ((flags & PMAP_ENTER_NOREPLACE) != 0) { CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", __func__, va, pmap); return (KERN_FAILURE); } /* Break the existing mapping(s). */ SLIST_INIT(&free); if (pte1_is_section(opte1)) { /* * If the section resulted from a promotion, then a * reserved PT page could be freed. */ pmap_remove_pte1(pmap, pte1p, va, &free); } else { sched_pin(); end = va + PTE1_SIZE; for (cur = va, pte2p = pmap_pte2_quick(pmap, va); cur != end; cur += PAGE_SIZE, pte2p++) { pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2)) continue; if (pmap_remove_pte2(pmap, pte2p, cur, &free)) break; } sched_unpin(); } vm_page_free_pages_toq(&free, false); } if ((m->oflags & VPO_UNMANAGED) == 0) { /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pte1(pmap, va, pte1, flags)) { CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", __func__, va, pmap); return (KERN_RESOURCE_SHORTAGE); } if ((pte1 & PTE1_RO) == 0) { for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) vm_page_aflag_set(mt, PGA_WRITEABLE); } } /* * Increment counters. */ if (pte1_is_wired(pte1)) pmap->pm_stats.wired_count += PTE1_SIZE / PAGE_SIZE; pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; /* * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA * is set. QQQ: For more info, see comments in pmap_enter(). */ if ((pte1 & PTE1_NX) == 0 && m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap && (!pte1_is_section(opte1) || pte1_pa(opte1) != VM_PAGE_TO_PHYS(m) || (opte1 & PTE2_NX) != 0)) cache_icache_sync_fresh(va, VM_PAGE_TO_PHYS(m), PTE1_SIZE); /* * Map the section. */ pte1_store(pte1p, pte1); pmap_pte1_mappings++; CTR3(KTR_PMAP, "%s: success for va %#lx in pmap %p", __func__, va, pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; vm_page_t m, mpt2pg; vm_pindex_t diff, psize; PDEBUG(6, printf("%s: pmap %p start %#x end %#x m %p prot %#x\n", __func__, pmap, start, end, m_start, prot)); VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpt2pg = NULL; m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end && m->psind == 1 && sp_enabled && pmap_enter_1mpage(pmap, va, m, prot)) m = &m[PTE1_SIZE / PAGE_SIZE - 1]; else mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot, mpt2pg); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pt1_entry_t *pte1p; vm_paddr_t pa, pte2_pa; vm_page_t p; vm_memattr_t pat_mode; u_int l1attr, l1prot; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("%s: non-device object", __func__)); if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("%s: invalid page %p", __func__, p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 1MB page boundary. */ pte2_pa = VM_PAGE_TO_PHYS(p); if (pte2_pa & PTE1_OFFSET) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("%s: invalid page %p", __func__, p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 1MB pages. * * QQQ: Well, we are mapping a section, so same condition must * be hold like during promotion. It looks that only RW mapping * is done here, so readonly mapping must be done elsewhere. */ l1prot = PTE1_U | PTE1_NG | PTE1_RW | PTE1_M | PTE1_A; l1attr = ATTR_TO_L1(vm_memattr_to_pte2(pat_mode)); PMAP_LOCK(pmap); for (pa = pte2_pa; pa < pte2_pa + size; pa += PTE1_SIZE) { pte1p = pmap_pte1(pmap, addr); if (!pte1_is_valid(pte1_load(pte1p))) { pte1_store(pte1p, PTE1(pa, l1prot, l1attr)); pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; pmap_pte1_mappings++; } /* Else continue on if the PTE1 is already valid. */ addr += PTE1_SIZE; } PMAP_UNLOCK(pmap); } } /* * Do the things to protect a 1mpage in a process. */ static void pmap_protect_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, vm_prot_t prot) { pt1_entry_t npte1, opte1; vm_offset_t eva, va; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PTE1_OFFSET) == 0, ("%s: sva is not 1mpage aligned", __func__)); opte1 = npte1 = pte1_load(pte1p); if (pte1_is_managed(opte1) && pte1_is_dirty(opte1)) { eva = sva + PTE1_SIZE; for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); va < eva; va += PAGE_SIZE, m++) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) npte1 |= PTE1_RO | PTE1_NM; if ((prot & VM_PROT_EXECUTE) == 0) npte1 |= PTE1_NX; /* * QQQ: Herein, execute permission is never set. * It only can be cleared. So, no icache * syncing is needed. */ if (npte1 != opte1) { pte1_store(pte1p, npte1); pmap_tlb_flush(pmap, sva); } } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { boolean_t pv_lists_locked; vm_offset_t nextva; pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, opte2, npte2; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = nextva) { /* * Calculate address for next L2 page table. */ nextva = pte1_trunc(sva + PTE1_SIZE); if (nextva < sva) nextva = eva; pte1p = pmap_pte1(pmap, sva); pte1 = pte1_load(pte1p); /* * Weed out invalid mappings. Note: we assume that L1 page * page table is always allocated, and in kernel virtual. */ if (pte1 == 0) continue; if (pte1_is_section(pte1)) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + PTE1_SIZE == nextva && eva >= nextva) { pmap_protect_pte1(pmap, pte1p, sva, prot); continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pte1(pmap, pte1p, sva)) { /* * The large page mapping * was destroyed. */ continue; } #ifdef INVARIANTS else { /* Update pte1 after demotion */ pte1 = pte1_load(pte1p); } #endif } } KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" " is not link", __func__, pmap, sva, pte1, pte1p)); /* * Limit our scan to either the end of the va represented * by the current L2 page table page, or to the end of the * range being protected. */ if (nextva > eva) nextva = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, sva += PAGE_SIZE) { vm_page_t m; opte2 = npte2 = pte2_load(pte2p); if (!pte2_is_valid(opte2)) continue; if ((prot & VM_PROT_WRITE) == 0) { if (pte2_is_managed(opte2) && pte2_is_dirty(opte2)) { m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); vm_page_dirty(m); } npte2 |= PTE2_RO | PTE2_NM; } if ((prot & VM_PROT_EXECUTE) == 0) npte2 |= PTE2_NX; /* * QQQ: Herein, execute permission is never set. * It only can be cleared. So, no icache * syncing is needed. */ if (npte2 != opte2) { pte2_store(pte2p, npte2); pmap_tlb_flush(pmap, sva); } } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * pmap_pvh_wired_mappings: * * Return the updated number "count" of managed mappings that are wired. */ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count) { pmap_t pmap; pt1_entry_t pte1; pt2_entry_t pte2; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); if (pte1_is_section(pte1)) { if (pte1_is_wired(pte1)) count++; } else { KASSERT(pte1_is_link(pte1), ("%s: pte1 %#x is not link", __func__, pte1)); pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); if (pte2_is_wired(pte2)) count++; } PMAP_UNLOCK(pmap); } sched_unpin(); return (count); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); count = pmap_pvh_wired_mappings(&m->md, count); if ((m->flags & PG_FICTITIOUS) == 0) { count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); } rw_wunlock(&pvh_global_lock); return (count); } /* * Returns TRUE if any of the given mappings were used to modify * physical memory. Otherwise, returns FALSE. Both page and 1mpage * mappings are supported. */ static boolean_t pmap_is_modified_pvh(struct md_page *pvh) { pv_entry_t pv; pt1_entry_t pte1; pt2_entry_t pte2; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); if (pte1_is_section(pte1)) { rv = pte1_is_dirty(pte1); } else { KASSERT(pte1_is_link(pte1), ("%s: pte1 %#x is not link", __func__, pte1)); pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); rv = pte2_is_dirty(pte2); } PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTE2s can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pt1_entry_t pte1; pt2_entry_t pte2; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, addr)); if (pte1_is_link(pte1)) { pte2 = pte2_load(pt2map_entry(addr)); rv = !pte2_is_valid(pte2) ; } PMAP_UNLOCK(pmap); return (rv); } /* * Returns TRUE if any of the given mappings were referenced and FALSE * otherwise. Both page and 1mpage mappings are supported. */ static boolean_t pmap_is_referenced_pvh(struct md_page *pvh) { pv_entry_t pv; pt1_entry_t pte1; pt2_entry_t pte2; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); if (pte1_is_section(pte1)) { rv = (pte1 & (PTE1_A | PTE1_V)) == (PTE1_A | PTE1_V); } else { pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); rv = (pte2 & (PTE2_A | PTE2_V)) == (PTE2_A | PTE2_V); } PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); rw_wlock(&pvh_global_lock); rv = pmap_is_referenced_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; pt1_entry_t *pte1p, opte1; pt2_entry_t *pte2p, opte2; vm_paddr_t pa; int rtval = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); pa = VM_PAGE_TO_PHYS(m); pvh = pa_to_pvh(pa); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0 || (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); opte1 = pte1_load(pte1p); if (pte1_is_dirty(opte1)) { /* * Although "opte1" is mapping a 1MB page, because * this function is called at a 4KB page granularity, * we only update the 4KB page under test. */ vm_page_dirty(m); } if ((opte1 & PTE1_A) != 0) { /* * Since this reference bit is shared by 256 4KB pages, * it should not be cleared every time it is tested. * Apply a simple "hash" function on the physical page * number, the virtual section number, and the pmap * address to select one 4KB page out of the 256 * on which testing the reference bit will result * in clearing that bit. This function is designed * to avoid the selection of the same 4KB page * for every 1MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the section is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PTE1_SHIFT) ^ (uintptr_t)pmap) & (NPTE2_IN_PG - 1)) == 0 && !pte1_is_wired(opte1)) { pte1_clear_bit(pte1p, PTE1_A); pmap_tlb_flush(pmap, pv->pv_va); } rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); } if (rtval >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(pte1_is_link(pte1_load(pte1p)), ("%s: not found a link in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); opte2 = pte2_load(pte2p); if (pte2_is_dirty(opte2)) vm_page_dirty(m); if ((opte2 & PTE2_A) != 0) { pte2_clear_bit(pte2p, PTE2_A); pmap_tlb_flush(pmap, pv->pv_va); rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < PMAP_TS_REFERENCED_MAX); out: sched_unpin(); rw_wunlock(&pvh_global_lock); return (rtval); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t nextva; pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; boolean_t pv_lists_locked; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = nextva) { nextva = pte1_trunc(sva + PTE1_SIZE); if (nextva < sva) nextva = eva; pte1p = pmap_pte1(pmap, sva); pte1 = pte1_load(pte1p); /* * Weed out invalid mappings. Note: we assume that L1 page * page table is always allocated, and in kernel virtual. */ if (pte1 == 0) continue; if (pte1_is_section(pte1)) { if (!pte1_is_wired(pte1)) panic("%s: pte1 %#x not wired", __func__, pte1); /* * Are we unwiring the entire large page? If not, * demote the mapping and fall through. */ if (sva + PTE1_SIZE == nextva && eva >= nextva) { pte1_clear_bit(pte1p, PTE1_W); pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); /* Repeat sva. */ goto resume; } sched_pin(); } if (!pmap_demote_pte1(pmap, pte1p, sva)) panic("%s: demotion failed", __func__); #ifdef INVARIANTS else { /* Update pte1 after demotion */ pte1 = pte1_load(pte1p); } #endif } } KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" " is not link", __func__, pmap, sva, pte1, pte1p)); /* * Limit our scan to either the end of the va represented * by the current L2 page table page, or to the end of the * range being protected. */ if (nextva > eva) nextva = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, sva += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2)) continue; if (!pte2_is_wired(pte2)) panic("%s: pte2 %#x is missing PTE2_W", __func__, pte2); /* * PTE2_W must be cleared atomically. Although the pmap * lock synchronizes access to PTE2_W, another processor * could be changing PTE2_NM and/or PTE2_A concurrently. */ pte2_clear_bit(pte2p, PTE2_W); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pt1_entry_t *pte1p; pt2_entry_t *pte2p, opte2; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, va); if (!(pte1_load(pte1p) & PTE1_RO)) (void)pmap_demote_pte1(pmap, pte1p, va); PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" " a section in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); opte2 = pte2_load(pte2p); if (!(opte2 & PTE2_RO)) { pte2_store(pte2p, opte2 | PTE2_RO | PTE2_NM); if (pte2_is_dirty(opte2)) vm_page_dirty(m); pmap_tlb_flush(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { pt1_entry_t *pte1p, opte1; pt2_entry_t *pte2p, pte2; vm_offset_t pdnxt; vm_page_t m; boolean_t pv_lists_locked; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = pte1_trunc(sva + PTE1_SIZE); if (pdnxt < sva) pdnxt = eva; pte1p = pmap_pte1(pmap, sva); opte1 = pte1_load(pte1p); if (!pte1_is_valid(opte1)) /* XXX */ continue; else if (pte1_is_section(opte1)) { if (!pte1_is_managed(opte1)) continue; if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pte1(pmap, pte1p, sva)) { /* * The large page mapping was destroyed. */ continue; } /* * Unless the page mappings are wired, remove the * mapping to a single page so that a subsequent * access may repromote. Since the underlying L2 page * table is fully populated, this removal never * frees a L2 page table page. */ if (!pte1_is_wired(opte1)) { pte2p = pmap_pte2_quick(pmap, sva); KASSERT(pte2_is_valid(pte2_load(pte2p)), ("%s: invalid PTE2", __func__)); pmap_remove_pte2(pmap, pte2p, sva, NULL); } } if (pdnxt > eva) pdnxt = eva; for (pte2p = pmap_pte2_quick(pmap, sva); sva != pdnxt; pte2p++, sva += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (!pte2_is_valid(pte2) || !pte2_is_managed(pte2)) continue; else if (pte2_is_dirty(pte2)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); vm_page_dirty(m); } pte2_set_bit(pte2p, PTE2_NM); pte2_clear_bit(pte2p, PTE2_A); } else if ((pte2 & PTE2_A) != 0) pte2_clear_bit(pte2p, PTE2_A); else continue; pmap_tlb_flush(pmap, sva); } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pt1_entry_t *pte1p, opte1; pt2_entry_t *pte2p, opte2; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("%s: page %p is exclusive busy", __func__, m)); /* * If the page is not PGA_WRITEABLE, then no PTE2s can have PTE2_NM * cleared. If the object containing the page is locked and the page * is not exclusive busied, then PGA_WRITEABLE cannot be concurrently * set. */ if ((m->flags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, va); opte1 = pte1_load(pte1p); if (!(opte1 & PTE1_RO)) { if (pmap_demote_pte1(pmap, pte1p, va) && !pte1_is_wired(opte1)) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - pte1_pa(opte1); pte2p = pmap_pte2_quick(pmap, va); opte2 = pte2_load(pte2p); if ((opte2 & PTE2_V)) { pte2_set_bit(pte2p, PTE2_NM | PTE2_RO); vm_page_dirty(m); pmap_tlb_flush(pmap, va); } } } PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte1p = pmap_pte1(pmap, pv->pv_va); KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" " a section in page %p's pv list", __func__, m)); pte2p = pmap_pte2_quick(pmap, pv->pv_va); if (pte2_is_dirty(pte2_load(pte2p))) { pte2_set_bit(pte2p, PTE2_NM); pmap_tlb_flush(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { pt2_entry_t *cmap2_pte2p; vm_memattr_t oma; vm_paddr_t pa; struct pcpu *pc; oma = m->md.pat_mode; m->md.pat_mode = ma; CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m, VM_PAGE_TO_PHYS(m), oma, ma); if ((m->flags & PG_FICTITIOUS) != 0) return; #if 0 /* * If "m" is a normal page, flush it from the cache. * * First, try to find an existing mapping of the page by sf * buffer. sf_buf_invalidate_cache() modifies mapping and * flushes the cache. */ if (sf_buf_invalidate_cache(m, oma)) return; #endif /* * If page is not mapped by sf buffer, map the page * transient and do invalidation. */ if (ma != oma) { pa = VM_PAGE_TO_PHYS(m); sched_pin(); pc = get_pcpu(); cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap2_pte2p) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, vm_memattr_to_pte2(ma))); dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } } /* * Miscellaneous support routines follow */ /* * Returns TRUE if the given page is mapped individually or as part of * a 1mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is not managed", __func__, m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { pt2_entry_t *cmap2_pte2p; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap2_pte2p) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); pagezero(pc->pc_cmap2_addr); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { pt2_entry_t *cmap2_pte2p; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap2_pte2p) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); if (off == 0 && size == PAGE_SIZE) pagezero(pc->pc_cmap2_addr); else bzero(pc->pc_cmap2_addr + off, size); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { pt2_entry_t *cmap1_pte2p, *cmap2_pte2p; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap1_pte2p = pc->pc_cmap1_pte2p; cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap1_pte2p) != 0) panic("%s: CMAP1 busy", __func__); if (pte2_load(cmap2_pte2p) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(src), PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(src))); pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(dst), PTE2_AP_KRW, vm_page_pte2_attr(dst))); bcopy(pc->pc_cmap1_addr, pc->pc_cmap2_addr, PAGE_SIZE); pte2_clear(cmap1_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap1_addr); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { pt2_entry_t *cmap1_pte2p, *cmap2_pte2p; vm_page_t a_pg, b_pg; char *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; struct pcpu *pc; int cnt; sched_pin(); pc = get_pcpu(); cmap1_pte2p = pc->pc_cmap1_pte2p; cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap1_pte2p) != 0) panic("pmap_copy_pages: CMAP1 busy"); if (pte2_load(cmap2_pte2p) != 0) panic("pmap_copy_pages: CMAP2 busy"); while (xfersize > 0) { a_pg = ma[a_offset >> PAGE_SHIFT]; a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); b_pg = mb[b_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(a_pg), PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(a_pg))); tlb_flush_local((vm_offset_t)pc->pc_cmap1_addr); pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(b_pg), PTE2_AP_KRW, vm_page_pte2_attr(b_pg))); tlb_flush_local((vm_offset_t)pc->pc_cmap2_addr); a_cp = pc->pc_cmap1_addr + a_pg_offset; b_cp = pc->pc_cmap2_addr + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } pte2_clear(cmap1_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap1_addr); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } vm_offset_t pmap_quick_enter_page(vm_page_t m) { struct pcpu *pc; pt2_entry_t *pte2p; critical_enter(); pc = get_pcpu(); pte2p = pc->pc_qmap_pte2p; KASSERT(pte2_load(pte2p) == 0, ("%s: PTE2 busy", __func__)); pte2_store(pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); return (pc->pc_qmap_addr); } void pmap_quick_remove_page(vm_offset_t addr) { struct pcpu *pc; pt2_entry_t *pte2p; pc = get_pcpu(); pte2p = pc->pc_qmap_pte2p; KASSERT(addr == pc->pc_qmap_addr, ("%s: invalid address", __func__)); KASSERT(pte2_load(pte2p) != 0, ("%s: PTE2 not in use", __func__)); pte2_clear(pte2p); tlb_flush(pc->pc_qmap_addr); critical_exit(); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { struct spglist free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t nextva; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) return; rw_wlock(&pvh_global_lock); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } sched_pin(); for (addr = src_addr; addr < end_addr; addr = nextva) { pt2_entry_t *src_pte2p, *dst_pte2p; vm_page_t dst_mpt2pg, src_mpt2pg; pt1_entry_t src_pte1; u_int pte1_idx; KASSERT(addr < VM_MAXUSER_ADDRESS, ("%s: invalid to pmap_copy page tables", __func__)); nextva = pte1_trunc(addr + PTE1_SIZE); if (nextva < addr) nextva = end_addr; pte1_idx = pte1_index(addr); src_pte1 = src_pmap->pm_pt1[pte1_idx]; if (pte1_is_section(src_pte1)) { if ((addr & PTE1_OFFSET) != 0 || (addr + PTE1_SIZE) > end_addr) continue; if (dst_pmap->pm_pt1[pte1_idx] == 0 && (!pte1_is_managed(src_pte1) || pmap_pv_insert_pte1(dst_pmap, addr, src_pte1, PMAP_ENTER_NORECLAIM))) { dst_pmap->pm_pt1[pte1_idx] = src_pte1 & ~PTE1_W; dst_pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; pmap_pte1_mappings++; } continue; } else if (!pte1_is_link(src_pte1)) continue; src_mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(src_pte1)); /* * We leave PT2s to be linked from PT1 even if they are not * referenced until all PT2s in a page are without reference. * * QQQ: It could be changed ... */ #if 0 /* single_pt2_link_is_cleared */ KASSERT(pt2_wirecount_get(src_mpt2pg, pte1_idx) > 0, ("%s: source page table page is unused", __func__)); #else if (pt2_wirecount_get(src_mpt2pg, pte1_idx) == 0) continue; #endif if (nextva > end_addr) nextva = end_addr; src_pte2p = pt2map_entry(addr); while (addr < nextva) { pt2_entry_t temp_pte2; temp_pte2 = pte2_load(src_pte2p); /* * we only virtual copy managed pages */ if (pte2_is_managed(temp_pte2)) { dst_mpt2pg = pmap_allocpte2(dst_pmap, addr, PMAP_ENTER_NOSLEEP); if (dst_mpt2pg == NULL) goto out; dst_pte2p = pmap_pte2_quick(dst_pmap, addr); if (!pte2_is_valid(pte2_load(dst_pte2p)) && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(pte2_pa(temp_pte2)))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ temp_pte2 &= ~(PTE2_W | PTE2_A); temp_pte2 |= PTE2_NM; pte2_store(dst_pte2p, temp_pte2); dst_pmap->pm_stats.resident_count++; } else { SLIST_INIT(&free); if (pmap_unwire_pt2(dst_pmap, addr, dst_mpt2pg, &free)) { pmap_tlb_flush(dst_pmap, addr); vm_page_free_pages_toq(&free, false); } goto out; } if (pt2_wirecount_get(dst_mpt2pg, pte1_idx) >= pt2_wirecount_get(src_mpt2pg, pte1_idx)) break; } addr += PAGE_SIZE; src_pte2p++; } } out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more section mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t pte1_offset; if (size < PTE1_SIZE) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); pte1_offset = offset & PTE1_OFFSET; if (size - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) < PTE1_SIZE || (*addr & PTE1_OFFSET) == pte1_offset) return; if ((*addr & PTE1_OFFSET) < pte1_offset) *addr = pte1_trunc(*addr) + pte1_offset; else *addr = pte1_roundup(*addr) + pte1_offset; } void pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int cpuid, ttb; PDEBUG(9, printf("%s: td = %08x\n", __func__, (uint32_t)td)); critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); cpuid = PCPU_GET(cpuid); #if defined(SMP) CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); #endif ttb = pmap_ttb_get(pmap); /* * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_pagedir = ttb; cp15_ttbr_set(ttb); PCPU_SET(curpmap, pmap); critical_exit(); } /* * Perform the pmap work for mincore. */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; vm_paddr_t pa; bool managed; int val; PMAP_LOCK(pmap); retry: pte1p = pmap_pte1(pmap, addr); pte1 = pte1_load(pte1p); if (pte1_is_section(pte1)) { pa = trunc_page(pte1_pa(pte1) | (addr & PTE1_OFFSET)); managed = pte1_is_managed(pte1); val = MINCORE_SUPER | MINCORE_INCORE; if (pte1_is_dirty(pte1)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if (pte1 & PTE1_A) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, addr); pte2 = pte2_load(pte2p); pmap_pte2_release(pte2p); pa = pte2_pa(pte2); managed = pte2_is_managed(pte2); val = MINCORE_INCORE; if (pte2_is_dirty(pte2)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if (pte2 & PTE2_A) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } else { managed = false; val = 0; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) { vm_offset_t sva; uint32_t l2attr; KASSERT((size & PAGE_MASK) == 0, ("%s: device mapping not page-sized", __func__)); sva = va; l2attr = vm_memattr_to_pte2(VM_MEMATTR_DEVICE); while (size != 0) { pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, l2attr); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } tlb_flush_range(sva, va - sva); } void pmap_kremove_device(vm_offset_t va, vm_size_t size) { vm_offset_t sva; KASSERT((size & PAGE_MASK) == 0, ("%s: device mapping not page-sized", __func__)); sva = va; while (size != 0) { pmap_kremove(va); va += PAGE_SIZE; size -= PAGE_SIZE; } tlb_flush_range(sva, va - sva); } void pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) { pcb->pcb_pagedir = pmap_ttb_get(pmap); } /* * Clean L1 data cache range by physical address. * The range must be within a single page. */ static void pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, uint32_t attr) { pt2_entry_t *cmap2_pte2p; struct pcpu *pc; KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE, ("%s: not on single page", __func__)); sched_pin(); pc = get_pcpu(); cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap2_pte2p) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, attr)); dcache_wb_pou((vm_offset_t)pc->pc_cmap2_addr + (pa & PAGE_MASK), size); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * Sync instruction cache range which is not mapped yet. */ void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size) { uint32_t len, offset; vm_page_t m; /* Write back d-cache on given address range. */ offset = pa & PAGE_MASK; for ( ; size != 0; size -= len, pa += len, offset = 0) { len = min(PAGE_SIZE - offset, size); m = PHYS_TO_VM_PAGE(pa); KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", __func__, pa)); pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); } /* * I-cache is VIPT. Only way how to flush all virtual mappings * on given physical address is to invalidate all i-cache. */ icache_inv_all(); } void pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t size) { /* Write back d-cache on given address range. */ if (va >= VM_MIN_KERNEL_ADDRESS) { dcache_wb_pou(va, size); } else { uint32_t len, offset; vm_paddr_t pa; vm_page_t m; offset = va & PAGE_MASK; for ( ; size != 0; size -= len, va += len, offset = 0) { pa = pmap_extract(pmap, va); /* offset is preserved */ len = min(PAGE_SIZE - offset, size); m = PHYS_TO_VM_PAGE(pa); KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", __func__, pa)); pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); } } /* * I-cache is VIPT. Only way how to flush all virtual mappings * on given physical address is to invalidate all i-cache. */ icache_inv_all(); } /* * The implementation of pmap_fault() uses IN_RANGE2() macro which * depends on the fact that given range size is a power of 2. */ CTASSERT(powerof2(NB_IN_PT1)); CTASSERT(powerof2(PT2MAP_SIZE)); #define IN_RANGE2(addr, start, size) \ ((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1))) /* * Handle access and R/W emulation faults. */ int pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, bool usermode) { pt1_entry_t *pte1p, pte1; pt2_entry_t *pte2p, pte2; if (pmap == NULL) pmap = kernel_pmap; /* * In kernel, we should never get abort with FAR which is in range of * pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here * and print out a useful abort message and even get to the debugger * otherwise it likely ends with never ending loop of aborts. */ if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) { /* * All L1 tables should always be mapped and present. * However, we check only current one herein. For user mode, * only permission abort from malicious user is not fatal. * And alignment abort as it may have higher priority. */ if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_PERM_L2)) { CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x", __func__, pmap, pmap->pm_pt1, far); panic("%s: pm_pt1 abort", __func__); } return (KERN_INVALID_ADDRESS); } if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) { /* * PT2MAP should be always mapped and present in current * L1 table. However, only existing L2 tables are mapped * in PT2MAP. For user mode, only L2 translation abort and * permission abort from malicious user is not fatal. * And alignment abort as it may have higher priority. */ if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) { CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x", __func__, pmap, PT2MAP, far); panic("%s: PT2MAP abort", __func__); } return (KERN_INVALID_ADDRESS); } /* * A pmap lock is used below for handling of access and R/W emulation * aborts. They were handled by atomic operations before so some * analysis of new situation is needed to answer the following question: * Is it safe to use the lock even for these aborts? * * There may happen two cases in general: * * (1) Aborts while the pmap lock is locked already - this should not * happen as pmap lock is not recursive. However, under pmap lock only * internal kernel data should be accessed and such data should be * mapped with A bit set and NM bit cleared. If double abort happens, * then a mapping of data which has caused it must be fixed. Further, * all new mappings are always made with A bit set and the bit can be * cleared only on managed mappings. * * (2) Aborts while another lock(s) is/are locked - this already can * happen. However, there is no difference here if it's either access or * R/W emulation abort, or if it's some other abort. */ PMAP_LOCK(pmap); #ifdef INVARIANTS pte1 = pte1_load(pmap_pte1(pmap, far)); if (pte1_is_link(pte1)) { /* * Check in advance that associated L2 page table is mapped into * PT2MAP space. Note that faulty access to not mapped L2 page * table is caught in more general check above where "far" is * checked that it does not lay in PT2MAP space. Note also that * L1 page table and PT2TAB always exist and are mapped. */ pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, far)); if (!pte2_is_valid(pte2)) panic("%s: missing L2 page table (%p, %#x)", __func__, pmap, far); } #endif #ifdef SMP /* * Special treatment is due to break-before-make approach done when * pte1 is updated for userland mapping during section promotion or * demotion. If not caught here, pmap_enter() can find a section * mapping on faulting address. That is not allowed. */ if (idx == FAULT_TRAN_L1 && usermode && cp15_ats1cur_check(far) == 0) { PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } #endif /* * Accesss bits for page and section. Note that the entry * is not in TLB yet, so TLB flush is not necessary. * * QQQ: This is hardware emulation, we do not call userret() * for aborts from user mode. */ if (idx == FAULT_ACCESS_L2) { pte1 = pte1_load(pmap_pte1(pmap, far)); if (pte1_is_link(pte1)) { /* L2 page table should exist and be mapped. */ pte2p = pt2map_entry(far); pte2 = pte2_load(pte2p); if (pte2_is_valid(pte2)) { pte2_store(pte2p, pte2 | PTE2_A); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } } else { /* * We got L2 access fault but PTE1 is not a link. * Probably some race happened, do nothing. */ CTR3(KTR_PMAP, "%s: FAULT_ACCESS_L2 - pmap %#x far %#x", __func__, pmap, far); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } } if (idx == FAULT_ACCESS_L1) { pte1p = pmap_pte1(pmap, far); pte1 = pte1_load(pte1p); if (pte1_is_section(pte1)) { pte1_store(pte1p, pte1 | PTE1_A); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } else { /* * We got L1 access fault but PTE1 is not section * mapping. Probably some race happened, do nothing. */ CTR3(KTR_PMAP, "%s: FAULT_ACCESS_L1 - pmap %#x far %#x", __func__, pmap, far); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } } /* * Handle modify bits for page and section. Note that the modify * bit is emulated by software. So PTEx_RO is software read only * bit and PTEx_NM flag is real hardware read only bit. * * QQQ: This is hardware emulation, we do not call userret() * for aborts from user mode. */ if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L2)) { pte1 = pte1_load(pmap_pte1(pmap, far)); if (pte1_is_link(pte1)) { /* L2 page table should exist and be mapped. */ pte2p = pt2map_entry(far); pte2 = pte2_load(pte2p); if (pte2_is_valid(pte2) && !(pte2 & PTE2_RO) && (pte2 & PTE2_NM)) { pte2_store(pte2p, pte2 & ~PTE2_NM); tlb_flush(trunc_page(far)); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } } else { /* * We got L2 permission fault but PTE1 is not a link. * Probably some race happened, do nothing. */ CTR3(KTR_PMAP, "%s: FAULT_PERM_L2 - pmap %#x far %#x", __func__, pmap, far); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } } if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L1)) { pte1p = pmap_pte1(pmap, far); pte1 = pte1_load(pte1p); if (pte1_is_section(pte1)) { if (!(pte1 & PTE1_RO) && (pte1 & PTE1_NM)) { pte1_store(pte1p, pte1 & ~PTE1_NM); tlb_flush(pte1_trunc(far)); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } } else { /* * We got L1 permission fault but PTE1 is not section * mapping. Probably some race happened, do nothing. */ CTR3(KTR_PMAP, "%s: FAULT_PERM_L1 - pmap %#x far %#x", __func__, pmap, far); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } } /* * QQQ: The previous code, mainly fast handling of access and * modify bits aborts, could be moved to ASM. Now we are * starting to deal with not fast aborts. */ PMAP_UNLOCK(pmap); return (KERN_FAILURE); } #if defined(PMAP_DEBUG) /* * Reusing of KVA used in pmap_zero_page function !!! */ static void pmap_zero_page_check(vm_page_t m) { pt2_entry_t *cmap2_pte2p; uint32_t *p, *end; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap2_pte2p = pc->pc_cmap2_pte2p; mtx_lock(&pc->pc_cmap_lock); if (pte2_load(cmap2_pte2p) != 0) panic("%s: CMAP2 busy", __func__); pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, vm_page_pte2_attr(m))); end = (uint32_t*)(pc->pc_cmap2_addr + PAGE_SIZE); for (p = (uint32_t*)pc->pc_cmap2_addr; p < end; p++) if (*p != 0) panic("%s: page %p not zero, va: %p", __func__, m, pc->pc_cmap2_addr); pte2_clear(cmap2_pte2p); tlb_flush((vm_offset_t)pc->pc_cmap2_addr); sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } int pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte2 = 0; int i, j, index; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid || p->p_vmspace == NULL) continue; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPTE1_IN_PT1; i++) { pt1_entry_t pte1; pt2_entry_t *pte2p, pte2; vm_offset_t base, va; vm_paddr_t pa; vm_page_t m; base = i << PTE1_SHIFT; pte1 = pte1_load(&pmap->pm_pt1[i]); if (pte1_is_section(pte1)) { /* * QQQ: Do something here! */ } else if (pte1_is_link(pte1)) { for (j = 0; j < NPTE2_IN_PT2; j++) { va = base + (j << PAGE_SHIFT); if (va >= VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return (npte2); } pte2p = pmap_pte2(pmap, va); pte2 = pte2_load(pte2p); pmap_pte2_release(pte2p); if (!pte2_is_valid(pte2)) continue; pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); printf("va: 0x%x, pa: 0x%x, h: %d, w:" " %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte2++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } sx_sunlock(&allproc_lock); return (npte2); } #endif #ifdef DDB static pt2_entry_t * pmap_pte2_ddb(pmap_t pmap, vm_offset_t va) { pt1_entry_t pte1; vm_paddr_t pt2pg_pa; pte1 = pte1_load(pmap_pte1(pmap, va)); if (!pte1_is_link(pte1)) return (NULL); if (pmap_is_current(pmap)) return (pt2map_entry(va)); /* Note that L2 page table size is not equal to PAGE_SIZE. */ pt2pg_pa = trunc_page(pte1_link_pa(pte1)); if (pte2_pa(pte2_load(PMAP3)) != pt2pg_pa) { pte2_store(PMAP3, PTE2_KPT(pt2pg_pa)); #ifdef SMP PMAP3cpu = PCPU_GET(cpuid); #endif tlb_flush_local((vm_offset_t)PADDR3); } #ifdef SMP else if (PMAP3cpu != PCPU_GET(cpuid)) { PMAP3cpu = PCPU_GET(cpuid); tlb_flush_local((vm_offset_t)PADDR3); } #endif return (PADDR3 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); } static void dump_pmap(pmap_t pmap) { printf("pmap %p\n", pmap); printf(" pm_pt1: %p\n", pmap->pm_pt1); printf(" pm_pt2tab: %p\n", pmap->pm_pt2tab); printf(" pm_active: 0x%08lX\n", pmap->pm_active.__bits[0]); } DB_SHOW_COMMAND(pmaps, pmap_list_pmaps) { pmap_t pmap; LIST_FOREACH(pmap, &allpmaps, pm_list) { dump_pmap(pmap); } } static int pte2_class(pt2_entry_t pte2) { int cls; cls = (pte2 >> 2) & 0x03; cls |= (pte2 >> 4) & 0x04; return (cls); } static void dump_section(pmap_t pmap, uint32_t pte1_idx) { } static void dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok) { uint32_t i; vm_offset_t va; pt2_entry_t *pte2p, pte2; vm_page_t m; va = pte1_idx << PTE1_SHIFT; pte2p = pmap_pte2_ddb(pmap, va); for (i = 0; i < NPTE2_IN_PT2; i++, pte2p++, va += PAGE_SIZE) { pte2 = pte2_load(pte2p); if (pte2 == 0) continue; if (!pte2_is_valid(pte2)) { printf(" 0x%08X: 0x%08X", va, pte2); if (!invalid_ok) printf(" - not valid !!!"); printf("\n"); continue; } m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2, pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m); if (m != NULL) { printf(" v:%d h:%d w:%d f:0x%04X\n", m->valid, m->hold_count, m->wire_count, m->flags); } else { printf("\n"); } } } static __inline boolean_t is_pv_chunk_space(vm_offset_t va) { if ((((vm_offset_t)pv_chunkbase) <= va) && (va < ((vm_offset_t)pv_chunkbase + PAGE_SIZE * pv_maxchunks))) return (TRUE); return (FALSE); } DB_SHOW_COMMAND(pmap, pmap_pmap_print) { /* XXX convert args. */ pmap_t pmap = (pmap_t)addr; pt1_entry_t pte1; pt2_entry_t pte2; vm_offset_t va, eva; vm_page_t m; uint32_t i; boolean_t invalid_ok, dump_link_ok, dump_pv_chunk; if (have_addr) { pmap_t pm; LIST_FOREACH(pm, &allpmaps, pm_list) if (pm == pmap) break; if (pm == NULL) { printf("given pmap %p is not in allpmaps list\n", pmap); return; } } else pmap = PCPU_GET(curpmap); eva = (modif[0] == 'u') ? VM_MAXUSER_ADDRESS : 0xFFFFFFFF; dump_pv_chunk = FALSE; /* XXX evaluate from modif[] */ printf("pmap: 0x%08X\n", (uint32_t)pmap); printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); for(i = 0; i < NPTE1_IN_PT1; i++) { pte1 = pte1_load(&pmap->pm_pt1[i]); if (pte1 == 0) continue; va = i << PTE1_SHIFT; if (va >= eva) break; if (pte1_is_section(pte1)) { printf("0x%08X: Section 0x%08X, s:%d g:%d\n", va, pte1, !!(pte1 & PTE1_S), !(pte1 & PTE1_NG)); dump_section(pmap, i); } else if (pte1_is_link(pte1)) { dump_link_ok = TRUE; invalid_ok = FALSE; pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X m: %p", va, pte1, pte2, m); if (is_pv_chunk_space(va)) { printf(" - pv_chunk space"); if (dump_pv_chunk) invalid_ok = TRUE; else dump_link_ok = FALSE; } else if (m != NULL) printf(" w:%d w2:%u", m->wire_count, pt2_wirecount_get(m, pte1_index(va))); if (pte2 == 0) printf(" !!! pt2tab entry is ZERO"); else if (pte2_pa(pte1) != pte2_pa(pte2)) printf(" !!! pt2tab entry is DIFFERENT - m: %p", PHYS_TO_VM_PAGE(pte2_pa(pte2))); printf("\n"); if (dump_link_ok) dump_link(pmap, i, invalid_ok); } else printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); } } static void dump_pt2tab(pmap_t pmap) { uint32_t i; pt2_entry_t pte2; vm_offset_t va; vm_paddr_t pa; vm_page_t m; printf("PT2TAB:\n"); for (i = 0; i < PT2TAB_ENTRIES; i++) { pte2 = pte2_load(&pmap->pm_pt2tab[i]); if (!pte2_is_valid(pte2)) continue; va = i << PT2TAB_SHIFT; pa = pte2_pa(pte2); m = PHYS_TO_VM_PAGE(pa); printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2, pte2_class(pte2), !!(pte2 & PTE2_S), m); if (m != NULL) printf(" , h: %d, w: %d, f: 0x%04X pidx: %lld", m->hold_count, m->wire_count, m->flags, m->pindex); printf("\n"); } } DB_SHOW_COMMAND(pmap_pt2tab, pmap_pt2tab_print) { /* XXX convert args. */ pmap_t pmap = (pmap_t)addr; pt1_entry_t pte1; pt2_entry_t pte2; vm_offset_t va; uint32_t i, start; if (have_addr) { printf("supported only on current pmap\n"); return; } pmap = PCPU_GET(curpmap); printf("curpmap: 0x%08X\n", (uint32_t)pmap); printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); start = pte1_index((vm_offset_t)PT2MAP); for (i = start; i < (start + NPT2_IN_PT2TAB); i++) { pte1 = pte1_load(&pmap->pm_pt1[i]); if (pte1 == 0) continue; va = i << PTE1_SHIFT; if (pte1_is_section(pte1)) { printf("0x%08X: Section 0x%08X, s:%d\n", va, pte1, !!(pte1 & PTE1_S)); dump_section(pmap, i); } else if (pte1_is_link(pte1)) { pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X\n", va, pte1, pte2); if (pte2 == 0) printf(" !!! pt2tab entry is ZERO\n"); } else printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); } dump_pt2tab(pmap); } #endif Index: head/sys/arm/freescale/imx/imx6_sdma.c =================================================================== --- head/sys/arm/freescale/imx/imx6_sdma.c (revision 338106) +++ head/sys/arm/freescale/imx/imx6_sdma.c (revision 338107) @@ -1,518 +1,516 @@ /*- * Copyright (c) 2015 Ruslan Bukin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * i.MX6 Smart Direct Memory Access Controller (sDMA) * Chapter 41, i.MX 6Dual/6Quad Applications Processor Reference Manual, * Rev. 1, 04/2013 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BD (PAGE_SIZE / sizeof(struct sdma_buffer_descriptor)) #define READ4(_sc, _reg) \ bus_space_read_4(_sc->bst, _sc->bsh, _reg) #define WRITE4(_sc, _reg, _val) \ bus_space_write_4(_sc->bst, _sc->bsh, _reg, _val) struct sdma_softc *sdma_sc; static struct resource_spec sdma_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE }, { -1, 0 } }; static void sdma_intr(void *arg) { struct sdma_buffer_descriptor *bd; struct sdma_channel *channel; struct sdma_conf *conf; struct sdma_softc *sc; int pending; int i; int j; sc = arg; pending = READ4(sc, SDMAARM_INTR); /* Ack intr */ WRITE4(sc, SDMAARM_INTR, pending); for (i = 0; i < SDMA_N_CHANNELS; i++) { if ((pending & (1 << i)) == 0) continue; channel = &sc->channel[i]; conf = channel->conf; if (!conf) continue; for (j = 0; j < conf->num_bd; j++) { bd = &channel->bd[j]; bd->mode.status |= BD_DONE; if (bd->mode.status & BD_RROR) printf("sDMA error\n"); } conf->ih(conf->ih_user, 1); WRITE4(sc, SDMAARM_HSTART, (1 << i)); } } static int sdma_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "fsl,imx6q-sdma")) return (ENXIO); device_set_desc(dev, "i.MX6 Smart Direct Memory Access Controller"); return (BUS_PROBE_DEFAULT); } int sdma_start(int chn) { struct sdma_softc *sc; sc = sdma_sc; WRITE4(sc, SDMAARM_HSTART, (1 << chn)); return (0); } int sdma_stop(int chn) { struct sdma_softc *sc; sc = sdma_sc; WRITE4(sc, SDMAARM_STOP_STAT, (1 << chn)); return (0); } int sdma_alloc(void) { struct sdma_channel *channel; struct sdma_softc *sc; int found; int chn; int i; sc = sdma_sc; found = 0; /* Channel 0 can't be used */ for (i = 1; i < SDMA_N_CHANNELS; i++) { channel = &sc->channel[i]; if (channel->in_use == 0) { channel->in_use = 1; found = 1; break; } } if (!found) return (-1); chn = i; /* Allocate area for buffer descriptors */ - channel->bd = (void *)kmem_alloc_contig(kernel_arena, - PAGE_SIZE, M_ZERO, 0, ~0, PAGE_SIZE, 0, - VM_MEMATTR_UNCACHEABLE); + channel->bd = (void *)kmem_alloc_contig(PAGE_SIZE, M_ZERO, 0, ~0, + PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE); return (chn); } int sdma_free(int chn) { struct sdma_channel *channel; struct sdma_softc *sc; sc = sdma_sc; channel = &sc->channel[chn]; channel->in_use = 0; kmem_free(kernel_arena, (vm_offset_t)channel->bd, PAGE_SIZE); return (0); } static int sdma_overrides(struct sdma_softc *sc, int chn, int evt, int host, int dsp) { int reg; /* Ignore sDMA requests */ reg = READ4(sc, SDMAARM_EVTOVR); if (evt) reg |= (1 << chn); else reg &= ~(1 << chn); WRITE4(sc, SDMAARM_EVTOVR, reg); /* Ignore enable bit (HE) */ reg = READ4(sc, SDMAARM_HOSTOVR); if (host) reg |= (1 << chn); else reg &= ~(1 << chn); WRITE4(sc, SDMAARM_HOSTOVR, reg); /* Prevent sDMA channel from starting */ reg = READ4(sc, SDMAARM_DSPOVR); if (!dsp) reg |= (1 << chn); else reg &= ~(1 << chn); WRITE4(sc, SDMAARM_DSPOVR, reg); return (0); } int sdma_configure(int chn, struct sdma_conf *conf) { struct sdma_buffer_descriptor *bd0; struct sdma_buffer_descriptor *bd; struct sdma_context_data *context; struct sdma_channel *channel; struct sdma_softc *sc; #if 0 int timeout; int ret; #endif int i; sc = sdma_sc; channel = &sc->channel[chn]; channel->conf = conf; /* Ensure operation has stopped */ sdma_stop(chn); /* Set priority and enable the channel */ WRITE4(sc, SDMAARM_SDMA_CHNPRI(chn), 1); WRITE4(sc, SDMAARM_CHNENBL(conf->event), (1 << chn)); sdma_overrides(sc, chn, 0, 0, 0); if (conf->num_bd > MAX_BD) { device_printf(sc->dev, "Error: too much buffer" " descriptors requested\n"); return (-1); } for (i = 0; i < conf->num_bd; i++) { bd = &channel->bd[i]; bd->mode.command = conf->command; bd->mode.status = BD_DONE | BD_EXTD | BD_CONT | BD_INTR; if (i == (conf->num_bd - 1)) bd->mode.status |= BD_WRAP; bd->mode.count = conf->period; bd->buffer_addr = conf->saddr + (conf->period * i); bd->ext_buffer_addr = 0; } sc->ccb[chn].base_bd_ptr = vtophys(channel->bd); sc->ccb[chn].current_bd_ptr = vtophys(channel->bd); /* * Load context. * * i.MX6 Reference Manual: Appendix A SDMA Scripts * A.3.1.7.1 (mcu_2_app) */ /* * TODO: allow using other scripts */ context = sc->context; memset(context, 0, sizeof(*context)); context->channel_state.pc = sc->fw_scripts->mcu_2_app_addr; /* * Tx FIFO 0 address (r6) * Event_mask (r1) * Event2_mask (r0) * Watermark level (r7) */ if (conf->event > 32) { context->gReg[0] = (1 << (conf->event % 32)); context->gReg[1] = 0; } else { context->gReg[0] = 0; context->gReg[1] = (1 << conf->event); } context->gReg[6] = conf->daddr; context->gReg[7] = conf->word_length; bd0 = sc->bd0; bd0->mode.command = C0_SETDM; bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; bd0->mode.count = sizeof(*context) / 4; bd0->buffer_addr = sc->context_phys; bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * chn; WRITE4(sc, SDMAARM_HSTART, 1); #if 0 /* Debug purposes */ timeout = 1000; while (!(ret = READ4(sc, SDMAARM_INTR) & 1)) { if (timeout-- <= 0) break; DELAY(10); }; if (!ret) { device_printf(sc->dev, "Failed to load context.\n"); return (-1); } WRITE4(sc, SDMAARM_INTR, ret); device_printf(sc->dev, "Context loaded successfully.\n"); #endif return (0); } static int load_firmware(struct sdma_softc *sc) { const struct sdma_firmware_header *header; const struct firmware *fp; fp = firmware_get("sdma_fw"); if (fp == NULL) { device_printf(sc->dev, "Can't get firmware.\n"); return (-1); } header = fp->data; if (header->magic != FW_HEADER_MAGIC) { device_printf(sc->dev, "Can't use firmware.\n"); return (-1); } sc->fw_header = header; sc->fw_scripts = (const void *)((const char *)header + header->script_addrs_start); return (0); } static int boot_firmware(struct sdma_softc *sc) { struct sdma_buffer_descriptor *bd0; const uint32_t *ram_code; int timeout; int ret; int chn; int sz; int i; ram_code = (const void *)((const char *)sc->fw_header + sc->fw_header->ram_code_start); /* Make sure SDMA has not started yet */ WRITE4(sc, SDMAARM_MC0PTR, 0); sz = SDMA_N_CHANNELS * sizeof(struct sdma_channel_control) + \ sizeof(struct sdma_context_data); - sc->ccb = (void *)kmem_alloc_contig(kernel_arena, - sz, M_ZERO, 0, ~0, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE); + sc->ccb = (void *)kmem_alloc_contig(sz, M_ZERO, 0, ~0, PAGE_SIZE, 0, + VM_MEMATTR_UNCACHEABLE); sc->ccb_phys = vtophys(sc->ccb); sc->context = (void *)((char *)sc->ccb + \ SDMA_N_CHANNELS * sizeof(struct sdma_channel_control)); sc->context_phys = vtophys(sc->context); /* Disable all the channels */ for (i = 0; i < SDMA_N_EVENTS; i++) WRITE4(sc, SDMAARM_CHNENBL(i), 0); /* All channels have priority 0 */ for (i = 0; i < SDMA_N_CHANNELS; i++) WRITE4(sc, SDMAARM_SDMA_CHNPRI(i), 0); /* Channel 0 is used for booting firmware */ chn = 0; - sc->bd0 = (void *)kmem_alloc_contig(kernel_arena, - PAGE_SIZE, M_ZERO, 0, ~0, PAGE_SIZE, 0, - VM_MEMATTR_UNCACHEABLE); + sc->bd0 = (void *)kmem_alloc_contig(PAGE_SIZE, M_ZERO, 0, ~0, PAGE_SIZE, + 0, VM_MEMATTR_UNCACHEABLE); bd0 = sc->bd0; sc->ccb[chn].base_bd_ptr = vtophys(bd0); sc->ccb[chn].current_bd_ptr = vtophys(bd0); WRITE4(sc, SDMAARM_SDMA_CHNPRI(chn), 1); sdma_overrides(sc, chn, 1, 0, 0); /* XXX: not sure what is that */ WRITE4(sc, SDMAARM_CHN0ADDR, 0x4050); WRITE4(sc, SDMAARM_CONFIG, 0); WRITE4(sc, SDMAARM_MC0PTR, sc->ccb_phys); WRITE4(sc, SDMAARM_CONFIG, CONFIG_CSM); WRITE4(sc, SDMAARM_SDMA_CHNPRI(chn), 1); bd0->mode.command = C0_SETPM; bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD; bd0->mode.count = sc->fw_header->ram_code_size / 2; bd0->buffer_addr = vtophys(ram_code); bd0->ext_buffer_addr = sc->fw_scripts->ram_code_start_addr; WRITE4(sc, SDMAARM_HSTART, 1); timeout = 100; while (!(ret = READ4(sc, SDMAARM_INTR) & 1)) { if (timeout-- <= 0) break; DELAY(10); } if (ret == 0) { device_printf(sc->dev, "SDMA failed to boot\n"); return (-1); } WRITE4(sc, SDMAARM_INTR, ret); #if 0 device_printf(sc->dev, "SDMA booted successfully.\n"); #endif /* Debug is disabled */ WRITE4(sc, SDMAARM_ONCE_ENB, 0); return (0); } static int sdma_attach(device_t dev) { struct sdma_softc *sc; int err; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, sdma_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } /* Memory interface */ sc->bst = rman_get_bustag(sc->res[0]); sc->bsh = rman_get_bushandle(sc->res[0]); sdma_sc = sc; /* Setup interrupt handler */ err = bus_setup_intr(dev, sc->res[1], INTR_TYPE_MISC | INTR_MPSAFE, NULL, sdma_intr, sc, &sc->ih); if (err) { device_printf(dev, "Unable to alloc interrupt resource.\n"); return (ENXIO); } if (load_firmware(sc) == -1) return (ENXIO); if (boot_firmware(sc) == -1) return (ENXIO); return (0); }; static device_method_t sdma_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sdma_probe), DEVMETHOD(device_attach, sdma_attach), { 0, 0 } }; static driver_t sdma_driver = { "sdma", sdma_methods, sizeof(struct sdma_softc), }; static devclass_t sdma_devclass; EARLY_DRIVER_MODULE(sdma, simplebus, sdma_driver, sdma_devclass, 0, 0, BUS_PASS_RESOURCE); Index: head/sys/arm/nvidia/drm2/tegra_dc.c =================================================================== --- head/sys/arm/nvidia/drm2/tegra_dc.c (revision 338106) +++ head/sys/arm/nvidia/drm2/tegra_dc.c (revision 338107) @@ -1,1447 +1,1447 @@ /*- * Copyright (c) 2015 Michal Meloun * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "tegra_drm_if.h" #include "tegra_dc_if.h" #define WR4(_sc, _r, _v) bus_write_4((_sc)->mem_res, 4 * (_r), (_v)) #define RD4(_sc, _r) bus_read_4((_sc)->mem_res, 4 * (_r)) #define LOCK(_sc) mtx_lock(&(_sc)->mtx) #define UNLOCK(_sc) mtx_unlock(&(_sc)->mtx) #define SLEEP(_sc, timeout) \ mtx_sleep(sc, &sc->mtx, 0, "tegra_dc_wait", timeout); #define LOCK_INIT(_sc) \ mtx_init(&_sc->mtx, device_get_nameunit(_sc->dev), "tegra_dc", MTX_DEF) #define LOCK_DESTROY(_sc) mtx_destroy(&_sc->mtx) #define ASSERT_LOCKED(_sc) mtx_assert(&_sc->mtx, MA_OWNED) #define ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->mtx, MA_NOTOWNED) #define SYNCPT_VBLANK0 26 #define SYNCPT_VBLANK1 27 #define DC_MAX_PLANES 2 /* Maximum planes */ /* DRM Formats supported by DC */ /* XXXX expand me */ static uint32_t dc_plane_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_UYVY, DRM_FORMAT_YUYV, DRM_FORMAT_YUV420, DRM_FORMAT_YUV422, }; /* Complete description of one window (plane) */ struct dc_window { /* Source (in framebuffer) rectangle, in pixels */ u_int src_x; u_int src_y; u_int src_w; u_int src_h; /* Destination (on display) rectangle, in pixels */ u_int dst_x; u_int dst_y; u_int dst_w; u_int dst_h; /* Parsed pixel format */ u_int bits_per_pixel; bool is_yuv; /* any YUV mode */ bool is_yuv_planar; /* planar YUV mode */ uint32_t color_mode; /* DC_WIN_COLOR_DEPTH */ uint32_t swap; /* DC_WIN_BYTE_SWAP */ uint32_t surface_kind; /* DC_WINBUF_SURFACE_KIND */ uint32_t block_height; /* DC_WINBUF_SURFACE_KIND */ /* Parsed flipping, rotation is not supported for pitched modes */ bool flip_x; /* inverted X-axis */ bool flip_y; /* inverted Y-axis */ bool transpose_xy; /* swap X and Y-axis */ /* Color planes base addresses and strides */ bus_size_t base[3]; uint32_t stride[3]; /* stride[2] isn't used by HW */ }; struct dc_softc { device_t dev; struct resource *mem_res; struct resource *irq_res; void *irq_ih; struct mtx mtx; clk_t clk_parent; clk_t clk_dc; hwreset_t hwreset_dc; int pitch_align; struct tegra_crtc tegra_crtc; struct drm_pending_vblank_event *event; struct drm_gem_object *cursor_gem; }; static struct ofw_compat_data compat_data[] = { {"nvidia,tegra124-dc", 1}, {NULL, 0}, }; /* Convert standard drm pixel format to tegra windows parameters. */ static int dc_parse_drm_format(struct tegra_fb *fb, struct dc_window *win) { struct tegra_bo *bo; uint32_t cm; uint32_t sw; bool is_yuv, is_yuv_planar; int nplanes, i; switch (fb->drm_fb.pixel_format) { case DRM_FORMAT_XBGR8888: sw = BYTE_SWAP(NOSWAP); cm = WIN_COLOR_DEPTH_R8G8B8A8; is_yuv = false; is_yuv_planar = false; break; case DRM_FORMAT_XRGB8888: sw = BYTE_SWAP(NOSWAP); cm = WIN_COLOR_DEPTH_B8G8R8A8; is_yuv = false; is_yuv_planar = false; break; case DRM_FORMAT_RGB565: sw = BYTE_SWAP(NOSWAP); cm = WIN_COLOR_DEPTH_B5G6R5; is_yuv = false; is_yuv_planar = false; break; case DRM_FORMAT_UYVY: sw = BYTE_SWAP(NOSWAP); cm = WIN_COLOR_DEPTH_YCbCr422; is_yuv = true; is_yuv_planar = false; break; case DRM_FORMAT_YUYV: sw = BYTE_SWAP(SWAP2); cm = WIN_COLOR_DEPTH_YCbCr422; is_yuv = true; is_yuv_planar = false; break; case DRM_FORMAT_YUV420: sw = BYTE_SWAP(NOSWAP); cm = WIN_COLOR_DEPTH_YCbCr420P; is_yuv = true; is_yuv_planar = true; break; case DRM_FORMAT_YUV422: sw = BYTE_SWAP(NOSWAP); cm = WIN_COLOR_DEPTH_YCbCr422P; is_yuv = true; is_yuv_planar = true; break; default: /* Unsupported format */ return (-EINVAL); } /* Basic check of arguments. */ switch (fb->rotation) { case 0: case 180: break; case 90: /* Rotation is supported only */ case 270: /* for block linear surfaces */ if (!fb->block_linear) return (-EINVAL); break; default: return (-EINVAL); } /* XXX Add more checks (sizes, scaling...) */ if (win == NULL) return (0); win->surface_kind = fb->block_linear ? SURFACE_KIND_BL_16B2: SURFACE_KIND_PITCH; win->block_height = fb->block_height; switch (fb->rotation) { case 0: /* (0,0,0) */ win->transpose_xy = false; win->flip_x = false; win->flip_y = false; break; case 90: /* (1,0,1) */ win->transpose_xy = true; win->flip_x = false; win->flip_y = true; break; case 180: /* (0,1,1) */ win->transpose_xy = false; win->flip_x = true; win->flip_y = true; break; case 270: /* (1,1,0) */ win->transpose_xy = true; win->flip_x = true; win->flip_y = false; break; } win->flip_x ^= fb->flip_x; win->flip_y ^= fb->flip_y; win->color_mode = cm; win->swap = sw; win->bits_per_pixel = fb->drm_fb.bits_per_pixel; win->is_yuv = is_yuv; win->is_yuv_planar = is_yuv_planar; nplanes = drm_format_num_planes(fb->drm_fb.pixel_format); for (i = 0; i < nplanes; i++) { bo = fb->planes[i]; win->base[i] = bo->pbase + fb->drm_fb.offsets[i]; win->stride[i] = fb->drm_fb.pitches[i]; } return (0); } /* * Scaling functions. * * It's unclear if we want/must program the fractional portion * (aka bias) of init_dda registers, mainly when mirrored axis * modes are used. * For now, we use 1.0 as recommended by TRM. */ static inline uint32_t dc_scaling_init(uint32_t start) { return (1 << 12); } static inline uint32_t dc_scaling_incr(uint32_t src, uint32_t dst, uint32_t maxscale) { uint32_t val; val = (src - 1) << 12 ; /* 4.12 fixed float */ val /= (dst - 1); if (val > (maxscale << 12)) val = maxscale << 12; return val; } /* ------------------------------------------------------------------- * * HW Access. * */ /* * Setup pixel clock. * Minimal frequency is pixel clock, but output is free to select * any higher. */ static int dc_setup_clk(struct dc_softc *sc, struct drm_crtc *crtc, struct drm_display_mode *mode, uint32_t *div) { uint64_t pclk, freq; struct tegra_drm_encoder *output; struct drm_encoder *encoder; long rv; pclk = mode->clock * 1000; /* Find attached encoder */ output = NULL; list_for_each_entry(encoder, &crtc->dev->mode_config.encoder_list, head) { if (encoder->crtc == crtc) { output = container_of(encoder, struct tegra_drm_encoder, encoder); break; } } if (output == NULL) return (-ENODEV); if (output->setup_clock == NULL) panic("Output have not setup_clock function.\n"); rv = output->setup_clock(output, sc->clk_dc, pclk); if (rv != 0) { device_printf(sc->dev, "Cannot setup pixel clock: %llu\n", pclk); return (rv); } rv = clk_get_freq(sc->clk_dc, &freq); *div = (freq * 2 / pclk) - 2; DRM_DEBUG_KMS("frequency: %llu, DC divider: %u\n", freq, *div); return 0; } static void dc_setup_window(struct dc_softc *sc, unsigned int index, struct dc_window *win) { uint32_t h_offset, v_offset, h_size, v_size, bpp; uint32_t h_init_dda, v_init_dda, h_incr_dda, v_incr_dda; uint32_t val; #ifdef DMR_DEBUG_WINDOW printf("%s window: %d\n", __func__, index); printf(" src: x: %d, y: %d, w: %d, h: %d\n", win->src_x, win->src_y, win->src_w, win->src_h); printf(" dst: x: %d, y: %d, w: %d, h: %d\n", win->dst_x, win->dst_y, win->dst_w, win->dst_h); printf(" bpp: %d, color_mode: %d, swap: %d\n", win->bits_per_pixel, win->color_mode, win->swap); #endif if (win->is_yuv) bpp = win->is_yuv_planar ? 1 : 2; else bpp = (win->bits_per_pixel + 7) / 8; if (!win->transpose_xy) { h_size = win->src_w * bpp; v_size = win->src_h; } else { h_size = win->src_h * bpp; v_size = win->src_w; } h_offset = win->src_x * bpp;; v_offset = win->src_y; if (win->flip_x) { h_offset += win->src_w * bpp - 1; } if (win->flip_y) v_offset += win->src_h - 1; /* Adjust offsets for planar yuv modes */ if (win->is_yuv_planar) { h_offset &= ~1; if (win->flip_x ) h_offset |= 1; v_offset &= ~1; if (win->flip_y ) v_offset |= 1; } /* Setup scaling. */ if (!win->transpose_xy) { h_init_dda = dc_scaling_init(win->src_x); v_init_dda = dc_scaling_init(win->src_y); h_incr_dda = dc_scaling_incr(win->src_w, win->dst_w, 4); v_incr_dda = dc_scaling_incr(win->src_h, win->dst_h, 15); } else { h_init_dda = dc_scaling_init(win->src_y); v_init_dda = dc_scaling_init(win->src_x); h_incr_dda = dc_scaling_incr(win->src_h, win->dst_h, 4); v_incr_dda = dc_scaling_incr(win->src_w, win->dst_w, 15); } #ifdef DMR_DEBUG_WINDOW printf("\n"); printf(" bpp: %d, size: h: %d v: %d, offset: h:%d v: %d\n", bpp, h_size, v_size, h_offset, v_offset); printf(" init_dda: h: %d v: %d, incr_dda: h: %d v: %d\n", h_init_dda, v_init_dda, h_incr_dda, v_incr_dda); #endif LOCK(sc); /* Select target window */ val = WINDOW_A_SELECT << index; WR4(sc, DC_CMD_DISPLAY_WINDOW_HEADER, val); /* Sizes */ WR4(sc, DC_WIN_POSITION, WIN_POSITION(win->dst_x, win->dst_y)); WR4(sc, DC_WIN_SIZE, WIN_SIZE(win->dst_w, win->dst_h)); WR4(sc, DC_WIN_PRESCALED_SIZE, WIN_PRESCALED_SIZE(h_size, v_size)); /* DDA */ WR4(sc, DC_WIN_DDA_INCREMENT, WIN_DDA_INCREMENT(h_incr_dda, v_incr_dda)); WR4(sc, DC_WIN_H_INITIAL_DDA, h_init_dda); WR4(sc, DC_WIN_V_INITIAL_DDA, v_init_dda); /* Color planes base addresses and strides */ WR4(sc, DC_WINBUF_START_ADDR, win->base[0]); if (win->is_yuv_planar) { WR4(sc, DC_WINBUF_START_ADDR_U, win->base[1]); WR4(sc, DC_WINBUF_START_ADDR_V, win->base[2]); WR4(sc, DC_WIN_LINE_STRIDE, win->stride[1] << 16 | win->stride[0]); } else { WR4(sc, DC_WIN_LINE_STRIDE, win->stride[0]); } /* Offsets for rotation and axis flip */ WR4(sc, DC_WINBUF_ADDR_H_OFFSET, h_offset); WR4(sc, DC_WINBUF_ADDR_V_OFFSET, v_offset); /* Color format */ WR4(sc, DC_WIN_COLOR_DEPTH, win->color_mode); WR4(sc, DC_WIN_BYTE_SWAP, win->swap); /* Tiling */ val = win->surface_kind; if (win->surface_kind == SURFACE_KIND_BL_16B2) val |= SURFACE_KIND_BLOCK_HEIGHT(win->block_height); WR4(sc, DC_WINBUF_SURFACE_KIND, val); /* Color space coefs for YUV modes */ if (win->is_yuv) { WR4(sc, DC_WINC_CSC_YOF, 0x00f0); WR4(sc, DC_WINC_CSC_KYRGB, 0x012a); WR4(sc, DC_WINC_CSC_KUR, 0x0000); WR4(sc, DC_WINC_CSC_KVR, 0x0198); WR4(sc, DC_WINC_CSC_KUG, 0x039b); WR4(sc, DC_WINC_CSC_KVG, 0x032f); WR4(sc, DC_WINC_CSC_KUB, 0x0204); WR4(sc, DC_WINC_CSC_KVB, 0x0000); } val = WIN_ENABLE; if (win->is_yuv) val |= CSC_ENABLE; else if (win->bits_per_pixel < 24) val |= COLOR_EXPAND; if (win->flip_y) val |= V_DIRECTION; if (win->flip_x) val |= H_DIRECTION; if (win->transpose_xy) val |= SCAN_COLUMN; WR4(sc, DC_WINC_WIN_OPTIONS, val); #ifdef DMR_DEBUG_WINDOW /* Set underflow debug mode -> highlight missing pixels. */ WR4(sc, DC_WINBUF_UFLOW_CTRL, UFLOW_CTR_ENABLE); WR4(sc, DC_WINBUF_UFLOW_DBG_PIXEL, 0xFFFF0000); #endif UNLOCK(sc); } /* ------------------------------------------------------------------- * * Plane functions. * */ static int dc_plane_update(struct drm_plane *drm_plane, struct drm_crtc *drm_crtc, struct drm_framebuffer *drm_fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h) { struct tegra_plane *plane; struct tegra_crtc *crtc; struct tegra_fb *fb; struct dc_softc *sc; struct dc_window win; int rv; plane = container_of(drm_plane, struct tegra_plane, drm_plane); fb = container_of(drm_fb, struct tegra_fb, drm_fb); crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); memset(&win, 0, sizeof(win)); win.src_x = src_x >> 16; win.src_y = src_y >> 16; win.src_w = src_w >> 16; win.src_h = src_h >> 16; win.dst_x = crtc_x; win.dst_y = crtc_y; win.dst_w = crtc_w; win.dst_h = crtc_h; rv = dc_parse_drm_format(fb, &win); if (rv != 0) { DRM_WARNING("unsupported pixel format %d\n", fb->drm_fb.pixel_format); return (rv); } dc_setup_window(sc, plane->index, &win); WR4(sc, DC_CMD_STATE_CONTROL, WIN_A_UPDATE << plane->index); WR4(sc, DC_CMD_STATE_CONTROL, WIN_A_ACT_REQ << plane->index); return (0); } static int dc_plane_disable(struct drm_plane *drm_plane) { struct tegra_plane *plane; struct tegra_crtc *crtc; struct dc_softc *sc; uint32_t val, idx; if (drm_plane->crtc == NULL) return (0); plane = container_of(drm_plane, struct tegra_plane, drm_plane); crtc = container_of(drm_plane->crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); idx = plane->index; LOCK(sc); WR4(sc, DC_CMD_DISPLAY_WINDOW_HEADER, WINDOW_A_SELECT << idx); val = RD4(sc, DC_WINC_WIN_OPTIONS); val &= ~WIN_ENABLE; WR4(sc, DC_WINC_WIN_OPTIONS, val); UNLOCK(sc); WR4(sc, DC_CMD_STATE_CONTROL, WIN_A_UPDATE << idx); WR4(sc, DC_CMD_STATE_CONTROL, WIN_A_ACT_REQ << idx); return (0); } static void dc_plane_destroy(struct drm_plane *plane) { dc_plane_disable(plane); drm_plane_cleanup(plane); free(plane, DRM_MEM_KMS); } static const struct drm_plane_funcs dc_plane_funcs = { .update_plane = dc_plane_update, .disable_plane = dc_plane_disable, .destroy = dc_plane_destroy, }; /* ------------------------------------------------------------------- * * CRTC helper functions. * */ static void dc_crtc_dpms(struct drm_crtc *crtc, int mode) { /* Empty function */ } static bool dc_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted) { return (true); } static int dc_set_base(struct dc_softc *sc, int x, int y, struct tegra_fb *fb) { struct dc_window win; int rv; memset(&win, 0, sizeof(win)); win.src_x = x; win.src_y = y; win.src_w = fb->drm_fb.width; win.src_h = fb->drm_fb.height; win.dst_x = x; win.dst_y = y; win.dst_w = fb->drm_fb.width; win.dst_h = fb->drm_fb.height; rv = dc_parse_drm_format(fb, &win); if (rv != 0) { DRM_WARNING("unsupported pixel format %d\n", fb->drm_fb.pixel_format); return (rv); } dc_setup_window(sc, 0, &win); return (0); } static int dc_crtc_mode_set(struct drm_crtc *drm_crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted, int x, int y, struct drm_framebuffer *old_fb) { struct dc_softc *sc; struct tegra_crtc *crtc; struct tegra_fb *fb; struct dc_window win; uint32_t div, h_ref_to_sync, v_ref_to_sync; int rv; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); fb = container_of(drm_crtc->fb, struct tegra_fb, drm_fb); h_ref_to_sync = 1; v_ref_to_sync = 1; /* Setup timing */ rv = dc_setup_clk(sc, drm_crtc, mode, &div); if (rv != 0) { device_printf(sc->dev, "Cannot set pixel clock\n"); return (rv); } /* Timing */ WR4(sc, DC_DISP_DISP_TIMING_OPTIONS, 0); WR4(sc, DC_DISP_REF_TO_SYNC, (v_ref_to_sync << 16) | h_ref_to_sync); WR4(sc, DC_DISP_SYNC_WIDTH, ((mode->vsync_end - mode->vsync_start) << 16) | ((mode->hsync_end - mode->hsync_start) << 0)); WR4(sc, DC_DISP_BACK_PORCH, ((mode->vtotal - mode->vsync_end) << 16) | ((mode->htotal - mode->hsync_end) << 0)); WR4(sc, DC_DISP_FRONT_PORCH, ((mode->vsync_start - mode->vdisplay) << 16) | ((mode->hsync_start - mode->hdisplay) << 0)); WR4(sc, DC_DISP_DISP_ACTIVE, (mode->vdisplay << 16) | mode->hdisplay); WR4(sc, DC_DISP_DISP_INTERFACE_CONTROL, DISP_DATA_FORMAT(DF1P1C)); WR4(sc,DC_DISP_DISP_CLOCK_CONTROL, SHIFT_CLK_DIVIDER(div) | PIXEL_CLK_DIVIDER(PCD1)); memset(&win, 0, sizeof(win)); win.src_x = x; win.src_y = y; win.src_w = mode->hdisplay; win.src_h = mode->vdisplay; win.dst_x = x; win.dst_y = y; win.dst_w = mode->hdisplay; win.dst_h = mode->vdisplay; rv = dc_parse_drm_format(fb, &win); if (rv != 0) { DRM_WARNING("unsupported pixel format %d\n", drm_crtc->fb->pixel_format); return (rv); } dc_setup_window(sc, 0, &win); return (0); } static int dc_crtc_mode_set_base(struct drm_crtc *drm_crtc, int x, int y, struct drm_framebuffer *old_fb) { struct dc_softc *sc; struct tegra_crtc *crtc; struct tegra_fb *fb; int rv; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); fb = container_of(drm_crtc->fb, struct tegra_fb, drm_fb); sc = device_get_softc(crtc->dev); rv = dc_set_base(sc, x, y, fb); /* Commit */ WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_UPDATE | WIN_A_UPDATE); WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_ACT_REQ | WIN_A_ACT_REQ); return (rv); } static void dc_crtc_prepare(struct drm_crtc *drm_crtc) { struct dc_softc *sc; struct tegra_crtc *crtc; uint32_t val; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); WR4(sc, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL, SYNCPT_CNTRL_NO_STALL); /* XXX allocate syncpoint from host1x */ WR4(sc, DC_CMD_CONT_SYNCPT_VSYNC, SYNCPT_VSYNC_ENABLE | (sc->tegra_crtc.nvidia_head == 0 ? SYNCPT_VBLANK0: SYNCPT_VBLANK1)); WR4(sc, DC_CMD_DISPLAY_POWER_CONTROL, PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE | PW4_ENABLE | PM0_ENABLE | PM1_ENABLE); val = RD4(sc, DC_CMD_DISPLAY_COMMAND); val |= DISPLAY_CTRL_MODE(CTRL_MODE_C_DISPLAY); WR4(sc, DC_CMD_DISPLAY_COMMAND, val); WR4(sc, DC_CMD_INT_MASK, WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT); WR4(sc, DC_CMD_INT_ENABLE, VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT); } static void dc_crtc_commit(struct drm_crtc *drm_crtc) { struct dc_softc *sc; struct tegra_crtc *crtc; uint32_t val; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_UPDATE | WIN_A_UPDATE); val = RD4(sc, DC_CMD_INT_MASK); val |= FRAME_END_INT; WR4(sc, DC_CMD_INT_MASK, val); val = RD4(sc, DC_CMD_INT_ENABLE); val |= FRAME_END_INT; WR4(sc, DC_CMD_INT_ENABLE, val); WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_ACT_REQ | WIN_A_ACT_REQ); } static void dc_crtc_load_lut(struct drm_crtc *crtc) { /* empty function */ } static const struct drm_crtc_helper_funcs dc_crtc_helper_funcs = { .dpms = dc_crtc_dpms, .mode_fixup = dc_crtc_mode_fixup, .mode_set = dc_crtc_mode_set, .mode_set_base = dc_crtc_mode_set_base, .prepare = dc_crtc_prepare, .commit = dc_crtc_commit, .load_lut = dc_crtc_load_lut, }; static int drm_crtc_index(struct drm_crtc *crtc) { int idx; struct drm_crtc *tmp; idx = 0; list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head) { if (tmp == crtc) return (idx); idx++; } panic("Cannot find CRTC"); } /* ------------------------------------------------------------------- * * Exported functions (mainly vsync related). * * XXX revisit this -> convert to bus methods? */ int tegra_dc_get_pipe(struct drm_crtc *drm_crtc) { struct tegra_crtc *crtc; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); return (crtc->nvidia_head); } void tegra_dc_enable_vblank(struct drm_crtc *drm_crtc) { struct dc_softc *sc; struct tegra_crtc *crtc; uint32_t val; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); LOCK(sc); val = RD4(sc, DC_CMD_INT_MASK); val |= VBLANK_INT; WR4(sc, DC_CMD_INT_MASK, val); UNLOCK(sc); } void tegra_dc_disable_vblank(struct drm_crtc *drm_crtc) { struct dc_softc *sc; struct tegra_crtc *crtc; uint32_t val; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); LOCK(sc); val = RD4(sc, DC_CMD_INT_MASK); val &= ~VBLANK_INT; WR4(sc, DC_CMD_INT_MASK, val); UNLOCK(sc); } static void dc_finish_page_flip(struct dc_softc *sc) { struct drm_crtc *drm_crtc; struct drm_device *drm; struct tegra_fb *fb; struct tegra_bo *bo; uint32_t base; int idx; drm_crtc = &sc->tegra_crtc.drm_crtc; drm = drm_crtc->dev; fb = container_of(drm_crtc->fb, struct tegra_fb, drm_fb); mtx_lock(&drm->event_lock); if (sc->event == NULL) { mtx_unlock(&drm->event_lock); return; } LOCK(sc); /* Read active copy of WINBUF_START_ADDR */ WR4(sc, DC_CMD_DISPLAY_WINDOW_HEADER, WINDOW_A_SELECT); WR4(sc, DC_CMD_STATE_ACCESS, READ_MUX); base = RD4(sc, DC_WINBUF_START_ADDR); WR4(sc, DC_CMD_STATE_ACCESS, 0); UNLOCK(sc); /* Is already active */ bo = tegra_fb_get_plane(fb, 0); if (base == (bo->pbase + fb->drm_fb.offsets[0])) { idx = drm_crtc_index(drm_crtc); drm_send_vblank_event(drm, idx, sc->event); drm_vblank_put(drm, idx); sc->event = NULL; } mtx_unlock(&drm->event_lock); } void tegra_dc_cancel_page_flip(struct drm_crtc *drm_crtc, struct drm_file *file) { struct dc_softc *sc; struct tegra_crtc *crtc; struct drm_device *drm; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); drm = drm_crtc->dev; mtx_lock(&drm->event_lock); if ((sc->event != NULL) && (sc->event->base.file_priv == file)) { sc->event->base.destroy(&sc->event->base); drm_vblank_put(drm, drm_crtc_index(drm_crtc)); sc->event = NULL; } mtx_unlock(&drm->event_lock); } /* ------------------------------------------------------------------- * * CRTC functions. * */ static int dc_page_flip(struct drm_crtc *drm_crtc, struct drm_framebuffer *drm_fb, struct drm_pending_vblank_event *event) { struct dc_softc *sc; struct tegra_crtc *crtc; struct tegra_fb *fb; struct drm_device *drm; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); fb = container_of(drm_crtc->fb, struct tegra_fb, drm_fb); drm = drm_crtc->dev; if (sc->event != NULL) return (-EBUSY); if (event != NULL) { event->pipe = sc->tegra_crtc.nvidia_head; sc->event = event; drm_vblank_get(drm, event->pipe); } dc_set_base(sc, drm_crtc->x, drm_crtc->y, fb); drm_crtc->fb = drm_fb; /* Commit */ WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_UPDATE | WIN_A_UPDATE); return (0); } static int dc_cursor_set(struct drm_crtc *drm_crtc, struct drm_file *file, uint32_t handle, uint32_t width, uint32_t height) { struct dc_softc *sc; struct tegra_crtc *crtc; struct drm_gem_object *gem; struct tegra_bo *bo; int i; uint32_t val, *src, *dst; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); if (width != height) return (-EINVAL); switch (width) { case 32: val = CURSOR_SIZE(C32x32); break; case 64: val = CURSOR_SIZE(C64x64); break; case 128: val = CURSOR_SIZE(C128x128); break; case 256: val = CURSOR_SIZE(C256x256); break; default: return (-EINVAL); } bo = NULL; gem = NULL; if (handle != 0) { gem = drm_gem_object_lookup(drm_crtc->dev, file, handle); if (gem == NULL) return (-ENOENT); bo = container_of(gem, struct tegra_bo, gem_obj); } if (sc->cursor_gem != NULL) { drm_gem_object_unreference(sc->cursor_gem); } sc->cursor_gem = gem; if (bo != NULL) { /* * Copy cursor into cache and convert it from ARGB to RGBA. * XXXX - this is broken by design - client can write to BO at * any time. We can dedicate other window for cursor or switch * to sw cursor in worst case. */ src = (uint32_t *)bo->vbase; dst = (uint32_t *)crtc->cursor_vbase; for (i = 0; i < width * height; i++) dst[i] = (src[i] << 8) | (src[i] >> 24); val |= CURSOR_CLIP(CC_DISPLAY); val |= CURSOR_START_ADDR(crtc->cursor_pbase); WR4(sc, DC_DISP_CURSOR_START_ADDR, val); val = RD4(sc, DC_DISP_BLEND_CURSOR_CONTROL); val &= ~CURSOR_DST_BLEND_FACTOR_SELECT(~0); val &= ~CURSOR_SRC_BLEND_FACTOR_SELECT(~0); val |= CURSOR_MODE_SELECT; val |= CURSOR_DST_BLEND_FACTOR_SELECT(DST_NEG_K1_TIMES_SRC); val |= CURSOR_SRC_BLEND_FACTOR_SELECT(SRC_BLEND_K1_TIMES_SRC); val |= CURSOR_ALPHA(~0); WR4(sc, DC_DISP_BLEND_CURSOR_CONTROL, val); val = RD4(sc, DC_DISP_DISP_WIN_OPTIONS); val |= CURSOR_ENABLE; WR4(sc, DC_DISP_DISP_WIN_OPTIONS, val); } else { val = RD4(sc, DC_DISP_DISP_WIN_OPTIONS); val &= ~CURSOR_ENABLE; WR4(sc, DC_DISP_DISP_WIN_OPTIONS, val); } /* XXX This fixes cursor underflow issues, but why ? */ WR4(sc, DC_DISP_CURSOR_UNDERFLOW_CTRL, CURSOR_UFLOW_CYA); WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_UPDATE | CURSOR_UPDATE ); WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_ACT_REQ | CURSOR_ACT_REQ); return (0); } static int dc_cursor_move(struct drm_crtc *drm_crtc, int x, int y) { struct dc_softc *sc; struct tegra_crtc *crtc; crtc = container_of(drm_crtc, struct tegra_crtc, drm_crtc); sc = device_get_softc(crtc->dev); WR4(sc, DC_DISP_CURSOR_POSITION, CURSOR_POSITION(x, y)); WR4(sc, DC_CMD_STATE_CONTROL, CURSOR_UPDATE); WR4(sc, DC_CMD_STATE_CONTROL, CURSOR_ACT_REQ); return (0); } static void dc_destroy(struct drm_crtc *crtc) { drm_crtc_cleanup(crtc); memset(crtc, 0, sizeof(*crtc)); } static const struct drm_crtc_funcs dc_crtc_funcs = { .page_flip = dc_page_flip, .cursor_set = dc_cursor_set, .cursor_move = dc_cursor_move, .set_config = drm_crtc_helper_set_config, .destroy = dc_destroy, }; /* ------------------------------------------------------------------- * * Bus and infrastructure. * */ static int dc_init_planes(struct dc_softc *sc, struct tegra_drm *drm) { int i, rv; struct tegra_plane *plane; rv = 0; for (i = 0; i < DC_MAX_PLANES; i++) { plane = malloc(sizeof(*plane), DRM_MEM_KMS, M_WAITOK | M_ZERO); plane->index = i + 1; rv = drm_plane_init(&drm->drm_dev, &plane->drm_plane, 1 << sc->tegra_crtc.nvidia_head, &dc_plane_funcs, dc_plane_formats, nitems(dc_plane_formats), false); if (rv != 0) { free(plane, DRM_MEM_KMS); return (rv); } } return 0; } static void dc_display_enable(device_t dev, bool enable) { struct dc_softc *sc; uint32_t val; sc = device_get_softc(dev); /* Set display mode */ val = enable ? CTRL_MODE_C_DISPLAY: CTRL_MODE_STOP; WR4(sc, DC_CMD_DISPLAY_COMMAND, DISPLAY_CTRL_MODE(val)); /* and commit it*/ WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_UPDATE); WR4(sc, DC_CMD_STATE_CONTROL, GENERAL_ACT_REQ); } static void dc_hdmi_enable(device_t dev, bool enable) { struct dc_softc *sc; uint32_t val; sc = device_get_softc(dev); val = RD4(sc, DC_DISP_DISP_WIN_OPTIONS); if (enable) val |= HDMI_ENABLE; else val &= ~HDMI_ENABLE; WR4(sc, DC_DISP_DISP_WIN_OPTIONS, val); } static void dc_setup_timing(device_t dev, int h_pulse_start) { struct dc_softc *sc; sc = device_get_softc(dev); /* Setup display timing */ WR4(sc, DC_DISP_DISP_TIMING_OPTIONS, VSYNC_H_POSITION(1)); WR4(sc, DC_DISP_DISP_COLOR_CONTROL, DITHER_CONTROL(DITHER_DISABLE) | BASE_COLOR_SIZE(SIZE_BASE888)); WR4(sc, DC_DISP_DISP_SIGNAL_OPTIONS0, H_PULSE2_ENABLE); WR4(sc, DC_DISP_H_PULSE2_CONTROL, PULSE_CONTROL_QUAL(QUAL_VACTIVE) | PULSE_CONTROL_LAST(LAST_END_A)); WR4(sc, DC_DISP_H_PULSE2_POSITION_A, PULSE_START(h_pulse_start) | PULSE_END(h_pulse_start + 8)); } static void dc_intr(void *arg) { struct dc_softc *sc; uint32_t status; sc = arg; /* Confirm interrupt */ status = RD4(sc, DC_CMD_INT_STATUS); WR4(sc, DC_CMD_INT_STATUS, status); if (status & VBLANK_INT) { drm_handle_vblank(sc->tegra_crtc.drm_crtc.dev, sc->tegra_crtc.nvidia_head); dc_finish_page_flip(sc); } } static int dc_init_client(device_t dev, device_t host1x, struct tegra_drm *drm) { struct dc_softc *sc; int rv; sc = device_get_softc(dev); if (drm->pitch_align < sc->pitch_align) drm->pitch_align = sc->pitch_align; drm_crtc_init(&drm->drm_dev, &sc->tegra_crtc.drm_crtc, &dc_crtc_funcs); drm_mode_crtc_set_gamma_size(&sc->tegra_crtc.drm_crtc, 256); drm_crtc_helper_add(&sc->tegra_crtc.drm_crtc, &dc_crtc_helper_funcs); rv = dc_init_planes(sc, drm); if (rv!= 0){ device_printf(dev, "Cannot init planes\n"); return (rv); } WR4(sc, DC_CMD_INT_TYPE, WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT); WR4(sc, DC_CMD_INT_POLARITY, WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT); WR4(sc, DC_CMD_INT_ENABLE, 0); WR4(sc, DC_CMD_INT_MASK, 0); rv = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, dc_intr, sc, &sc->irq_ih); if (rv != 0) { device_printf(dev, "Cannot register interrupt handler\n"); return (rv); } /* allocate memory for cursor cache */ - sc->tegra_crtc.cursor_vbase = kmem_alloc_contig(kernel_arena, - 256 * 256 * 4, M_WAITOK | M_ZERO, - 0, -1UL, PAGE_SIZE, 0, VM_MEMATTR_WRITE_COMBINING); + sc->tegra_crtc.cursor_vbase = kmem_alloc_contig(256 * 256 * 4, + M_WAITOK | M_ZERO, 0, -1UL, PAGE_SIZE, 0, + VM_MEMATTR_WRITE_COMBINING); sc->tegra_crtc.cursor_pbase = vtophys(sc->tegra_crtc.cursor_vbase); return (0); } static int dc_exit_client(device_t dev, device_t host1x, struct tegra_drm *drm) { struct dc_softc *sc; sc = device_get_softc(dev); if (sc->irq_ih != NULL) bus_teardown_intr(dev, sc->irq_res, sc->irq_ih); sc->irq_ih = NULL; return (0); } static int get_fdt_resources(struct dc_softc *sc, phandle_t node) { int rv; rv = hwreset_get_by_ofw_name(sc->dev, 0, "dc", &sc->hwreset_dc); if (rv != 0) { device_printf(sc->dev, "Cannot get 'dc' reset\n"); return (rv); } rv = clk_get_by_ofw_name(sc->dev, 0, "parent", &sc->clk_parent); if (rv != 0) { device_printf(sc->dev, "Cannot get 'parent' clock\n"); return (rv); } rv = clk_get_by_ofw_name(sc->dev, 0, "dc", &sc->clk_dc); if (rv != 0) { device_printf(sc->dev, "Cannot get 'dc' clock\n"); return (rv); } rv = OF_getencprop(node, "nvidia,head", &sc->tegra_crtc.nvidia_head, sizeof(sc->tegra_crtc.nvidia_head)); if (rv <= 0) { device_printf(sc->dev, "Cannot get 'nvidia,head' property\n"); return (rv); } return (0); } static int enable_fdt_resources(struct dc_softc *sc) { int id, rv; rv = clk_set_parent_by_clk(sc->clk_dc, sc->clk_parent); if (rv != 0) { device_printf(sc->dev, "Cannot set parent for 'dc' clock\n"); return (rv); } id = (sc->tegra_crtc.nvidia_head == 0) ? TEGRA_POWERGATE_DIS: TEGRA_POWERGATE_DISB; rv = tegra_powergate_sequence_power_up(id, sc->clk_dc, sc->hwreset_dc); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'DIS' powergate\n"); return (rv); } return (0); } static int dc_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) return (ENXIO); device_set_desc(dev, "Tegra Display Controller"); return (BUS_PROBE_DEFAULT); } static int dc_attach(device_t dev) { struct dc_softc *sc; phandle_t node; int rid, rv; sc = device_get_softc(dev); sc->dev = dev; sc->tegra_crtc.dev = dev; node = ofw_bus_get_node(sc->dev); LOCK_INIT(sc); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) { device_printf(dev, "Cannot allocate memory resources\n"); goto fail; } rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(dev, "Cannot allocate IRQ resources\n"); goto fail; } rv = get_fdt_resources(sc, node); if (rv != 0) { device_printf(dev, "Cannot parse FDT resources\n"); goto fail; } rv = enable_fdt_resources(sc); if (rv != 0) { device_printf(dev, "Cannot enable FDT resources\n"); goto fail; } /* * Tegra124 * - 64 for RGB modes * - 128 for YUV planar modes * - 256 for block linear modes */ sc->pitch_align = 256; rv = TEGRA_DRM_REGISTER_CLIENT(device_get_parent(sc->dev), sc->dev); if (rv != 0) { device_printf(dev, "Cannot register DRM device\n"); goto fail; } return (bus_generic_attach(dev)); fail: TEGRA_DRM_DEREGISTER_CLIENT(device_get_parent(sc->dev), sc->dev); if (sc->irq_ih != NULL) bus_teardown_intr(dev, sc->irq_res, sc->irq_ih); if (sc->clk_parent != NULL) clk_release(sc->clk_parent); if (sc->clk_dc != NULL) clk_release(sc->clk_dc); if (sc->hwreset_dc != NULL) hwreset_release(sc->hwreset_dc); if (sc->irq_res != NULL) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res); if (sc->mem_res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->mem_res); LOCK_DESTROY(sc); return (ENXIO); } static int dc_detach(device_t dev) { struct dc_softc *sc; sc = device_get_softc(dev); TEGRA_DRM_DEREGISTER_CLIENT(device_get_parent(sc->dev), sc->dev); if (sc->irq_ih != NULL) bus_teardown_intr(dev, sc->irq_res, sc->irq_ih); if (sc->clk_parent != NULL) clk_release(sc->clk_parent); if (sc->clk_dc != NULL) clk_release(sc->clk_dc); if (sc->hwreset_dc != NULL) hwreset_release(sc->hwreset_dc); if (sc->irq_res != NULL) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res); if (sc->mem_res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->mem_res); LOCK_DESTROY(sc); return (bus_generic_detach(dev)); } static device_method_t tegra_dc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, dc_probe), DEVMETHOD(device_attach, dc_attach), DEVMETHOD(device_detach, dc_detach), /* tegra drm interface */ DEVMETHOD(tegra_drm_init_client, dc_init_client), DEVMETHOD(tegra_drm_exit_client, dc_exit_client), /* tegra dc interface */ DEVMETHOD(tegra_dc_display_enable, dc_display_enable), DEVMETHOD(tegra_dc_hdmi_enable, dc_hdmi_enable), DEVMETHOD(tegra_dc_setup_timing, dc_setup_timing), DEVMETHOD_END }; static devclass_t tegra_dc_devclass; DEFINE_CLASS_0(tegra_dc, tegra_dc_driver, tegra_dc_methods, sizeof(struct dc_softc)); DRIVER_MODULE(tegra_dc, host1x, tegra_dc_driver, tegra_dc_devclass, NULL, NULL); Index: head/sys/arm/nvidia/tegra_pcie.c =================================================================== --- head/sys/arm/nvidia/tegra_pcie.c (revision 338106) +++ head/sys/arm/nvidia/tegra_pcie.c (revision 338107) @@ -1,1636 +1,1636 @@ /*- * Copyright (c) 2016 Michal Meloun * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Nvidia Integrated PCI/PCI-Express controller driver. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ofw_bus_if.h" #include "msi_if.h" #include "pcib_if.h" #include "pic_if.h" #define AFI_AXI_BAR0_SZ 0x000 #define AFI_AXI_BAR1_SZ 0x004 #define AFI_AXI_BAR2_SZ 0x008 #define AFI_AXI_BAR3_SZ 0x00c #define AFI_AXI_BAR4_SZ 0x010 #define AFI_AXI_BAR5_SZ 0x014 #define AFI_AXI_BAR0_START 0x018 #define AFI_AXI_BAR1_START 0x01c #define AFI_AXI_BAR2_START 0x020 #define AFI_AXI_BAR3_START 0x024 #define AFI_AXI_BAR4_START 0x028 #define AFI_AXI_BAR5_START 0x02c #define AFI_FPCI_BAR0 0x030 #define AFI_FPCI_BAR1 0x034 #define AFI_FPCI_BAR2 0x038 #define AFI_FPCI_BAR3 0x03c #define AFI_FPCI_BAR4 0x040 #define AFI_FPCI_BAR5 0x044 #define AFI_MSI_BAR_SZ 0x060 #define AFI_MSI_FPCI_BAR_ST 0x064 #define AFI_MSI_AXI_BAR_ST 0x068 #define AFI_MSI_VEC(x) (0x06c + 4 * (x)) #define AFI_MSI_EN_VEC(x) (0x08c + 4 * (x)) #define AFI_MSI_INTR_IN_REG 32 #define AFI_MSI_REGS 8 #define AFI_CONFIGURATION 0x0ac #define AFI_CONFIGURATION_EN_FPCI (1 << 0) #define AFI_FPCI_ERROR_MASKS 0x0b0 #define AFI_INTR_MASK 0x0b4 #define AFI_INTR_MASK_MSI_MASK (1 << 8) #define AFI_INTR_MASK_INT_MASK (1 << 0) #define AFI_INTR_CODE 0x0b8 #define AFI_INTR_CODE_MASK 0xf #define AFI_INTR_CODE_INT_CODE_INI_SLVERR 1 #define AFI_INTR_CODE_INT_CODE_INI_DECERR 2 #define AFI_INTR_CODE_INT_CODE_TGT_SLVERR 3 #define AFI_INTR_CODE_INT_CODE_TGT_DECERR 4 #define AFI_INTR_CODE_INT_CODE_TGT_WRERR 5 #define AFI_INTR_CODE_INT_CODE_SM_MSG 6 #define AFI_INTR_CODE_INT_CODE_DFPCI_DECERR 7 #define AFI_INTR_CODE_INT_CODE_AXI_DECERR 8 #define AFI_INTR_CODE_INT_CODE_FPCI_TIMEOUT 9 #define AFI_INTR_CODE_INT_CODE_PE_PRSNT_SENSE 10 #define AFI_INTR_CODE_INT_CODE_PE_CLKREQ_SENSE 11 #define AFI_INTR_CODE_INT_CODE_CLKCLAMP_SENSE 12 #define AFI_INTR_CODE_INT_CODE_RDY4PD_SENSE 13 #define AFI_INTR_CODE_INT_CODE_P2P_ERROR 14 #define AFI_INTR_SIGNATURE 0x0bc #define AFI_UPPER_FPCI_ADDRESS 0x0c0 #define AFI_SM_INTR_ENABLE 0x0c4 #define AFI_SM_INTR_RP_DEASSERT (1 << 14) #define AFI_SM_INTR_RP_ASSERT (1 << 13) #define AFI_SM_INTR_HOTPLUG (1 << 12) #define AFI_SM_INTR_PME (1 << 11) #define AFI_SM_INTR_FATAL_ERROR (1 << 10) #define AFI_SM_INTR_UNCORR_ERROR (1 << 9) #define AFI_SM_INTR_CORR_ERROR (1 << 8) #define AFI_SM_INTR_INTD_DEASSERT (1 << 7) #define AFI_SM_INTR_INTC_DEASSERT (1 << 6) #define AFI_SM_INTR_INTB_DEASSERT (1 << 5) #define AFI_SM_INTR_INTA_DEASSERT (1 << 4) #define AFI_SM_INTR_INTD_ASSERT (1 << 3) #define AFI_SM_INTR_INTC_ASSERT (1 << 2) #define AFI_SM_INTR_INTB_ASSERT (1 << 1) #define AFI_SM_INTR_INTA_ASSERT (1 << 0) #define AFI_AFI_INTR_ENABLE 0x0c8 #define AFI_AFI_INTR_ENABLE_CODE(code) (1 << (code)) #define AFI_PCIE_CONFIG 0x0f8 #define AFI_PCIE_CONFIG_PCIE_DISABLE(x) (1 << ((x) + 1)) #define AFI_PCIE_CONFIG_PCIE_DISABLE_ALL 0x6 #define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_MASK (0xf << 20) #define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_XBAR2_1 (0x0 << 20) #define AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_XBAR4_1 (0x1 << 20) #define AFI_FUSE 0x104 #define AFI_FUSE_PCIE_T0_GEN2_DIS (1 << 2) #define AFI_PEX0_CTRL 0x110 #define AFI_PEX1_CTRL 0x118 #define AFI_PEX2_CTRL 0x128 #define AFI_PEX_CTRL_OVERRIDE_EN (1 << 4) #define AFI_PEX_CTRL_REFCLK_EN (1 << 3) #define AFI_PEX_CTRL_CLKREQ_EN (1 << 1) #define AFI_PEX_CTRL_RST_L (1 << 0) #define AFI_AXI_BAR6_SZ 0x134 #define AFI_AXI_BAR7_SZ 0x138 #define AFI_AXI_BAR8_SZ 0x13c #define AFI_AXI_BAR6_START 0x140 #define AFI_AXI_BAR7_START 0x144 #define AFI_AXI_BAR8_START 0x148 #define AFI_FPCI_BAR6 0x14c #define AFI_FPCI_BAR7 0x150 #define AFI_FPCI_BAR8 0x154 #define AFI_PLLE_CONTROL 0x160 #define AFI_PLLE_CONTROL_BYPASS_PADS2PLLE_CONTROL (1 << 9) #define AFI_PLLE_CONTROL_BYPASS_PCIE2PLLE_CONTROL (1 << 8) #define AFI_PLLE_CONTROL_PADS2PLLE_CONTROL_EN (1 << 1) #define AFI_PLLE_CONTROL_PCIE2PLLE_CONTROL_EN (1 << 0) #define AFI_PEXBIAS_CTRL 0x168 /* FPCI Address space */ #define FPCI_MAP_IO 0xfdfc000000ULL #define FPCI_MAP_TYPE0_CONFIG 0xfdfc000000ULL #define FPCI_MAP_TYPE1_CONFIG 0xfdff000000ULL #define FPCI_MAP_EXT_TYPE0_CONFIG 0xfe00000000ULL #define FPCI_MAP_EXT_TYPE1_CONFIG 0xfe10000000ULL /* Configuration space */ #define RP_VEND_XP 0x00000F00 #define RP_VEND_XP_DL_UP (1 << 30) #define RP_PRIV_MISC 0x00000FE0 #define RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT (0xE << 0) #define RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT (0xF << 0) #define RP_LINK_CONTROL_STATUS 0x00000090 #define RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE 0x20000000 #define RP_LINK_CONTROL_STATUS_LINKSTAT_MASK 0x3fff0000 /* Wait 50 ms (per port) for link. */ #define TEGRA_PCIE_LINKUP_TIMEOUT 50000 #define TEGRA_PCIB_MSI_ENABLE #define DEBUG #ifdef DEBUG #define debugf(fmt, args...) do { printf(fmt,##args); } while (0) #else #define debugf(fmt, args...) #endif /* * Configuration space format: * [27:24] extended register * [23:16] bus * [15:11] slot (device) * [10: 8] function * [ 7: 0] register */ #define PCI_CFG_EXT_REG(reg) ((((reg) >> 8) & 0x0f) << 24) #define PCI_CFG_BUS(bus) (((bus) & 0xff) << 16) #define PCI_CFG_DEV(dev) (((dev) & 0x1f) << 11) #define PCI_CFG_FUN(fun) (((fun) & 0x07) << 8) #define PCI_CFG_BASE_REG(reg) ((reg) & 0xff) #define PADS_WR4(_sc, _r, _v) bus_write_4((_sc)-pads_mem_res, (_r), (_v)) #define PADS_RD4(_sc, _r) bus_read_4((_sc)->pads_mem_res, (_r)) #define AFI_WR4(_sc, _r, _v) bus_write_4((_sc)->afi_mem_res, (_r), (_v)) #define AFI_RD4(_sc, _r) bus_read_4((_sc)->afi_mem_res, (_r)) static struct { bus_size_t axi_start; bus_size_t fpci_start; bus_size_t size; } bars[] = { {AFI_AXI_BAR0_START, AFI_FPCI_BAR0, AFI_AXI_BAR0_SZ}, /* BAR 0 */ {AFI_AXI_BAR1_START, AFI_FPCI_BAR1, AFI_AXI_BAR1_SZ}, /* BAR 1 */ {AFI_AXI_BAR2_START, AFI_FPCI_BAR2, AFI_AXI_BAR2_SZ}, /* BAR 2 */ {AFI_AXI_BAR3_START, AFI_FPCI_BAR3, AFI_AXI_BAR3_SZ}, /* BAR 3 */ {AFI_AXI_BAR4_START, AFI_FPCI_BAR4, AFI_AXI_BAR4_SZ}, /* BAR 4 */ {AFI_AXI_BAR5_START, AFI_FPCI_BAR5, AFI_AXI_BAR5_SZ}, /* BAR 5 */ {AFI_AXI_BAR6_START, AFI_FPCI_BAR6, AFI_AXI_BAR6_SZ}, /* BAR 6 */ {AFI_AXI_BAR7_START, AFI_FPCI_BAR7, AFI_AXI_BAR7_SZ}, /* BAR 7 */ {AFI_AXI_BAR8_START, AFI_FPCI_BAR8, AFI_AXI_BAR8_SZ}, /* BAR 8 */ {AFI_MSI_AXI_BAR_ST, AFI_MSI_FPCI_BAR_ST, AFI_MSI_BAR_SZ}, /* MSI 9 */ }; /* Compatible devices. */ static struct ofw_compat_data compat_data[] = { {"nvidia,tegra124-pcie", 1}, {NULL, 0}, }; #define TEGRA_FLAG_MSI_USED 0x0001 struct tegra_pcib_irqsrc { struct intr_irqsrc isrc; u_int irq; u_int flags; }; struct tegra_pcib_port { int enabled; int port_idx; /* chip port index */ int num_lanes; /* number of lanes */ bus_size_t afi_pex_ctrl; /* offset of afi_pex_ctrl */ phy_t phy; /* port phy */ /* Config space properties. */ bus_addr_t rp_base_addr; /* PA of config window */ bus_size_t rp_size; /* size of config window */ bus_space_handle_t cfg_handle; /* handle of config window */ }; #define TEGRA_PCIB_MAX_PORTS 3 #define TEGRA_PCIB_MAX_MSI AFI_MSI_INTR_IN_REG * AFI_MSI_REGS struct tegra_pcib_softc { struct ofw_pci_softc ofw_pci; device_t dev; struct mtx mtx; struct resource *pads_mem_res; struct resource *afi_mem_res; struct resource *cfg_mem_res; struct resource *irq_res; struct resource *msi_irq_res; void *intr_cookie; void *msi_intr_cookie; struct ofw_pci_range mem_range; struct ofw_pci_range pref_mem_range; struct ofw_pci_range io_range; clk_t clk_pex; clk_t clk_afi; clk_t clk_pll_e; clk_t clk_cml; hwreset_t hwreset_pex; hwreset_t hwreset_afi; hwreset_t hwreset_pcie_x; regulator_t supply_avddio_pex; regulator_t supply_dvddio_pex; regulator_t supply_avdd_pex_pll; regulator_t supply_hvdd_pex; regulator_t supply_hvdd_pex_pll_e; regulator_t supply_vddio_pex_ctl; regulator_t supply_avdd_pll_erefe; vm_offset_t msi_page; /* VA of MSI page */ bus_addr_t cfg_base_addr; /* base address of config */ bus_size_t cfg_cur_offs; /* currently mapped window */ bus_space_handle_t cfg_handle; /* handle of config window */ bus_space_tag_t bus_tag; /* tag of config window */ int lanes_cfg; int num_ports; struct tegra_pcib_port *ports[TEGRA_PCIB_MAX_PORTS]; struct tegra_pcib_irqsrc *isrcs; }; static int tegra_pcib_maxslots(device_t dev) { return (16); } static int tegra_pcib_route_interrupt(device_t bus, device_t dev, int pin) { struct tegra_pcib_softc *sc; u_int irq; sc = device_get_softc(bus); irq = intr_map_clone_irq(rman_get_start(sc->irq_res)); device_printf(bus, "route pin %d for device %d.%d to %u\n", pin, pci_get_slot(dev), pci_get_function(dev), irq); return (irq); } static int tegra_pcbib_map_cfg(struct tegra_pcib_softc *sc, u_int bus, u_int slot, u_int func, u_int reg) { bus_size_t offs; int rv; offs = sc->cfg_base_addr; offs |= PCI_CFG_BUS(bus) | PCI_CFG_DEV(slot) | PCI_CFG_FUN(func) | PCI_CFG_EXT_REG(reg); if ((sc->cfg_handle != 0) && (sc->cfg_cur_offs == offs)) return (0); if (sc->cfg_handle != 0) bus_space_unmap(sc->bus_tag, sc->cfg_handle, 0x800); rv = bus_space_map(sc->bus_tag, offs, 0x800, 0, &sc->cfg_handle); if (rv != 0) device_printf(sc->dev, "Cannot map config space\n"); else sc->cfg_cur_offs = offs; return (rv); } static uint32_t tegra_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { struct tegra_pcib_softc *sc; bus_space_handle_t hndl; uint32_t off; uint32_t val; int rv, i; sc = device_get_softc(dev); if (bus == 0) { if (func != 0) return (0xFFFFFFFF); for (i = 0; i < TEGRA_PCIB_MAX_PORTS; i++) { if ((sc->ports[i] != NULL) && (sc->ports[i]->port_idx == slot)) { hndl = sc->ports[i]->cfg_handle; off = reg & 0xFFF; break; } } if (i >= TEGRA_PCIB_MAX_PORTS) return (0xFFFFFFFF); } else { rv = tegra_pcbib_map_cfg(sc, bus, slot, func, reg); if (rv != 0) return (0xFFFFFFFF); hndl = sc->cfg_handle; off = PCI_CFG_BASE_REG(reg); } val = bus_space_read_4(sc->bus_tag, hndl, off & ~3); switch (bytes) { case 4: break; case 2: if (off & 3) val >>= 16; val &= 0xffff; break; case 1: val >>= ((off & 3) << 3); val &= 0xff; break; } return val; } static void tegra_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t val, int bytes) { struct tegra_pcib_softc *sc; bus_space_handle_t hndl; uint32_t off; uint32_t val2; int rv, i; sc = device_get_softc(dev); if (bus == 0) { if (func != 0) return; for (i = 0; i < TEGRA_PCIB_MAX_PORTS; i++) { if ((sc->ports[i] != NULL) && (sc->ports[i]->port_idx == slot)) { hndl = sc->ports[i]->cfg_handle; off = reg & 0xFFF; break; } } if (i >= TEGRA_PCIB_MAX_PORTS) return; } else { rv = tegra_pcbib_map_cfg(sc, bus, slot, func, reg); if (rv != 0) return; hndl = sc->cfg_handle; off = PCI_CFG_BASE_REG(reg); } switch (bytes) { case 4: bus_space_write_4(sc->bus_tag, hndl, off, val); break; case 2: val2 = bus_space_read_4(sc->bus_tag, hndl, off & ~3); val2 &= ~(0xffff << ((off & 3) << 3)); val2 |= ((val & 0xffff) << ((off & 3) << 3)); bus_space_write_4(sc->bus_tag, hndl, off & ~3, val2); break; case 1: val2 = bus_space_read_4(sc->bus_tag, hndl, off & ~3); val2 &= ~(0xff << ((off & 3) << 3)); val2 |= ((val & 0xff) << ((off & 3) << 3)); bus_space_write_4(sc->bus_tag, hndl, off & ~3, val2); break; } } static int tegra_pci_intr(void *arg) { struct tegra_pcib_softc *sc = arg; uint32_t code, signature; code = bus_read_4(sc->afi_mem_res, AFI_INTR_CODE) & AFI_INTR_CODE_MASK; signature = bus_read_4(sc->afi_mem_res, AFI_INTR_SIGNATURE); bus_write_4(sc->afi_mem_res, AFI_INTR_CODE, 0); if (code == AFI_INTR_CODE_INT_CODE_SM_MSG) return(FILTER_STRAY); printf("tegra_pci_intr: code %x sig %x\n", code, signature); return (FILTER_HANDLED); } /* ----------------------------------------------------------------------- * * PCI MSI interface */ static int tegra_pcib_alloc_msi(device_t pci, device_t child, int count, int maxcount, int *irqs) { phandle_t msi_parent; /* XXXX ofw_bus_msimap() don't works for Tegra DT. ofw_bus_msimap(ofw_bus_get_node(pci), pci_get_rid(child), &msi_parent, NULL); */ msi_parent = OF_xref_from_node(ofw_bus_get_node(pci)); return (intr_alloc_msi(pci, child, msi_parent, count, maxcount, irqs)); } static int tegra_pcib_release_msi(device_t pci, device_t child, int count, int *irqs) { phandle_t msi_parent; /* XXXX ofw_bus_msimap() don't works for Tegra DT. ofw_bus_msimap(ofw_bus_get_node(pci), pci_get_rid(child), &msi_parent, NULL); */ msi_parent = OF_xref_from_node(ofw_bus_get_node(pci)); return (intr_release_msi(pci, child, msi_parent, count, irqs)); } static int tegra_pcib_map_msi(device_t pci, device_t child, int irq, uint64_t *addr, uint32_t *data) { phandle_t msi_parent; /* XXXX ofw_bus_msimap() don't works for Tegra DT. ofw_bus_msimap(ofw_bus_get_node(pci), pci_get_rid(child), &msi_parent, NULL); */ msi_parent = OF_xref_from_node(ofw_bus_get_node(pci)); return (intr_map_msi(pci, child, msi_parent, irq, addr, data)); } #ifdef TEGRA_PCIB_MSI_ENABLE /* -------------------------------------------------------------------------- * * Interrupts * */ static inline void tegra_pcib_isrc_mask(struct tegra_pcib_softc *sc, struct tegra_pcib_irqsrc *tgi, uint32_t val) { uint32_t reg; int offs, bit; offs = tgi->irq / AFI_MSI_INTR_IN_REG; bit = 1 << (tgi->irq % AFI_MSI_INTR_IN_REG); if (val != 0) AFI_WR4(sc, AFI_MSI_VEC(offs), bit); reg = AFI_RD4(sc, AFI_MSI_EN_VEC(offs)); if (val != 0) reg |= bit; else reg &= ~bit; AFI_WR4(sc, AFI_MSI_EN_VEC(offs), reg); } static int tegra_pcib_msi_intr(void *arg) { u_int irq, i, bit, reg; struct tegra_pcib_softc *sc; struct trapframe *tf; struct tegra_pcib_irqsrc *tgi; sc = (struct tegra_pcib_softc *)arg; tf = curthread->td_intr_frame; for (i = 0; i < AFI_MSI_REGS; i++) { reg = AFI_RD4(sc, AFI_MSI_VEC(i)); /* Handle one vector. */ while (reg != 0) { bit = ffs(reg) - 1; /* Send EOI */ AFI_WR4(sc, AFI_MSI_VEC(i), 1 << bit); irq = i * AFI_MSI_INTR_IN_REG + bit; tgi = &sc->isrcs[irq]; if (intr_isrc_dispatch(&tgi->isrc, tf) != 0) { /* Disable stray. */ tegra_pcib_isrc_mask(sc, tgi, 0); device_printf(sc->dev, "Stray irq %u disabled\n", irq); } reg = AFI_RD4(sc, AFI_MSI_VEC(i)); } } return (FILTER_HANDLED); } static int tegra_pcib_msi_attach(struct tegra_pcib_softc *sc) { int error; uint32_t irq; const char *name; sc->isrcs = malloc(sizeof(*sc->isrcs) * TEGRA_PCIB_MAX_MSI, M_DEVBUF, M_WAITOK | M_ZERO); name = device_get_nameunit(sc->dev); for (irq = 0; irq < TEGRA_PCIB_MAX_MSI; irq++) { sc->isrcs[irq].irq = irq; error = intr_isrc_register(&sc->isrcs[irq].isrc, sc->dev, 0, "%s,%u", name, irq); if (error != 0) return (error); /* XXX deregister ISRCs */ } if (intr_msi_register(sc->dev, OF_xref_from_node(ofw_bus_get_node(sc->dev))) != 0) return (ENXIO); return (0); } static int tegra_pcib_msi_detach(struct tegra_pcib_softc *sc) { /* * There has not been established any procedure yet * how to detach PIC from living system correctly. */ device_printf(sc->dev, "%s: not implemented yet\n", __func__); return (EBUSY); } static void tegra_pcib_msi_disable_intr(device_t dev, struct intr_irqsrc *isrc) { struct tegra_pcib_softc *sc; struct tegra_pcib_irqsrc *tgi; sc = device_get_softc(dev); tgi = (struct tegra_pcib_irqsrc *)isrc; tegra_pcib_isrc_mask(sc, tgi, 0); } static void tegra_pcib_msi_enable_intr(device_t dev, struct intr_irqsrc *isrc) { struct tegra_pcib_softc *sc; struct tegra_pcib_irqsrc *tgi; sc = device_get_softc(dev); tgi = (struct tegra_pcib_irqsrc *)isrc; tegra_pcib_isrc_mask(sc, tgi, 1); } /* MSI interrupts are edge trigered -> do nothing */ static void tegra_pcib_msi_post_filter(device_t dev, struct intr_irqsrc *isrc) { } static void tegra_pcib_msi_post_ithread(device_t dev, struct intr_irqsrc *isrc) { } static void tegra_pcib_msi_pre_ithread(device_t dev, struct intr_irqsrc *isrc) { } static int tegra_pcib_msi_setup_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { struct tegra_pcib_softc *sc; struct tegra_pcib_irqsrc *tgi; sc = device_get_softc(dev); tgi = (struct tegra_pcib_irqsrc *)isrc; if (data == NULL || data->type != INTR_MAP_DATA_MSI) return (ENOTSUP); if (isrc->isrc_handlers == 0) tegra_pcib_msi_enable_intr(dev, isrc); return (0); } static int tegra_pcib_msi_teardown_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { struct tegra_pcib_softc *sc; struct tegra_pcib_irqsrc *tgi; sc = device_get_softc(dev); tgi = (struct tegra_pcib_irqsrc *)isrc; if (isrc->isrc_handlers == 0) tegra_pcib_isrc_mask(sc, tgi, 0); return (0); } static int tegra_pcib_msi_alloc_msi(device_t dev, device_t child, int count, int maxcount, device_t *pic, struct intr_irqsrc **srcs) { struct tegra_pcib_softc *sc; int i, irq, end_irq; bool found; KASSERT(powerof2(count), ("%s: bad count", __func__)); KASSERT(powerof2(maxcount), ("%s: bad maxcount", __func__)); sc = device_get_softc(dev); mtx_lock(&sc->mtx); found = false; for (irq = 0; (irq + count - 1) < TEGRA_PCIB_MAX_MSI; irq++) { /* Start on an aligned interrupt */ if ((irq & (maxcount - 1)) != 0) continue; /* Assume we found a valid range until shown otherwise */ found = true; /* Check this range is valid */ for (end_irq = irq; end_irq < irq + count; end_irq++) { /* This is already used */ if ((sc->isrcs[end_irq].flags & TEGRA_FLAG_MSI_USED) == TEGRA_FLAG_MSI_USED) { found = false; break; } } if (found) break; } /* Not enough interrupts were found */ if (!found || irq == (TEGRA_PCIB_MAX_MSI - 1)) { mtx_unlock(&sc->mtx); return (ENXIO); } for (i = 0; i < count; i++) { /* Mark the interrupt as used */ sc->isrcs[irq + i].flags |= TEGRA_FLAG_MSI_USED; } mtx_unlock(&sc->mtx); for (i = 0; i < count; i++) srcs[i] = (struct intr_irqsrc *)&sc->isrcs[irq + i]; *pic = device_get_parent(dev); return (0); } static int tegra_pcib_msi_release_msi(device_t dev, device_t child, int count, struct intr_irqsrc **isrc) { struct tegra_pcib_softc *sc; struct tegra_pcib_irqsrc *ti; int i; sc = device_get_softc(dev); mtx_lock(&sc->mtx); for (i = 0; i < count; i++) { ti = (struct tegra_pcib_irqsrc *)isrc[i]; KASSERT((ti->flags & TEGRA_FLAG_MSI_USED) == TEGRA_FLAG_MSI_USED, ("%s: Trying to release an unused MSI-X interrupt", __func__)); ti->flags &= ~TEGRA_FLAG_MSI_USED; } mtx_unlock(&sc->mtx); return (0); } static int tegra_pcib_msi_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc, uint64_t *addr, uint32_t *data) { struct tegra_pcib_softc *sc = device_get_softc(dev); struct tegra_pcib_irqsrc *ti = (struct tegra_pcib_irqsrc *)isrc; *addr = vtophys(sc->msi_page); *data = ti->irq; return (0); } #endif /* ------------------------------------------------------------------- */ static bus_size_t tegra_pcib_pex_ctrl(struct tegra_pcib_softc *sc, int port) { if (port >= TEGRA_PCIB_MAX_PORTS) panic("invalid port number: %d\n", port); if (port == 0) return (AFI_PEX0_CTRL); else if (port == 1) return (AFI_PEX1_CTRL); else if (port == 2) return (AFI_PEX2_CTRL); else panic("invalid port number: %d\n", port); } static int tegra_pcib_enable_fdt_resources(struct tegra_pcib_softc *sc) { int rv; rv = hwreset_assert(sc->hwreset_pcie_x); if (rv != 0) { device_printf(sc->dev, "Cannot assert 'pcie_x' reset\n"); return (rv); } rv = hwreset_assert(sc->hwreset_afi); if (rv != 0) { device_printf(sc->dev, "Cannot assert 'afi' reset\n"); return (rv); } rv = hwreset_assert(sc->hwreset_pex); if (rv != 0) { device_printf(sc->dev, "Cannot assert 'pex' reset\n"); return (rv); } tegra_powergate_power_off(TEGRA_POWERGATE_PCX); /* Power supplies. */ rv = regulator_enable(sc->supply_avddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avddio_pex' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_dvddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'dvddio_pex' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_avdd_pex_pll); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avdd-pex-pll' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_hvdd_pex); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'hvdd-pex-supply' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_hvdd_pex_pll_e); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'hvdd-pex-pll-e-supply' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_vddio_pex_ctl); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'vddio-pex-ctl' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_avdd_pll_erefe); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avdd-pll-erefe-supply' regulator\n"); return (rv); } rv = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_PCX, sc->clk_pex, sc->hwreset_pex); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'PCX' powergate\n"); return (rv); } rv = hwreset_deassert(sc->hwreset_afi); if (rv != 0) { device_printf(sc->dev, "Cannot unreset 'afi' reset\n"); return (rv); } rv = clk_enable(sc->clk_afi); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'afi' clock\n"); return (rv); } rv = clk_enable(sc->clk_cml); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'cml' clock\n"); return (rv); } rv = clk_enable(sc->clk_pll_e); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'pll_e' clock\n"); return (rv); } return (0); } static struct tegra_pcib_port * tegra_pcib_parse_port(struct tegra_pcib_softc *sc, phandle_t node) { struct tegra_pcib_port *port; uint32_t tmp[5]; char tmpstr[6]; int rv; port = malloc(sizeof(struct tegra_pcib_port), M_DEVBUF, M_WAITOK); rv = OF_getprop(node, "status", tmpstr, sizeof(tmpstr)); if (rv <= 0 || strcmp(tmpstr, "okay") == 0 || strcmp(tmpstr, "ok") == 0) port->enabled = 1; else port->enabled = 0; rv = OF_getencprop(node, "assigned-addresses", tmp, sizeof(tmp)); if (rv != sizeof(tmp)) { device_printf(sc->dev, "Cannot parse assigned-address: %d\n", rv); goto fail; } port->rp_base_addr = tmp[2]; port->rp_size = tmp[4]; port->port_idx = OFW_PCI_PHYS_HI_DEVICE(tmp[0]) - 1; if (port->port_idx >= TEGRA_PCIB_MAX_PORTS) { device_printf(sc->dev, "Invalid port index: %d\n", port->port_idx); goto fail; } /* XXX - TODO: * Implement proper function for parsing pci "reg" property: * - it have PCI bus format * - its relative to matching "assigned-addresses" */ rv = OF_getencprop(node, "reg", tmp, sizeof(tmp)); if (rv != sizeof(tmp)) { device_printf(sc->dev, "Cannot parse reg: %d\n", rv); goto fail; } port->rp_base_addr += tmp[2]; rv = OF_getencprop(node, "nvidia,num-lanes", &port->num_lanes, sizeof(port->num_lanes)); if (rv != sizeof(port->num_lanes)) { device_printf(sc->dev, "Cannot parse nvidia,num-lanes: %d\n", rv); goto fail; } if (port->num_lanes > 4) { device_printf(sc->dev, "Invalid nvidia,num-lanes: %d\n", port->num_lanes); goto fail; } port->afi_pex_ctrl = tegra_pcib_pex_ctrl(sc, port->port_idx); sc->lanes_cfg |= port->num_lanes << (4 * port->port_idx); /* Phy. */ rv = phy_get_by_ofw_name(sc->dev, node, "pcie-0", &port->phy); if (rv != 0) { device_printf(sc->dev, "Cannot get 'pcie-0' phy for port %d\n", port->port_idx); goto fail; } return (port); fail: free(port, M_DEVBUF); return (NULL); } static int tegra_pcib_parse_fdt_resources(struct tegra_pcib_softc *sc, phandle_t node) { phandle_t child; struct tegra_pcib_port *port; int rv; /* Power supplies. */ rv = regulator_get_by_ofw_property(sc->dev, 0, "avddio-pex-supply", &sc->supply_avddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avddio-pex' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "dvddio-pex-supply", &sc->supply_dvddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot get 'dvddio-pex' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-pex-pll-supply", &sc->supply_avdd_pex_pll); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avdd-pex-pll' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "hvdd-pex-supply", &sc->supply_hvdd_pex); if (rv != 0) { device_printf(sc->dev, "Cannot get 'hvdd-pex' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "hvdd-pex-pll-e-supply", &sc->supply_hvdd_pex_pll_e); if (rv != 0) { device_printf(sc->dev, "Cannot get 'hvdd-pex-pll-e' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "vddio-pex-ctl-supply", &sc->supply_vddio_pex_ctl); if (rv != 0) { device_printf(sc->dev, "Cannot get 'vddio-pex-ctl' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-pll-erefe-supply", &sc->supply_avdd_pll_erefe); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avdd-pll-erefe' regulator\n"); return (ENXIO); } /* Resets. */ rv = hwreset_get_by_ofw_name(sc->dev, 0, "pex", &sc->hwreset_pex); if (rv != 0) { device_printf(sc->dev, "Cannot get 'pex' reset\n"); return (ENXIO); } rv = hwreset_get_by_ofw_name(sc->dev, 0, "afi", &sc->hwreset_afi); if (rv != 0) { device_printf(sc->dev, "Cannot get 'afi' reset\n"); return (ENXIO); } rv = hwreset_get_by_ofw_name(sc->dev, 0, "pcie_x", &sc->hwreset_pcie_x); if (rv != 0) { device_printf(sc->dev, "Cannot get 'pcie_x' reset\n"); return (ENXIO); } /* Clocks. */ rv = clk_get_by_ofw_name(sc->dev, 0, "pex", &sc->clk_pex); if (rv != 0) { device_printf(sc->dev, "Cannot get 'pex' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "afi", &sc->clk_afi); if (rv != 0) { device_printf(sc->dev, "Cannot get 'afi' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "pll_e", &sc->clk_pll_e); if (rv != 0) { device_printf(sc->dev, "Cannot get 'pll_e' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "cml", &sc->clk_cml); if (rv != 0) { device_printf(sc->dev, "Cannot get 'cml' clock\n"); return (ENXIO); } /* Ports */ sc->num_ports = 0; for (child = OF_child(node); child != 0; child = OF_peer(child)) { port = tegra_pcib_parse_port(sc, child); if (port == NULL) { device_printf(sc->dev, "Cannot parse PCIe port node\n"); return (ENXIO); } sc->ports[sc->num_ports++] = port; } return (0); } static int tegra_pcib_decode_ranges(struct tegra_pcib_softc *sc, struct ofw_pci_range *ranges, int nranges) { int i; for (i = 2; i < nranges; i++) { if ((ranges[i].pci_hi & OFW_PCI_PHYS_HI_SPACEMASK) == OFW_PCI_PHYS_HI_SPACE_IO) { if (sc->io_range.size != 0) { device_printf(sc->dev, "Duplicated IO range found in DT\n"); return (ENXIO); } sc->io_range = ranges[i]; } if (((ranges[i].pci_hi & OFW_PCI_PHYS_HI_SPACEMASK) == OFW_PCI_PHYS_HI_SPACE_MEM32)) { if (ranges[i].pci_hi & OFW_PCI_PHYS_HI_PREFETCHABLE) { if (sc->pref_mem_range.size != 0) { device_printf(sc->dev, "Duplicated memory range found " "in DT\n"); return (ENXIO); } sc->pref_mem_range = ranges[i]; } else { if (sc->mem_range.size != 0) { device_printf(sc->dev, "Duplicated memory range found " "in DT\n"); return (ENXIO); } sc->mem_range = ranges[i]; } } } if ((sc->io_range.size == 0) || (sc->mem_range.size == 0) || (sc->pref_mem_range.size == 0)) { device_printf(sc->dev, " Not all required ranges are found in DT\n"); return (ENXIO); } return (0); } /* * Hardware config. */ static int tegra_pcib_wait_for_link(struct tegra_pcib_softc *sc, struct tegra_pcib_port *port) { uint32_t reg; int i; /* Setup link detection. */ reg = tegra_pcib_read_config(sc->dev, 0, port->port_idx, 0, RP_PRIV_MISC, 4); reg &= ~RP_PRIV_MISC_PRSNT_MAP_EP_ABSNT; reg |= RP_PRIV_MISC_PRSNT_MAP_EP_PRSNT; tegra_pcib_write_config(sc->dev, 0, port->port_idx, 0, RP_PRIV_MISC, reg, 4); for (i = TEGRA_PCIE_LINKUP_TIMEOUT; i > 0; i--) { reg = tegra_pcib_read_config(sc->dev, 0, port->port_idx, 0, RP_VEND_XP, 4); if (reg & RP_VEND_XP_DL_UP) break; DELAY(1); } if (i <= 0) return (ETIMEDOUT); for (i = TEGRA_PCIE_LINKUP_TIMEOUT; i > 0; i--) { reg = tegra_pcib_read_config(sc->dev, 0, port->port_idx, 0, RP_LINK_CONTROL_STATUS, 4); if (reg & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE) break; DELAY(1); } if (i <= 0) return (ETIMEDOUT); return (0); } static void tegra_pcib_port_enable(struct tegra_pcib_softc *sc, int port_num) { struct tegra_pcib_port *port; uint32_t reg; int rv; port = sc->ports[port_num]; /* Put port to reset. */ reg = AFI_RD4(sc, port->afi_pex_ctrl); reg &= ~AFI_PEX_CTRL_RST_L; AFI_WR4(sc, port->afi_pex_ctrl, reg); AFI_RD4(sc, port->afi_pex_ctrl); DELAY(10); /* Enable clocks. */ reg |= AFI_PEX_CTRL_REFCLK_EN; reg |= AFI_PEX_CTRL_CLKREQ_EN; reg |= AFI_PEX_CTRL_OVERRIDE_EN; AFI_WR4(sc, port->afi_pex_ctrl, reg); AFI_RD4(sc, port->afi_pex_ctrl); DELAY(100); /* Release reset. */ reg |= AFI_PEX_CTRL_RST_L; AFI_WR4(sc, port->afi_pex_ctrl, reg); rv = tegra_pcib_wait_for_link(sc, port); if (bootverbose) device_printf(sc->dev, " port %d (%d lane%s): Link is %s\n", port->port_idx, port->num_lanes, port->num_lanes > 1 ? "s": "", rv == 0 ? "up": "down"); } static void tegra_pcib_port_disable(struct tegra_pcib_softc *sc, uint32_t port_num) { struct tegra_pcib_port *port; uint32_t reg; port = sc->ports[port_num]; /* Put port to reset. */ reg = AFI_RD4(sc, port->afi_pex_ctrl); reg &= ~AFI_PEX_CTRL_RST_L; AFI_WR4(sc, port->afi_pex_ctrl, reg); AFI_RD4(sc, port->afi_pex_ctrl); DELAY(10); /* Disable clocks. */ reg &= ~AFI_PEX_CTRL_CLKREQ_EN; reg &= ~AFI_PEX_CTRL_REFCLK_EN; AFI_WR4(sc, port->afi_pex_ctrl, reg); if (bootverbose) device_printf(sc->dev, " port %d (%d lane%s): Disabled\n", port->port_idx, port->num_lanes, port->num_lanes > 1 ? "s": ""); } static void tegra_pcib_set_bar(struct tegra_pcib_softc *sc, int bar, uint32_t axi, uint64_t fpci, uint32_t size, int is_memory) { uint32_t fpci_reg; uint32_t axi_reg; uint32_t size_reg; axi_reg = axi & ~0xFFF; size_reg = size >> 12; fpci_reg = (uint32_t)(fpci >> 8) & ~0xF; fpci_reg |= is_memory ? 0x1 : 0x0; AFI_WR4(sc, bars[bar].axi_start, axi_reg); AFI_WR4(sc, bars[bar].size, size_reg); AFI_WR4(sc, bars[bar].fpci_start, fpci_reg); } static int tegra_pcib_enable(struct tegra_pcib_softc *sc) { int rv; int i; uint32_t reg; rv = tegra_pcib_enable_fdt_resources(sc); if (rv != 0) { device_printf(sc->dev, "Cannot enable FDT resources\n"); return (rv); } /* Enable PLLE control. */ reg = AFI_RD4(sc, AFI_PLLE_CONTROL); reg &= ~AFI_PLLE_CONTROL_BYPASS_PADS2PLLE_CONTROL; reg |= AFI_PLLE_CONTROL_PADS2PLLE_CONTROL_EN; AFI_WR4(sc, AFI_PLLE_CONTROL, reg); /* Set bias pad. */ AFI_WR4(sc, AFI_PEXBIAS_CTRL, 0); /* Configure mode and ports. */ reg = AFI_RD4(sc, AFI_PCIE_CONFIG); reg &= ~AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_MASK; if (sc->lanes_cfg == 0x14) { if (bootverbose) device_printf(sc->dev, "Using x1,x4 configuration\n"); reg |= AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_XBAR4_1; } else if (sc->lanes_cfg == 0x12) { if (bootverbose) device_printf(sc->dev, "Using x1,x2 configuration\n"); reg |= AFI_PCIE_CONFIG_SM2TMS0_XBAR_CONFIG_XBAR2_1; } else { device_printf(sc->dev, "Unsupported lanes configuration: 0x%X\n", sc->lanes_cfg); } reg |= AFI_PCIE_CONFIG_PCIE_DISABLE_ALL; for (i = 0; i < TEGRA_PCIB_MAX_PORTS; i++) { if ((sc->ports[i] != NULL)) reg &= ~AFI_PCIE_CONFIG_PCIE_DISABLE(sc->ports[i]->port_idx); } AFI_WR4(sc, AFI_PCIE_CONFIG, reg); /* Enable Gen2 support. */ reg = AFI_RD4(sc, AFI_FUSE); reg &= ~AFI_FUSE_PCIE_T0_GEN2_DIS; AFI_WR4(sc, AFI_FUSE, reg); for (i = 0; i < TEGRA_PCIB_MAX_PORTS; i++) { if (sc->ports[i] != NULL) { rv = phy_enable(sc->ports[i]->phy); if (rv != 0) { device_printf(sc->dev, "Cannot enable phy for port %d\n", sc->ports[i]->port_idx); return (rv); } } } rv = hwreset_deassert(sc->hwreset_pcie_x); if (rv != 0) { device_printf(sc->dev, "Cannot unreset 'pci_x' reset\n"); return (rv); } /* Enable config space. */ reg = AFI_RD4(sc, AFI_CONFIGURATION); reg |= AFI_CONFIGURATION_EN_FPCI; AFI_WR4(sc, AFI_CONFIGURATION, reg); /* Enable AFI errors. */ reg = 0; reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_INI_SLVERR); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_INI_DECERR); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_TGT_SLVERR); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_TGT_DECERR); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_TGT_WRERR); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_SM_MSG); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_DFPCI_DECERR); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_AXI_DECERR); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_FPCI_TIMEOUT); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_PE_PRSNT_SENSE); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_PE_CLKREQ_SENSE); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_CLKCLAMP_SENSE); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_RDY4PD_SENSE); reg |= AFI_AFI_INTR_ENABLE_CODE(AFI_INTR_CODE_INT_CODE_P2P_ERROR); AFI_WR4(sc, AFI_AFI_INTR_ENABLE, reg); AFI_WR4(sc, AFI_SM_INTR_ENABLE, 0xffffffff); /* Enable INT, disable MSI. */ AFI_WR4(sc, AFI_INTR_MASK, AFI_INTR_MASK_INT_MASK); /* Mask all FPCI errors. */ AFI_WR4(sc, AFI_FPCI_ERROR_MASKS, 0); /* Setup AFI translation windows. */ /* BAR 0 - type 1 extended configuration. */ tegra_pcib_set_bar(sc, 0, rman_get_start(sc->cfg_mem_res), FPCI_MAP_EXT_TYPE1_CONFIG, rman_get_size(sc->cfg_mem_res), 0); /* BAR 1 - downstream I/O. */ tegra_pcib_set_bar(sc, 1, sc->io_range.host, FPCI_MAP_IO, sc->io_range.size, 0); /* BAR 2 - downstream prefetchable memory 1:1. */ tegra_pcib_set_bar(sc, 2, sc->pref_mem_range.host, sc->pref_mem_range.host, sc->pref_mem_range.size, 1); /* BAR 3 - downstream not prefetchable memory 1:1 .*/ tegra_pcib_set_bar(sc, 3, sc->mem_range.host, sc->mem_range.host, sc->mem_range.size, 1); /* BAR 3-8 clear. */ tegra_pcib_set_bar(sc, 4, 0, 0, 0, 0); tegra_pcib_set_bar(sc, 5, 0, 0, 0, 0); tegra_pcib_set_bar(sc, 6, 0, 0, 0, 0); tegra_pcib_set_bar(sc, 7, 0, 0, 0, 0); tegra_pcib_set_bar(sc, 8, 0, 0, 0, 0); /* MSI BAR - clear. */ tegra_pcib_set_bar(sc, 9, 0, 0, 0, 0); return(0); } #ifdef TEGRA_PCIB_MSI_ENABLE static int tegra_pcib_attach_msi(device_t dev) { struct tegra_pcib_softc *sc; uint32_t reg; int i, rv; sc = device_get_softc(dev); - sc->msi_page = kmem_alloc_contig(kernel_arena, PAGE_SIZE, M_WAITOK, - 0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); + sc->msi_page = kmem_alloc_contig(PAGE_SIZE, M_WAITOK, 0, + BUS_SPACE_MAXADDR, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); /* MSI BAR */ tegra_pcib_set_bar(sc, 9, vtophys(sc->msi_page), vtophys(sc->msi_page), PAGE_SIZE, 0); /* Disble and clear all interrupts. */ for (i = 0; i < AFI_MSI_REGS; i++) { AFI_WR4(sc, AFI_MSI_EN_VEC(i), 0); AFI_WR4(sc, AFI_MSI_VEC(i), 0xFFFFFFFF); } rv = bus_setup_intr(dev, sc->msi_irq_res, INTR_TYPE_BIO | INTR_MPSAFE, tegra_pcib_msi_intr, NULL, sc, &sc->msi_intr_cookie); if (rv != 0) { device_printf(dev, "cannot setup MSI interrupt handler\n"); rv = ENXIO; goto out; } if (tegra_pcib_msi_attach(sc) != 0) { device_printf(dev, "WARNING: unable to attach PIC\n"); tegra_pcib_msi_detach(sc); goto out; } /* Unmask MSI interrupt. */ reg = AFI_RD4(sc, AFI_INTR_MASK); reg |= AFI_INTR_MASK_MSI_MASK; AFI_WR4(sc, AFI_INTR_MASK, reg); out: return (rv); } #endif static int tegra_pcib_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data != 0) { device_set_desc(dev, "Nvidia Integrated PCI/PCI-E Controller"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int tegra_pcib_attach(device_t dev) { struct tegra_pcib_softc *sc; phandle_t node; int rv; int rid; struct tegra_pcib_port *port; int i; sc = device_get_softc(dev); sc->dev = dev; mtx_init(&sc->mtx, "msi_mtx", NULL, MTX_DEF); node = ofw_bus_get_node(dev); rv = tegra_pcib_parse_fdt_resources(sc, node); if (rv != 0) { device_printf(dev, "Cannot get FDT resources\n"); return (rv); } /* Allocate bus_space resources. */ rid = 0; sc->pads_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->pads_mem_res == NULL) { device_printf(dev, "Cannot allocate PADS register\n"); rv = ENXIO; goto out; } /* * XXX - FIXME * tag for config space is not filled when RF_ALLOCATED flag is used. */ sc->bus_tag = rman_get_bustag(sc->pads_mem_res); rid = 1; sc->afi_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->afi_mem_res == NULL) { device_printf(dev, "Cannot allocate AFI register\n"); rv = ENXIO; goto out; } rid = 2; sc->cfg_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ALLOCATED); if (sc->cfg_mem_res == NULL) { device_printf(dev, "Cannot allocate config space memory\n"); rv = ENXIO; goto out; } sc->cfg_base_addr = rman_get_start(sc->cfg_mem_res); /* Map RP slots */ for (i = 0; i < TEGRA_PCIB_MAX_PORTS; i++) { if (sc->ports[i] == NULL) continue; port = sc->ports[i]; rv = bus_space_map(sc->bus_tag, port->rp_base_addr, port->rp_size, 0, &port->cfg_handle); if (rv != 0) { device_printf(sc->dev, "Cannot allocate memory for " "port: %d\n", i); rv = ENXIO; goto out; } } /* * Get PCI interrupt */ rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | RF_SHAREABLE); if (sc->irq_res == NULL) { device_printf(dev, "Cannot allocate IRQ resources\n"); rv = ENXIO; goto out; } rid = 1; sc->msi_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(dev, "Cannot allocate MSI IRQ resources\n"); rv = ENXIO; goto out; } sc->ofw_pci.sc_range_mask = 0x3; rv = ofw_pci_init(dev); if (rv != 0) goto out; rv = tegra_pcib_decode_ranges(sc, sc->ofw_pci.sc_range, sc->ofw_pci.sc_nrange); if (rv != 0) goto out; if (bus_setup_intr(dev, sc->irq_res, INTR_TYPE_BIO | INTR_MPSAFE, tegra_pci_intr, NULL, sc, &sc->intr_cookie)) { device_printf(dev, "cannot setup interrupt handler\n"); rv = ENXIO; goto out; } /* * Enable PCIE device. */ rv = tegra_pcib_enable(sc); if (rv != 0) goto out; for (i = 0; i < TEGRA_PCIB_MAX_PORTS; i++) { if (sc->ports[i] == NULL) continue; if (sc->ports[i]->enabled) tegra_pcib_port_enable(sc, i); else tegra_pcib_port_disable(sc, i); } #ifdef TEGRA_PCIB_MSI_ENABLE rv = tegra_pcib_attach_msi(dev); if (rv != 0) goto out; #endif device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); out: return (rv); } static device_method_t tegra_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_probe, tegra_pcib_probe), DEVMETHOD(device_attach, tegra_pcib_attach), /* Bus interface */ DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, tegra_pcib_maxslots), DEVMETHOD(pcib_read_config, tegra_pcib_read_config), DEVMETHOD(pcib_write_config, tegra_pcib_write_config), DEVMETHOD(pcib_route_interrupt, tegra_pcib_route_interrupt), DEVMETHOD(pcib_alloc_msi, tegra_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, tegra_pcib_release_msi), DEVMETHOD(pcib_map_msi, tegra_pcib_map_msi), DEVMETHOD(pcib_request_feature, pcib_request_feature_allow), #ifdef TEGRA_PCIB_MSI_ENABLE /* MSI/MSI-X */ DEVMETHOD(msi_alloc_msi, tegra_pcib_msi_alloc_msi), DEVMETHOD(msi_release_msi, tegra_pcib_msi_release_msi), DEVMETHOD(msi_map_msi, tegra_pcib_msi_map_msi), /* Interrupt controller interface */ DEVMETHOD(pic_disable_intr, tegra_pcib_msi_disable_intr), DEVMETHOD(pic_enable_intr, tegra_pcib_msi_enable_intr), DEVMETHOD(pic_setup_intr, tegra_pcib_msi_setup_intr), DEVMETHOD(pic_teardown_intr, tegra_pcib_msi_teardown_intr), DEVMETHOD(pic_post_filter, tegra_pcib_msi_post_filter), DEVMETHOD(pic_post_ithread, tegra_pcib_msi_post_ithread), DEVMETHOD(pic_pre_ithread, tegra_pcib_msi_pre_ithread), #endif /* OFW bus interface */ DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), DEVMETHOD_END }; static devclass_t pcib_devclass; DEFINE_CLASS_1(pcib, tegra_pcib_driver, tegra_pcib_methods, sizeof(struct tegra_pcib_softc), ofw_pci_driver); DRIVER_MODULE(pcib, simplebus, tegra_pcib_driver, pcib_devclass, NULL, NULL); Index: head/sys/arm/nvidia/tegra_xhci.c =================================================================== --- head/sys/arm/nvidia/tegra_xhci.c (revision 338106) +++ head/sys/arm/nvidia/tegra_xhci.c (revision 338107) @@ -1,1160 +1,1160 @@ /*- * Copyright (c) 2016 Michal Meloun * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * XHCI driver for Tegra SoCs. */ #include "opt_bus.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "usbdevs.h" /* FPCI address space */ #define T_XUSB_CFG_0 0x000 #define T_XUSB_CFG_1 0x004 #define CFG_1_BUS_MASTER (1 << 2) #define CFG_1_MEMORY_SPACE (1 << 1) #define CFG_1_IO_SPACE (1 << 0) #define T_XUSB_CFG_2 0x008 #define T_XUSB_CFG_3 0x00C #define T_XUSB_CFG_4 0x010 #define CFG_4_BASE_ADDRESS(x) (((x) & 0x1FFFF) << 15) #define T_XUSB_CFG_5 0x014 #define T_XUSB_CFG_ARU_MAILBOX_CMD 0x0E4 #define ARU_MAILBOX_CMD_INT_EN (1U << 31) #define ARU_MAILBOX_CMD_DEST_XHCI (1 << 30) #define ARU_MAILBOX_CMD_DEST_SMI (1 << 29) #define ARU_MAILBOX_CMD_DEST_PME (1 << 28) #define ARU_MAILBOX_CMD_DEST_FALC (1 << 27) #define T_XUSB_CFG_ARU_MAILBOX_DATA_IN 0x0E8 #define ARU_MAILBOX_DATA_IN_DATA(x) (((x) & 0xFFFFFF) << 0) #define ARU_MAILBOX_DATA_IN_TYPE(x) (((x) & 0x0000FF) << 24) #define T_XUSB_CFG_ARU_MAILBOX_DATA_OUT 0x0EC #define ARU_MAILBOX_DATA_OUT_DATA(x) (((x) >> 0) & 0xFFFFFF) #define ARU_MAILBOX_DATA_OUT_TYPE(x) (((x) >> 24) & 0x0000FF) #define T_XUSB_CFG_ARU_MAILBOX_OWNER 0x0F0 #define ARU_MAILBOX_OWNER_SW 2 #define ARU_MAILBOX_OWNER_FW 1 #define ARU_MAILBOX_OWNER_NONE 0 #define XUSB_CFG_ARU_C11_CSBRANGE 0x41C /* ! UNDOCUMENTED ! */ #define ARU_C11_CSBRANGE_PAGE(x) ((x) >> 9) #define ARU_C11_CSBRANGE_ADDR(x) (0x800 + ((x) & 0x1FF)) #define XUSB_CFG_ARU_SMI_INTR 0x428 /* ! UNDOCUMENTED ! */ #define ARU_SMI_INTR_EN (1 << 3) #define ARU_SMI_INTR_FW_HANG (1 << 1) #define XUSB_CFG_ARU_RST 0x42C /* ! UNDOCUMENTED ! */ #define ARU_RST_RESET (1 << 0) #define XUSB_HOST_CONFIGURATION 0x180 #define CONFIGURATION_CLKEN_OVERRIDE (1U<< 31) #define CONFIGURATION_PW_NO_DEVSEL_ERR_CYA (1 << 19) #define CONFIGURATION_INITIATOR_READ_IDLE (1 << 18) #define CONFIGURATION_INITIATOR_WRITE_IDLE (1 << 17) #define CONFIGURATION_WDATA_LEAD_CYA (1 << 15) #define CONFIGURATION_WR_INTRLV_CYA (1 << 14) #define CONFIGURATION_TARGET_READ_IDLE (1 << 11) #define CONFIGURATION_TARGET_WRITE_IDLE (1 << 10) #define CONFIGURATION_MSI_VEC_EMPTY (1 << 9) #define CONFIGURATION_UFPCI_MSIAW (1 << 7) #define CONFIGURATION_UFPCI_PWPASSPW (1 << 6) #define CONFIGURATION_UFPCI_PASSPW (1 << 5) #define CONFIGURATION_UFPCI_PWPASSNPW (1 << 4) #define CONFIGURATION_DFPCI_PWPASSNPW (1 << 3) #define CONFIGURATION_DFPCI_RSPPASSPW (1 << 2) #define CONFIGURATION_DFPCI_PASSPW (1 << 1) #define CONFIGURATION_EN_FPCI (1 << 0) /* IPFS address space */ #define XUSB_HOST_FPCI_ERROR_MASKS 0x184 #define FPCI_ERROR_MASTER_ABORT (1 << 2) #define FPCI_ERRORI_DATA_ERROR (1 << 1) #define FPCI_ERROR_TARGET_ABORT (1 << 0) #define XUSB_HOST_INTR_MASK 0x188 #define INTR_IP_INT_MASK (1 << 16) #define INTR_MSI_MASK (1 << 8) #define INTR_INT_MASK (1 << 0) #define XUSB_HOST_CLKGATE_HYSTERESIS 0x1BC /* CSB Falcon CPU */ #define XUSB_FALCON_CPUCTL 0x100 #define CPUCTL_STOPPED (1 << 5) #define CPUCTL_HALTED (1 << 4) #define CPUCTL_HRESET (1 << 3) #define CPUCTL_SRESET (1 << 2) #define CPUCTL_STARTCPU (1 << 1) #define CPUCTL_IINVAL (1 << 0) #define XUSB_FALCON_BOOTVEC 0x104 #define XUSB_FALCON_DMACTL 0x10C #define XUSB_FALCON_IMFILLRNG1 0x154 #define IMFILLRNG1_TAG_HI(x) (((x) & 0xFFF) << 16) #define IMFILLRNG1_TAG_LO(x) (((x) & 0xFFF) << 0) #define XUSB_FALCON_IMFILLCTL 0x158 /* CSB mempool */ #define XUSB_CSB_MEMPOOL_APMAP 0x10181C #define APMAP_BOOTPATH (1U << 31) #define XUSB_CSB_MEMPOOL_ILOAD_ATTR 0x101A00 #define XUSB_CSB_MEMPOOL_ILOAD_BASE_LO 0x101A04 #define XUSB_CSB_MEMPOOL_ILOAD_BASE_HI 0x101A08 #define XUSB_CSB_MEMPOOL_L2IMEMOP_SIZE 0x101A10 #define L2IMEMOP_SIZE_OFFSET(x) (((x) & 0x3FF) << 8) #define L2IMEMOP_SIZE_SIZE(x) (((x) & 0x0FF) << 24) #define XUSB_CSB_MEMPOOL_L2IMEMOP_TRIG 0x101A14 #define L2IMEMOP_INVALIDATE_ALL (0x40 << 24) #define L2IMEMOP_LOAD_LOCKED_RESULT (0x11 << 24) #define XUSB_CSB_MEMPOOL_L2IMEMOP_RESULT 0x101A18 #define L2IMEMOP_RESULT_VLD (1U << 31) #define XUSB_CSB_IMEM_BLOCK_SIZE 256 #define TEGRA_XHCI_SS_HIGH_SPEED 120000000 #define TEGRA_XHCI_SS_LOW_SPEED 12000000 /* MBOX commands. */ #define MBOX_CMD_MSG_ENABLED 1 #define MBOX_CMD_INC_FALC_CLOCK 2 #define MBOX_CMD_DEC_FALC_CLOCK 3 #define MBOX_CMD_INC_SSPI_CLOCK 4 #define MBOX_CMD_DEC_SSPI_CLOCK 5 #define MBOX_CMD_SET_BW 6 #define MBOX_CMD_SET_SS_PWR_GATING 7 #define MBOX_CMD_SET_SS_PWR_UNGATING 8 #define MBOX_CMD_SAVE_DFE_CTLE_CTX 9 #define MBOX_CMD_AIRPLANE_MODE_ENABLED 10 #define MBOX_CMD_AIRPLANE_MODE_DISABLED 11 #define MBOX_CMD_START_HSIC_IDLE 12 #define MBOX_CMD_STOP_HSIC_IDLE 13 #define MBOX_CMD_DBC_WAKE_STACK 14 #define MBOX_CMD_HSIC_PRETEND_CONNECT 15 #define MBOX_CMD_RESET_SSPI 16 #define MBOX_CMD_DISABLE_SS_LFPS_DETECTION 17 #define MBOX_CMD_ENABLE_SS_LFPS_DETECTION 18 /* MBOX responses. */ #define MBOX_CMD_ACK (0x80 + 0) #define MBOX_CMD_NAK (0x80 + 1) #define IPFS_WR4(_sc, _r, _v) bus_write_4((_sc)->mem_res_ipfs, (_r), (_v)) #define IPFS_RD4(_sc, _r) bus_read_4((_sc)->mem_res_ipfs, (_r)) #define FPCI_WR4(_sc, _r, _v) bus_write_4((_sc)->mem_res_fpci, (_r), (_v)) #define FPCI_RD4(_sc, _r) bus_read_4((_sc)->mem_res_fpci, (_r)) #define LOCK(_sc) mtx_lock(&(_sc)->mtx) #define UNLOCK(_sc) mtx_unlock(&(_sc)->mtx) #define SLEEP(_sc, timeout) \ mtx_sleep(sc, &sc->mtx, 0, "tegra_xhci", timeout); #define LOCK_INIT(_sc) \ mtx_init(&_sc->mtx, device_get_nameunit(_sc->dev), "tegra_xhci", MTX_DEF) #define LOCK_DESTROY(_sc) mtx_destroy(&_sc->mtx) #define ASSERT_LOCKED(_sc) mtx_assert(&_sc->mtx, MA_OWNED) #define ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->mtx, MA_NOTOWNED) struct tegra_xusb_fw_hdr { uint32_t boot_loadaddr_in_imem; uint32_t boot_codedfi_offset; uint32_t boot_codetag; uint32_t boot_codesize; uint32_t phys_memaddr; uint16_t reqphys_memsize; uint16_t alloc_phys_memsize; uint32_t rodata_img_offset; uint32_t rodata_section_start; uint32_t rodata_section_end; uint32_t main_fnaddr; uint32_t fwimg_cksum; uint32_t fwimg_created_time; uint32_t imem_resident_start; uint32_t imem_resident_end; uint32_t idirect_start; uint32_t idirect_end; uint32_t l2_imem_start; uint32_t l2_imem_end; uint32_t version_id; uint8_t init_ddirect; uint8_t reserved[3]; uint32_t phys_addr_log_buffer; uint32_t total_log_entries; uint32_t dequeue_ptr; uint32_t dummy[2]; uint32_t fwimg_len; uint8_t magic[8]; uint32_t ss_low_power_entry_timeout; uint8_t num_hsic_port; uint8_t ss_portmap; uint8_t build; uint8_t padding[137]; /* Pad to 256 bytes */ }; /* Compatible devices. */ static struct ofw_compat_data compat_data[] = { {"nvidia,tegra124-xusb", 1}, {NULL, 0} }; struct tegra_xhci_softc { struct xhci_softc xhci_softc; device_t dev; struct mtx mtx; struct resource *mem_res_fpci; struct resource *mem_res_ipfs; struct resource *irq_res_mbox; void *irq_hdl_mbox; clk_t clk_xusb_host; clk_t clk_xusb_gate; clk_t clk_xusb_falcon_src; clk_t clk_xusb_ss; clk_t clk_xusb_hs_src; clk_t clk_xusb_fs_src; hwreset_t hwreset_xusb_host; hwreset_t hwreset_xusb_ss; regulator_t supply_avddio_pex; regulator_t supply_dvddio_pex; regulator_t supply_avdd_usb; regulator_t supply_avdd_pll_utmip; regulator_t supply_avdd_pll_erefe; regulator_t supply_avdd_usb_ss_pll; regulator_t supply_hvdd_usb_ss; regulator_t supply_hvdd_usb_ss_pll_e; phy_t phy_usb2_0; phy_t phy_usb2_1; phy_t phy_usb2_2; phy_t phy_usb3_0; struct intr_config_hook irq_hook; bool xhci_inited; char *fw_name; vm_offset_t fw_vaddr; vm_size_t fw_size; }; static uint32_t CSB_RD4(struct tegra_xhci_softc *sc, uint32_t addr) { FPCI_WR4(sc, XUSB_CFG_ARU_C11_CSBRANGE, ARU_C11_CSBRANGE_PAGE(addr)); return (FPCI_RD4(sc, ARU_C11_CSBRANGE_ADDR(addr))); } static void CSB_WR4(struct tegra_xhci_softc *sc, uint32_t addr, uint32_t val) { FPCI_WR4(sc, XUSB_CFG_ARU_C11_CSBRANGE, ARU_C11_CSBRANGE_PAGE(addr)); FPCI_WR4(sc, ARU_C11_CSBRANGE_ADDR(addr), val); } static int get_fdt_resources(struct tegra_xhci_softc *sc, phandle_t node) { int rv; rv = regulator_get_by_ofw_property(sc->dev, 0, "avddio-pex-supply", &sc->supply_avddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avddio-pex' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "dvddio-pex-supply", &sc->supply_dvddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot get 'dvddio-pex' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-usb-supply", &sc->supply_avdd_usb); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avdd-usb' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-pll-utmip-supply", &sc->supply_avdd_pll_utmip); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avdd-pll-utmip' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-pll-erefe-supply", &sc->supply_avdd_pll_erefe); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avdd-pll-erefe' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "avdd-usb-ss-pll-supply", &sc->supply_avdd_usb_ss_pll); if (rv != 0) { device_printf(sc->dev, "Cannot get 'avdd-usb-ss-pll' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "hvdd-usb-ss-supply", &sc->supply_hvdd_usb_ss); if (rv != 0) { device_printf(sc->dev, "Cannot get 'hvdd-usb-ss' regulator\n"); return (ENXIO); } rv = regulator_get_by_ofw_property(sc->dev, 0, "hvdd-usb-ss-pll-e-supply", &sc->supply_hvdd_usb_ss_pll_e); if (rv != 0) { device_printf(sc->dev, "Cannot get 'hvdd-usb-ss-pll-e' regulator\n"); return (ENXIO); } rv = hwreset_get_by_ofw_name(sc->dev, 0, "xusb_host", &sc->hwreset_xusb_host); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_host' reset\n"); return (ENXIO); } rv = hwreset_get_by_ofw_name(sc->dev, 0, "xusb_ss", &sc->hwreset_xusb_ss); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_ss' reset\n"); return (ENXIO); } rv = phy_get_by_ofw_name(sc->dev, 0, "usb2-0", &sc->phy_usb2_0); if (rv != 0) { device_printf(sc->dev, "Cannot get 'usb2-0' phy\n"); return (ENXIO); } rv = phy_get_by_ofw_name(sc->dev, 0, "usb2-1", &sc->phy_usb2_1); if (rv != 0) { device_printf(sc->dev, "Cannot get 'usb2-1' phy\n"); return (ENXIO); } rv = phy_get_by_ofw_name(sc->dev, 0, "usb2-2", &sc->phy_usb2_2); if (rv != 0) { device_printf(sc->dev, "Cannot get 'usb2-2' phy\n"); return (ENXIO); } rv = phy_get_by_ofw_name(sc->dev, 0, "usb3-0", &sc->phy_usb3_0); if (rv != 0) { device_printf(sc->dev, "Cannot get 'usb3-0' phy\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_host", &sc->clk_xusb_host); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_host' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_falcon_src", &sc->clk_xusb_falcon_src); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_falcon_src' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_ss", &sc->clk_xusb_ss); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_ss' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_hs_src", &sc->clk_xusb_hs_src); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_hs_src' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_name(sc->dev, 0, "xusb_fs_src", &sc->clk_xusb_fs_src); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_fs_src' clock\n"); return (ENXIO); } rv = clk_get_by_ofw_index_prop(sc->dev, 0, "freebsd,clock-xusb-gate", 0, &sc->clk_xusb_gate); if (rv != 0) { device_printf(sc->dev, "Cannot get 'xusb_gate' clock\n"); return (ENXIO); } return (0); } static int enable_fdt_resources(struct tegra_xhci_softc *sc) { int rv; rv = hwreset_assert(sc->hwreset_xusb_host); if (rv != 0) { device_printf(sc->dev, "Cannot reset 'xusb_host' reset\n"); return (rv); } rv = hwreset_assert(sc->hwreset_xusb_ss); if (rv != 0) { device_printf(sc->dev, "Cannot reset 'xusb_ss' reset\n"); return (rv); } rv = regulator_enable(sc->supply_avddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avddio_pex' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_dvddio_pex); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'dvddio_pex' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_avdd_usb); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avdd_usb' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_avdd_pll_utmip); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avdd_pll_utmip-5v' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_avdd_pll_erefe); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avdd_pll_erefe' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_avdd_usb_ss_pll); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'avdd_usb_ss_pll' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_hvdd_usb_ss); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'hvdd_usb_ss' regulator\n"); return (rv); } rv = regulator_enable(sc->supply_hvdd_usb_ss_pll_e); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'hvdd_usb_ss_pll_e' regulator\n"); return (rv); } /* Power off XUSB host and XUSB SS domains. */ rv = tegra_powergate_power_off(TEGRA_POWERGATE_XUSBA); if (rv != 0) { device_printf(sc->dev, "Cannot powerdown 'xusba' domain\n"); return (rv); } rv = tegra_powergate_power_off(TEGRA_POWERGATE_XUSBC); if (rv != 0) { device_printf(sc->dev, "Cannot powerdown 'xusbc' domain\n"); return (rv); } /* Setup XUSB ss_src clock first */ clk_set_freq(sc->clk_xusb_ss, TEGRA_XHCI_SS_HIGH_SPEED, 0); if (rv != 0) return (rv); /* The XUSB gate clock must be enabled before XUSBA can be powered. */ rv = clk_enable(sc->clk_xusb_gate); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'xusb_gate' clock\n"); return (rv); } /* Power on XUSB host and XUSB SS domains. */ rv = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_XUSBC, sc->clk_xusb_host, sc->hwreset_xusb_host); if (rv != 0) { device_printf(sc->dev, "Cannot powerup 'xusbc' domain\n"); return (rv); } rv = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_XUSBA, sc->clk_xusb_ss, sc->hwreset_xusb_ss); if (rv != 0) { device_printf(sc->dev, "Cannot powerup 'xusba' domain\n"); return (rv); } /* Enable rest of clocks */ rv = clk_enable(sc->clk_xusb_falcon_src); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'xusb_falcon_src' clock\n"); return (rv); } rv = clk_enable(sc->clk_xusb_fs_src); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'xusb_fs_src' clock\n"); return (rv); } rv = clk_enable(sc->clk_xusb_hs_src); if (rv != 0) { device_printf(sc->dev, "Cannot enable 'xusb_hs_src' clock\n"); return (rv); } rv = phy_enable(sc->phy_usb2_0); if (rv != 0) { device_printf(sc->dev, "Cannot enable USB2_0 phy\n"); return (rv); } rv = phy_enable(sc->phy_usb2_1); if (rv != 0) { device_printf(sc->dev, "Cannot enable USB2_1 phy\n"); return (rv); } rv = phy_enable(sc->phy_usb2_2); if (rv != 0) { device_printf(sc->dev, "Cannot enable USB2_2 phy\n"); return (rv); } rv = phy_enable(sc->phy_usb3_0); if (rv != 0) { device_printf(sc->dev, "Cannot enable USB3_0 phy\n"); return (rv); } return (0); } /* Respond by ACK/NAK back to FW */ static void mbox_send_ack(struct tegra_xhci_softc *sc, uint32_t cmd, uint32_t data) { uint32_t reg; reg = ARU_MAILBOX_DATA_IN_TYPE(cmd) | ARU_MAILBOX_DATA_IN_DATA(data); FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_DATA_IN, reg); reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD); reg |= ARU_MAILBOX_CMD_DEST_FALC | ARU_MAILBOX_CMD_INT_EN; FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD, reg); } /* Sent command to FW */ static int mbox_send_cmd(struct tegra_xhci_softc *sc, uint32_t cmd, uint32_t data) { uint32_t reg; int i; reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER); if (reg != ARU_MAILBOX_OWNER_NONE) { device_printf(sc->dev, "CPU mailbox is busy: 0x%08X\n", reg); return (EBUSY); } /* XXX Is this right? Retry loop? Wait before send? */ FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER, ARU_MAILBOX_OWNER_SW); reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER); if (reg != ARU_MAILBOX_OWNER_SW) { device_printf(sc->dev, "Cannot acquire CPU mailbox: 0x%08X\n", reg); return (EBUSY); } reg = ARU_MAILBOX_DATA_IN_TYPE(cmd) | ARU_MAILBOX_DATA_IN_DATA(data); FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_DATA_IN, reg); reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD); reg |= ARU_MAILBOX_CMD_DEST_FALC | ARU_MAILBOX_CMD_INT_EN; FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD, reg); for (i = 250; i > 0; i--) { reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER); if (reg == ARU_MAILBOX_OWNER_NONE) break; DELAY(100); } if (i <= 0) { device_printf(sc->dev, "Command response timeout: 0x%08X\n", reg); return (ETIMEDOUT); } return(0); } static void process_msg(struct tegra_xhci_softc *sc, uint32_t req_cmd, uint32_t req_data, uint32_t *resp_cmd, uint32_t *resp_data) { uint64_t freq; int rv; /* In most cases, data are echoed back. */ *resp_data = req_data; switch (req_cmd) { case MBOX_CMD_INC_FALC_CLOCK: case MBOX_CMD_DEC_FALC_CLOCK: rv = clk_set_freq(sc->clk_xusb_falcon_src, req_data * 1000ULL, 0); if (rv == 0) { rv = clk_get_freq(sc->clk_xusb_falcon_src, &freq); *resp_data = (uint32_t)(freq / 1000); } *resp_cmd = rv == 0 ? MBOX_CMD_ACK: MBOX_CMD_NAK; break; case MBOX_CMD_INC_SSPI_CLOCK: case MBOX_CMD_DEC_SSPI_CLOCK: rv = clk_set_freq(sc->clk_xusb_ss, req_data * 1000ULL, 0); if (rv == 0) { rv = clk_get_freq(sc->clk_xusb_ss, &freq); *resp_data = (uint32_t)(freq / 1000); } *resp_cmd = rv == 0 ? MBOX_CMD_ACK: MBOX_CMD_NAK; break; case MBOX_CMD_SET_BW: /* No respense is expected. */ *resp_cmd = 0; break; case MBOX_CMD_SET_SS_PWR_GATING: case MBOX_CMD_SET_SS_PWR_UNGATING: *resp_cmd = MBOX_CMD_NAK; break; case MBOX_CMD_SAVE_DFE_CTLE_CTX: /* Not implemented yet. */ *resp_cmd = MBOX_CMD_ACK; break; case MBOX_CMD_START_HSIC_IDLE: case MBOX_CMD_STOP_HSIC_IDLE: /* Not implemented yet. */ *resp_cmd = MBOX_CMD_NAK; break; case MBOX_CMD_DISABLE_SS_LFPS_DETECTION: case MBOX_CMD_ENABLE_SS_LFPS_DETECTION: /* Not implemented yet. */ *resp_cmd = MBOX_CMD_NAK; break; case MBOX_CMD_AIRPLANE_MODE_ENABLED: case MBOX_CMD_AIRPLANE_MODE_DISABLED: case MBOX_CMD_DBC_WAKE_STACK: case MBOX_CMD_HSIC_PRETEND_CONNECT: case MBOX_CMD_RESET_SSPI: device_printf(sc->dev, "Received unused/unexpected command: %u\n", req_cmd); *resp_cmd = 0; break; default: device_printf(sc->dev, "Received unknown command: %u\n", req_cmd); } } static void intr_mbox(void *arg) { struct tegra_xhci_softc *sc; uint32_t reg, msg, resp_cmd, resp_data; sc = (struct tegra_xhci_softc *)arg; /* Clear interrupt first */ reg = FPCI_RD4(sc, XUSB_CFG_ARU_SMI_INTR); FPCI_WR4(sc, XUSB_CFG_ARU_SMI_INTR, reg); if (reg & ARU_SMI_INTR_FW_HANG) { device_printf(sc->dev, "XUSB CPU firmware hang!!! CPUCTL: 0x%08X\n", CSB_RD4(sc, XUSB_FALCON_CPUCTL)); } msg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_DATA_OUT); resp_cmd = 0; process_msg(sc, ARU_MAILBOX_DATA_OUT_TYPE(msg), ARU_MAILBOX_DATA_OUT_DATA(msg), &resp_cmd, &resp_data); if (resp_cmd != 0) mbox_send_ack(sc, resp_cmd, resp_data); else FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_OWNER, ARU_MAILBOX_OWNER_NONE); reg = FPCI_RD4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD); reg &= ~ARU_MAILBOX_CMD_DEST_SMI; FPCI_WR4(sc, T_XUSB_CFG_ARU_MAILBOX_CMD, reg); } static int load_fw(struct tegra_xhci_softc *sc) { const struct firmware *fw; const struct tegra_xusb_fw_hdr *fw_hdr; vm_paddr_t fw_paddr, fw_base; vm_offset_t fw_vaddr; vm_size_t fw_size; uint32_t code_tags, code_size; struct clocktime fw_clock; struct timespec fw_timespec; int i; /* Reset ARU */ FPCI_WR4(sc, XUSB_CFG_ARU_RST, ARU_RST_RESET); DELAY(3000); /* Check if FALCON already runs */ if (CSB_RD4(sc, XUSB_CSB_MEMPOOL_ILOAD_BASE_LO) != 0) { device_printf(sc->dev, "XUSB CPU is already loaded, CPUCTL: 0x%08X\n", CSB_RD4(sc, XUSB_FALCON_CPUCTL)); return (0); } fw = firmware_get(sc->fw_name); if (fw == NULL) { device_printf(sc->dev, "Cannot read xusb firmware\n"); return (ENOENT); } /* Allocate uncached memory and copy firmware into. */ fw_hdr = (const struct tegra_xusb_fw_hdr *)fw->data; fw_size = fw_hdr->fwimg_len; - fw_vaddr = kmem_alloc_contig(kernel_arena, fw_size, - M_WAITOK, 0, -1UL, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE); + fw_vaddr = kmem_alloc_contig(fw_size, M_WAITOK, 0, -1UL, PAGE_SIZE, 0, + VM_MEMATTR_UNCACHEABLE); fw_paddr = vtophys(fw_vaddr); fw_hdr = (const struct tegra_xusb_fw_hdr *)fw_vaddr; memcpy((void *)fw_vaddr, fw->data, fw_size); firmware_put(fw, FIRMWARE_UNLOAD); sc->fw_vaddr = fw_vaddr; sc->fw_size = fw_size; /* Setup firmware physical address and size. */ fw_base = fw_paddr + sizeof(*fw_hdr); CSB_WR4(sc, XUSB_CSB_MEMPOOL_ILOAD_ATTR, fw_size); CSB_WR4(sc, XUSB_CSB_MEMPOOL_ILOAD_BASE_LO, fw_base & 0xFFFFFFFF); CSB_WR4(sc, XUSB_CSB_MEMPOOL_ILOAD_BASE_HI, (uint64_t)fw_base >> 32); CSB_WR4(sc, XUSB_CSB_MEMPOOL_APMAP, APMAP_BOOTPATH); /* Invalidate full L2IMEM context. */ CSB_WR4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_TRIG, L2IMEMOP_INVALIDATE_ALL); /* Program load of L2IMEM by boot code. */ code_tags = howmany(fw_hdr->boot_codetag, XUSB_CSB_IMEM_BLOCK_SIZE); code_size = howmany(fw_hdr->boot_codesize, XUSB_CSB_IMEM_BLOCK_SIZE); CSB_WR4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_SIZE, L2IMEMOP_SIZE_OFFSET(code_tags) | L2IMEMOP_SIZE_SIZE(code_size)); /* Execute L2IMEM boot code fetch. */ CSB_WR4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_TRIG, L2IMEMOP_LOAD_LOCKED_RESULT); /* Program FALCON auto-fill range and block count */ CSB_WR4(sc, XUSB_FALCON_IMFILLCTL, code_size); CSB_WR4(sc, XUSB_FALCON_IMFILLRNG1, IMFILLRNG1_TAG_LO(code_tags) | IMFILLRNG1_TAG_HI(code_tags + code_size)); CSB_WR4(sc, XUSB_FALCON_DMACTL, 0); /* Wait for CPU */ for (i = 500; i > 0; i--) { if (CSB_RD4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_RESULT) & L2IMEMOP_RESULT_VLD) break; DELAY(100); } if (i <= 0) { device_printf(sc->dev, "Timedout while wating for DMA, " "state: 0x%08X\n", CSB_RD4(sc, XUSB_CSB_MEMPOOL_L2IMEMOP_RESULT)); return (ETIMEDOUT); } /* Boot FALCON cpu */ CSB_WR4(sc, XUSB_FALCON_BOOTVEC, fw_hdr->boot_codetag); CSB_WR4(sc, XUSB_FALCON_CPUCTL, CPUCTL_STARTCPU); /* Wait for CPU */ for (i = 50; i > 0; i--) { if (CSB_RD4(sc, XUSB_FALCON_CPUCTL) == CPUCTL_STOPPED) break; DELAY(100); } if (i <= 0) { device_printf(sc->dev, "Timedout while wating for FALCON cpu, " "state: 0x%08X\n", CSB_RD4(sc, XUSB_FALCON_CPUCTL)); return (ETIMEDOUT); } fw_timespec.tv_sec = fw_hdr->fwimg_created_time; fw_timespec.tv_nsec = 0; clock_ts_to_ct(&fw_timespec, &fw_clock); device_printf(sc->dev, " Falcon firmware version: %02X.%02X.%04X," " (%d/%d/%d %d:%02d:%02d UTC)\n", (fw_hdr->version_id >> 24) & 0xFF,(fw_hdr->version_id >> 15) & 0xFF, fw_hdr->version_id & 0xFFFF, fw_clock.day, fw_clock.mon, fw_clock.year, fw_clock.hour, fw_clock.min, fw_clock.sec); return (0); } static int init_hw(struct tegra_xhci_softc *sc) { int rv; uint32_t reg; rman_res_t base_addr; base_addr = rman_get_start(sc->xhci_softc.sc_io_res); /* Enable FPCI access */ reg = IPFS_RD4(sc, XUSB_HOST_CONFIGURATION); reg |= CONFIGURATION_EN_FPCI; IPFS_WR4(sc, XUSB_HOST_CONFIGURATION, reg); IPFS_RD4(sc, XUSB_HOST_CONFIGURATION); /* Program bar for XHCI base address */ reg = FPCI_RD4(sc, T_XUSB_CFG_4); reg &= ~CFG_4_BASE_ADDRESS(~0); reg |= CFG_4_BASE_ADDRESS((uint32_t)base_addr >> 15); FPCI_WR4(sc, T_XUSB_CFG_4, reg); FPCI_WR4(sc, T_XUSB_CFG_5, (uint32_t)((uint64_t)(base_addr) >> 32)); /* Enable bus master */ reg = FPCI_RD4(sc, T_XUSB_CFG_1); reg |= CFG_1_IO_SPACE; reg |= CFG_1_MEMORY_SPACE; reg |= CFG_1_BUS_MASTER; FPCI_WR4(sc, T_XUSB_CFG_1, reg); /* Enable Interrupts */ reg = IPFS_RD4(sc, XUSB_HOST_INTR_MASK); reg |= INTR_IP_INT_MASK; IPFS_WR4(sc, XUSB_HOST_INTR_MASK, reg); /* Set hysteresis */ IPFS_WR4(sc, XUSB_HOST_CLKGATE_HYSTERESIS, 128); rv = load_fw(sc); if (rv != 0) return rv; return (0); } static int tegra_xhci_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data != 0) { device_set_desc(dev, "Nvidia Tegra XHCI controller"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int tegra_xhci_detach(device_t dev) { struct tegra_xhci_softc *sc; struct xhci_softc *xsc; sc = device_get_softc(dev); xsc = &sc->xhci_softc; /* during module unload there are lots of children leftover */ device_delete_children(dev); if (sc->xhci_inited) { usb_callout_drain(&xsc->sc_callout); xhci_halt_controller(xsc); } if (xsc->sc_irq_res && xsc->sc_intr_hdl) { bus_teardown_intr(dev, xsc->sc_irq_res, xsc->sc_intr_hdl); xsc->sc_intr_hdl = NULL; } if (xsc->sc_irq_res) { bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(xsc->sc_irq_res), xsc->sc_irq_res); xsc->sc_irq_res = NULL; } if (xsc->sc_io_res != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(xsc->sc_io_res), xsc->sc_io_res); xsc->sc_io_res = NULL; } if (sc->xhci_inited) xhci_uninit(xsc); if (sc->irq_hdl_mbox != NULL) bus_teardown_intr(dev, sc->irq_res_mbox, sc->irq_hdl_mbox); if (sc->fw_vaddr != 0) kmem_free(kernel_arena, sc->fw_vaddr, sc->fw_size); LOCK_DESTROY(sc); return (0); } static int tegra_xhci_attach(device_t dev) { struct tegra_xhci_softc *sc; struct xhci_softc *xsc; int rv, rid; phandle_t node; sc = device_get_softc(dev); sc->dev = dev; sc->fw_name = "tegra124_xusb_fw"; node = ofw_bus_get_node(dev); xsc = &sc->xhci_softc; LOCK_INIT(sc); rv = get_fdt_resources(sc, node); if (rv != 0) { rv = ENXIO; goto error; } rv = enable_fdt_resources(sc); if (rv != 0) { rv = ENXIO; goto error; } /* Allocate resources. */ rid = 0; xsc->sc_io_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (xsc->sc_io_res == NULL) { device_printf(dev, "Could not allocate HCD memory resources\n"); rv = ENXIO; goto error; } rid = 1; sc->mem_res_fpci = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res_fpci == NULL) { device_printf(dev, "Could not allocate FPCI memory resources\n"); rv = ENXIO; goto error; } rid = 2; sc->mem_res_ipfs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res_ipfs == NULL) { device_printf(dev, "Could not allocate IPFS memory resources\n"); rv = ENXIO; goto error; } rid = 0; xsc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (xsc->sc_irq_res == NULL) { device_printf(dev, "Could not allocate HCD IRQ resources\n"); rv = ENXIO; goto error; } rid = 1; sc->irq_res_mbox = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res_mbox == NULL) { device_printf(dev, "Could not allocate MBOX IRQ resources\n"); rv = ENXIO; goto error; } rv = init_hw(sc); if (rv != 0) { device_printf(dev, "Could not initialize XUSB hardware\n"); goto error; } /* Wakeup and enable firmaware */ rv = mbox_send_cmd(sc, MBOX_CMD_MSG_ENABLED, 0); if (rv != 0) { device_printf(sc->dev, "Could not enable XUSB firmware\n"); goto error; } /* Fill data for XHCI driver. */ xsc->sc_bus.parent = dev; xsc->sc_bus.devices = xsc->sc_devices; xsc->sc_bus.devices_max = XHCI_MAX_DEVICES; xsc->sc_io_tag = rman_get_bustag(xsc->sc_io_res); xsc->sc_io_hdl = rman_get_bushandle(xsc->sc_io_res); xsc->sc_io_size = rman_get_size(xsc->sc_io_res); strlcpy(xsc->sc_vendor, "Nvidia", sizeof(xsc->sc_vendor)); /* Add USB bus device. */ xsc->sc_bus.bdev = device_add_child(sc->dev, "usbus", -1); if (xsc->sc_bus.bdev == NULL) { device_printf(sc->dev, "Could not add USB device\n"); rv = ENXIO; goto error; } device_set_ivars(xsc->sc_bus.bdev, &xsc->sc_bus); device_set_desc(xsc->sc_bus.bdev, "Nvidia USB 3.0 controller"); rv = xhci_init(xsc, sc->dev, 1); if (rv != 0) { device_printf(sc->dev, "USB init failed: %d\n", rv); goto error; } sc->xhci_inited = true; rv = xhci_start_controller(xsc); if (rv != 0) { device_printf(sc->dev, "Could not start XHCI controller: %d\n", rv); goto error; } rv = bus_setup_intr(dev, sc->irq_res_mbox, INTR_TYPE_MISC | INTR_MPSAFE, NULL, intr_mbox, sc, &sc->irq_hdl_mbox); if (rv != 0) { device_printf(dev, "Could not setup error IRQ: %d\n",rv); xsc->sc_intr_hdl = NULL; goto error; } rv = bus_setup_intr(dev, xsc->sc_irq_res, INTR_TYPE_BIO | INTR_MPSAFE, NULL, (driver_intr_t *)xhci_interrupt, xsc, &xsc->sc_intr_hdl); if (rv != 0) { device_printf(dev, "Could not setup error IRQ: %d\n",rv); xsc->sc_intr_hdl = NULL; goto error; } /* Probe the bus. */ rv = device_probe_and_attach(xsc->sc_bus.bdev); if (rv != 0) { device_printf(sc->dev, "Could not initialize USB: %d\n", rv); goto error; } return (0); error: panic("XXXXX"); tegra_xhci_detach(dev); return (rv); } static device_method_t xhci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, tegra_xhci_probe), DEVMETHOD(device_attach, tegra_xhci_attach), DEVMETHOD(device_detach, tegra_xhci_detach), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), DEVMETHOD(device_shutdown, bus_generic_shutdown), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD_END }; static devclass_t xhci_devclass; static DEFINE_CLASS_0(xhci, xhci_driver, xhci_methods, sizeof(struct tegra_xhci_softc)); DRIVER_MODULE(tegra_xhci, simplebus, xhci_driver, xhci_devclass, NULL, NULL); MODULE_DEPEND(tegra_xhci, usb, 1, 1, 1); Index: head/sys/arm/samsung/exynos/exynos5_fimd.c =================================================================== --- head/sys/arm/samsung/exynos/exynos5_fimd.c (revision 338106) +++ head/sys/arm/samsung/exynos/exynos5_fimd.c (revision 338107) @@ -1,412 +1,412 @@ /*- * Copyright (c) 2014 Ruslan Bukin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Samsung Exynos 5 Display Controller * Chapter 15, Exynos 5 Dual User's Manual Public Rev 1.00 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gpio_if.h" #include #include #include #include "fb_if.h" #define FIMDBYPASS_DISP1 (1 << 15) #define VIDCON0 (0x0) #define VIDCON0_ENVID (1 << 1) #define VIDCON0_ENVID_F (1 << 0) #define CLKVAL_F 0xb #define CLKVAL_F_OFFSET 6 #define WINCON0 0x0020 #define WINCON1 0x0024 #define WINCON2 0x0028 #define WINCON3 0x002C #define WINCON4 0x0030 #define ENLOCAL_F (1 << 22) #define BPPMODE_F_RGB_16BIT_565 0x5 #define BPPMODE_F_OFFSET 2 #define ENWIN_F_ENABLE (1 << 0) #define HALF_WORD_SWAP_EN (1 << 16) #define SHADOWCON 0x0034 #define CHANNEL0_EN (1 << 0) #define VIDOSD0A 0x0040 #define VIDOSD0B 0x0044 #define VIDOSD0C 0x0048 #define VIDW00ADD0B0 0x00A0 #define VIDW00ADD0B1 0x00A4 #define VIDW00ADD0B2 0x20A0 #define VIDW00ADD1B0 0x00D0 #define VIDW00ADD1B1 0x00D4 #define VIDW00ADD1B2 0x20D0 #define VIDW00ADD2 0x0100 #define VIDW01ADD2 0x0104 #define VIDW02ADD2 0x0108 #define VIDW03ADD2 0x010C #define VIDW04ADD2 0x0110 #define VIDCON1 (0x04) #define VIDTCON0 0x0010 #define VIDTCON1 0x0014 #define VIDTCON2 0x0018 #define VIDTCON3 0x001C #define VIDINTCON0 0x0130 #define VIDINTCON1 0x0134 #define VSYNC_PULSE_WIDTH_VAL 0x3 #define VSYNC_PULSE_WIDTH_OFFSET 0 #define V_FRONT_PORCH_VAL 0x3 #define V_FRONT_PORCH_OFFSET 8 #define V_BACK_PORCH_VAL 0x3 #define V_BACK_PORCH_OFFSET 16 #define HSYNC_PULSE_WIDTH_VAL 0x3 #define HSYNC_PULSE_WIDTH_OFFSET 0 #define H_FRONT_PORCH_VAL 0x3 #define H_FRONT_PORCH_OFFSET 8 #define H_BACK_PORCH_VAL 0x3 #define H_BACK_PORCH_OFFSET 16 #define HOZVAL_OFFSET 0 #define LINEVAL_OFFSET 11 #define OSD_RIGHTBOTX_F_OFFSET 11 #define OSD_RIGHTBOTY_F_OFFSET 0 #define DPCLKCON 0x27c #define DPCLKCON_EN (1 << 1) #define DREAD4(_sc, _reg) \ bus_space_read_4(_sc->bst_disp, _sc->bsh_disp, _reg) #define DWRITE4(_sc, _reg, _val) \ bus_space_write_4(_sc->bst_disp, _sc->bsh_disp, _reg, _val) struct panel_info { uint32_t width; uint32_t height; uint32_t h_back_porch; uint32_t h_pulse_width; uint32_t h_front_porch; uint32_t v_back_porch; uint32_t v_pulse_width; uint32_t v_front_porch; uint32_t clk_div; uint32_t backlight_pin; uint32_t fixvclk; uint32_t ivclk; uint32_t clkval_f; }; struct fimd_softc { struct resource *res[3]; bus_space_tag_t bst; bus_space_handle_t bsh; bus_space_tag_t bst_disp; bus_space_handle_t bsh_disp; bus_space_tag_t bst_sysreg; bus_space_handle_t bsh_sysreg; void *ih; device_t dev; device_t sc_fbd; /* fbd child */ struct fb_info sc_info; struct panel_info *panel; }; static struct resource_spec fimd_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, /* Timer registers */ { SYS_RES_MEMORY, 1, RF_ACTIVE }, /* FIMD */ { SYS_RES_MEMORY, 2, RF_ACTIVE }, /* DISP */ { -1, 0 } }; static int fimd_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "exynos,fimd")) return (ENXIO); device_set_desc(dev, "Samsung Exynos 5 Display Controller"); return (BUS_PROBE_DEFAULT); } static int get_panel_info(struct fimd_softc *sc, struct panel_info *panel) { phandle_t node; pcell_t dts_value[3]; int len; if ((node = ofw_bus_get_node(sc->dev)) == -1) return (ENXIO); /* panel size */ if ((len = OF_getproplen(node, "panel-size")) <= 0) return (ENXIO); OF_getencprop(node, "panel-size", dts_value, len); panel->width = dts_value[0]; panel->height = dts_value[1]; /* hsync */ if ((len = OF_getproplen(node, "panel-hsync")) <= 0) return (ENXIO); OF_getencprop(node, "panel-hsync", dts_value, len); panel->h_back_porch = dts_value[0]; panel->h_pulse_width = dts_value[1]; panel->h_front_porch = dts_value[2]; /* vsync */ if ((len = OF_getproplen(node, "panel-vsync")) <= 0) return (ENXIO); OF_getencprop(node, "panel-vsync", dts_value, len); panel->v_back_porch = dts_value[0]; panel->v_pulse_width = dts_value[1]; panel->v_front_porch = dts_value[2]; /* clk divider */ if ((len = OF_getproplen(node, "panel-clk-div")) <= 0) return (ENXIO); OF_getencprop(node, "panel-clk-div", dts_value, len); panel->clk_div = dts_value[0]; /* backlight pin */ if ((len = OF_getproplen(node, "panel-backlight-pin")) <= 0) return (ENXIO); OF_getencprop(node, "panel-backlight-pin", dts_value, len); panel->backlight_pin = dts_value[0]; return (0); } static int fimd_init(struct fimd_softc *sc) { struct panel_info *panel; int reg; panel = sc->panel; /* fb_init */ reg = panel->ivclk | panel->fixvclk; DWRITE4(sc,VIDCON1,reg); reg = (VIDCON0_ENVID | VIDCON0_ENVID_F); reg |= (panel->clkval_f << CLKVAL_F_OFFSET); WRITE4(sc,VIDCON0,reg); reg = (panel->v_pulse_width << VSYNC_PULSE_WIDTH_OFFSET); reg |= (panel->v_front_porch << V_FRONT_PORCH_OFFSET); reg |= (panel->v_back_porch << V_BACK_PORCH_OFFSET); DWRITE4(sc,VIDTCON0,reg); reg = (panel->h_pulse_width << HSYNC_PULSE_WIDTH_OFFSET); reg |= (panel->h_front_porch << H_FRONT_PORCH_OFFSET); reg |= (panel->h_back_porch << H_BACK_PORCH_OFFSET); DWRITE4(sc,VIDTCON1,reg); reg = ((panel->width - 1) << HOZVAL_OFFSET); reg |= ((panel->height - 1) << LINEVAL_OFFSET); DWRITE4(sc,VIDTCON2,reg); reg = sc->sc_info.fb_pbase; WRITE4(sc, VIDW00ADD0B0, reg); reg += (sc->sc_info.fb_stride * (sc->sc_info.fb_height + 1)); WRITE4(sc, VIDW00ADD1B0, reg); WRITE4(sc, VIDW00ADD2, sc->sc_info.fb_stride); reg = ((panel->width - 1) << OSD_RIGHTBOTX_F_OFFSET); reg |= ((panel->height - 1) << OSD_RIGHTBOTY_F_OFFSET); WRITE4(sc,VIDOSD0B,reg); reg = panel->width * panel->height; WRITE4(sc,VIDOSD0C,reg); reg = READ4(sc, SHADOWCON); reg |= CHANNEL0_EN; reg &= ~(1 << 5); /* disable local path for channel0 */ WRITE4(sc,SHADOWCON,reg); reg = BPPMODE_F_RGB_16BIT_565 << BPPMODE_F_OFFSET; reg |= ENWIN_F_ENABLE | HALF_WORD_SWAP_EN; /* Note: swap=0 when ENLOCAL==1 */ reg &= ~ENLOCAL_F; /* use DMA */ WRITE4(sc,WINCON0,reg); /* Enable DisplayPort Clk */ WRITE4(sc, DPCLKCON, DPCLKCON_EN); return (0); } static int fimd_attach(device_t dev) { struct panel_info panel; struct fimd_softc *sc; device_t gpio_dev; int reg; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, fimd_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } /* Memory interface */ sc->bst = rman_get_bustag(sc->res[0]); sc->bsh = rman_get_bushandle(sc->res[0]); sc->bst_disp = rman_get_bustag(sc->res[1]); sc->bsh_disp = rman_get_bushandle(sc->res[1]); sc->bst_sysreg = rman_get_bustag(sc->res[2]); sc->bsh_sysreg = rman_get_bushandle(sc->res[2]); if (get_panel_info(sc, &panel)) { device_printf(dev, "Can't get panel info\n"); return (ENXIO); } panel.fixvclk = 0; panel.ivclk = 0; panel.clkval_f = 2; sc->panel = &panel; /* Get the GPIO device, we need this to give power to USB */ gpio_dev = devclass_get_device(devclass_find("gpio"), 0); if (gpio_dev == NULL) { /* TODO */ } reg = bus_space_read_4(sc->bst_sysreg, sc->bsh_sysreg, 0x214); reg |= FIMDBYPASS_DISP1; bus_space_write_4(sc->bst_sysreg, sc->bsh_sysreg, 0x214, reg); sc->sc_info.fb_width = panel.width; sc->sc_info.fb_height = panel.height; sc->sc_info.fb_stride = sc->sc_info.fb_width * 2; sc->sc_info.fb_bpp = sc->sc_info.fb_depth = 16; sc->sc_info.fb_size = sc->sc_info.fb_height * sc->sc_info.fb_stride; - sc->sc_info.fb_vbase = (intptr_t)kmem_alloc_contig(kernel_arena, - sc->sc_info.fb_size, M_ZERO, 0, ~0, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE); + sc->sc_info.fb_vbase = (intptr_t)kmem_alloc_contig(sc->sc_info.fb_size, + M_ZERO, 0, ~0, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE); sc->sc_info.fb_pbase = (intptr_t)vtophys(sc->sc_info.fb_vbase); #if 0 printf("%dx%d [%d]\n", sc->sc_info.fb_width, sc->sc_info.fb_height, sc->sc_info.fb_stride); printf("pbase == 0x%08x\n", sc->sc_info.fb_pbase); #endif memset((int8_t *)sc->sc_info.fb_vbase, 0x0, sc->sc_info.fb_size); fimd_init(sc); sc->sc_info.fb_name = device_get_nameunit(dev); /* Ask newbus to attach framebuffer device to me. */ sc->sc_fbd = device_add_child(dev, "fbd", device_get_unit(dev)); if (sc->sc_fbd == NULL) device_printf(dev, "Can't attach fbd device\n"); if (device_probe_and_attach(sc->sc_fbd) != 0) { device_printf(sc->dev, "Failed to attach fbd device\n"); } return (0); } static struct fb_info * fimd_fb_getinfo(device_t dev) { struct fimd_softc *sc = device_get_softc(dev); return (&sc->sc_info); } static device_method_t fimd_methods[] = { DEVMETHOD(device_probe, fimd_probe), DEVMETHOD(device_attach, fimd_attach), /* Framebuffer service methods */ DEVMETHOD(fb_getinfo, fimd_fb_getinfo), { 0, 0 } }; static driver_t fimd_driver = { "fb", fimd_methods, sizeof(struct fimd_softc), }; static devclass_t fimd_devclass; DRIVER_MODULE(fb, simplebus, fimd_driver, fimd_devclass, 0, 0); Index: head/sys/arm64/arm64/busdma_bounce.c =================================================================== --- head/sys/arm64/arm64/busdma_bounce.c (revision 338106) +++ head/sys/arm64/arm64/busdma_bounce.c (revision 338107) @@ -1,1332 +1,1331 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Andrew Turner * under sponsorship of the FreeBSD Foundation. * * Portions of this software were developed by Semihalf * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 4096 enum { BF_COULD_BOUNCE = 0x01, BF_MIN_ALLOC_COMP = 0x02, BF_KMEM_ALLOC = 0x04, BF_COHERENT = 0x10, }; struct bounce_zone; struct bus_dma_tag { struct bus_dma_tag_common common; int map_count; int bounce_flags; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct sync_list { vm_offset_t vaddr; /* kva of client data */ bus_addr_t paddr; /* physical address */ vm_page_t pages; /* starting page of client data */ bus_size_t datacount; /* client data count */ }; struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; u_int flags; #define DMAMAP_COULD_BOUNCE (1 << 0) #define DMAMAP_FROM_DMAMEM (1 << 1) int sync_count; struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); /* * Allocate a device specific dma_tag. */ static int bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &parent->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof (struct bus_dma_tag), (void **)&newtag); if (error != 0) return (error); newtag->common.impl = &bus_dma_bounce_impl; newtag->map_count = 0; newtag->segments = NULL; if ((flags & BUS_DMA_COHERENT) != 0) newtag->bounce_flags |= BF_COHERENT; if (parent != NULL) { if ((newtag->common.filter != NULL || (parent->bounce_flags & BF_COULD_BOUNCE) != 0)) newtag->bounce_flags |= BF_COULD_BOUNCE; /* Copy some flags from the parent */ newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT; } if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || newtag->common.alignment > 1) newtag->bounce_flags |= BF_COULD_BOUNCE; if (((newtag->bounce_flags & BF_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->bounce_flags |= BF_MIN_ALLOC_COMP; } else error = 0; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy, parent; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (bus_dma_tag_t)dmat->common.parent; atomic_subtract_int(&dmat->common.ref_count, 1); if (dmat->common.ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static bus_dmamap_t alloc_dmamap(bus_dma_tag_t dmat, int flags) { u_long mapsize; bus_dmamap_t map; mapsize = sizeof(*map); mapsize += sizeof(struct sync_list) * dmat->common.nsegments; map = malloc(mapsize, M_DEVBUF, flags | M_ZERO); if (map == NULL) return (NULL); /* Initialize the new map */ STAILQ_INIT(&map->bpages); return (map); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bounce_zone *bz; int error, maxpages, pages; error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } *mapp = alloc_dmamap(dmat, M_NOWAIT); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->bounce_flags & BF_COULD_BOUNCE) { /* Must bounce */ if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { free(*mapp, M_DEVBUF); return (error); } } bz = dmat->bounce_zone; (*mapp)->flags = DMAMAP_COULD_BOUNCE; /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->common.alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->common.lowaddr)); if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { pages = MAX(atop(dmat->common.maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0) { if (error == 0) { dmat->bounce_flags |= BF_MIN_ALLOC_COMP; } } else error = 0; } bz->map_count++; } if (error == 0) dmat->map_count++; else free(*mapp, M_DEVBUF); CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { /* Check we are destroying the correct map type */ if ((map->flags & DMAMAP_FROM_DMAMEM) != 0) panic("bounce_bus_dmamap_destroy: Invalid map freed\n"); if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) { KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0, ("%s: Bounce zone when cannot bounce", __func__)); dmat->bounce_zone->map_count--; } free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static int bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { /* * XXX ARM64TODO: * This bus_dma implementation requires IO-Coherent architecutre. * If IO-Coherency is not guaranteed, the BUS_DMA_COHERENT flag has * to be implented using non-cacheable memory. */ vm_memattr_t attr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } } if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else if ((flags & BUS_DMA_COHERENT) != 0 && (dmat->bounce_flags & BF_COHERENT) == 0) /* * If we have a non-coherent tag, and are trying to allocate * a coherent block of memory it needs to be uncached. */ attr = VM_MEMATTR_UNCACHEABLE; else attr = VM_MEMATTR_DEFAULT; /* * Create the map, but don't set the could bounce flag as * this allocation should never bounce; */ *mapp = alloc_dmamap(dmat, mflags); if (*mapp == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } (*mapp)->flags = DMAMAP_FROM_DMAMEM; /* * Allocate the buffer from the malloc(9) allocator if... * - It's small enough to fit into a single power of two sized bucket. * - The alignment is less than or equal to the maximum size * - The low address requirement is fulfilled. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. * * NOTE: The (dmat->common.alignment <= dmat->maxsize) check * below is just a quick hack. The exact alignment guarantees * of malloc(9) need to be nailed down, and the code below * should be rewritten to take that into account. * * In the meantime warn the user if malloc gets it wrong. */ if ((dmat->common.maxsize <= PAGE_SIZE) && (dmat->common.alignment <= dmat->common.maxsize) && dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags); } else if (dmat->common.nsegments >= howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) && dmat->common.alignment <= PAGE_SIZE && (dmat->common.boundary % PAGE_SIZE) == 0) { /* Page-based multi-segment allocations allowed */ *vaddr = (void *)kmem_alloc_attr(dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, attr); dmat->bounce_flags |= BF_KMEM_ALLOC; } else { - *vaddr = (void *)kmem_alloc_contig(kernel_arena, - dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, - dmat->common.alignment != 0 ? dmat->common.alignment : 1ul, - dmat->common.boundary, attr); + *vaddr = (void *)kmem_alloc_contig(dmat->common.maxsize, mflags, + 0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ? + dmat->common.alignment : 1ul, dmat->common.boundary, attr); dmat->bounce_flags |= BF_KMEM_ALLOC; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); free(*mapp, M_DEVBUF); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static void bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { /* * Check the map came from bounce_bus_dmamem_alloc, so the map * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc() * was used and set if kmem_alloc_contig() was used. */ if ((map->flags & DMAMAP_FROM_DMAMEM) == 0) panic("bus_dmamem_free: Invalid map freed\n"); if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0) free(vaddr, M_DEVBUF); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->common.maxsize); free(map, M_DEVBUF); dmat->map_count--; CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->bounce_flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) { /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->common.maxsegsz); if (bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; bus_size_t sg_len; if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->common.boundary - 1); if (dmat->common.boundary > 0) { baddr = (curaddr + dmat->common.boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && (dmat->common.boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct sync_list *sl; bus_size_t sgsize; bus_addr_t curaddr, sl_end; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; sl_end = 0; while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->common.maxsegsz); if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { if (map->sync_count > 0) sl_end = sl->paddr + sl->datacount; if (map->sync_count == 0 || curaddr != sl_end) { if (++map->sync_count > dmat->common.nsegments) break; sl++; sl->vaddr = 0; sl->paddr = curaddr; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not in " "vm_page_array", __func__, curaddr)); } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct sync_list *sl; bus_size_t sgsize, max_sgsize; bus_addr_t curaddr, sl_pend; vm_offset_t kvaddr, vaddr, sl_vend; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; vaddr = (vm_offset_t)buf; sl_pend = 0; sl_vend = 0; while (buflen > 0) { /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { sgsize = MIN(sgsize, max_sgsize); if (map->sync_count > 0) { sl_pend = sl->paddr + sl->datacount; sl_vend = sl->vaddr + sl->datacount; } if (map->sync_count == 0 || (kvaddr != 0 && kvaddr != sl_vend) || (curaddr != sl_pend)) { if (++map->sync_count > dmat->common.nsegments) goto cleanup; sl++; sl->vaddr = kvaddr; sl->paddr = curaddr; if (kvaddr != 0) { sl->pages = NULL; } else { sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not " "in vm_page_array", __func__, curaddr)); } sl->datacount = sgsize; } else sl->datacount += sgsize; } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static void bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if ((map->flags & DMAMAP_COULD_BOUNCE) == 0) return; map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ static void bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } map->sync_count = 0; } static void dma_preread_safe(vm_offset_t va, vm_size_t size) { /* * Write back any partial cachelines immediately before and * after the DMA region. */ if (va & (dcache_line_size - 1)) cpu_dcache_wb_range(va, 1); if ((va + size) & (dcache_line_size - 1)) cpu_dcache_wb_range(va + size, 1); cpu_dcache_inv_range(va, size); } static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) { uint32_t len, offset; vm_page_t m; vm_paddr_t pa; vm_offset_t va, tempva; bus_size_t size; offset = sl->paddr & PAGE_MASK; m = sl->pages; size = sl->datacount; pa = sl->paddr; for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { tempva = 0; if (sl->vaddr == 0) { len = min(PAGE_SIZE - offset, size); tempva = pmap_quick_enter_page(m); va = tempva | offset; KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx", VM_PAGE_TO_PHYS(m) | offset, pa)); } else { len = sl->datacount; va = sl->vaddr; } switch (op) { case BUS_DMASYNC_PREWRITE: case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: cpu_dcache_wb_range(va, len); break; case BUS_DMASYNC_PREREAD: /* * An mbuf may start in the middle of a cacheline. There * will be no cpu writes to the beginning of that line * (which contains the mbuf header) while dma is in * progress. Handle that case by doing a writeback of * just the first cacheline before invalidating the * overall buffer. Any mbuf in a chain may have this * misalignment. Buffers which are not mbufs bounce if * they are not aligned to a cacheline. */ dma_preread_safe(va, len); break; case BUS_DMASYNC_POSTREAD: case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: cpu_dcache_inv_range(va, len); break; default: panic("unsupported combination of sync operations: " "0x%08x\n", op); } if (tempva != 0) pmap_quick_remove_page(tempva); } } static void bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; struct sync_list *sl, *end; vm_offset_t datavaddr, tempvaddr; if (op == BUS_DMASYNC_POSTWRITE) return; if ((op & BUS_DMASYNC_POSTREAD) != 0) { /* * Wait for any DMA operations to complete before the bcopy. */ dsb(sy); } if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->common.flags, op); if ((op & BUS_DMASYNC_PREWRITE) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if ((dmat->bounce_flags & BF_COHERENT) == 0) cpu_dcache_wb_range(bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } else if ((op & BUS_DMASYNC_PREREAD) != 0) { while (bpage != NULL) { if ((dmat->bounce_flags & BF_COHERENT) == 0) cpu_dcache_wbinv_range(bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } } if ((op & BUS_DMASYNC_POSTREAD) != 0) { while (bpage != NULL) { if ((dmat->bounce_flags & BF_COHERENT) == 0) cpu_dcache_inv_range(bpage->vaddr, bpage->datacount); tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } /* * Cache maintenance for normal (non-COHERENT non-bounce) buffers. */ if (map->sync_count != 0) { sl = &map->slist[0]; end = &map->slist[map->sync_count]; CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x " "performing sync", __func__, dmat, op); for ( ; sl != end; ++sl) dma_dcache_sync(sl, op); } if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) { /* * Wait for the bcopy to complete before any DMA operations. */ dsb(sy); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->common.alignment <= bz->alignment) && (dmat->common.lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->common.lowaddr; bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } struct bus_dma_impl bus_dma_bounce_impl = { .tag_create = bounce_bus_dma_tag_create, .tag_destroy = bounce_bus_dma_tag_destroy, .map_create = bounce_bus_dmamap_create, .map_destroy = bounce_bus_dmamap_destroy, .mem_alloc = bounce_bus_dmamem_alloc, .mem_free = bounce_bus_dmamem_free, .load_phys = bounce_bus_dmamap_load_phys, .load_buffer = bounce_bus_dmamap_load_buffer, .load_ma = bus_dmamap_load_ma_triv, .map_waitok = bounce_bus_dmamap_waitok, .map_complete = bounce_bus_dmamap_complete, .map_unload = bounce_bus_dmamap_unload, .map_sync = bounce_bus_dmamap_sync }; Index: head/sys/compat/linuxkpi/common/include/linux/dma-mapping.h =================================================================== --- head/sys/compat/linuxkpi/common/include/linux/dma-mapping.h (revision 338106) +++ head/sys/compat/linuxkpi/common/include/linux/dma-mapping.h (revision 338107) @@ -1,282 +1,282 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_DMA_MAPPING_H_ #define _LINUX_DMA_MAPPING_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include enum dma_data_direction { DMA_BIDIRECTIONAL = 0, DMA_TO_DEVICE = 1, DMA_FROM_DEVICE = 2, DMA_NONE = 3, }; struct dma_map_ops { void* (*alloc_coherent)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); void (*free_coherent)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); dma_addr_t (*map_page)(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs); void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs); void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); void (*sync_single_for_device)(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction dir); void (*sync_single_range_for_device)(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction dir); void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir); void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); int is_phys; }; #define DMA_BIT_MASK(n) ((2ULL << ((n) - 1)) - 1ULL) static inline int dma_supported(struct device *dev, u64 mask) { /* XXX busdma takes care of this elsewhere. */ return (1); } static inline int dma_set_mask(struct device *dev, u64 dma_mask) { if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; *dev->dma_mask = dma_mask; return (0); } static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { if (!dma_supported(dev, mask)) return -EIO; /* XXX Currently we don't support a separate coherent mask. */ return 0; } static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { vm_paddr_t high; size_t align; void *mem; if (dev != NULL && dev->dma_mask) high = *dev->dma_mask; else if (flag & GFP_DMA32) high = BUS_SPACE_MAXADDR_32BIT; else high = BUS_SPACE_MAXADDR; align = PAGE_SIZE << get_order(size); - mem = (void *)kmem_alloc_contig(kmem_arena, size, flag, 0, high, align, - 0, VM_MEMATTR_DEFAULT); + mem = (void *)kmem_alloc_contig(size, flag, 0, high, align, 0, + VM_MEMATTR_DEFAULT); if (mem) *dma_handle = vtophys(mem); else *dma_handle = 0; return (mem); } static inline void * dma_zalloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { return (dma_alloc_coherent(dev, size, dma_handle, flag | __GFP_ZERO)); } static inline void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { kmem_free(kmem_arena, (vm_offset_t)cpu_addr, size); } /* XXX This only works with no iommu. */ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { return vtophys(ptr); } static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { } static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { struct scatterlist *sg; int i; for_each_sg(sgl, sg, nents, i) sg_dma_address(sg) = sg_phys(sg); return (nents); } static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { } static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) { return VM_PAGE_TO_PHYS(page) + offset; } static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction) { } static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { } static inline void dma_sync_single(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { dma_sync_single_for_cpu(dev, addr, size, dir); } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { } static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { } static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { } static inline void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { } static inline void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { } static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return (0); } static inline unsigned int dma_set_max_seg_size(struct device *dev, unsigned int size) { return (0); } #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL) #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL) #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL) #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL) #define DEFINE_DMA_UNMAP_ADDR(name) dma_addr_t name #define DEFINE_DMA_UNMAP_LEN(name) __u32 name #define dma_unmap_addr(p, name) ((p)->name) #define dma_unmap_addr_set(p, name, v) (((p)->name) = (v)) #define dma_unmap_len(p, name) ((p)->name) #define dma_unmap_len_set(p, name, v) (((p)->name) = (v)) extern int uma_align_cache; #define dma_get_cache_alignment() uma_align_cache #endif /* _LINUX_DMA_MAPPING_H_ */ Index: head/sys/compat/linuxkpi/common/src/linux_page.c =================================================================== --- head/sys/compat/linuxkpi/common/src/linux_page.c (revision 338106) +++ head/sys/compat/linuxkpi/common/src/linux_page.c (revision 338107) @@ -1,388 +1,387 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io) * Copyright (c) 2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void * linux_page_address(struct page *page) { if (page->object != kmem_object && page->object != kernel_object) { return (PMAP_HAS_DMAP ? ((void *)(uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(page))) : NULL); } return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS + IDX_TO_OFF(page->pindex))); } vm_page_t linux_alloc_pages(gfp_t flags, unsigned int order) { vm_page_t page; if (PMAP_HAS_DMAP) { unsigned long npages = 1UL << order; int req = (flags & M_ZERO) ? (VM_ALLOC_ZERO | VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL) : (VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL); if (order == 0 && (flags & GFP_DMA32) == 0) { page = vm_page_alloc(NULL, 0, req); if (page == NULL) return (NULL); } else { vm_paddr_t pmax = (flags & GFP_DMA32) ? BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR; retry: page = vm_page_alloc_contig(NULL, 0, req, npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if (page == NULL) { if (flags & M_WAITOK) { if (!vm_page_reclaim_contig(req, npages, 0, pmax, PAGE_SIZE, 0)) { vm_wait(NULL); } flags &= ~M_WAITOK; goto retry; } return (NULL); } } if (flags & M_ZERO) { unsigned long x; for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; if ((pgo->flags & PG_ZERO) == 0) pmap_zero_page(pgo); } } } else { vm_offset_t vaddr; vaddr = linux_alloc_kmem(flags, order); if (vaddr == 0) return (NULL); page = PHYS_TO_VM_PAGE(vtophys((void *)vaddr)); KASSERT(vaddr == (vm_offset_t)page_address(page), ("Page address mismatch")); } return (page); } void linux_free_pages(vm_page_t page, unsigned int order) { if (PMAP_HAS_DMAP) { unsigned long npages = 1UL << order; unsigned long x; for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; vm_page_lock(pgo); vm_page_free(pgo); vm_page_unlock(pgo); } } else { vm_offset_t vaddr; vaddr = (vm_offset_t)page_address(page); linux_free_kmem(vaddr, order); } } vm_offset_t linux_alloc_kmem(gfp_t flags, unsigned int order) { size_t size = ((size_t)PAGE_SIZE) << order; vm_offset_t addr; if ((flags & GFP_DMA32) == 0) { addr = kmem_malloc(kmem_arena, size, flags & GFP_NATIVE_MASK); } else { - addr = kmem_alloc_contig(kmem_arena, size, - flags & GFP_NATIVE_MASK, 0, BUS_SPACE_MAXADDR_32BIT, - PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); + addr = kmem_alloc_contig(size, flags & GFP_NATIVE_MASK, 0, + BUS_SPACE_MAXADDR_32BIT, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } return (addr); } void linux_free_kmem(vm_offset_t addr, unsigned int order) { size_t size = ((size_t)PAGE_SIZE) << order; kmem_free(kmem_arena, addr, size); } static int linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages, int write, struct page **pages) { vm_prot_t prot; size_t len; int count; int i; prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; len = ((size_t)nr_pages) << PAGE_SHIFT; count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages); if (count == -1) return (-EFAULT); for (i = 0; i != nr_pages; i++) { struct page *pg = pages[i]; vm_page_lock(pg); vm_page_wire(pg); vm_page_unhold(pg); vm_page_unlock(pg); } return (nr_pages); } int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { vm_map_t map; vm_page_t *mp; vm_offset_t va; vm_offset_t end; vm_prot_t prot; int count; if (nr_pages == 0 || in_interrupt()) return (0); MPASS(pages != NULL); va = start; map = &curthread->td_proc->p_vmspace->vm_map; end = start + (((size_t)nr_pages) << PAGE_SHIFT); if (start < vm_map_min(map) || end > vm_map_max(map)) return (-EINVAL); prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; for (count = 0, mp = pages, va = start; va < end; mp++, va += PAGE_SIZE, count++) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) break; vm_page_lock(*mp); vm_page_wire(*mp); vm_page_unhold(*mp); vm_page_unlock(*mp); if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } return (count); } long get_user_pages_remote(struct task_struct *task, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int gup_flags, struct page **pages, struct vm_area_struct **vmas) { vm_map_t map; map = &task->task_thread->td_proc->p_vmspace->vm_map; return (linux_get_user_pages_internal(map, start, nr_pages, !!(gup_flags & FOLL_WRITE), pages)); } long get_user_pages(unsigned long start, unsigned long nr_pages, int gup_flags, struct page **pages, struct vm_area_struct **vmas) { vm_map_t map; map = &curthread->td_proc->p_vmspace->vm_map; return (linux_get_user_pages_internal(map, start, nr_pages, !!(gup_flags & FOLL_WRITE), pages)); } int is_vmalloc_addr(const void *addr) { return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL); } struct page * linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp) { vm_page_t page; int rv; if ((gfp & GFP_NOWAIT) != 0) panic("GFP_NOWAIT is unimplemented"); VM_OBJECT_WLOCK(obj); page = vm_page_grab(obj, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); if (page->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(page); if (vm_pager_has_page(obj, pindex, NULL, NULL)) { rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(page); vm_page_unwire(page, PQ_NONE); vm_page_free(page); vm_page_unlock(page); VM_OBJECT_WUNLOCK(obj); return (ERR_PTR(-EINVAL)); } MPASS(page->valid == VM_PAGE_BITS_ALL); } else { pmap_zero_page(page); page->valid = VM_PAGE_BITS_ALL; page->dirty = 0; } vm_page_xunbusy(page); } VM_OBJECT_WUNLOCK(obj); return (page); } struct linux_file * linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags) { struct fileobj { struct linux_file file __aligned(sizeof(void *)); struct vnode vnode __aligned(sizeof(void *)); }; struct fileobj *fileobj; struct linux_file *filp; struct vnode *vp; int error; fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL); if (fileobj == NULL) { error = -ENOMEM; goto err_0; } filp = &fileobj->file; vp = &fileobj->vnode; filp->f_count = 1; filp->f_vnode = vp; filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size, VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred); if (filp->f_shmem == NULL) { error = -ENOMEM; goto err_1; } return (filp); err_1: kfree(filp); err_0: return (ERR_PTR(error)); } static vm_ooffset_t linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start, vm_pindex_t end, int flags) { int start_count, end_count; VM_OBJECT_WLOCK(obj); start_count = obj->resident_page_count; vm_object_page_remove(obj, start, end, flags); end_count = obj->resident_page_count; VM_OBJECT_WUNLOCK(obj); return (start_count - end_count); } unsigned long linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end) { return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY)); } void linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend) { vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1); vm_pindex_t end = OFF_TO_IDX(lend + 1); (void) linux_invalidate_mapping_pages_sub(obj, start, end, 0); } Index: head/sys/compat/ndis/subr_ntoskrnl.c =================================================================== --- head/sys/compat/ndis/subr_ntoskrnl.c (revision 338106) +++ head/sys/compat/ndis/subr_ntoskrnl.c (revision 338107) @@ -1,4456 +1,4456 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NTOSKRNL_DEBUG_TIMERS static int sysctl_show_timers(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug, OID_AUTO, ntoskrnl_timers, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_show_timers, "I", "Show ntoskrnl timer stats"); #endif struct kdpc_queue { list_entry kq_disp; struct thread *kq_td; int kq_cpu; int kq_exit; int kq_running; kspin_lock kq_lock; nt_kevent kq_proc; nt_kevent kq_done; }; typedef struct kdpc_queue kdpc_queue; struct wb_ext { struct cv we_cv; struct thread *we_td; }; typedef struct wb_ext wb_ext; #define NTOSKRNL_TIMEOUTS 256 #ifdef NTOSKRNL_DEBUG_TIMERS static uint64_t ntoskrnl_timer_fires; static uint64_t ntoskrnl_timer_sets; static uint64_t ntoskrnl_timer_reloads; static uint64_t ntoskrnl_timer_cancels; #endif struct callout_entry { struct callout ce_callout; list_entry ce_list; }; typedef struct callout_entry callout_entry; static struct list_entry ntoskrnl_calllist; static struct mtx ntoskrnl_calllock; struct kuser_shared_data kuser_shared_data; static struct list_entry ntoskrnl_intlist; static kspin_lock ntoskrnl_intlock; static uint8_t RtlEqualUnicodeString(unicode_string *, unicode_string *, uint8_t); static void RtlCopyString(ansi_string *, const ansi_string *); static void RtlCopyUnicodeString(unicode_string *, unicode_string *); static irp *IoBuildSynchronousFsdRequest(uint32_t, device_object *, void *, uint32_t, uint64_t *, nt_kevent *, io_status_block *); static irp *IoBuildAsynchronousFsdRequest(uint32_t, device_object *, void *, uint32_t, uint64_t *, io_status_block *); static irp *IoBuildDeviceIoControlRequest(uint32_t, device_object *, void *, uint32_t, void *, uint32_t, uint8_t, nt_kevent *, io_status_block *); static irp *IoAllocateIrp(uint8_t, uint8_t); static void IoReuseIrp(irp *, uint32_t); static void IoFreeIrp(irp *); static void IoInitializeIrp(irp *, uint16_t, uint8_t); static irp *IoMakeAssociatedIrp(irp *, uint8_t); static uint32_t KeWaitForMultipleObjects(uint32_t, nt_dispatch_header **, uint32_t, uint32_t, uint32_t, uint8_t, int64_t *, wait_block *); static void ntoskrnl_waittest(nt_dispatch_header *, uint32_t); static void ntoskrnl_satisfy_wait(nt_dispatch_header *, struct thread *); static void ntoskrnl_satisfy_multiple_waits(wait_block *); static int ntoskrnl_is_signalled(nt_dispatch_header *, struct thread *); static void ntoskrnl_insert_timer(ktimer *, int); static void ntoskrnl_remove_timer(ktimer *); #ifdef NTOSKRNL_DEBUG_TIMERS static void ntoskrnl_show_timers(void); #endif static void ntoskrnl_timercall(void *); static void ntoskrnl_dpc_thread(void *); static void ntoskrnl_destroy_dpc_threads(void); static void ntoskrnl_destroy_workitem_threads(void); static void ntoskrnl_workitem_thread(void *); static void ntoskrnl_workitem(device_object *, void *); static void ntoskrnl_unicode_to_ascii(uint16_t *, char *, int); static void ntoskrnl_ascii_to_unicode(char *, uint16_t *, int); static uint8_t ntoskrnl_insert_dpc(list_entry *, kdpc *); static void WRITE_REGISTER_USHORT(uint16_t *, uint16_t); static uint16_t READ_REGISTER_USHORT(uint16_t *); static void WRITE_REGISTER_ULONG(uint32_t *, uint32_t); static uint32_t READ_REGISTER_ULONG(uint32_t *); static void WRITE_REGISTER_UCHAR(uint8_t *, uint8_t); static uint8_t READ_REGISTER_UCHAR(uint8_t *); static int64_t _allmul(int64_t, int64_t); static int64_t _alldiv(int64_t, int64_t); static int64_t _allrem(int64_t, int64_t); static int64_t _allshr(int64_t, uint8_t); static int64_t _allshl(int64_t, uint8_t); static uint64_t _aullmul(uint64_t, uint64_t); static uint64_t _aulldiv(uint64_t, uint64_t); static uint64_t _aullrem(uint64_t, uint64_t); static uint64_t _aullshr(uint64_t, uint8_t); static uint64_t _aullshl(uint64_t, uint8_t); static slist_entry *ntoskrnl_pushsl(slist_header *, slist_entry *); static void InitializeSListHead(slist_header *); static slist_entry *ntoskrnl_popsl(slist_header *); static void ExFreePoolWithTag(void *, uint32_t); static void ExInitializePagedLookasideList(paged_lookaside_list *, lookaside_alloc_func *, lookaside_free_func *, uint32_t, size_t, uint32_t, uint16_t); static void ExDeletePagedLookasideList(paged_lookaside_list *); static void ExInitializeNPagedLookasideList(npaged_lookaside_list *, lookaside_alloc_func *, lookaside_free_func *, uint32_t, size_t, uint32_t, uint16_t); static void ExDeleteNPagedLookasideList(npaged_lookaside_list *); static slist_entry *ExInterlockedPushEntrySList(slist_header *, slist_entry *, kspin_lock *); static slist_entry *ExInterlockedPopEntrySList(slist_header *, kspin_lock *); static uint32_t InterlockedIncrement(volatile uint32_t *); static uint32_t InterlockedDecrement(volatile uint32_t *); static void ExInterlockedAddLargeStatistic(uint64_t *, uint32_t); static void *MmAllocateContiguousMemory(uint32_t, uint64_t); static void *MmAllocateContiguousMemorySpecifyCache(uint32_t, uint64_t, uint64_t, uint64_t, enum nt_caching_type); static void MmFreeContiguousMemory(void *); static void MmFreeContiguousMemorySpecifyCache(void *, uint32_t, enum nt_caching_type); static uint32_t MmSizeOfMdl(void *, size_t); static void *MmMapLockedPages(mdl *, uint8_t); static void *MmMapLockedPagesSpecifyCache(mdl *, uint8_t, uint32_t, void *, uint32_t, uint32_t); static void MmUnmapLockedPages(void *, mdl *); static device_t ntoskrnl_finddev(device_t, uint64_t, struct resource **); static void RtlZeroMemory(void *, size_t); static void RtlSecureZeroMemory(void *, size_t); static void RtlFillMemory(void *, size_t, uint8_t); static void RtlMoveMemory(void *, const void *, size_t); static ndis_status RtlCharToInteger(const char *, uint32_t, uint32_t *); static void RtlCopyMemory(void *, const void *, size_t); static size_t RtlCompareMemory(const void *, const void *, size_t); static ndis_status RtlUnicodeStringToInteger(unicode_string *, uint32_t, uint32_t *); static int atoi (const char *); static long atol (const char *); static int rand(void); static void srand(unsigned int); static void KeQuerySystemTime(uint64_t *); static uint32_t KeTickCount(void); static uint8_t IoIsWdmVersionAvailable(uint8_t, uint8_t); static int32_t IoOpenDeviceRegistryKey(struct device_object *, uint32_t, uint32_t, void **); static void ntoskrnl_thrfunc(void *); static ndis_status PsCreateSystemThread(ndis_handle *, uint32_t, void *, ndis_handle, void *, void *, void *); static ndis_status PsTerminateSystemThread(ndis_status); static ndis_status IoGetDeviceObjectPointer(unicode_string *, uint32_t, void *, device_object *); static ndis_status IoGetDeviceProperty(device_object *, uint32_t, uint32_t, void *, uint32_t *); static void KeInitializeMutex(kmutant *, uint32_t); static uint32_t KeReleaseMutex(kmutant *, uint8_t); static uint32_t KeReadStateMutex(kmutant *); static ndis_status ObReferenceObjectByHandle(ndis_handle, uint32_t, void *, uint8_t, void **, void **); static void ObfDereferenceObject(void *); static uint32_t ZwClose(ndis_handle); static uint32_t WmiQueryTraceInformation(uint32_t, void *, uint32_t, uint32_t, void *); static uint32_t WmiTraceMessage(uint64_t, uint32_t, void *, uint16_t, ...); static uint32_t IoWMIRegistrationControl(device_object *, uint32_t); static void *ntoskrnl_memset(void *, int, size_t); static void *ntoskrnl_memmove(void *, void *, size_t); static void *ntoskrnl_memchr(void *, unsigned char, size_t); static char *ntoskrnl_strstr(char *, char *); static char *ntoskrnl_strncat(char *, char *, size_t); static int ntoskrnl_toupper(int); static int ntoskrnl_tolower(int); static funcptr ntoskrnl_findwrap(funcptr); static uint32_t DbgPrint(char *, ...); static void DbgBreakPoint(void); static void KeBugCheckEx(uint32_t, u_long, u_long, u_long, u_long); static int32_t KeDelayExecutionThread(uint8_t, uint8_t, int64_t *); static int32_t KeSetPriorityThread(struct thread *, int32_t); static void dummy(void); static struct mtx ntoskrnl_dispatchlock; static struct mtx ntoskrnl_interlock; static kspin_lock ntoskrnl_cancellock; static int ntoskrnl_kth = 0; static struct nt_objref_head ntoskrnl_reflist; static uma_zone_t mdl_zone; static uma_zone_t iw_zone; static struct kdpc_queue *kq_queues; static struct kdpc_queue *wq_queues; static int wq_idx = 0; int ntoskrnl_libinit() { image_patch_table *patch; int error; struct proc *p; kdpc_queue *kq; callout_entry *e; int i; mtx_init(&ntoskrnl_dispatchlock, "ntoskrnl dispatch lock", MTX_NDIS_LOCK, MTX_DEF|MTX_RECURSE); mtx_init(&ntoskrnl_interlock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN); KeInitializeSpinLock(&ntoskrnl_cancellock); KeInitializeSpinLock(&ntoskrnl_intlock); TAILQ_INIT(&ntoskrnl_reflist); InitializeListHead(&ntoskrnl_calllist); InitializeListHead(&ntoskrnl_intlist); mtx_init(&ntoskrnl_calllock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN); kq_queues = ExAllocatePoolWithTag(NonPagedPool, #ifdef NTOSKRNL_MULTIPLE_DPCS sizeof(kdpc_queue) * mp_ncpus, 0); #else sizeof(kdpc_queue), 0); #endif if (kq_queues == NULL) return (ENOMEM); wq_queues = ExAllocatePoolWithTag(NonPagedPool, sizeof(kdpc_queue) * WORKITEM_THREADS, 0); if (wq_queues == NULL) return (ENOMEM); #ifdef NTOSKRNL_MULTIPLE_DPCS bzero((char *)kq_queues, sizeof(kdpc_queue) * mp_ncpus); #else bzero((char *)kq_queues, sizeof(kdpc_queue)); #endif bzero((char *)wq_queues, sizeof(kdpc_queue) * WORKITEM_THREADS); /* * Launch the DPC threads. */ #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq = kq_queues + i; kq->kq_cpu = i; error = kproc_create(ntoskrnl_dpc_thread, kq, &p, RFHIGHPID, NDIS_KSTACK_PAGES, "Windows DPC %d", i); if (error) panic("failed to launch DPC thread"); } /* * Launch the workitem threads. */ for (i = 0; i < WORKITEM_THREADS; i++) { kq = wq_queues + i; error = kproc_create(ntoskrnl_workitem_thread, kq, &p, RFHIGHPID, NDIS_KSTACK_PAGES, "Windows Workitem %d", i); if (error) panic("failed to launch workitem thread"); } patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { windrv_wrap((funcptr)patch->ipt_func, (funcptr *)&patch->ipt_wrap, patch->ipt_argcnt, patch->ipt_ftype); patch++; } for (i = 0; i < NTOSKRNL_TIMEOUTS; i++) { e = ExAllocatePoolWithTag(NonPagedPool, sizeof(callout_entry), 0); if (e == NULL) panic("failed to allocate timeouts"); mtx_lock_spin(&ntoskrnl_calllock); InsertHeadList((&ntoskrnl_calllist), (&e->ce_list)); mtx_unlock_spin(&ntoskrnl_calllock); } /* * MDLs are supposed to be variable size (they describe * buffers containing some number of pages, but we don't * know ahead of time how many pages that will be). But * always allocating them off the heap is very slow. As * a compromise, we create an MDL UMA zone big enough to * handle any buffer requiring up to 16 pages, and we * use those for any MDLs for buffers of 16 pages or less * in size. For buffers larger than that (which we assume * will be few and far between, we allocate the MDLs off * the heap. */ mdl_zone = uma_zcreate("Windows MDL", MDL_ZONE_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); iw_zone = uma_zcreate("Windows WorkItem", sizeof(io_workitem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); return (0); } int ntoskrnl_libfini() { image_patch_table *patch; callout_entry *e; list_entry *l; patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } /* Stop the workitem queues. */ ntoskrnl_destroy_workitem_threads(); /* Stop the DPC queues. */ ntoskrnl_destroy_dpc_threads(); ExFreePool(kq_queues); ExFreePool(wq_queues); uma_zdestroy(mdl_zone); uma_zdestroy(iw_zone); mtx_lock_spin(&ntoskrnl_calllock); while(!IsListEmpty(&ntoskrnl_calllist)) { l = RemoveHeadList(&ntoskrnl_calllist); e = CONTAINING_RECORD(l, callout_entry, ce_list); mtx_unlock_spin(&ntoskrnl_calllock); ExFreePool(e); mtx_lock_spin(&ntoskrnl_calllock); } mtx_unlock_spin(&ntoskrnl_calllock); mtx_destroy(&ntoskrnl_dispatchlock); mtx_destroy(&ntoskrnl_interlock); mtx_destroy(&ntoskrnl_calllock); return (0); } /* * We need to be able to reference this externally from the wrapper; * GCC only generates a local implementation of memset. */ static void * ntoskrnl_memset(buf, ch, size) void *buf; int ch; size_t size; { return (memset(buf, ch, size)); } static void * ntoskrnl_memmove(dst, src, size) void *src; void *dst; size_t size; { bcopy(src, dst, size); return (dst); } static void * ntoskrnl_memchr(void *buf, unsigned char ch, size_t len) { if (len != 0) { unsigned char *p = buf; do { if (*p++ == ch) return (p - 1); } while (--len != 0); } return (NULL); } static char * ntoskrnl_strstr(s, find) char *s, *find; { char c, sc; size_t len; if ((c = *find++) != 0) { len = strlen(find); do { do { if ((sc = *s++) == 0) return (NULL); } while (sc != c); } while (strncmp(s, find, len) != 0); s--; } return ((char *)s); } /* Taken from libc */ static char * ntoskrnl_strncat(dst, src, n) char *dst; char *src; size_t n; { if (n != 0) { char *d = dst; const char *s = src; while (*d != 0) d++; do { if ((*d = *s++) == 0) break; d++; } while (--n != 0); *d = 0; } return (dst); } static int ntoskrnl_toupper(c) int c; { return (toupper(c)); } static int ntoskrnl_tolower(c) int c; { return (tolower(c)); } static uint8_t RtlEqualUnicodeString(unicode_string *str1, unicode_string *str2, uint8_t caseinsensitive) { int i; if (str1->us_len != str2->us_len) return (FALSE); for (i = 0; i < str1->us_len; i++) { if (caseinsensitive == TRUE) { if (toupper((char)(str1->us_buf[i] & 0xFF)) != toupper((char)(str2->us_buf[i] & 0xFF))) return (FALSE); } else { if (str1->us_buf[i] != str2->us_buf[i]) return (FALSE); } } return (TRUE); } static void RtlCopyString(dst, src) ansi_string *dst; const ansi_string *src; { if (src != NULL && src->as_buf != NULL && dst->as_buf != NULL) { dst->as_len = min(src->as_len, dst->as_maxlen); memcpy(dst->as_buf, src->as_buf, dst->as_len); if (dst->as_len < dst->as_maxlen) dst->as_buf[dst->as_len] = 0; } else dst->as_len = 0; } static void RtlCopyUnicodeString(dest, src) unicode_string *dest; unicode_string *src; { if (dest->us_maxlen >= src->us_len) dest->us_len = src->us_len; else dest->us_len = dest->us_maxlen; memcpy(dest->us_buf, src->us_buf, dest->us_len); } static void ntoskrnl_ascii_to_unicode(ascii, unicode, len) char *ascii; uint16_t *unicode; int len; { int i; uint16_t *ustr; ustr = unicode; for (i = 0; i < len; i++) { *ustr = (uint16_t)ascii[i]; ustr++; } } static void ntoskrnl_unicode_to_ascii(unicode, ascii, len) uint16_t *unicode; char *ascii; int len; { int i; uint8_t *astr; astr = ascii; for (i = 0; i < len / 2; i++) { *astr = (uint8_t)unicode[i]; astr++; } } uint32_t RtlUnicodeStringToAnsiString(ansi_string *dest, unicode_string *src, uint8_t allocate) { if (dest == NULL || src == NULL) return (STATUS_INVALID_PARAMETER); dest->as_len = src->us_len / 2; if (dest->as_maxlen < dest->as_len) dest->as_len = dest->as_maxlen; if (allocate == TRUE) { dest->as_buf = ExAllocatePoolWithTag(NonPagedPool, (src->us_len / 2) + 1, 0); if (dest->as_buf == NULL) return (STATUS_INSUFFICIENT_RESOURCES); dest->as_len = dest->as_maxlen = src->us_len / 2; } else { dest->as_len = src->us_len / 2; /* XXX */ if (dest->as_maxlen < dest->as_len) dest->as_len = dest->as_maxlen; } ntoskrnl_unicode_to_ascii(src->us_buf, dest->as_buf, dest->as_len * 2); return (STATUS_SUCCESS); } uint32_t RtlAnsiStringToUnicodeString(unicode_string *dest, ansi_string *src, uint8_t allocate) { if (dest == NULL || src == NULL) return (STATUS_INVALID_PARAMETER); if (allocate == TRUE) { dest->us_buf = ExAllocatePoolWithTag(NonPagedPool, src->as_len * 2, 0); if (dest->us_buf == NULL) return (STATUS_INSUFFICIENT_RESOURCES); dest->us_len = dest->us_maxlen = strlen(src->as_buf) * 2; } else { dest->us_len = src->as_len * 2; /* XXX */ if (dest->us_maxlen < dest->us_len) dest->us_len = dest->us_maxlen; } ntoskrnl_ascii_to_unicode(src->as_buf, dest->us_buf, dest->us_len / 2); return (STATUS_SUCCESS); } void * ExAllocatePoolWithTag(pooltype, len, tag) uint32_t pooltype; size_t len; uint32_t tag; { void *buf; buf = malloc(len, M_DEVBUF, M_NOWAIT|M_ZERO); if (buf == NULL) return (NULL); return (buf); } static void ExFreePoolWithTag(buf, tag) void *buf; uint32_t tag; { ExFreePool(buf); } void ExFreePool(buf) void *buf; { free(buf, M_DEVBUF); } uint32_t IoAllocateDriverObjectExtension(drv, clid, extlen, ext) driver_object *drv; void *clid; uint32_t extlen; void **ext; { custom_extension *ce; ce = ExAllocatePoolWithTag(NonPagedPool, sizeof(custom_extension) + extlen, 0); if (ce == NULL) return (STATUS_INSUFFICIENT_RESOURCES); ce->ce_clid = clid; InsertTailList((&drv->dro_driverext->dre_usrext), (&ce->ce_list)); *ext = (void *)(ce + 1); return (STATUS_SUCCESS); } void * IoGetDriverObjectExtension(drv, clid) driver_object *drv; void *clid; { list_entry *e; custom_extension *ce; /* * Sanity check. Our dummy bus drivers don't have * any driver extensions. */ if (drv->dro_driverext == NULL) return (NULL); e = drv->dro_driverext->dre_usrext.nle_flink; while (e != &drv->dro_driverext->dre_usrext) { ce = (custom_extension *)e; if (ce->ce_clid == clid) return ((void *)(ce + 1)); e = e->nle_flink; } return (NULL); } uint32_t IoCreateDevice(driver_object *drv, uint32_t devextlen, unicode_string *devname, uint32_t devtype, uint32_t devchars, uint8_t exclusive, device_object **newdev) { device_object *dev; dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device_object), 0); if (dev == NULL) return (STATUS_INSUFFICIENT_RESOURCES); dev->do_type = devtype; dev->do_drvobj = drv; dev->do_currirp = NULL; dev->do_flags = 0; if (devextlen) { dev->do_devext = ExAllocatePoolWithTag(NonPagedPool, devextlen, 0); if (dev->do_devext == NULL) { ExFreePool(dev); return (STATUS_INSUFFICIENT_RESOURCES); } bzero(dev->do_devext, devextlen); } else dev->do_devext = NULL; dev->do_size = sizeof(device_object) + devextlen; dev->do_refcnt = 1; dev->do_attacheddev = NULL; dev->do_nextdev = NULL; dev->do_devtype = devtype; dev->do_stacksize = 1; dev->do_alignreq = 1; dev->do_characteristics = devchars; dev->do_iotimer = NULL; KeInitializeEvent(&dev->do_devlock, EVENT_TYPE_SYNC, TRUE); /* * Vpd is used for disk/tape devices, * but we don't support those. (Yet.) */ dev->do_vpb = NULL; dev->do_devobj_ext = ExAllocatePoolWithTag(NonPagedPool, sizeof(devobj_extension), 0); if (dev->do_devobj_ext == NULL) { if (dev->do_devext != NULL) ExFreePool(dev->do_devext); ExFreePool(dev); return (STATUS_INSUFFICIENT_RESOURCES); } dev->do_devobj_ext->dve_type = 0; dev->do_devobj_ext->dve_size = sizeof(devobj_extension); dev->do_devobj_ext->dve_devobj = dev; /* * Attach this device to the driver object's list * of devices. Note: this is not the same as attaching * the device to the device stack. The driver's AddDevice * routine must explicitly call IoAddDeviceToDeviceStack() * to do that. */ if (drv->dro_devobj == NULL) { drv->dro_devobj = dev; dev->do_nextdev = NULL; } else { dev->do_nextdev = drv->dro_devobj; drv->dro_devobj = dev; } *newdev = dev; return (STATUS_SUCCESS); } void IoDeleteDevice(dev) device_object *dev; { device_object *prev; if (dev == NULL) return; if (dev->do_devobj_ext != NULL) ExFreePool(dev->do_devobj_ext); if (dev->do_devext != NULL) ExFreePool(dev->do_devext); /* Unlink the device from the driver's device list. */ prev = dev->do_drvobj->dro_devobj; if (prev == dev) dev->do_drvobj->dro_devobj = dev->do_nextdev; else { while (prev->do_nextdev != dev) prev = prev->do_nextdev; prev->do_nextdev = dev->do_nextdev; } ExFreePool(dev); } device_object * IoGetAttachedDevice(dev) device_object *dev; { device_object *d; if (dev == NULL) return (NULL); d = dev; while (d->do_attacheddev != NULL) d = d->do_attacheddev; return (d); } static irp * IoBuildSynchronousFsdRequest(func, dobj, buf, len, off, event, status) uint32_t func; device_object *dobj; void *buf; uint32_t len; uint64_t *off; nt_kevent *event; io_status_block *status; { irp *ip; ip = IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status); if (ip == NULL) return (NULL); ip->irp_usrevent = event; return (ip); } static irp * IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status) uint32_t func; device_object *dobj; void *buf; uint32_t len; uint64_t *off; io_status_block *status; { irp *ip; io_stack_location *sl; ip = IoAllocateIrp(dobj->do_stacksize, TRUE); if (ip == NULL) return (NULL); ip->irp_usriostat = status; ip->irp_tail.irp_overlay.irp_thread = NULL; sl = IoGetNextIrpStackLocation(ip); sl->isl_major = func; sl->isl_minor = 0; sl->isl_flags = 0; sl->isl_ctl = 0; sl->isl_devobj = dobj; sl->isl_fileobj = NULL; sl->isl_completionfunc = NULL; ip->irp_userbuf = buf; if (dobj->do_flags & DO_BUFFERED_IO) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, len, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return (NULL); } bcopy(buf, ip->irp_assoc.irp_sysbuf, len); } if (dobj->do_flags & DO_DIRECT_IO) { ip->irp_mdl = IoAllocateMdl(buf, len, FALSE, FALSE, ip); if (ip->irp_mdl == NULL) { if (ip->irp_assoc.irp_sysbuf != NULL) ExFreePool(ip->irp_assoc.irp_sysbuf); IoFreeIrp(ip); return (NULL); } ip->irp_userbuf = NULL; ip->irp_assoc.irp_sysbuf = NULL; } if (func == IRP_MJ_READ) { sl->isl_parameters.isl_read.isl_len = len; if (off != NULL) sl->isl_parameters.isl_read.isl_byteoff = *off; else sl->isl_parameters.isl_read.isl_byteoff = 0; } if (func == IRP_MJ_WRITE) { sl->isl_parameters.isl_write.isl_len = len; if (off != NULL) sl->isl_parameters.isl_write.isl_byteoff = *off; else sl->isl_parameters.isl_write.isl_byteoff = 0; } return (ip); } static irp * IoBuildDeviceIoControlRequest(uint32_t iocode, device_object *dobj, void *ibuf, uint32_t ilen, void *obuf, uint32_t olen, uint8_t isinternal, nt_kevent *event, io_status_block *status) { irp *ip; io_stack_location *sl; uint32_t buflen; ip = IoAllocateIrp(dobj->do_stacksize, TRUE); if (ip == NULL) return (NULL); ip->irp_usrevent = event; ip->irp_usriostat = status; ip->irp_tail.irp_overlay.irp_thread = NULL; sl = IoGetNextIrpStackLocation(ip); sl->isl_major = isinternal == TRUE ? IRP_MJ_INTERNAL_DEVICE_CONTROL : IRP_MJ_DEVICE_CONTROL; sl->isl_minor = 0; sl->isl_flags = 0; sl->isl_ctl = 0; sl->isl_devobj = dobj; sl->isl_fileobj = NULL; sl->isl_completionfunc = NULL; sl->isl_parameters.isl_ioctl.isl_iocode = iocode; sl->isl_parameters.isl_ioctl.isl_ibuflen = ilen; sl->isl_parameters.isl_ioctl.isl_obuflen = olen; switch(IO_METHOD(iocode)) { case METHOD_BUFFERED: if (ilen > olen) buflen = ilen; else buflen = olen; if (buflen) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, buflen, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return (NULL); } } if (ilen && ibuf != NULL) { bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen); bzero((char *)ip->irp_assoc.irp_sysbuf + ilen, buflen - ilen); } else bzero(ip->irp_assoc.irp_sysbuf, ilen); ip->irp_userbuf = obuf; break; case METHOD_IN_DIRECT: case METHOD_OUT_DIRECT: if (ilen && ibuf != NULL) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, ilen, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return (NULL); } bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen); } if (olen && obuf != NULL) { ip->irp_mdl = IoAllocateMdl(obuf, olen, FALSE, FALSE, ip); /* * Normally we would MmProbeAndLockPages() * here, but we don't have to in our * imlementation. */ } break; case METHOD_NEITHER: ip->irp_userbuf = obuf; sl->isl_parameters.isl_ioctl.isl_type3ibuf = ibuf; break; default: break; } /* * Ideally, we should associate this IRP with the calling * thread here. */ return (ip); } static irp * IoAllocateIrp(uint8_t stsize, uint8_t chargequota) { irp *i; i = ExAllocatePoolWithTag(NonPagedPool, IoSizeOfIrp(stsize), 0); if (i == NULL) return (NULL); IoInitializeIrp(i, IoSizeOfIrp(stsize), stsize); return (i); } static irp * IoMakeAssociatedIrp(irp *ip, uint8_t stsize) { irp *associrp; associrp = IoAllocateIrp(stsize, FALSE); if (associrp == NULL) return (NULL); mtx_lock(&ntoskrnl_dispatchlock); associrp->irp_flags |= IRP_ASSOCIATED_IRP; associrp->irp_tail.irp_overlay.irp_thread = ip->irp_tail.irp_overlay.irp_thread; associrp->irp_assoc.irp_master = ip; mtx_unlock(&ntoskrnl_dispatchlock); return (associrp); } static void IoFreeIrp(ip) irp *ip; { ExFreePool(ip); } static void IoInitializeIrp(irp *io, uint16_t psize, uint8_t ssize) { bzero((char *)io, IoSizeOfIrp(ssize)); io->irp_size = psize; io->irp_stackcnt = ssize; io->irp_currentstackloc = ssize; InitializeListHead(&io->irp_thlist); io->irp_tail.irp_overlay.irp_csl = (io_stack_location *)(io + 1) + ssize; } static void IoReuseIrp(ip, status) irp *ip; uint32_t status; { uint8_t allocflags; allocflags = ip->irp_allocflags; IoInitializeIrp(ip, ip->irp_size, ip->irp_stackcnt); ip->irp_iostat.isb_status = status; ip->irp_allocflags = allocflags; } void IoAcquireCancelSpinLock(uint8_t *irql) { KeAcquireSpinLock(&ntoskrnl_cancellock, irql); } void IoReleaseCancelSpinLock(uint8_t irql) { KeReleaseSpinLock(&ntoskrnl_cancellock, irql); } uint8_t IoCancelIrp(irp *ip) { cancel_func cfunc; uint8_t cancelirql; IoAcquireCancelSpinLock(&cancelirql); cfunc = IoSetCancelRoutine(ip, NULL); ip->irp_cancel = TRUE; if (cfunc == NULL) { IoReleaseCancelSpinLock(cancelirql); return (FALSE); } ip->irp_cancelirql = cancelirql; MSCALL2(cfunc, IoGetCurrentIrpStackLocation(ip)->isl_devobj, ip); return (uint8_t)IoSetCancelValue(ip, TRUE); } uint32_t IofCallDriver(dobj, ip) device_object *dobj; irp *ip; { driver_object *drvobj; io_stack_location *sl; uint32_t status; driver_dispatch disp; drvobj = dobj->do_drvobj; if (ip->irp_currentstackloc <= 0) panic("IoCallDriver(): out of stack locations"); IoSetNextIrpStackLocation(ip); sl = IoGetCurrentIrpStackLocation(ip); sl->isl_devobj = dobj; disp = drvobj->dro_dispatch[sl->isl_major]; status = MSCALL2(disp, dobj, ip); return (status); } void IofCompleteRequest(irp *ip, uint8_t prioboost) { uint32_t status; device_object *dobj; io_stack_location *sl; completion_func cf; KASSERT(ip->irp_iostat.isb_status != STATUS_PENDING, ("incorrect IRP(%p) status (STATUS_PENDING)", ip)); sl = IoGetCurrentIrpStackLocation(ip); IoSkipCurrentIrpStackLocation(ip); do { if (sl->isl_ctl & SL_PENDING_RETURNED) ip->irp_pendingreturned = TRUE; if (ip->irp_currentstackloc != (ip->irp_stackcnt + 1)) dobj = IoGetCurrentIrpStackLocation(ip)->isl_devobj; else dobj = NULL; if (sl->isl_completionfunc != NULL && ((ip->irp_iostat.isb_status == STATUS_SUCCESS && sl->isl_ctl & SL_INVOKE_ON_SUCCESS) || (ip->irp_iostat.isb_status != STATUS_SUCCESS && sl->isl_ctl & SL_INVOKE_ON_ERROR) || (ip->irp_cancel == TRUE && sl->isl_ctl & SL_INVOKE_ON_CANCEL))) { cf = sl->isl_completionfunc; status = MSCALL3(cf, dobj, ip, sl->isl_completionctx); if (status == STATUS_MORE_PROCESSING_REQUIRED) return; } else { if ((ip->irp_currentstackloc <= ip->irp_stackcnt) && (ip->irp_pendingreturned == TRUE)) IoMarkIrpPending(ip); } /* move to the next. */ IoSkipCurrentIrpStackLocation(ip); sl++; } while (ip->irp_currentstackloc <= (ip->irp_stackcnt + 1)); if (ip->irp_usriostat != NULL) *ip->irp_usriostat = ip->irp_iostat; if (ip->irp_usrevent != NULL) KeSetEvent(ip->irp_usrevent, prioboost, FALSE); /* Handle any associated IRPs. */ if (ip->irp_flags & IRP_ASSOCIATED_IRP) { uint32_t masterirpcnt; irp *masterirp; mdl *m; masterirp = ip->irp_assoc.irp_master; masterirpcnt = InterlockedDecrement(&masterirp->irp_assoc.irp_irpcnt); while ((m = ip->irp_mdl) != NULL) { ip->irp_mdl = m->mdl_next; IoFreeMdl(m); } IoFreeIrp(ip); if (masterirpcnt == 0) IoCompleteRequest(masterirp, IO_NO_INCREMENT); return; } /* With any luck, these conditions will never arise. */ if (ip->irp_flags & IRP_PAGING_IO) { if (ip->irp_mdl != NULL) IoFreeMdl(ip->irp_mdl); IoFreeIrp(ip); } } void ntoskrnl_intr(arg) void *arg; { kinterrupt *iobj; uint8_t irql; uint8_t claimed; list_entry *l; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); l = ntoskrnl_intlist.nle_flink; while (l != &ntoskrnl_intlist) { iobj = CONTAINING_RECORD(l, kinterrupt, ki_list); claimed = MSCALL2(iobj->ki_svcfunc, iobj, iobj->ki_svcctx); if (claimed == TRUE) break; l = l->nle_flink; } KeReleaseSpinLock(&ntoskrnl_intlock, irql); } uint8_t KeAcquireInterruptSpinLock(iobj) kinterrupt *iobj; { uint8_t irql; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); return (irql); } void KeReleaseInterruptSpinLock(kinterrupt *iobj, uint8_t irql) { KeReleaseSpinLock(&ntoskrnl_intlock, irql); } uint8_t KeSynchronizeExecution(iobj, syncfunc, syncctx) kinterrupt *iobj; void *syncfunc; void *syncctx; { uint8_t irql; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); MSCALL1(syncfunc, syncctx); KeReleaseSpinLock(&ntoskrnl_intlock, irql); return (TRUE); } /* * IoConnectInterrupt() is passed only the interrupt vector and * irql that a device wants to use, but no device-specific tag * of any kind. This conflicts rather badly with FreeBSD's * bus_setup_intr(), which needs the device_t for the device * requesting interrupt delivery. In order to bypass this * inconsistency, we implement a second level of interrupt * dispatching on top of bus_setup_intr(). All devices use * ntoskrnl_intr() as their ISR, and any device requesting * interrupts will be registered with ntoskrnl_intr()'s interrupt * dispatch list. When an interrupt arrives, we walk the list * and invoke all the registered ISRs. This effectively makes all * interrupts shared, but it's the only way to duplicate the * semantics of IoConnectInterrupt() and IoDisconnectInterrupt() properly. */ uint32_t IoConnectInterrupt(kinterrupt **iobj, void *svcfunc, void *svcctx, kspin_lock *lock, uint32_t vector, uint8_t irql, uint8_t syncirql, uint8_t imode, uint8_t shared, uint32_t affinity, uint8_t savefloat) { uint8_t curirql; *iobj = ExAllocatePoolWithTag(NonPagedPool, sizeof(kinterrupt), 0); if (*iobj == NULL) return (STATUS_INSUFFICIENT_RESOURCES); (*iobj)->ki_svcfunc = svcfunc; (*iobj)->ki_svcctx = svcctx; if (lock == NULL) { KeInitializeSpinLock(&(*iobj)->ki_lock_priv); (*iobj)->ki_lock = &(*iobj)->ki_lock_priv; } else (*iobj)->ki_lock = lock; KeAcquireSpinLock(&ntoskrnl_intlock, &curirql); InsertHeadList((&ntoskrnl_intlist), (&(*iobj)->ki_list)); KeReleaseSpinLock(&ntoskrnl_intlock, curirql); return (STATUS_SUCCESS); } void IoDisconnectInterrupt(iobj) kinterrupt *iobj; { uint8_t irql; if (iobj == NULL) return; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); RemoveEntryList((&iobj->ki_list)); KeReleaseSpinLock(&ntoskrnl_intlock, irql); ExFreePool(iobj); } device_object * IoAttachDeviceToDeviceStack(src, dst) device_object *src; device_object *dst; { device_object *attached; mtx_lock(&ntoskrnl_dispatchlock); attached = IoGetAttachedDevice(dst); attached->do_attacheddev = src; src->do_attacheddev = NULL; src->do_stacksize = attached->do_stacksize + 1; mtx_unlock(&ntoskrnl_dispatchlock); return (attached); } void IoDetachDevice(topdev) device_object *topdev; { device_object *tail; mtx_lock(&ntoskrnl_dispatchlock); /* First, break the chain. */ tail = topdev->do_attacheddev; if (tail == NULL) { mtx_unlock(&ntoskrnl_dispatchlock); return; } topdev->do_attacheddev = tail->do_attacheddev; topdev->do_refcnt--; /* Now reduce the stacksize count for the takm_il objects. */ tail = topdev->do_attacheddev; while (tail != NULL) { tail->do_stacksize--; tail = tail->do_attacheddev; } mtx_unlock(&ntoskrnl_dispatchlock); } /* * For the most part, an object is considered signalled if * dh_sigstate == TRUE. The exception is for mutant objects * (mutexes), where the logic works like this: * * - If the thread already owns the object and sigstate is * less than or equal to 0, then the object is considered * signalled (recursive acquisition). * - If dh_sigstate == 1, the object is also considered * signalled. */ static int ntoskrnl_is_signalled(obj, td) nt_dispatch_header *obj; struct thread *td; { kmutant *km; if (obj->dh_type == DISP_TYPE_MUTANT) { km = (kmutant *)obj; if ((obj->dh_sigstate <= 0 && km->km_ownerthread == td) || obj->dh_sigstate == 1) return (TRUE); return (FALSE); } if (obj->dh_sigstate > 0) return (TRUE); return (FALSE); } static void ntoskrnl_satisfy_wait(obj, td) nt_dispatch_header *obj; struct thread *td; { kmutant *km; switch (obj->dh_type) { case DISP_TYPE_MUTANT: km = (struct kmutant *)obj; obj->dh_sigstate--; /* * If sigstate reaches 0, the mutex is now * non-signalled (the new thread owns it). */ if (obj->dh_sigstate == 0) { km->km_ownerthread = td; if (km->km_abandoned == TRUE) km->km_abandoned = FALSE; } break; /* Synchronization objects get reset to unsignalled. */ case DISP_TYPE_SYNCHRONIZATION_EVENT: case DISP_TYPE_SYNCHRONIZATION_TIMER: obj->dh_sigstate = 0; break; case DISP_TYPE_SEMAPHORE: obj->dh_sigstate--; break; default: break; } } static void ntoskrnl_satisfy_multiple_waits(wb) wait_block *wb; { wait_block *cur; struct thread *td; cur = wb; td = wb->wb_kthread; do { ntoskrnl_satisfy_wait(wb->wb_object, td); cur->wb_awakened = TRUE; cur = cur->wb_next; } while (cur != wb); } /* Always called with dispatcher lock held. */ static void ntoskrnl_waittest(obj, increment) nt_dispatch_header *obj; uint32_t increment; { wait_block *w, *next; list_entry *e; struct thread *td; wb_ext *we; int satisfied; /* * Once an object has been signalled, we walk its list of * wait blocks. If a wait block can be awakened, then satisfy * waits as necessary and wake the thread. * * The rules work like this: * * If a wait block is marked as WAITTYPE_ANY, then * we can satisfy the wait conditions on the current * object and wake the thread right away. Satisfying * the wait also has the effect of breaking us out * of the search loop. * * If the object is marked as WAITTYLE_ALL, then the * wait block will be part of a circularly linked * list of wait blocks belonging to a waiting thread * that's sleeping in KeWaitForMultipleObjects(). In * order to wake the thread, all the objects in the * wait list must be in the signalled state. If they * are, we then satisfy all of them and wake the * thread. * */ e = obj->dh_waitlisthead.nle_flink; while (e != &obj->dh_waitlisthead && obj->dh_sigstate > 0) { w = CONTAINING_RECORD(e, wait_block, wb_waitlist); we = w->wb_ext; td = we->we_td; satisfied = FALSE; if (w->wb_waittype == WAITTYPE_ANY) { /* * Thread can be awakened if * any wait is satisfied. */ ntoskrnl_satisfy_wait(obj, td); satisfied = TRUE; w->wb_awakened = TRUE; } else { /* * Thread can only be woken up * if all waits are satisfied. * If the thread is waiting on multiple * objects, they should all be linked * through the wb_next pointers in the * wait blocks. */ satisfied = TRUE; next = w->wb_next; while (next != w) { if (ntoskrnl_is_signalled(obj, td) == FALSE) { satisfied = FALSE; break; } next = next->wb_next; } ntoskrnl_satisfy_multiple_waits(w); } if (satisfied == TRUE) cv_broadcastpri(&we->we_cv, (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ? w->wb_oldpri - (increment * 4) : PRI_MIN_KERN); e = e->nle_flink; } } /* * Return the number of 100 nanosecond intervals since * January 1, 1601. (?!?!) */ void ntoskrnl_time(tval) uint64_t *tval; { struct timespec ts; nanotime(&ts); *tval = (uint64_t)ts.tv_nsec / 100 + (uint64_t)ts.tv_sec * 10000000 + 11644473600 * 10000000; /* 100ns ticks from 1601 to 1970 */ } static void KeQuerySystemTime(current_time) uint64_t *current_time; { ntoskrnl_time(current_time); } static uint32_t KeTickCount(void) { struct timeval tv; getmicrouptime(&tv); return tvtohz(&tv); } /* * KeWaitForSingleObject() is a tricky beast, because it can be used * with several different object types: semaphores, timers, events, * mutexes and threads. Semaphores don't appear very often, but the * other object types are quite common. KeWaitForSingleObject() is * what's normally used to acquire a mutex, and it can be used to * wait for a thread termination. * * The Windows NDIS API is implemented in terms of Windows kernel * primitives, and some of the object manipulation is duplicated in * NDIS. For example, NDIS has timers and events, which are actually * Windows kevents and ktimers. Now, you're supposed to only use the * NDIS variants of these objects within the confines of the NDIS API, * but there are some naughty developers out there who will use * KeWaitForSingleObject() on NDIS timer and event objects, so we * have to support that as well. Conseqently, our NDIS timer and event * code has to be closely tied into our ntoskrnl timer and event code, * just as it is in Windows. * * KeWaitForSingleObject() may do different things for different kinds * of objects: * * - For events, we check if the event has been signalled. If the * event is already in the signalled state, we just return immediately, * otherwise we wait for it to be set to the signalled state by someone * else calling KeSetEvent(). Events can be either synchronization or * notification events. * * - For timers, if the timer has already fired and the timer is in * the signalled state, we just return, otherwise we wait on the * timer. Unlike an event, timers get signalled automatically when * they expire rather than someone having to trip them manually. * Timers initialized with KeInitializeTimer() are always notification * events: KeInitializeTimerEx() lets you initialize a timer as * either a notification or synchronization event. * * - For mutexes, we try to acquire the mutex and if we can't, we wait * on the mutex until it's available and then grab it. When a mutex is * released, it enters the signalled state, which wakes up one of the * threads waiting to acquire it. Mutexes are always synchronization * events. * * - For threads, the only thing we do is wait until the thread object * enters a signalled state, which occurs when the thread terminates. * Threads are always notification events. * * A notification event wakes up all threads waiting on an object. A * synchronization event wakes up just one. Also, a synchronization event * is auto-clearing, which means we automatically set the event back to * the non-signalled state once the wakeup is done. */ uint32_t KeWaitForSingleObject(void *arg, uint32_t reason, uint32_t mode, uint8_t alertable, int64_t *duetime) { wait_block w; struct thread *td = curthread; struct timeval tv; int error = 0; uint64_t curtime; wb_ext we; nt_dispatch_header *obj; obj = arg; if (obj == NULL) return (STATUS_INVALID_PARAMETER); mtx_lock(&ntoskrnl_dispatchlock); cv_init(&we.we_cv, "KeWFS"); we.we_td = td; /* * Check to see if this object is already signalled, * and just return without waiting if it is. */ if (ntoskrnl_is_signalled(obj, td) == TRUE) { /* Sanity check the signal state value. */ if (obj->dh_sigstate != INT32_MIN) { ntoskrnl_satisfy_wait(obj, curthread); mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_SUCCESS); } else { /* * There's a limit to how many times we can * recursively acquire a mutant. If we hit * the limit, something is very wrong. */ if (obj->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } } } bzero((char *)&w, sizeof(wait_block)); w.wb_object = obj; w.wb_ext = &we; w.wb_waittype = WAITTYPE_ANY; w.wb_next = &w; w.wb_waitkey = 0; w.wb_awakened = FALSE; w.wb_oldpri = td->td_priority; InsertTailList((&obj->dh_waitlisthead), (&w.wb_waitlist)); /* * The timeout value is specified in 100 nanosecond units * and can be a positive or negative number. If it's positive, * then the duetime is absolute, and we need to convert it * to an absolute offset relative to now in order to use it. * If it's negative, then the duetime is relative and we * just have to convert the units. */ if (duetime != NULL) { if (*duetime < 0) { tv.tv_sec = - (*duetime) / 10000000; tv.tv_usec = (- (*duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (*duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((*duetime) - curtime) / 10000000; tv.tv_usec = ((*duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } } if (duetime == NULL) cv_wait(&we.we_cv, &ntoskrnl_dispatchlock); else error = cv_timedwait(&we.we_cv, &ntoskrnl_dispatchlock, tvtohz(&tv)); RemoveEntryList(&w.wb_waitlist); cv_destroy(&we.we_cv); /* We timed out. Leave the object alone and return status. */ if (error == EWOULDBLOCK) { mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_TIMEOUT); } mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_SUCCESS); /* return (KeWaitForMultipleObjects(1, &obj, WAITTYPE_ALL, reason, mode, alertable, duetime, &w)); */ } static uint32_t KeWaitForMultipleObjects(uint32_t cnt, nt_dispatch_header *obj[], uint32_t wtype, uint32_t reason, uint32_t mode, uint8_t alertable, int64_t *duetime, wait_block *wb_array) { struct thread *td = curthread; wait_block *whead, *w; wait_block _wb_array[MAX_WAIT_OBJECTS]; nt_dispatch_header *cur; struct timeval tv; int i, wcnt = 0, error = 0; uint64_t curtime; struct timespec t1, t2; uint32_t status = STATUS_SUCCESS; wb_ext we; if (cnt > MAX_WAIT_OBJECTS) return (STATUS_INVALID_PARAMETER); if (cnt > THREAD_WAIT_OBJECTS && wb_array == NULL) return (STATUS_INVALID_PARAMETER); mtx_lock(&ntoskrnl_dispatchlock); cv_init(&we.we_cv, "KeWFM"); we.we_td = td; if (wb_array == NULL) whead = _wb_array; else whead = wb_array; bzero((char *)whead, sizeof(wait_block) * cnt); /* First pass: see if we can satisfy any waits immediately. */ wcnt = 0; w = whead; for (i = 0; i < cnt; i++) { InsertTailList((&obj[i]->dh_waitlisthead), (&w->wb_waitlist)); w->wb_ext = &we; w->wb_object = obj[i]; w->wb_waittype = wtype; w->wb_waitkey = i; w->wb_awakened = FALSE; w->wb_oldpri = td->td_priority; w->wb_next = w + 1; w++; wcnt++; if (ntoskrnl_is_signalled(obj[i], td)) { /* * There's a limit to how many times * we can recursively acquire a mutant. * If we hit the limit, something * is very wrong. */ if (obj[i]->dh_sigstate == INT32_MIN && obj[i]->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } /* * If this is a WAITTYPE_ANY wait, then * satisfy the waited object and exit * right now. */ if (wtype == WAITTYPE_ANY) { ntoskrnl_satisfy_wait(obj[i], td); status = STATUS_WAIT_0 + i; goto wait_done; } else { w--; wcnt--; w->wb_object = NULL; RemoveEntryList(&w->wb_waitlist); } } } /* * If this is a WAITTYPE_ALL wait and all objects are * already signalled, satisfy the waits and exit now. */ if (wtype == WAITTYPE_ALL && wcnt == 0) { for (i = 0; i < cnt; i++) ntoskrnl_satisfy_wait(obj[i], td); status = STATUS_SUCCESS; goto wait_done; } /* * Create a circular waitblock list. The waitcount * must always be non-zero when we get here. */ (w - 1)->wb_next = whead; /* Wait on any objects that aren't yet signalled. */ /* Calculate timeout, if any. */ if (duetime != NULL) { if (*duetime < 0) { tv.tv_sec = - (*duetime) / 10000000; tv.tv_usec = (- (*duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (*duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((*duetime) - curtime) / 10000000; tv.tv_usec = ((*duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } } while (wcnt) { nanotime(&t1); if (duetime == NULL) cv_wait(&we.we_cv, &ntoskrnl_dispatchlock); else error = cv_timedwait(&we.we_cv, &ntoskrnl_dispatchlock, tvtohz(&tv)); /* Wait with timeout expired. */ if (error) { status = STATUS_TIMEOUT; goto wait_done; } nanotime(&t2); /* See what's been signalled. */ w = whead; do { cur = w->wb_object; if (ntoskrnl_is_signalled(cur, td) == TRUE || w->wb_awakened == TRUE) { /* Sanity check the signal state value. */ if (cur->dh_sigstate == INT32_MIN && cur->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } wcnt--; if (wtype == WAITTYPE_ANY) { status = w->wb_waitkey & STATUS_WAIT_0; goto wait_done; } } w = w->wb_next; } while (w != whead); /* * If all objects have been signalled, or if this * is a WAITTYPE_ANY wait and we were woke up by * someone, we can bail. */ if (wcnt == 0) { status = STATUS_SUCCESS; goto wait_done; } /* * If this is WAITTYPE_ALL wait, and there's still * objects that haven't been signalled, deduct the * time that's elapsed so far from the timeout and * wait again (or continue waiting indefinitely if * there's no timeout). */ if (duetime != NULL) { tv.tv_sec -= (t2.tv_sec - t1.tv_sec); tv.tv_usec -= (t2.tv_nsec - t1.tv_nsec) / 1000; } } wait_done: cv_destroy(&we.we_cv); for (i = 0; i < cnt; i++) { if (whead[i].wb_object != NULL) RemoveEntryList(&whead[i].wb_waitlist); } mtx_unlock(&ntoskrnl_dispatchlock); return (status); } static void WRITE_REGISTER_USHORT(uint16_t *reg, uint16_t val) { bus_space_write_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); } static uint16_t READ_REGISTER_USHORT(reg) uint16_t *reg; { return (bus_space_read_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static void WRITE_REGISTER_ULONG(reg, val) uint32_t *reg; uint32_t val; { bus_space_write_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); } static uint32_t READ_REGISTER_ULONG(reg) uint32_t *reg; { return (bus_space_read_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static uint8_t READ_REGISTER_UCHAR(uint8_t *reg) { return (bus_space_read_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static void WRITE_REGISTER_UCHAR(uint8_t *reg, uint8_t val) { bus_space_write_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); } static int64_t _allmul(a, b) int64_t a; int64_t b; { return (a * b); } static int64_t _alldiv(a, b) int64_t a; int64_t b; { return (a / b); } static int64_t _allrem(a, b) int64_t a; int64_t b; { return (a % b); } static uint64_t _aullmul(a, b) uint64_t a; uint64_t b; { return (a * b); } static uint64_t _aulldiv(a, b) uint64_t a; uint64_t b; { return (a / b); } static uint64_t _aullrem(a, b) uint64_t a; uint64_t b; { return (a % b); } static int64_t _allshl(int64_t a, uint8_t b) { return (a << b); } static uint64_t _aullshl(uint64_t a, uint8_t b) { return (a << b); } static int64_t _allshr(int64_t a, uint8_t b) { return (a >> b); } static uint64_t _aullshr(uint64_t a, uint8_t b) { return (a >> b); } static slist_entry * ntoskrnl_pushsl(head, entry) slist_header *head; slist_entry *entry; { slist_entry *oldhead; oldhead = head->slh_list.slh_next; entry->sl_next = head->slh_list.slh_next; head->slh_list.slh_next = entry; head->slh_list.slh_depth++; head->slh_list.slh_seq++; return (oldhead); } static void InitializeSListHead(head) slist_header *head; { memset(head, 0, sizeof(*head)); } static slist_entry * ntoskrnl_popsl(head) slist_header *head; { slist_entry *first; first = head->slh_list.slh_next; if (first != NULL) { head->slh_list.slh_next = first->sl_next; head->slh_list.slh_depth--; head->slh_list.slh_seq++; } return (first); } /* * We need this to make lookaside lists work for amd64. * We pass a pointer to ExAllocatePoolWithTag() the lookaside * list structure. For amd64 to work right, this has to be a * pointer to the wrapped version of the routine, not the * original. Letting the Windows driver invoke the original * function directly will result in a convention calling * mismatch and a pretty crash. On x86, this effectively * becomes a no-op since ipt_func and ipt_wrap are the same. */ static funcptr ntoskrnl_findwrap(func) funcptr func; { image_patch_table *patch; patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { if ((funcptr)patch->ipt_func == func) return ((funcptr)patch->ipt_wrap); patch++; } return (NULL); } static void ExInitializePagedLookasideList(paged_lookaside_list *lookaside, lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc, uint32_t flags, size_t size, uint32_t tag, uint16_t depth) { bzero((char *)lookaside, sizeof(paged_lookaside_list)); if (size < sizeof(slist_entry)) lookaside->nll_l.gl_size = sizeof(slist_entry); else lookaside->nll_l.gl_size = size; lookaside->nll_l.gl_tag = tag; if (allocfunc == NULL) lookaside->nll_l.gl_allocfunc = ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag); else lookaside->nll_l.gl_allocfunc = allocfunc; if (freefunc == NULL) lookaside->nll_l.gl_freefunc = ntoskrnl_findwrap((funcptr)ExFreePool); else lookaside->nll_l.gl_freefunc = freefunc; #ifdef __i386__ KeInitializeSpinLock(&lookaside->nll_obsoletelock); #endif lookaside->nll_l.gl_type = NonPagedPool; lookaside->nll_l.gl_depth = depth; lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH; } static void ExDeletePagedLookasideList(lookaside) paged_lookaside_list *lookaside; { void *buf; void (*freefunc)(void *); freefunc = lookaside->nll_l.gl_freefunc; while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL) MSCALL1(freefunc, buf); } static void ExInitializeNPagedLookasideList(npaged_lookaside_list *lookaside, lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc, uint32_t flags, size_t size, uint32_t tag, uint16_t depth) { bzero((char *)lookaside, sizeof(npaged_lookaside_list)); if (size < sizeof(slist_entry)) lookaside->nll_l.gl_size = sizeof(slist_entry); else lookaside->nll_l.gl_size = size; lookaside->nll_l.gl_tag = tag; if (allocfunc == NULL) lookaside->nll_l.gl_allocfunc = ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag); else lookaside->nll_l.gl_allocfunc = allocfunc; if (freefunc == NULL) lookaside->nll_l.gl_freefunc = ntoskrnl_findwrap((funcptr)ExFreePool); else lookaside->nll_l.gl_freefunc = freefunc; #ifdef __i386__ KeInitializeSpinLock(&lookaside->nll_obsoletelock); #endif lookaside->nll_l.gl_type = NonPagedPool; lookaside->nll_l.gl_depth = depth; lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH; } static void ExDeleteNPagedLookasideList(lookaside) npaged_lookaside_list *lookaside; { void *buf; void (*freefunc)(void *); freefunc = lookaside->nll_l.gl_freefunc; while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL) MSCALL1(freefunc, buf); } slist_entry * InterlockedPushEntrySList(head, entry) slist_header *head; slist_entry *entry; { slist_entry *oldhead; mtx_lock_spin(&ntoskrnl_interlock); oldhead = ntoskrnl_pushsl(head, entry); mtx_unlock_spin(&ntoskrnl_interlock); return (oldhead); } slist_entry * InterlockedPopEntrySList(head) slist_header *head; { slist_entry *first; mtx_lock_spin(&ntoskrnl_interlock); first = ntoskrnl_popsl(head); mtx_unlock_spin(&ntoskrnl_interlock); return (first); } static slist_entry * ExInterlockedPushEntrySList(head, entry, lock) slist_header *head; slist_entry *entry; kspin_lock *lock; { return (InterlockedPushEntrySList(head, entry)); } static slist_entry * ExInterlockedPopEntrySList(head, lock) slist_header *head; kspin_lock *lock; { return (InterlockedPopEntrySList(head)); } uint16_t ExQueryDepthSList(head) slist_header *head; { uint16_t depth; mtx_lock_spin(&ntoskrnl_interlock); depth = head->slh_list.slh_depth; mtx_unlock_spin(&ntoskrnl_interlock); return (depth); } void KeInitializeSpinLock(lock) kspin_lock *lock; { *lock = 0; } #ifdef __i386__ void KefAcquireSpinLockAtDpcLevel(lock) kspin_lock *lock; { #ifdef NTOSKRNL_DEBUG_SPINLOCKS int i = 0; #endif while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0) { /* sit and spin */; #ifdef NTOSKRNL_DEBUG_SPINLOCKS i++; if (i > 200000000) panic("DEADLOCK!"); #endif } } void KefReleaseSpinLockFromDpcLevel(lock) kspin_lock *lock; { atomic_store_rel_int((volatile u_int *)lock, 0); } uint8_t KeAcquireSpinLockRaiseToDpc(kspin_lock *lock) { uint8_t oldirql; if (KeGetCurrentIrql() > DISPATCH_LEVEL) panic("IRQL_NOT_LESS_THAN_OR_EQUAL"); KeRaiseIrql(DISPATCH_LEVEL, &oldirql); KeAcquireSpinLockAtDpcLevel(lock); return (oldirql); } #else void KeAcquireSpinLockAtDpcLevel(kspin_lock *lock) { while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0) /* sit and spin */; } void KeReleaseSpinLockFromDpcLevel(kspin_lock *lock) { atomic_store_rel_int((volatile u_int *)lock, 0); } #endif /* __i386__ */ uintptr_t InterlockedExchange(dst, val) volatile uint32_t *dst; uintptr_t val; { uintptr_t r; mtx_lock_spin(&ntoskrnl_interlock); r = *dst; *dst = val; mtx_unlock_spin(&ntoskrnl_interlock); return (r); } static uint32_t InterlockedIncrement(addend) volatile uint32_t *addend; { atomic_add_long((volatile u_long *)addend, 1); return (*addend); } static uint32_t InterlockedDecrement(addend) volatile uint32_t *addend; { atomic_subtract_long((volatile u_long *)addend, 1); return (*addend); } static void ExInterlockedAddLargeStatistic(addend, inc) uint64_t *addend; uint32_t inc; { mtx_lock_spin(&ntoskrnl_interlock); *addend += inc; mtx_unlock_spin(&ntoskrnl_interlock); }; mdl * IoAllocateMdl(void *vaddr, uint32_t len, uint8_t secondarybuf, uint8_t chargequota, irp *iopkt) { mdl *m; int zone = 0; if (MmSizeOfMdl(vaddr, len) > MDL_ZONE_SIZE) m = ExAllocatePoolWithTag(NonPagedPool, MmSizeOfMdl(vaddr, len), 0); else { m = uma_zalloc(mdl_zone, M_NOWAIT | M_ZERO); zone++; } if (m == NULL) return (NULL); MmInitializeMdl(m, vaddr, len); /* * MmInitializMdl() clears the flags field, so we * have to set this here. If the MDL came from the * MDL UMA zone, tag it so we can release it to * the right place later. */ if (zone) m->mdl_flags = MDL_ZONE_ALLOCED; if (iopkt != NULL) { if (secondarybuf == TRUE) { mdl *last; last = iopkt->irp_mdl; while (last->mdl_next != NULL) last = last->mdl_next; last->mdl_next = m; } else { if (iopkt->irp_mdl != NULL) panic("leaking an MDL in IoAllocateMdl()"); iopkt->irp_mdl = m; } } return (m); } void IoFreeMdl(m) mdl *m; { if (m == NULL) return; if (m->mdl_flags & MDL_ZONE_ALLOCED) uma_zfree(mdl_zone, m); else ExFreePool(m); } static void * MmAllocateContiguousMemory(size, highest) uint32_t size; uint64_t highest; { void *addr; size_t pagelength = roundup(size, PAGE_SIZE); addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0); return (addr); } static void * MmAllocateContiguousMemorySpecifyCache(size, lowest, highest, boundary, cachetype) uint32_t size; uint64_t lowest; uint64_t highest; uint64_t boundary; enum nt_caching_type cachetype; { vm_memattr_t memattr; void *ret; switch (cachetype) { case MmNonCached: memattr = VM_MEMATTR_UNCACHEABLE; break; case MmWriteCombined: memattr = VM_MEMATTR_WRITE_COMBINING; break; case MmNonCachedUnordered: memattr = VM_MEMATTR_UNCACHEABLE; break; case MmCached: case MmHardwareCoherentCached: case MmUSWCCached: default: memattr = VM_MEMATTR_DEFAULT; break; } - ret = (void *)kmem_alloc_contig(kernel_arena, size, M_ZERO | M_NOWAIT, - lowest, highest, PAGE_SIZE, boundary, memattr); + ret = (void *)kmem_alloc_contig(size, M_ZERO | M_NOWAIT, lowest, + highest, PAGE_SIZE, boundary, memattr); if (ret != NULL) malloc_type_allocated(M_DEVBUF, round_page(size)); return (ret); } static void MmFreeContiguousMemory(base) void *base; { ExFreePool(base); } static void MmFreeContiguousMemorySpecifyCache(base, size, cachetype) void *base; uint32_t size; enum nt_caching_type cachetype; { contigfree(base, size, M_DEVBUF); } static uint32_t MmSizeOfMdl(vaddr, len) void *vaddr; size_t len; { uint32_t l; l = sizeof(struct mdl) + (sizeof(vm_offset_t *) * SPAN_PAGES(vaddr, len)); return (l); } /* * The Microsoft documentation says this routine fills in the * page array of an MDL with the _physical_ page addresses that * comprise the buffer, but we don't really want to do that here. * Instead, we just fill in the page array with the kernel virtual * addresses of the buffers. */ void MmBuildMdlForNonPagedPool(m) mdl *m; { vm_offset_t *mdl_pages; int pagecnt, i; pagecnt = SPAN_PAGES(m->mdl_byteoffset, m->mdl_bytecount); if (pagecnt > (m->mdl_size - sizeof(mdl)) / sizeof(vm_offset_t *)) panic("not enough pages in MDL to describe buffer"); mdl_pages = MmGetMdlPfnArray(m); for (i = 0; i < pagecnt; i++) *mdl_pages = (vm_offset_t)m->mdl_startva + (i * PAGE_SIZE); m->mdl_flags |= MDL_SOURCE_IS_NONPAGED_POOL; m->mdl_mappedsystemva = MmGetMdlVirtualAddress(m); } static void * MmMapLockedPages(mdl *buf, uint8_t accessmode) { buf->mdl_flags |= MDL_MAPPED_TO_SYSTEM_VA; return (MmGetMdlVirtualAddress(buf)); } static void * MmMapLockedPagesSpecifyCache(mdl *buf, uint8_t accessmode, uint32_t cachetype, void *vaddr, uint32_t bugcheck, uint32_t prio) { return (MmMapLockedPages(buf, accessmode)); } static void MmUnmapLockedPages(vaddr, buf) void *vaddr; mdl *buf; { buf->mdl_flags &= ~MDL_MAPPED_TO_SYSTEM_VA; } /* * This function has a problem in that it will break if you * compile this module without PAE and try to use it on a PAE * kernel. Unfortunately, there's no way around this at the * moment. It's slightly less broken that using pmap_kextract(). * You'd think the virtual memory subsystem would help us out * here, but it doesn't. */ static uint64_t MmGetPhysicalAddress(void *base) { return (pmap_extract(kernel_map->pmap, (vm_offset_t)base)); } void * MmGetSystemRoutineAddress(ustr) unicode_string *ustr; { ansi_string astr; if (RtlUnicodeStringToAnsiString(&astr, ustr, TRUE)) return (NULL); return (ndis_get_routine_address(ntoskrnl_functbl, astr.as_buf)); } uint8_t MmIsAddressValid(vaddr) void *vaddr; { if (pmap_extract(kernel_map->pmap, (vm_offset_t)vaddr)) return (TRUE); return (FALSE); } void * MmMapIoSpace(paddr, len, cachetype) uint64_t paddr; uint32_t len; uint32_t cachetype; { devclass_t nexus_class; device_t *nexus_devs, devp; int nexus_count = 0; device_t matching_dev = NULL; struct resource *res; int i; vm_offset_t v; /* There will always be at least one nexus. */ nexus_class = devclass_find("nexus"); devclass_get_devices(nexus_class, &nexus_devs, &nexus_count); for (i = 0; i < nexus_count; i++) { devp = nexus_devs[i]; matching_dev = ntoskrnl_finddev(devp, paddr, &res); if (matching_dev) break; } free(nexus_devs, M_TEMP); if (matching_dev == NULL) return (NULL); v = (vm_offset_t)rman_get_virtual(res); if (paddr > rman_get_start(res)) v += paddr - rman_get_start(res); return ((void *)v); } void MmUnmapIoSpace(vaddr, len) void *vaddr; size_t len; { } static device_t ntoskrnl_finddev(dev, paddr, res) device_t dev; uint64_t paddr; struct resource **res; { device_t *children = NULL; device_t matching_dev; int childcnt; struct resource *r; struct resource_list *rl; struct resource_list_entry *rle; uint32_t flags; int i; /* We only want devices that have been successfully probed. */ if (device_is_alive(dev) == FALSE) return (NULL); rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev); if (rl != NULL) { STAILQ_FOREACH(rle, rl, link) { r = rle->res; if (r == NULL) continue; flags = rman_get_flags(r); if (rle->type == SYS_RES_MEMORY && paddr >= rman_get_start(r) && paddr <= rman_get_end(r)) { if (!(flags & RF_ACTIVE)) bus_activate_resource(dev, SYS_RES_MEMORY, 0, r); *res = r; return (dev); } } } /* * If this device has children, do another * level of recursion to inspect them. */ device_get_children(dev, &children, &childcnt); for (i = 0; i < childcnt; i++) { matching_dev = ntoskrnl_finddev(children[i], paddr, res); if (matching_dev != NULL) { free(children, M_TEMP); return (matching_dev); } } /* Won't somebody please think of the children! */ if (children != NULL) free(children, M_TEMP); return (NULL); } /* * Workitems are unlike DPCs, in that they run in a user-mode thread * context rather than at DISPATCH_LEVEL in kernel context. In our * case we run them in kernel context anyway. */ static void ntoskrnl_workitem_thread(arg) void *arg; { kdpc_queue *kq; list_entry *l; io_workitem *iw; uint8_t irql; kq = arg; InitializeListHead(&kq->kq_disp); kq->kq_td = curthread; kq->kq_exit = 0; KeInitializeSpinLock(&kq->kq_lock); KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE); while (1) { KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL); KeAcquireSpinLock(&kq->kq_lock, &irql); if (kq->kq_exit) { kq->kq_exit = 0; KeReleaseSpinLock(&kq->kq_lock, irql); break; } while (!IsListEmpty(&kq->kq_disp)) { l = RemoveHeadList(&kq->kq_disp); iw = CONTAINING_RECORD(l, io_workitem, iw_listentry); InitializeListHead((&iw->iw_listentry)); if (iw->iw_func == NULL) continue; KeReleaseSpinLock(&kq->kq_lock, irql); MSCALL2(iw->iw_func, iw->iw_dobj, iw->iw_ctx); KeAcquireSpinLock(&kq->kq_lock, &irql); } KeReleaseSpinLock(&kq->kq_lock, irql); } kproc_exit(0); return; /* notreached */ } static ndis_status RtlCharToInteger(src, base, val) const char *src; uint32_t base; uint32_t *val; { int negative = 0; uint32_t res; if (!src || !val) return (STATUS_ACCESS_VIOLATION); while (*src != '\0' && *src <= ' ') src++; if (*src == '+') src++; else if (*src == '-') { src++; negative = 1; } if (base == 0) { base = 10; if (*src == '0') { src++; if (*src == 'b') { base = 2; src++; } else if (*src == 'o') { base = 8; src++; } else if (*src == 'x') { base = 16; src++; } } } else if (!(base == 2 || base == 8 || base == 10 || base == 16)) return (STATUS_INVALID_PARAMETER); for (res = 0; *src; src++) { int v; if (isdigit(*src)) v = *src - '0'; else if (isxdigit(*src)) v = tolower(*src) - 'a' + 10; else v = base; if (v >= base) return (STATUS_INVALID_PARAMETER); res = res * base + v; } *val = negative ? -res : res; return (STATUS_SUCCESS); } static void ntoskrnl_destroy_workitem_threads(void) { kdpc_queue *kq; int i; for (i = 0; i < WORKITEM_THREADS; i++) { kq = wq_queues + i; kq->kq_exit = 1; KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); while (kq->kq_exit) tsleep(kq->kq_td->td_proc, PWAIT, "waitiw", hz/10); } } io_workitem * IoAllocateWorkItem(dobj) device_object *dobj; { io_workitem *iw; iw = uma_zalloc(iw_zone, M_NOWAIT); if (iw == NULL) return (NULL); InitializeListHead(&iw->iw_listentry); iw->iw_dobj = dobj; mtx_lock(&ntoskrnl_dispatchlock); iw->iw_idx = wq_idx; WORKIDX_INC(wq_idx); mtx_unlock(&ntoskrnl_dispatchlock); return (iw); } void IoFreeWorkItem(iw) io_workitem *iw; { uma_zfree(iw_zone, iw); } void IoQueueWorkItem(iw, iw_func, qtype, ctx) io_workitem *iw; io_workitem_func iw_func; uint32_t qtype; void *ctx; { kdpc_queue *kq; list_entry *l; io_workitem *cur; uint8_t irql; kq = wq_queues + iw->iw_idx; KeAcquireSpinLock(&kq->kq_lock, &irql); /* * Traverse the list and make sure this workitem hasn't * already been inserted. Queuing the same workitem * twice will hose the list but good. */ l = kq->kq_disp.nle_flink; while (l != &kq->kq_disp) { cur = CONTAINING_RECORD(l, io_workitem, iw_listentry); if (cur == iw) { /* Already queued -- do nothing. */ KeReleaseSpinLock(&kq->kq_lock, irql); return; } l = l->nle_flink; } iw->iw_func = iw_func; iw->iw_ctx = ctx; InsertTailList((&kq->kq_disp), (&iw->iw_listentry)); KeReleaseSpinLock(&kq->kq_lock, irql); KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); } static void ntoskrnl_workitem(dobj, arg) device_object *dobj; void *arg; { io_workitem *iw; work_queue_item *w; work_item_func f; iw = arg; w = (work_queue_item *)dobj; f = (work_item_func)w->wqi_func; uma_zfree(iw_zone, iw); MSCALL2(f, w, w->wqi_ctx); } /* * The ExQueueWorkItem() API is deprecated in Windows XP. Microsoft * warns that it's unsafe and to use IoQueueWorkItem() instead. The * problem with ExQueueWorkItem() is that it can't guard against * the condition where a driver submits a job to the work queue and * is then unloaded before the job is able to run. IoQueueWorkItem() * acquires a reference to the device's device_object via the * object manager and retains it until after the job has completed, * which prevents the driver from being unloaded before the job * runs. (We don't currently support this behavior, though hopefully * that will change once the object manager API is fleshed out a bit.) * * Having said all that, the ExQueueWorkItem() API remains, because * there are still other parts of Windows that use it, including * NDIS itself: NdisScheduleWorkItem() calls ExQueueWorkItem(). * We fake up the ExQueueWorkItem() API on top of our implementation * of IoQueueWorkItem(). Workitem thread #3 is reserved exclusively * for ExQueueWorkItem() jobs, and we pass a pointer to the work * queue item (provided by the caller) in to IoAllocateWorkItem() * instead of the device_object. We need to save this pointer so * we can apply a sanity check: as with the DPC queue and other * workitem queues, we can't allow the same work queue item to * be queued twice. If it's already pending, we silently return */ void ExQueueWorkItem(w, qtype) work_queue_item *w; uint32_t qtype; { io_workitem *iw; io_workitem_func iwf; kdpc_queue *kq; list_entry *l; io_workitem *cur; uint8_t irql; /* * We need to do a special sanity test to make sure * the ExQueueWorkItem() API isn't used to queue * the same workitem twice. Rather than checking the * io_workitem pointer itself, we test the attached * device object, which is really a pointer to the * legacy work queue item structure. */ kq = wq_queues + WORKITEM_LEGACY_THREAD; KeAcquireSpinLock(&kq->kq_lock, &irql); l = kq->kq_disp.nle_flink; while (l != &kq->kq_disp) { cur = CONTAINING_RECORD(l, io_workitem, iw_listentry); if (cur->iw_dobj == (device_object *)w) { /* Already queued -- do nothing. */ KeReleaseSpinLock(&kq->kq_lock, irql); return; } l = l->nle_flink; } KeReleaseSpinLock(&kq->kq_lock, irql); iw = IoAllocateWorkItem((device_object *)w); if (iw == NULL) return; iw->iw_idx = WORKITEM_LEGACY_THREAD; iwf = (io_workitem_func)ntoskrnl_findwrap((funcptr)ntoskrnl_workitem); IoQueueWorkItem(iw, iwf, qtype, iw); } static void RtlZeroMemory(dst, len) void *dst; size_t len; { bzero(dst, len); } static void RtlSecureZeroMemory(dst, len) void *dst; size_t len; { memset(dst, 0, len); } static void RtlFillMemory(void *dst, size_t len, uint8_t c) { memset(dst, c, len); } static void RtlMoveMemory(dst, src, len) void *dst; const void *src; size_t len; { memmove(dst, src, len); } static void RtlCopyMemory(dst, src, len) void *dst; const void *src; size_t len; { bcopy(src, dst, len); } static size_t RtlCompareMemory(s1, s2, len) const void *s1; const void *s2; size_t len; { size_t i; uint8_t *m1, *m2; m1 = __DECONST(char *, s1); m2 = __DECONST(char *, s2); for (i = 0; i < len && m1[i] == m2[i]; i++); return (i); } void RtlInitAnsiString(dst, src) ansi_string *dst; char *src; { ansi_string *a; a = dst; if (a == NULL) return; if (src == NULL) { a->as_len = a->as_maxlen = 0; a->as_buf = NULL; } else { a->as_buf = src; a->as_len = a->as_maxlen = strlen(src); } } void RtlInitUnicodeString(dst, src) unicode_string *dst; uint16_t *src; { unicode_string *u; int i; u = dst; if (u == NULL) return; if (src == NULL) { u->us_len = u->us_maxlen = 0; u->us_buf = NULL; } else { i = 0; while(src[i] != 0) i++; u->us_buf = src; u->us_len = u->us_maxlen = i * 2; } } ndis_status RtlUnicodeStringToInteger(ustr, base, val) unicode_string *ustr; uint32_t base; uint32_t *val; { uint16_t *uchr; int len, neg = 0; char abuf[64]; char *astr; uchr = ustr->us_buf; len = ustr->us_len; bzero(abuf, sizeof(abuf)); if ((char)((*uchr) & 0xFF) == '-') { neg = 1; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == '+') { neg = 0; uchr++; len -= 2; } if (base == 0) { if ((char)((*uchr) & 0xFF) == 'b') { base = 2; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == 'o') { base = 8; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == 'x') { base = 16; uchr++; len -= 2; } else base = 10; } astr = abuf; if (neg) { strcpy(astr, "-"); astr++; } ntoskrnl_unicode_to_ascii(uchr, astr, len); *val = strtoul(abuf, NULL, base); return (STATUS_SUCCESS); } void RtlFreeUnicodeString(ustr) unicode_string *ustr; { if (ustr->us_buf == NULL) return; ExFreePool(ustr->us_buf); ustr->us_buf = NULL; } void RtlFreeAnsiString(astr) ansi_string *astr; { if (astr->as_buf == NULL) return; ExFreePool(astr->as_buf); astr->as_buf = NULL; } static int atoi(str) const char *str; { return (int)strtol(str, (char **)NULL, 10); } static long atol(str) const char *str; { return strtol(str, (char **)NULL, 10); } static int rand(void) { return (random()); } static void srand(unsigned int seed) { srandom(seed); } static uint8_t IoIsWdmVersionAvailable(uint8_t major, uint8_t minor) { if (major == WDM_MAJOR && minor == WDM_MINOR_WINXP) return (TRUE); return (FALSE); } static int32_t IoOpenDeviceRegistryKey(struct device_object *devobj, uint32_t type, uint32_t mask, void **key) { return (NDIS_STATUS_INVALID_DEVICE_REQUEST); } static ndis_status IoGetDeviceObjectPointer(name, reqaccess, fileobj, devobj) unicode_string *name; uint32_t reqaccess; void *fileobj; device_object *devobj; { return (STATUS_SUCCESS); } static ndis_status IoGetDeviceProperty(devobj, regprop, buflen, prop, reslen) device_object *devobj; uint32_t regprop; uint32_t buflen; void *prop; uint32_t *reslen; { driver_object *drv; uint16_t **name; drv = devobj->do_drvobj; switch (regprop) { case DEVPROP_DRIVER_KEYNAME: name = prop; *name = drv->dro_drivername.us_buf; *reslen = drv->dro_drivername.us_len; break; default: return (STATUS_INVALID_PARAMETER_2); break; } return (STATUS_SUCCESS); } static void KeInitializeMutex(kmutex, level) kmutant *kmutex; uint32_t level; { InitializeListHead((&kmutex->km_header.dh_waitlisthead)); kmutex->km_abandoned = FALSE; kmutex->km_apcdisable = 1; kmutex->km_header.dh_sigstate = 1; kmutex->km_header.dh_type = DISP_TYPE_MUTANT; kmutex->km_header.dh_size = sizeof(kmutant) / sizeof(uint32_t); kmutex->km_ownerthread = NULL; } static uint32_t KeReleaseMutex(kmutant *kmutex, uint8_t kwait) { uint32_t prevstate; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kmutex->km_header.dh_sigstate; if (kmutex->km_ownerthread != curthread) { mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_MUTANT_NOT_OWNED); } kmutex->km_header.dh_sigstate++; kmutex->km_abandoned = FALSE; if (kmutex->km_header.dh_sigstate == 1) { kmutex->km_ownerthread = NULL; ntoskrnl_waittest(&kmutex->km_header, IO_NO_INCREMENT); } mtx_unlock(&ntoskrnl_dispatchlock); return (prevstate); } static uint32_t KeReadStateMutex(kmutex) kmutant *kmutex; { return (kmutex->km_header.dh_sigstate); } void KeInitializeEvent(nt_kevent *kevent, uint32_t type, uint8_t state) { InitializeListHead((&kevent->k_header.dh_waitlisthead)); kevent->k_header.dh_sigstate = state; if (type == EVENT_TYPE_NOTIFY) kevent->k_header.dh_type = DISP_TYPE_NOTIFICATION_EVENT; else kevent->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_EVENT; kevent->k_header.dh_size = sizeof(nt_kevent) / sizeof(uint32_t); } uint32_t KeResetEvent(kevent) nt_kevent *kevent; { uint32_t prevstate; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kevent->k_header.dh_sigstate; kevent->k_header.dh_sigstate = FALSE; mtx_unlock(&ntoskrnl_dispatchlock); return (prevstate); } uint32_t KeSetEvent(nt_kevent *kevent, uint32_t increment, uint8_t kwait) { uint32_t prevstate; wait_block *w; nt_dispatch_header *dh; struct thread *td; wb_ext *we; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kevent->k_header.dh_sigstate; dh = &kevent->k_header; if (IsListEmpty(&dh->dh_waitlisthead)) /* * If there's nobody in the waitlist, just set * the state to signalled. */ dh->dh_sigstate = 1; else { /* * Get the first waiter. If this is a synchronization * event, just wake up that one thread (don't bother * setting the state to signalled since we're supposed * to automatically clear synchronization events anyway). * * If it's a notification event, or the first * waiter is doing a WAITTYPE_ALL wait, go through * the full wait satisfaction process. */ w = CONTAINING_RECORD(dh->dh_waitlisthead.nle_flink, wait_block, wb_waitlist); we = w->wb_ext; td = we->we_td; if (kevent->k_header.dh_type == DISP_TYPE_NOTIFICATION_EVENT || w->wb_waittype == WAITTYPE_ALL) { if (prevstate == 0) { dh->dh_sigstate = 1; ntoskrnl_waittest(dh, increment); } } else { w->wb_awakened |= TRUE; cv_broadcastpri(&we->we_cv, (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ? w->wb_oldpri - (increment * 4) : PRI_MIN_KERN); } } mtx_unlock(&ntoskrnl_dispatchlock); return (prevstate); } void KeClearEvent(kevent) nt_kevent *kevent; { kevent->k_header.dh_sigstate = FALSE; } uint32_t KeReadStateEvent(kevent) nt_kevent *kevent; { return (kevent->k_header.dh_sigstate); } /* * The object manager in Windows is responsible for managing * references and access to various types of objects, including * device_objects, events, threads, timers and so on. However, * there's a difference in the way objects are handled in user * mode versus kernel mode. * * In user mode (i.e. Win32 applications), all objects are * managed by the object manager. For example, when you create * a timer or event object, you actually end up with an * object_header (for the object manager's bookkeeping * purposes) and an object body (which contains the actual object * structure, e.g. ktimer, kevent, etc...). This allows Windows * to manage resource quotas and to enforce access restrictions * on basically every kind of system object handled by the kernel. * * However, in kernel mode, you only end up using the object * manager some of the time. For example, in a driver, you create * a timer object by simply allocating the memory for a ktimer * structure and initializing it with KeInitializeTimer(). Hence, * the timer has no object_header and no reference counting or * security/resource checks are done on it. The assumption in * this case is that if you're running in kernel mode, you know * what you're doing, and you're already at an elevated privilege * anyway. * * There are some exceptions to this. The two most important ones * for our purposes are device_objects and threads. We need to use * the object manager to do reference counting on device_objects, * and for threads, you can only get a pointer to a thread's * dispatch header by using ObReferenceObjectByHandle() on the * handle returned by PsCreateSystemThread(). */ static ndis_status ObReferenceObjectByHandle(ndis_handle handle, uint32_t reqaccess, void *otype, uint8_t accessmode, void **object, void **handleinfo) { nt_objref *nr; nr = malloc(sizeof(nt_objref), M_DEVBUF, M_NOWAIT|M_ZERO); if (nr == NULL) return (STATUS_INSUFFICIENT_RESOURCES); InitializeListHead((&nr->no_dh.dh_waitlisthead)); nr->no_obj = handle; nr->no_dh.dh_type = DISP_TYPE_THREAD; nr->no_dh.dh_sigstate = 0; nr->no_dh.dh_size = (uint8_t)(sizeof(struct thread) / sizeof(uint32_t)); TAILQ_INSERT_TAIL(&ntoskrnl_reflist, nr, link); *object = nr; return (STATUS_SUCCESS); } static void ObfDereferenceObject(object) void *object; { nt_objref *nr; nr = object; TAILQ_REMOVE(&ntoskrnl_reflist, nr, link); free(nr, M_DEVBUF); } static uint32_t ZwClose(handle) ndis_handle handle; { return (STATUS_SUCCESS); } static uint32_t WmiQueryTraceInformation(traceclass, traceinfo, infolen, reqlen, buf) uint32_t traceclass; void *traceinfo; uint32_t infolen; uint32_t reqlen; void *buf; { return (STATUS_NOT_FOUND); } static uint32_t WmiTraceMessage(uint64_t loghandle, uint32_t messageflags, void *guid, uint16_t messagenum, ...) { return (STATUS_SUCCESS); } static uint32_t IoWMIRegistrationControl(dobj, action) device_object *dobj; uint32_t action; { return (STATUS_SUCCESS); } /* * This is here just in case the thread returns without calling * PsTerminateSystemThread(). */ static void ntoskrnl_thrfunc(arg) void *arg; { thread_context *thrctx; uint32_t (*tfunc)(void *); void *tctx; uint32_t rval; thrctx = arg; tfunc = thrctx->tc_thrfunc; tctx = thrctx->tc_thrctx; free(thrctx, M_TEMP); rval = MSCALL1(tfunc, tctx); PsTerminateSystemThread(rval); return; /* notreached */ } static ndis_status PsCreateSystemThread(handle, reqaccess, objattrs, phandle, clientid, thrfunc, thrctx) ndis_handle *handle; uint32_t reqaccess; void *objattrs; ndis_handle phandle; void *clientid; void *thrfunc; void *thrctx; { int error; thread_context *tc; struct proc *p; tc = malloc(sizeof(thread_context), M_TEMP, M_NOWAIT); if (tc == NULL) return (STATUS_INSUFFICIENT_RESOURCES); tc->tc_thrctx = thrctx; tc->tc_thrfunc = thrfunc; error = kproc_create(ntoskrnl_thrfunc, tc, &p, RFHIGHPID, NDIS_KSTACK_PAGES, "Windows Kthread %d", ntoskrnl_kth); if (error) { free(tc, M_TEMP); return (STATUS_INSUFFICIENT_RESOURCES); } *handle = p; ntoskrnl_kth++; return (STATUS_SUCCESS); } /* * In Windows, the exit of a thread is an event that you're allowed * to wait on, assuming you've obtained a reference to the thread using * ObReferenceObjectByHandle(). Unfortunately, the only way we can * simulate this behavior is to register each thread we create in a * reference list, and if someone holds a reference to us, we poke * them. */ static ndis_status PsTerminateSystemThread(status) ndis_status status; { struct nt_objref *nr; mtx_lock(&ntoskrnl_dispatchlock); TAILQ_FOREACH(nr, &ntoskrnl_reflist, link) { if (nr->no_obj != curthread->td_proc) continue; nr->no_dh.dh_sigstate = 1; ntoskrnl_waittest(&nr->no_dh, IO_NO_INCREMENT); break; } mtx_unlock(&ntoskrnl_dispatchlock); ntoskrnl_kth--; kproc_exit(0); return (0); /* notreached */ } static uint32_t DbgPrint(char *fmt, ...) { va_list ap; if (bootverbose) { va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } return (STATUS_SUCCESS); } static void DbgBreakPoint(void) { kdb_enter(KDB_WHY_NDIS, "DbgBreakPoint(): breakpoint"); } static void KeBugCheckEx(code, param1, param2, param3, param4) uint32_t code; u_long param1; u_long param2; u_long param3; u_long param4; { panic("KeBugCheckEx: STOP 0x%X", code); } static void ntoskrnl_timercall(arg) void *arg; { ktimer *timer; struct timeval tv; kdpc *dpc; mtx_lock(&ntoskrnl_dispatchlock); timer = arg; #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_fires++; #endif ntoskrnl_remove_timer(timer); /* * This should never happen, but complain * if it does. */ if (timer->k_header.dh_inserted == FALSE) { mtx_unlock(&ntoskrnl_dispatchlock); printf("NTOS: timer %p fired even though " "it was canceled\n", timer); return; } /* Mark the timer as no longer being on the timer queue. */ timer->k_header.dh_inserted = FALSE; /* Now signal the object and satisfy any waits on it. */ timer->k_header.dh_sigstate = 1; ntoskrnl_waittest(&timer->k_header, IO_NO_INCREMENT); /* * If this is a periodic timer, re-arm it * so it will fire again. We do this before * calling any deferred procedure calls because * it's possible the DPC might cancel the timer, * in which case it would be wrong for us to * re-arm it again afterwards. */ if (timer->k_period) { tv.tv_sec = 0; tv.tv_usec = timer->k_period * 1000; timer->k_header.dh_inserted = TRUE; ntoskrnl_insert_timer(timer, tvtohz(&tv)); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_reloads++; #endif } dpc = timer->k_dpc; mtx_unlock(&ntoskrnl_dispatchlock); /* If there's a DPC associated with the timer, queue it up. */ if (dpc != NULL) KeInsertQueueDpc(dpc, NULL, NULL); } #ifdef NTOSKRNL_DEBUG_TIMERS static int sysctl_show_timers(SYSCTL_HANDLER_ARGS) { int ret; ret = 0; ntoskrnl_show_timers(); return (sysctl_handle_int(oidp, &ret, 0, req)); } static void ntoskrnl_show_timers() { int i = 0; list_entry *l; mtx_lock_spin(&ntoskrnl_calllock); l = ntoskrnl_calllist.nle_flink; while(l != &ntoskrnl_calllist) { i++; l = l->nle_flink; } mtx_unlock_spin(&ntoskrnl_calllock); printf("\n"); printf("%d timers available (out of %d)\n", i, NTOSKRNL_TIMEOUTS); printf("timer sets: %qu\n", ntoskrnl_timer_sets); printf("timer reloads: %qu\n", ntoskrnl_timer_reloads); printf("timer cancels: %qu\n", ntoskrnl_timer_cancels); printf("timer fires: %qu\n", ntoskrnl_timer_fires); printf("\n"); } #endif /* * Must be called with dispatcher lock held. */ static void ntoskrnl_insert_timer(timer, ticks) ktimer *timer; int ticks; { callout_entry *e; list_entry *l; struct callout *c; /* * Try and allocate a timer. */ mtx_lock_spin(&ntoskrnl_calllock); if (IsListEmpty(&ntoskrnl_calllist)) { mtx_unlock_spin(&ntoskrnl_calllock); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_show_timers(); #endif panic("out of timers!"); } l = RemoveHeadList(&ntoskrnl_calllist); mtx_unlock_spin(&ntoskrnl_calllock); e = CONTAINING_RECORD(l, callout_entry, ce_list); c = &e->ce_callout; timer->k_callout = c; callout_init(c, 1); callout_reset(c, ticks, ntoskrnl_timercall, timer); } static void ntoskrnl_remove_timer(timer) ktimer *timer; { callout_entry *e; e = (callout_entry *)timer->k_callout; callout_stop(timer->k_callout); mtx_lock_spin(&ntoskrnl_calllock); InsertHeadList((&ntoskrnl_calllist), (&e->ce_list)); mtx_unlock_spin(&ntoskrnl_calllock); } void KeInitializeTimer(timer) ktimer *timer; { if (timer == NULL) return; KeInitializeTimerEx(timer, EVENT_TYPE_NOTIFY); } void KeInitializeTimerEx(timer, type) ktimer *timer; uint32_t type; { if (timer == NULL) return; bzero((char *)timer, sizeof(ktimer)); InitializeListHead((&timer->k_header.dh_waitlisthead)); timer->k_header.dh_sigstate = FALSE; timer->k_header.dh_inserted = FALSE; if (type == EVENT_TYPE_NOTIFY) timer->k_header.dh_type = DISP_TYPE_NOTIFICATION_TIMER; else timer->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_TIMER; timer->k_header.dh_size = sizeof(ktimer) / sizeof(uint32_t); } /* * DPC subsystem. A Windows Defered Procedure Call has the following * properties: * - It runs at DISPATCH_LEVEL. * - It can have one of 3 importance values that control when it * runs relative to other DPCs in the queue. * - On SMP systems, it can be set to run on a specific processor. * In order to satisfy the last property, we create a DPC thread for * each CPU in the system and bind it to that CPU. Each thread * maintains three queues with different importance levels, which * will be processed in order from lowest to highest. * * In Windows, interrupt handlers run as DPCs. (Not to be confused * with ISRs, which run in interrupt context and can preempt DPCs.) * ISRs are given the highest importance so that they'll take * precedence over timers and other things. */ static void ntoskrnl_dpc_thread(arg) void *arg; { kdpc_queue *kq; kdpc *d; list_entry *l; uint8_t irql; kq = arg; InitializeListHead(&kq->kq_disp); kq->kq_td = curthread; kq->kq_exit = 0; kq->kq_running = FALSE; KeInitializeSpinLock(&kq->kq_lock); KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE); KeInitializeEvent(&kq->kq_done, EVENT_TYPE_SYNC, FALSE); /* * Elevate our priority. DPCs are used to run interrupt * handlers, and they should trigger as soon as possible * once scheduled by an ISR. */ thread_lock(curthread); #ifdef NTOSKRNL_MULTIPLE_DPCS sched_bind(curthread, kq->kq_cpu); #endif sched_prio(curthread, PRI_MIN_KERN); thread_unlock(curthread); while (1) { KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL); KeAcquireSpinLock(&kq->kq_lock, &irql); if (kq->kq_exit) { kq->kq_exit = 0; KeReleaseSpinLock(&kq->kq_lock, irql); break; } kq->kq_running = TRUE; while (!IsListEmpty(&kq->kq_disp)) { l = RemoveHeadList((&kq->kq_disp)); d = CONTAINING_RECORD(l, kdpc, k_dpclistentry); InitializeListHead((&d->k_dpclistentry)); KeReleaseSpinLockFromDpcLevel(&kq->kq_lock); MSCALL4(d->k_deferedfunc, d, d->k_deferredctx, d->k_sysarg1, d->k_sysarg2); KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); } kq->kq_running = FALSE; KeReleaseSpinLock(&kq->kq_lock, irql); KeSetEvent(&kq->kq_done, IO_NO_INCREMENT, FALSE); } kproc_exit(0); return; /* notreached */ } static void ntoskrnl_destroy_dpc_threads(void) { kdpc_queue *kq; kdpc dpc; int i; kq = kq_queues; #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq += i; kq->kq_exit = 1; KeInitializeDpc(&dpc, NULL, NULL); KeSetTargetProcessorDpc(&dpc, i); KeInsertQueueDpc(&dpc, NULL, NULL); while (kq->kq_exit) tsleep(kq->kq_td->td_proc, PWAIT, "dpcw", hz/10); } } static uint8_t ntoskrnl_insert_dpc(head, dpc) list_entry *head; kdpc *dpc; { list_entry *l; kdpc *d; l = head->nle_flink; while (l != head) { d = CONTAINING_RECORD(l, kdpc, k_dpclistentry); if (d == dpc) return (FALSE); l = l->nle_flink; } if (dpc->k_importance == KDPC_IMPORTANCE_LOW) InsertTailList((head), (&dpc->k_dpclistentry)); else InsertHeadList((head), (&dpc->k_dpclistentry)); return (TRUE); } void KeInitializeDpc(dpc, dpcfunc, dpcctx) kdpc *dpc; void *dpcfunc; void *dpcctx; { if (dpc == NULL) return; dpc->k_deferedfunc = dpcfunc; dpc->k_deferredctx = dpcctx; dpc->k_num = KDPC_CPU_DEFAULT; dpc->k_importance = KDPC_IMPORTANCE_MEDIUM; InitializeListHead((&dpc->k_dpclistentry)); } uint8_t KeInsertQueueDpc(dpc, sysarg1, sysarg2) kdpc *dpc; void *sysarg1; void *sysarg2; { kdpc_queue *kq; uint8_t r; uint8_t irql; if (dpc == NULL) return (FALSE); kq = kq_queues; #ifdef NTOSKRNL_MULTIPLE_DPCS KeRaiseIrql(DISPATCH_LEVEL, &irql); /* * By default, the DPC is queued to run on the same CPU * that scheduled it. */ if (dpc->k_num == KDPC_CPU_DEFAULT) kq += curthread->td_oncpu; else kq += dpc->k_num; KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); #else KeAcquireSpinLock(&kq->kq_lock, &irql); #endif r = ntoskrnl_insert_dpc(&kq->kq_disp, dpc); if (r == TRUE) { dpc->k_sysarg1 = sysarg1; dpc->k_sysarg2 = sysarg2; } KeReleaseSpinLock(&kq->kq_lock, irql); if (r == FALSE) return (r); KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); return (r); } uint8_t KeRemoveQueueDpc(dpc) kdpc *dpc; { kdpc_queue *kq; uint8_t irql; if (dpc == NULL) return (FALSE); #ifdef NTOSKRNL_MULTIPLE_DPCS KeRaiseIrql(DISPATCH_LEVEL, &irql); kq = kq_queues + dpc->k_num; KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); #else kq = kq_queues; KeAcquireSpinLock(&kq->kq_lock, &irql); #endif if (dpc->k_dpclistentry.nle_flink == &dpc->k_dpclistentry) { KeReleaseSpinLockFromDpcLevel(&kq->kq_lock); KeLowerIrql(irql); return (FALSE); } RemoveEntryList((&dpc->k_dpclistentry)); InitializeListHead((&dpc->k_dpclistentry)); KeReleaseSpinLock(&kq->kq_lock, irql); return (TRUE); } void KeSetImportanceDpc(dpc, imp) kdpc *dpc; uint32_t imp; { if (imp != KDPC_IMPORTANCE_LOW && imp != KDPC_IMPORTANCE_MEDIUM && imp != KDPC_IMPORTANCE_HIGH) return; dpc->k_importance = (uint8_t)imp; } void KeSetTargetProcessorDpc(kdpc *dpc, uint8_t cpu) { if (cpu > mp_ncpus) return; dpc->k_num = cpu; } void KeFlushQueuedDpcs(void) { kdpc_queue *kq; int i; /* * Poke each DPC queue and wait * for them to drain. */ #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq = kq_queues + i; KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); KeWaitForSingleObject(&kq->kq_done, 0, 0, TRUE, NULL); } } uint32_t KeGetCurrentProcessorNumber(void) { return ((uint32_t)curthread->td_oncpu); } uint8_t KeSetTimerEx(timer, duetime, period, dpc) ktimer *timer; int64_t duetime; uint32_t period; kdpc *dpc; { struct timeval tv; uint64_t curtime; uint8_t pending; if (timer == NULL) return (FALSE); mtx_lock(&ntoskrnl_dispatchlock); if (timer->k_header.dh_inserted == TRUE) { ntoskrnl_remove_timer(timer); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_cancels++; #endif timer->k_header.dh_inserted = FALSE; pending = TRUE; } else pending = FALSE; timer->k_duetime = duetime; timer->k_period = period; timer->k_header.dh_sigstate = FALSE; timer->k_dpc = dpc; if (duetime < 0) { tv.tv_sec = - (duetime) / 10000000; tv.tv_usec = (- (duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((duetime) - curtime) / 10000000; tv.tv_usec = ((duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } timer->k_header.dh_inserted = TRUE; ntoskrnl_insert_timer(timer, tvtohz(&tv)); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_sets++; #endif mtx_unlock(&ntoskrnl_dispatchlock); return (pending); } uint8_t KeSetTimer(timer, duetime, dpc) ktimer *timer; int64_t duetime; kdpc *dpc; { return (KeSetTimerEx(timer, duetime, 0, dpc)); } /* * The Windows DDK documentation seems to say that cancelling * a timer that has a DPC will result in the DPC also being * cancelled, but this isn't really the case. */ uint8_t KeCancelTimer(timer) ktimer *timer; { uint8_t pending; if (timer == NULL) return (FALSE); mtx_lock(&ntoskrnl_dispatchlock); pending = timer->k_header.dh_inserted; if (timer->k_header.dh_inserted == TRUE) { timer->k_header.dh_inserted = FALSE; ntoskrnl_remove_timer(timer); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_cancels++; #endif } mtx_unlock(&ntoskrnl_dispatchlock); return (pending); } uint8_t KeReadStateTimer(timer) ktimer *timer; { return (timer->k_header.dh_sigstate); } static int32_t KeDelayExecutionThread(uint8_t wait_mode, uint8_t alertable, int64_t *interval) { ktimer timer; if (wait_mode != 0) panic("invalid wait_mode %d", wait_mode); KeInitializeTimer(&timer); KeSetTimer(&timer, *interval, NULL); KeWaitForSingleObject(&timer, 0, 0, alertable, NULL); return STATUS_SUCCESS; } static uint64_t KeQueryInterruptTime(void) { int ticks; struct timeval tv; getmicrouptime(&tv); ticks = tvtohz(&tv); return ticks * howmany(10000000, hz); } static struct thread * KeGetCurrentThread(void) { return curthread; } static int32_t KeSetPriorityThread(td, pri) struct thread *td; int32_t pri; { int32_t old; if (td == NULL) return LOW_REALTIME_PRIORITY; if (td->td_priority <= PRI_MIN_KERN) old = HIGH_PRIORITY; else if (td->td_priority >= PRI_MAX_KERN) old = LOW_PRIORITY; else old = LOW_REALTIME_PRIORITY; thread_lock(td); if (pri == HIGH_PRIORITY) sched_prio(td, PRI_MIN_KERN); if (pri == LOW_REALTIME_PRIORITY) sched_prio(td, PRI_MIN_KERN + (PRI_MAX_KERN - PRI_MIN_KERN) / 2); if (pri == LOW_PRIORITY) sched_prio(td, PRI_MAX_KERN); thread_unlock(td); return old; } static void dummy() { printf("ntoskrnl dummy called...\n"); } image_patch_table ntoskrnl_functbl[] = { IMPORT_SFUNC(RtlZeroMemory, 2), IMPORT_SFUNC(RtlSecureZeroMemory, 2), IMPORT_SFUNC(RtlFillMemory, 3), IMPORT_SFUNC(RtlMoveMemory, 3), IMPORT_SFUNC(RtlCharToInteger, 3), IMPORT_SFUNC(RtlCopyMemory, 3), IMPORT_SFUNC(RtlCopyString, 2), IMPORT_SFUNC(RtlCompareMemory, 3), IMPORT_SFUNC(RtlEqualUnicodeString, 3), IMPORT_SFUNC(RtlCopyUnicodeString, 2), IMPORT_SFUNC(RtlUnicodeStringToAnsiString, 3), IMPORT_SFUNC(RtlAnsiStringToUnicodeString, 3), IMPORT_SFUNC(RtlInitAnsiString, 2), IMPORT_SFUNC_MAP(RtlInitString, RtlInitAnsiString, 2), IMPORT_SFUNC(RtlInitUnicodeString, 2), IMPORT_SFUNC(RtlFreeAnsiString, 1), IMPORT_SFUNC(RtlFreeUnicodeString, 1), IMPORT_SFUNC(RtlUnicodeStringToInteger, 3), IMPORT_CFUNC(sprintf, 0), IMPORT_CFUNC(vsprintf, 0), IMPORT_CFUNC_MAP(_snprintf, snprintf, 0), IMPORT_CFUNC_MAP(_vsnprintf, vsnprintf, 0), IMPORT_CFUNC(DbgPrint, 0), IMPORT_SFUNC(DbgBreakPoint, 0), IMPORT_SFUNC(KeBugCheckEx, 5), IMPORT_CFUNC(strncmp, 0), IMPORT_CFUNC(strcmp, 0), IMPORT_CFUNC_MAP(stricmp, strcasecmp, 0), IMPORT_CFUNC(strncpy, 0), IMPORT_CFUNC(strcpy, 0), IMPORT_CFUNC(strlen, 0), IMPORT_CFUNC_MAP(toupper, ntoskrnl_toupper, 0), IMPORT_CFUNC_MAP(tolower, ntoskrnl_tolower, 0), IMPORT_CFUNC_MAP(strstr, ntoskrnl_strstr, 0), IMPORT_CFUNC_MAP(strncat, ntoskrnl_strncat, 0), IMPORT_CFUNC_MAP(strchr, index, 0), IMPORT_CFUNC_MAP(strrchr, rindex, 0), IMPORT_CFUNC(memcpy, 0), IMPORT_CFUNC_MAP(memmove, ntoskrnl_memmove, 0), IMPORT_CFUNC_MAP(memset, ntoskrnl_memset, 0), IMPORT_CFUNC_MAP(memchr, ntoskrnl_memchr, 0), IMPORT_SFUNC(IoAllocateDriverObjectExtension, 4), IMPORT_SFUNC(IoGetDriverObjectExtension, 2), IMPORT_FFUNC(IofCallDriver, 2), IMPORT_FFUNC(IofCompleteRequest, 2), IMPORT_SFUNC(IoAcquireCancelSpinLock, 1), IMPORT_SFUNC(IoReleaseCancelSpinLock, 1), IMPORT_SFUNC(IoCancelIrp, 1), IMPORT_SFUNC(IoConnectInterrupt, 11), IMPORT_SFUNC(IoDisconnectInterrupt, 1), IMPORT_SFUNC(IoCreateDevice, 7), IMPORT_SFUNC(IoDeleteDevice, 1), IMPORT_SFUNC(IoGetAttachedDevice, 1), IMPORT_SFUNC(IoAttachDeviceToDeviceStack, 2), IMPORT_SFUNC(IoDetachDevice, 1), IMPORT_SFUNC(IoBuildSynchronousFsdRequest, 7), IMPORT_SFUNC(IoBuildAsynchronousFsdRequest, 6), IMPORT_SFUNC(IoBuildDeviceIoControlRequest, 9), IMPORT_SFUNC(IoAllocateIrp, 2), IMPORT_SFUNC(IoReuseIrp, 2), IMPORT_SFUNC(IoMakeAssociatedIrp, 2), IMPORT_SFUNC(IoFreeIrp, 1), IMPORT_SFUNC(IoInitializeIrp, 3), IMPORT_SFUNC(KeAcquireInterruptSpinLock, 1), IMPORT_SFUNC(KeReleaseInterruptSpinLock, 2), IMPORT_SFUNC(KeSynchronizeExecution, 3), IMPORT_SFUNC(KeWaitForSingleObject, 5), IMPORT_SFUNC(KeWaitForMultipleObjects, 8), IMPORT_SFUNC(_allmul, 4), IMPORT_SFUNC(_alldiv, 4), IMPORT_SFUNC(_allrem, 4), IMPORT_RFUNC(_allshr, 0), IMPORT_RFUNC(_allshl, 0), IMPORT_SFUNC(_aullmul, 4), IMPORT_SFUNC(_aulldiv, 4), IMPORT_SFUNC(_aullrem, 4), IMPORT_RFUNC(_aullshr, 0), IMPORT_RFUNC(_aullshl, 0), IMPORT_CFUNC(atoi, 0), IMPORT_CFUNC(atol, 0), IMPORT_CFUNC(rand, 0), IMPORT_CFUNC(srand, 0), IMPORT_SFUNC(WRITE_REGISTER_USHORT, 2), IMPORT_SFUNC(READ_REGISTER_USHORT, 1), IMPORT_SFUNC(WRITE_REGISTER_ULONG, 2), IMPORT_SFUNC(READ_REGISTER_ULONG, 1), IMPORT_SFUNC(READ_REGISTER_UCHAR, 1), IMPORT_SFUNC(WRITE_REGISTER_UCHAR, 2), IMPORT_SFUNC(ExInitializePagedLookasideList, 7), IMPORT_SFUNC(ExDeletePagedLookasideList, 1), IMPORT_SFUNC(ExInitializeNPagedLookasideList, 7), IMPORT_SFUNC(ExDeleteNPagedLookasideList, 1), IMPORT_FFUNC(InterlockedPopEntrySList, 1), IMPORT_FFUNC(InitializeSListHead, 1), IMPORT_FFUNC(InterlockedPushEntrySList, 2), IMPORT_SFUNC(ExQueryDepthSList, 1), IMPORT_FFUNC_MAP(ExpInterlockedPopEntrySList, InterlockedPopEntrySList, 1), IMPORT_FFUNC_MAP(ExpInterlockedPushEntrySList, InterlockedPushEntrySList, 2), IMPORT_FFUNC(ExInterlockedPopEntrySList, 2), IMPORT_FFUNC(ExInterlockedPushEntrySList, 3), IMPORT_SFUNC(ExAllocatePoolWithTag, 3), IMPORT_SFUNC(ExFreePoolWithTag, 2), IMPORT_SFUNC(ExFreePool, 1), #ifdef __i386__ IMPORT_FFUNC(KefAcquireSpinLockAtDpcLevel, 1), IMPORT_FFUNC(KefReleaseSpinLockFromDpcLevel,1), IMPORT_FFUNC(KeAcquireSpinLockRaiseToDpc, 1), #else /* * For AMD64, we can get away with just mapping * KeAcquireSpinLockRaiseToDpc() directly to KfAcquireSpinLock() * because the calling conventions end up being the same. * On i386, we have to be careful because KfAcquireSpinLock() * is _fastcall but KeAcquireSpinLockRaiseToDpc() isn't. */ IMPORT_SFUNC(KeAcquireSpinLockAtDpcLevel, 1), IMPORT_SFUNC(KeReleaseSpinLockFromDpcLevel, 1), IMPORT_SFUNC_MAP(KeAcquireSpinLockRaiseToDpc, KfAcquireSpinLock, 1), #endif IMPORT_SFUNC_MAP(KeReleaseSpinLock, KfReleaseSpinLock, 1), IMPORT_FFUNC(InterlockedIncrement, 1), IMPORT_FFUNC(InterlockedDecrement, 1), IMPORT_FFUNC(InterlockedExchange, 2), IMPORT_FFUNC(ExInterlockedAddLargeStatistic, 2), IMPORT_SFUNC(IoAllocateMdl, 5), IMPORT_SFUNC(IoFreeMdl, 1), IMPORT_SFUNC(MmAllocateContiguousMemory, 2 + 1), IMPORT_SFUNC(MmAllocateContiguousMemorySpecifyCache, 5 + 3), IMPORT_SFUNC(MmFreeContiguousMemory, 1), IMPORT_SFUNC(MmFreeContiguousMemorySpecifyCache, 3), IMPORT_SFUNC(MmSizeOfMdl, 1), IMPORT_SFUNC(MmMapLockedPages, 2), IMPORT_SFUNC(MmMapLockedPagesSpecifyCache, 6), IMPORT_SFUNC(MmUnmapLockedPages, 2), IMPORT_SFUNC(MmBuildMdlForNonPagedPool, 1), IMPORT_SFUNC(MmGetPhysicalAddress, 1), IMPORT_SFUNC(MmGetSystemRoutineAddress, 1), IMPORT_SFUNC(MmIsAddressValid, 1), IMPORT_SFUNC(MmMapIoSpace, 3 + 1), IMPORT_SFUNC(MmUnmapIoSpace, 2), IMPORT_SFUNC(KeInitializeSpinLock, 1), IMPORT_SFUNC(IoIsWdmVersionAvailable, 2), IMPORT_SFUNC(IoOpenDeviceRegistryKey, 4), IMPORT_SFUNC(IoGetDeviceObjectPointer, 4), IMPORT_SFUNC(IoGetDeviceProperty, 5), IMPORT_SFUNC(IoAllocateWorkItem, 1), IMPORT_SFUNC(IoFreeWorkItem, 1), IMPORT_SFUNC(IoQueueWorkItem, 4), IMPORT_SFUNC(ExQueueWorkItem, 2), IMPORT_SFUNC(ntoskrnl_workitem, 2), IMPORT_SFUNC(KeInitializeMutex, 2), IMPORT_SFUNC(KeReleaseMutex, 2), IMPORT_SFUNC(KeReadStateMutex, 1), IMPORT_SFUNC(KeInitializeEvent, 3), IMPORT_SFUNC(KeSetEvent, 3), IMPORT_SFUNC(KeResetEvent, 1), IMPORT_SFUNC(KeClearEvent, 1), IMPORT_SFUNC(KeReadStateEvent, 1), IMPORT_SFUNC(KeInitializeTimer, 1), IMPORT_SFUNC(KeInitializeTimerEx, 2), IMPORT_SFUNC(KeSetTimer, 3), IMPORT_SFUNC(KeSetTimerEx, 4), IMPORT_SFUNC(KeCancelTimer, 1), IMPORT_SFUNC(KeReadStateTimer, 1), IMPORT_SFUNC(KeInitializeDpc, 3), IMPORT_SFUNC(KeInsertQueueDpc, 3), IMPORT_SFUNC(KeRemoveQueueDpc, 1), IMPORT_SFUNC(KeSetImportanceDpc, 2), IMPORT_SFUNC(KeSetTargetProcessorDpc, 2), IMPORT_SFUNC(KeFlushQueuedDpcs, 0), IMPORT_SFUNC(KeGetCurrentProcessorNumber, 1), IMPORT_SFUNC(ObReferenceObjectByHandle, 6), IMPORT_FFUNC(ObfDereferenceObject, 1), IMPORT_SFUNC(ZwClose, 1), IMPORT_SFUNC(PsCreateSystemThread, 7), IMPORT_SFUNC(PsTerminateSystemThread, 1), IMPORT_SFUNC(IoWMIRegistrationControl, 2), IMPORT_SFUNC(WmiQueryTraceInformation, 5), IMPORT_CFUNC(WmiTraceMessage, 0), IMPORT_SFUNC(KeQuerySystemTime, 1), IMPORT_CFUNC(KeTickCount, 0), IMPORT_SFUNC(KeDelayExecutionThread, 3), IMPORT_SFUNC(KeQueryInterruptTime, 0), IMPORT_SFUNC(KeGetCurrentThread, 0), IMPORT_SFUNC(KeSetPriorityThread, 2), /* * This last entry is a catch-all for any function we haven't * implemented yet. The PE import list patching routine will * use it for any function that doesn't have an explicit match * in this table. */ { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL }, /* End of list. */ { NULL, NULL, NULL } }; Index: head/sys/dev/agp/agp.c =================================================================== --- head/sys/dev/agp/agp.c (revision 338106) +++ head/sys/dev/agp/agp.c (revision 338107) @@ -1,1059 +1,1059 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_agp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(agp, 1); MALLOC_DEFINE(M_AGP, "agp", "AGP data structures"); /* agp_drv.c */ static d_open_t agp_open; static d_close_t agp_close; static d_ioctl_t agp_ioctl; static d_mmap_t agp_mmap; static struct cdevsw agp_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = agp_open, .d_close = agp_close, .d_ioctl = agp_ioctl, .d_mmap = agp_mmap, .d_name = "agp", }; static devclass_t agp_devclass; /* Helper functions for implementing chipset mini drivers. */ u_int8_t agp_find_caps(device_t dev) { int capreg; if (pci_find_cap(dev, PCIY_AGP, &capreg) != 0) capreg = 0; return (capreg); } /* * Find an AGP display device (if any). */ static device_t agp_find_display(void) { devclass_t pci = devclass_find("pci"); device_t bus, dev = 0; device_t *kids; int busnum, numkids, i; for (busnum = 0; busnum < devclass_get_maxunit(pci); busnum++) { bus = devclass_get_device(pci, busnum); if (!bus) continue; if (device_get_children(bus, &kids, &numkids) != 0) continue; for (i = 0; i < numkids; i++) { dev = kids[i]; if (pci_get_class(dev) == PCIC_DISPLAY && pci_get_subclass(dev) == PCIS_DISPLAY_VGA) if (agp_find_caps(dev)) { free(kids, M_TEMP); return dev; } } free(kids, M_TEMP); } return 0; } struct agp_gatt * agp_alloc_gatt(device_t dev) { u_int32_t apsize = AGP_GET_APERTURE(dev); u_int32_t entries = apsize >> AGP_PAGE_SHIFT; struct agp_gatt *gatt; if (bootverbose) device_printf(dev, "allocating GATT for aperture of size %dM\n", apsize / (1024*1024)); if (entries == 0) { device_printf(dev, "bad aperture size\n"); return NULL; } gatt = malloc(sizeof(struct agp_gatt), M_AGP, M_NOWAIT); if (!gatt) return 0; gatt->ag_entries = entries; - gatt->ag_virtual = (void *)kmem_alloc_contig(kernel_arena, - entries * sizeof(u_int32_t), M_NOWAIT | M_ZERO, 0, ~0, PAGE_SIZE, - 0, VM_MEMATTR_WRITE_COMBINING); + gatt->ag_virtual = (void *)kmem_alloc_contig(entries * + sizeof(u_int32_t), M_NOWAIT | M_ZERO, 0, ~0, PAGE_SIZE, 0, + VM_MEMATTR_WRITE_COMBINING); if (!gatt->ag_virtual) { if (bootverbose) device_printf(dev, "contiguous allocation failed\n"); free(gatt, M_AGP); return 0; } gatt->ag_physical = vtophys((vm_offset_t) gatt->ag_virtual); return gatt; } void agp_free_gatt(struct agp_gatt *gatt) { kmem_free(kernel_arena, (vm_offset_t)gatt->ag_virtual, gatt->ag_entries * sizeof(u_int32_t)); free(gatt, M_AGP); } static u_int agp_max[][2] = { {0, 0}, {32, 4}, {64, 28}, {128, 96}, {256, 204}, {512, 440}, {1024, 942}, {2048, 1920}, {4096, 3932} }; #define AGP_MAX_SIZE nitems(agp_max) /** * Sets the PCI resource which represents the AGP aperture. * * If not called, the default AGP aperture resource of AGP_APBASE will * be used. Must be called before agp_generic_attach(). */ void agp_set_aperture_resource(device_t dev, int rid) { struct agp_softc *sc = device_get_softc(dev); sc->as_aperture_rid = rid; } int agp_generic_attach(device_t dev) { struct agp_softc *sc = device_get_softc(dev); int i; u_int memsize; /* * Find and map the aperture, RF_SHAREABLE for DRM but not RF_ACTIVE * because the kernel doesn't need to map it. */ if (sc->as_aperture_rid != -1) { if (sc->as_aperture_rid == 0) sc->as_aperture_rid = AGP_APBASE; sc->as_aperture = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->as_aperture_rid, RF_SHAREABLE); if (!sc->as_aperture) return ENOMEM; } /* * Work out an upper bound for agp memory allocation. This * uses a heurisitc table from the Linux driver. */ memsize = ptoa(realmem) >> 20; for (i = 0; i < AGP_MAX_SIZE; i++) { if (memsize <= agp_max[i][0]) break; } if (i == AGP_MAX_SIZE) i = AGP_MAX_SIZE - 1; sc->as_maxmem = agp_max[i][1] << 20U; /* * The lock is used to prevent re-entry to * agp_generic_bind_memory() since that function can sleep. */ mtx_init(&sc->as_lock, "agp lock", NULL, MTX_DEF); /* * Initialise stuff for the userland device. */ agp_devclass = devclass_find("agp"); TAILQ_INIT(&sc->as_memory); sc->as_nextid = 1; sc->as_devnode = make_dev(&agp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "agpgart"); sc->as_devnode->si_drv1 = dev; return 0; } void agp_free_cdev(device_t dev) { struct agp_softc *sc = device_get_softc(dev); destroy_dev(sc->as_devnode); } void agp_free_res(device_t dev) { struct agp_softc *sc = device_get_softc(dev); if (sc->as_aperture != NULL) bus_release_resource(dev, SYS_RES_MEMORY, sc->as_aperture_rid, sc->as_aperture); mtx_destroy(&sc->as_lock); } int agp_generic_detach(device_t dev) { agp_free_cdev(dev); agp_free_res(dev); return 0; } /** * Default AGP aperture size detection which simply returns the size of * the aperture's PCI resource. */ u_int32_t agp_generic_get_aperture(device_t dev) { struct agp_softc *sc = device_get_softc(dev); return rman_get_size(sc->as_aperture); } /** * Default AGP aperture size setting function, which simply doesn't allow * changes to resource size. */ int agp_generic_set_aperture(device_t dev, u_int32_t aperture) { u_int32_t current_aperture; current_aperture = AGP_GET_APERTURE(dev); if (current_aperture != aperture) return EINVAL; else return 0; } /* * This does the enable logic for v3, with the same topology * restrictions as in place for v2 -- one bus, one device on the bus. */ static int agp_v3_enable(device_t dev, device_t mdev, u_int32_t mode) { u_int32_t tstatus, mstatus; u_int32_t command; int rq, sba, fw, rate, arqsz, cal; tstatus = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4); mstatus = pci_read_config(mdev, agp_find_caps(mdev) + AGP_STATUS, 4); /* Set RQ to the min of mode, tstatus and mstatus */ rq = AGP_MODE_GET_RQ(mode); if (AGP_MODE_GET_RQ(tstatus) < rq) rq = AGP_MODE_GET_RQ(tstatus); if (AGP_MODE_GET_RQ(mstatus) < rq) rq = AGP_MODE_GET_RQ(mstatus); /* * ARQSZ - Set the value to the maximum one. * Don't allow the mode register to override values. */ arqsz = AGP_MODE_GET_ARQSZ(mode); if (AGP_MODE_GET_ARQSZ(tstatus) > rq) rq = AGP_MODE_GET_ARQSZ(tstatus); if (AGP_MODE_GET_ARQSZ(mstatus) > rq) rq = AGP_MODE_GET_ARQSZ(mstatus); /* Calibration cycle - don't allow override by mode register */ cal = AGP_MODE_GET_CAL(tstatus); if (AGP_MODE_GET_CAL(mstatus) < cal) cal = AGP_MODE_GET_CAL(mstatus); /* SBA must be supported for AGP v3. */ sba = 1; /* Set FW if all three support it. */ fw = (AGP_MODE_GET_FW(tstatus) & AGP_MODE_GET_FW(mstatus) & AGP_MODE_GET_FW(mode)); /* Figure out the max rate */ rate = (AGP_MODE_GET_RATE(tstatus) & AGP_MODE_GET_RATE(mstatus) & AGP_MODE_GET_RATE(mode)); if (rate & AGP_MODE_V3_RATE_8x) rate = AGP_MODE_V3_RATE_8x; else rate = AGP_MODE_V3_RATE_4x; if (bootverbose) device_printf(dev, "Setting AGP v3 mode %d\n", rate * 4); pci_write_config(dev, agp_find_caps(dev) + AGP_COMMAND, 0, 4); /* Construct the new mode word and tell the hardware */ command = 0; command = AGP_MODE_SET_RQ(0, rq); command = AGP_MODE_SET_ARQSZ(command, arqsz); command = AGP_MODE_SET_CAL(command, cal); command = AGP_MODE_SET_SBA(command, sba); command = AGP_MODE_SET_FW(command, fw); command = AGP_MODE_SET_RATE(command, rate); command = AGP_MODE_SET_MODE_3(command, 1); command = AGP_MODE_SET_AGP(command, 1); pci_write_config(dev, agp_find_caps(dev) + AGP_COMMAND, command, 4); pci_write_config(mdev, agp_find_caps(mdev) + AGP_COMMAND, command, 4); return 0; } static int agp_v2_enable(device_t dev, device_t mdev, u_int32_t mode) { u_int32_t tstatus, mstatus; u_int32_t command; int rq, sba, fw, rate; tstatus = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4); mstatus = pci_read_config(mdev, agp_find_caps(mdev) + AGP_STATUS, 4); /* Set RQ to the min of mode, tstatus and mstatus */ rq = AGP_MODE_GET_RQ(mode); if (AGP_MODE_GET_RQ(tstatus) < rq) rq = AGP_MODE_GET_RQ(tstatus); if (AGP_MODE_GET_RQ(mstatus) < rq) rq = AGP_MODE_GET_RQ(mstatus); /* Set SBA if all three can deal with SBA */ sba = (AGP_MODE_GET_SBA(tstatus) & AGP_MODE_GET_SBA(mstatus) & AGP_MODE_GET_SBA(mode)); /* Similar for FW */ fw = (AGP_MODE_GET_FW(tstatus) & AGP_MODE_GET_FW(mstatus) & AGP_MODE_GET_FW(mode)); /* Figure out the max rate */ rate = (AGP_MODE_GET_RATE(tstatus) & AGP_MODE_GET_RATE(mstatus) & AGP_MODE_GET_RATE(mode)); if (rate & AGP_MODE_V2_RATE_4x) rate = AGP_MODE_V2_RATE_4x; else if (rate & AGP_MODE_V2_RATE_2x) rate = AGP_MODE_V2_RATE_2x; else rate = AGP_MODE_V2_RATE_1x; if (bootverbose) device_printf(dev, "Setting AGP v2 mode %d\n", rate); /* Construct the new mode word and tell the hardware */ command = 0; command = AGP_MODE_SET_RQ(0, rq); command = AGP_MODE_SET_SBA(command, sba); command = AGP_MODE_SET_FW(command, fw); command = AGP_MODE_SET_RATE(command, rate); command = AGP_MODE_SET_AGP(command, 1); pci_write_config(dev, agp_find_caps(dev) + AGP_COMMAND, command, 4); pci_write_config(mdev, agp_find_caps(mdev) + AGP_COMMAND, command, 4); return 0; } int agp_generic_enable(device_t dev, u_int32_t mode) { device_t mdev = agp_find_display(); u_int32_t tstatus, mstatus; if (!mdev) { AGP_DPF("can't find display\n"); return ENXIO; } tstatus = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4); mstatus = pci_read_config(mdev, agp_find_caps(mdev) + AGP_STATUS, 4); /* * Check display and bridge for AGP v3 support. AGP v3 allows * more variety in topology than v2, e.g. multiple AGP devices * attached to one bridge, or multiple AGP bridges in one * system. This doesn't attempt to address those situations, * but should work fine for a classic single AGP slot system * with AGP v3. */ if (AGP_MODE_GET_MODE_3(mode) && AGP_MODE_GET_MODE_3(tstatus) && AGP_MODE_GET_MODE_3(mstatus)) return (agp_v3_enable(dev, mdev, mode)); else return (agp_v2_enable(dev, mdev, mode)); } struct agp_memory * agp_generic_alloc_memory(device_t dev, int type, vm_size_t size) { struct agp_softc *sc = device_get_softc(dev); struct agp_memory *mem; if ((size & (AGP_PAGE_SIZE - 1)) != 0) return 0; if (size > sc->as_maxmem - sc->as_allocated) return 0; if (type != 0) { printf("agp_generic_alloc_memory: unsupported type %d\n", type); return 0; } mem = malloc(sizeof *mem, M_AGP, M_WAITOK); mem->am_id = sc->as_nextid++; mem->am_size = size; mem->am_type = 0; mem->am_obj = vm_object_allocate(OBJT_DEFAULT, atop(round_page(size))); mem->am_physical = 0; mem->am_offset = 0; mem->am_is_bound = 0; TAILQ_INSERT_TAIL(&sc->as_memory, mem, am_link); sc->as_allocated += size; return mem; } int agp_generic_free_memory(device_t dev, struct agp_memory *mem) { struct agp_softc *sc = device_get_softc(dev); if (mem->am_is_bound) return EBUSY; sc->as_allocated -= mem->am_size; TAILQ_REMOVE(&sc->as_memory, mem, am_link); vm_object_deallocate(mem->am_obj); free(mem, M_AGP); return 0; } int agp_generic_bind_memory(device_t dev, struct agp_memory *mem, vm_offset_t offset) { struct agp_softc *sc = device_get_softc(dev); vm_offset_t i, j, k; vm_page_t m; int error; /* Do some sanity checks first. */ if ((offset & (AGP_PAGE_SIZE - 1)) != 0 || offset + mem->am_size > AGP_GET_APERTURE(dev)) { device_printf(dev, "binding memory at bad offset %#x\n", (int)offset); return EINVAL; } /* * Allocate the pages early, before acquiring the lock, * because vm_page_grab() may sleep and we can't hold a mutex * while sleeping. */ VM_OBJECT_WLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { /* * Find a page from the object and wire it * down. This page will be mapped using one or more * entries in the GATT (assuming that PAGE_SIZE >= * AGP_PAGE_SIZE. If this is the first call to bind, * the pages will be allocated and zeroed. */ m = vm_page_grab(mem->am_obj, OFF_TO_IDX(i), VM_ALLOC_WIRED | VM_ALLOC_ZERO); AGP_DPF("found page pa=%#jx\n", (uintmax_t)VM_PAGE_TO_PHYS(m)); } VM_OBJECT_WUNLOCK(mem->am_obj); mtx_lock(&sc->as_lock); if (mem->am_is_bound) { device_printf(dev, "memory already bound\n"); error = EINVAL; VM_OBJECT_WLOCK(mem->am_obj); i = 0; goto bad; } /* * Bind the individual pages and flush the chipset's * TLB. */ VM_OBJECT_WLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(i)); /* * Install entries in the GATT, making sure that if * AGP_PAGE_SIZE < PAGE_SIZE and mem->am_size is not * aligned to PAGE_SIZE, we don't modify too many GATT * entries. */ for (j = 0; j < PAGE_SIZE && i + j < mem->am_size; j += AGP_PAGE_SIZE) { vm_offset_t pa = VM_PAGE_TO_PHYS(m) + j; AGP_DPF("binding offset %#jx to pa %#jx\n", (uintmax_t)offset + i + j, (uintmax_t)pa); error = AGP_BIND_PAGE(dev, offset + i + j, pa); if (error) { /* * Bail out. Reverse all the mappings * and unwire the pages. */ for (k = 0; k < i + j; k += AGP_PAGE_SIZE) AGP_UNBIND_PAGE(dev, offset + k); goto bad; } } vm_page_xunbusy(m); } VM_OBJECT_WUNLOCK(mem->am_obj); /* * Make sure the chipset gets the new mappings. */ AGP_FLUSH_TLB(dev); mem->am_offset = offset; mem->am_is_bound = 1; mtx_unlock(&sc->as_lock); return 0; bad: mtx_unlock(&sc->as_lock); VM_OBJECT_ASSERT_WLOCKED(mem->am_obj); for (k = 0; k < mem->am_size; k += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) vm_page_xunbusy(m); vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(mem->am_obj); return error; } int agp_generic_unbind_memory(device_t dev, struct agp_memory *mem) { struct agp_softc *sc = device_get_softc(dev); vm_page_t m; int i; mtx_lock(&sc->as_lock); if (!mem->am_is_bound) { device_printf(dev, "memory is not bound\n"); mtx_unlock(&sc->as_lock); return EINVAL; } /* * Unbind the individual pages and flush the chipset's * TLB. Unwire the pages so they can be swapped. */ for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) AGP_UNBIND_PAGE(dev, mem->am_offset + i); AGP_FLUSH_TLB(dev); VM_OBJECT_WLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, atop(i)); vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(mem->am_obj); mem->am_offset = 0; mem->am_is_bound = 0; mtx_unlock(&sc->as_lock); return 0; } /* Helper functions for implementing user/kernel api */ static int agp_acquire_helper(device_t dev, enum agp_acquire_state state) { struct agp_softc *sc = device_get_softc(dev); if (sc->as_state != AGP_ACQUIRE_FREE) return EBUSY; sc->as_state = state; return 0; } static int agp_release_helper(device_t dev, enum agp_acquire_state state) { struct agp_softc *sc = device_get_softc(dev); if (sc->as_state == AGP_ACQUIRE_FREE) return 0; if (sc->as_state != state) return EBUSY; sc->as_state = AGP_ACQUIRE_FREE; return 0; } static struct agp_memory * agp_find_memory(device_t dev, int id) { struct agp_softc *sc = device_get_softc(dev); struct agp_memory *mem; AGP_DPF("searching for memory block %d\n", id); TAILQ_FOREACH(mem, &sc->as_memory, am_link) { AGP_DPF("considering memory block %d\n", mem->am_id); if (mem->am_id == id) return mem; } return 0; } /* Implementation of the userland ioctl api */ static int agp_info_user(device_t dev, agp_info *info) { struct agp_softc *sc = device_get_softc(dev); bzero(info, sizeof *info); info->bridge_id = pci_get_devid(dev); info->agp_mode = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4); if (sc->as_aperture) info->aper_base = rman_get_start(sc->as_aperture); else info->aper_base = 0; info->aper_size = AGP_GET_APERTURE(dev) >> 20; info->pg_total = info->pg_system = sc->as_maxmem >> AGP_PAGE_SHIFT; info->pg_used = sc->as_allocated >> AGP_PAGE_SHIFT; return 0; } static int agp_setup_user(device_t dev, agp_setup *setup) { return AGP_ENABLE(dev, setup->agp_mode); } static int agp_allocate_user(device_t dev, agp_allocate *alloc) { struct agp_memory *mem; mem = AGP_ALLOC_MEMORY(dev, alloc->type, alloc->pg_count << AGP_PAGE_SHIFT); if (mem) { alloc->key = mem->am_id; alloc->physical = mem->am_physical; return 0; } else { return ENOMEM; } } static int agp_deallocate_user(device_t dev, int id) { struct agp_memory *mem = agp_find_memory(dev, id); if (mem) { AGP_FREE_MEMORY(dev, mem); return 0; } else { return ENOENT; } } static int agp_bind_user(device_t dev, agp_bind *bind) { struct agp_memory *mem = agp_find_memory(dev, bind->key); if (!mem) return ENOENT; return AGP_BIND_MEMORY(dev, mem, bind->pg_start << AGP_PAGE_SHIFT); } static int agp_unbind_user(device_t dev, agp_unbind *unbind) { struct agp_memory *mem = agp_find_memory(dev, unbind->key); if (!mem) return ENOENT; return AGP_UNBIND_MEMORY(dev, mem); } static int agp_chipset_flush(device_t dev) { return (AGP_CHIPSET_FLUSH(dev)); } static int agp_open(struct cdev *kdev, int oflags, int devtype, struct thread *td) { device_t dev = kdev->si_drv1; struct agp_softc *sc = device_get_softc(dev); if (!sc->as_isopen) { sc->as_isopen = 1; device_busy(dev); } return 0; } static int agp_close(struct cdev *kdev, int fflag, int devtype, struct thread *td) { device_t dev = kdev->si_drv1; struct agp_softc *sc = device_get_softc(dev); struct agp_memory *mem; /* * Clear the GATT and force release on last close */ while ((mem = TAILQ_FIRST(&sc->as_memory)) != NULL) { if (mem->am_is_bound) AGP_UNBIND_MEMORY(dev, mem); AGP_FREE_MEMORY(dev, mem); } if (sc->as_state == AGP_ACQUIRE_USER) agp_release_helper(dev, AGP_ACQUIRE_USER); sc->as_isopen = 0; device_unbusy(dev); return 0; } static int agp_ioctl(struct cdev *kdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { device_t dev = kdev->si_drv1; switch (cmd) { case AGPIOC_INFO: return agp_info_user(dev, (agp_info *) data); case AGPIOC_ACQUIRE: return agp_acquire_helper(dev, AGP_ACQUIRE_USER); case AGPIOC_RELEASE: return agp_release_helper(dev, AGP_ACQUIRE_USER); case AGPIOC_SETUP: return agp_setup_user(dev, (agp_setup *)data); case AGPIOC_ALLOCATE: return agp_allocate_user(dev, (agp_allocate *)data); case AGPIOC_DEALLOCATE: return agp_deallocate_user(dev, *(int *) data); case AGPIOC_BIND: return agp_bind_user(dev, (agp_bind *)data); case AGPIOC_UNBIND: return agp_unbind_user(dev, (agp_unbind *)data); case AGPIOC_CHIPSET_FLUSH: return agp_chipset_flush(dev); } return EINVAL; } static int agp_mmap(struct cdev *kdev, vm_ooffset_t offset, vm_paddr_t *paddr, int prot, vm_memattr_t *memattr) { device_t dev = kdev->si_drv1; struct agp_softc *sc = device_get_softc(dev); if (offset > AGP_GET_APERTURE(dev)) return -1; if (sc->as_aperture == NULL) return -1; *paddr = rman_get_start(sc->as_aperture) + offset; return 0; } /* Implementation of the kernel api */ device_t agp_find_device() { device_t *children, child; int i, count; if (!agp_devclass) return NULL; if (devclass_get_devices(agp_devclass, &children, &count) != 0) return NULL; child = NULL; for (i = 0; i < count; i++) { if (device_is_attached(children[i])) { child = children[i]; break; } } free(children, M_TEMP); return child; } enum agp_acquire_state agp_state(device_t dev) { struct agp_softc *sc = device_get_softc(dev); return sc->as_state; } void agp_get_info(device_t dev, struct agp_info *info) { struct agp_softc *sc = device_get_softc(dev); info->ai_mode = pci_read_config(dev, agp_find_caps(dev) + AGP_STATUS, 4); if (sc->as_aperture != NULL) info->ai_aperture_base = rman_get_start(sc->as_aperture); else info->ai_aperture_base = 0; info->ai_aperture_size = AGP_GET_APERTURE(dev); info->ai_memory_allowed = sc->as_maxmem; info->ai_memory_used = sc->as_allocated; } int agp_acquire(device_t dev) { return agp_acquire_helper(dev, AGP_ACQUIRE_KERNEL); } int agp_release(device_t dev) { return agp_release_helper(dev, AGP_ACQUIRE_KERNEL); } int agp_enable(device_t dev, u_int32_t mode) { return AGP_ENABLE(dev, mode); } void *agp_alloc_memory(device_t dev, int type, vm_size_t bytes) { return (void *) AGP_ALLOC_MEMORY(dev, type, bytes); } void agp_free_memory(device_t dev, void *handle) { struct agp_memory *mem = (struct agp_memory *) handle; AGP_FREE_MEMORY(dev, mem); } int agp_bind_memory(device_t dev, void *handle, vm_offset_t offset) { struct agp_memory *mem = (struct agp_memory *) handle; return AGP_BIND_MEMORY(dev, mem, offset); } int agp_unbind_memory(device_t dev, void *handle) { struct agp_memory *mem = (struct agp_memory *) handle; return AGP_UNBIND_MEMORY(dev, mem); } void agp_memory_info(device_t dev, void *handle, struct agp_memory_info *mi) { struct agp_memory *mem = (struct agp_memory *) handle; mi->ami_size = mem->am_size; mi->ami_physical = mem->am_physical; mi->ami_offset = mem->am_offset; mi->ami_is_bound = mem->am_is_bound; } int agp_bind_pages(device_t dev, vm_page_t *pages, vm_size_t size, vm_offset_t offset) { struct agp_softc *sc; vm_offset_t i, j, k, pa; vm_page_t m; int error; if ((size & (AGP_PAGE_SIZE - 1)) != 0 || (offset & (AGP_PAGE_SIZE - 1)) != 0) return (EINVAL); sc = device_get_softc(dev); mtx_lock(&sc->as_lock); for (i = 0; i < size; i += PAGE_SIZE) { m = pages[OFF_TO_IDX(i)]; KASSERT(m->wire_count > 0, ("agp_bind_pages: page %p hasn't been wired", m)); /* * Install entries in the GATT, making sure that if * AGP_PAGE_SIZE < PAGE_SIZE and size is not * aligned to PAGE_SIZE, we don't modify too many GATT * entries. */ for (j = 0; j < PAGE_SIZE && i + j < size; j += AGP_PAGE_SIZE) { pa = VM_PAGE_TO_PHYS(m) + j; AGP_DPF("binding offset %#jx to pa %#jx\n", (uintmax_t)offset + i + j, (uintmax_t)pa); error = AGP_BIND_PAGE(dev, offset + i + j, pa); if (error) { /* * Bail out. Reverse all the mappings. */ for (k = 0; k < i + j; k += AGP_PAGE_SIZE) AGP_UNBIND_PAGE(dev, offset + k); mtx_unlock(&sc->as_lock); return (error); } } } AGP_FLUSH_TLB(dev); mtx_unlock(&sc->as_lock); return (0); } int agp_unbind_pages(device_t dev, vm_size_t size, vm_offset_t offset) { struct agp_softc *sc; vm_offset_t i; if ((size & (AGP_PAGE_SIZE - 1)) != 0 || (offset & (AGP_PAGE_SIZE - 1)) != 0) return (EINVAL); sc = device_get_softc(dev); mtx_lock(&sc->as_lock); for (i = 0; i < size; i += AGP_PAGE_SIZE) AGP_UNBIND_PAGE(dev, offset + i); AGP_FLUSH_TLB(dev); mtx_unlock(&sc->as_lock); return (0); } Index: head/sys/dev/agp/agp_i810.c =================================================================== --- head/sys/dev/agp/agp_i810.c (revision 338106) +++ head/sys/dev/agp/agp_i810.c (revision 338107) @@ -1,2376 +1,2375 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Doug Rabson * Copyright (c) 2000 Ruslan Ermilov * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Fixes for 830/845G support: David Dawes * 852GM/855GM/865G support added by David Dawes * * This is generic Intel GTT handling code, morphed from the AGP * bridge code. */ #include __FBSDID("$FreeBSD$"); #if 0 #define KTR_AGP_I810 KTR_DEV #else #define KTR_AGP_I810 0 #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DECLARE(M_AGP); struct agp_i810_match; static int agp_i810_check_active(device_t bridge_dev); static int agp_i830_check_active(device_t bridge_dev); static int agp_i915_check_active(device_t bridge_dev); static void agp_82852_set_desc(device_t dev, const struct agp_i810_match *match); static void agp_i810_set_desc(device_t dev, const struct agp_i810_match *match); static void agp_i810_dump_regs(device_t dev); static void agp_i830_dump_regs(device_t dev); static void agp_i855_dump_regs(device_t dev); static void agp_i915_dump_regs(device_t dev); static void agp_i965_dump_regs(device_t dev); static int agp_i810_get_stolen_size(device_t dev); static int agp_i830_get_stolen_size(device_t dev); static int agp_i915_get_stolen_size(device_t dev); static int agp_i810_get_gtt_mappable_entries(device_t dev); static int agp_i830_get_gtt_mappable_entries(device_t dev); static int agp_i915_get_gtt_mappable_entries(device_t dev); static int agp_i810_get_gtt_total_entries(device_t dev); static int agp_i965_get_gtt_total_entries(device_t dev); static int agp_gen5_get_gtt_total_entries(device_t dev); static int agp_i810_install_gatt(device_t dev); static int agp_i830_install_gatt(device_t dev); static int agp_i965_install_gatt(device_t dev); static int agp_g4x_install_gatt(device_t dev); static void agp_i810_deinstall_gatt(device_t dev); static void agp_i830_deinstall_gatt(device_t dev); static void agp_i810_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags); static void agp_i830_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags); static void agp_i915_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags); static void agp_i965_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags); static void agp_g4x_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags); static void agp_i810_write_gtt(device_t dev, u_int index, uint32_t pte); static void agp_i915_write_gtt(device_t dev, u_int index, uint32_t pte); static void agp_i965_write_gtt(device_t dev, u_int index, uint32_t pte); static void agp_g4x_write_gtt(device_t dev, u_int index, uint32_t pte); static u_int32_t agp_i810_read_gtt_pte(device_t dev, u_int index); static u_int32_t agp_i915_read_gtt_pte(device_t dev, u_int index); static u_int32_t agp_i965_read_gtt_pte(device_t dev, u_int index); static u_int32_t agp_g4x_read_gtt_pte(device_t dev, u_int index); static vm_paddr_t agp_i810_read_gtt_pte_paddr(device_t dev, u_int index); static vm_paddr_t agp_i915_read_gtt_pte_paddr(device_t dev, u_int index); static int agp_i810_set_aperture(device_t dev, u_int32_t aperture); static int agp_i830_set_aperture(device_t dev, u_int32_t aperture); static int agp_i915_set_aperture(device_t dev, u_int32_t aperture); static int agp_i810_chipset_flush_setup(device_t dev); static int agp_i915_chipset_flush_setup(device_t dev); static int agp_i965_chipset_flush_setup(device_t dev); static void agp_i810_chipset_flush_teardown(device_t dev); static void agp_i915_chipset_flush_teardown(device_t dev); static void agp_i965_chipset_flush_teardown(device_t dev); static void agp_i810_chipset_flush(device_t dev); static void agp_i830_chipset_flush(device_t dev); static void agp_i915_chipset_flush(device_t dev); enum { CHIP_I810, /* i810/i815 */ CHIP_I830, /* 830M/845G */ CHIP_I855, /* 852GM/855GM/865G */ CHIP_I915, /* 915G/915GM */ CHIP_I965, /* G965 */ CHIP_G33, /* G33/Q33/Q35 */ CHIP_IGD, /* Pineview */ CHIP_G4X, /* G45/Q45 */ }; /* The i810 through i855 have the registers at BAR 1, and the GATT gets * allocated by us. The i915 has registers in BAR 0 and the GATT is at the * start of the stolen memory, and should only be accessed by the OS through * BAR 3. The G965 has registers and GATT in the same BAR (0) -- first 512KB * is registers, second 512KB is GATT. */ static struct resource_spec agp_i810_res_spec[] = { { SYS_RES_MEMORY, AGP_I810_MMADR, RF_ACTIVE | RF_SHAREABLE }, { -1, 0 } }; static struct resource_spec agp_i915_res_spec[] = { { SYS_RES_MEMORY, AGP_I915_MMADR, RF_ACTIVE | RF_SHAREABLE }, { SYS_RES_MEMORY, AGP_I915_GTTADR, RF_ACTIVE | RF_SHAREABLE }, { -1, 0 } }; static struct resource_spec agp_i965_res_spec[] = { { SYS_RES_MEMORY, AGP_I965_GTTMMADR, RF_ACTIVE | RF_SHAREABLE }, { SYS_RES_MEMORY, AGP_I965_APBASE, RF_ACTIVE | RF_SHAREABLE }, { -1, 0 } }; struct agp_i810_softc { struct agp_softc agp; u_int32_t initial_aperture; /* aperture size at startup */ struct agp_gatt *gatt; u_int32_t dcache_size; /* i810 only */ u_int32_t stolen; /* number of i830/845 gtt entries for stolen memory */ u_int stolen_size; /* BIOS-reserved graphics memory */ u_int gtt_total_entries; /* Total number of gtt ptes */ u_int gtt_mappable_entries; /* Number of gtt ptes mappable by CPU */ device_t bdev; /* bridge device */ void *argb_cursor; /* contigmalloc area for ARGB cursor */ struct resource *sc_res[2]; const struct agp_i810_match *match; int sc_flush_page_rid; struct resource *sc_flush_page_res; void *sc_flush_page_vaddr; int sc_bios_allocated_flush_page; }; static device_t intel_agp; struct agp_i810_driver { int chiptype; int gen; int busdma_addr_mask_sz; struct resource_spec *res_spec; int (*check_active)(device_t); void (*set_desc)(device_t, const struct agp_i810_match *); void (*dump_regs)(device_t); int (*get_stolen_size)(device_t); int (*get_gtt_total_entries)(device_t); int (*get_gtt_mappable_entries)(device_t); int (*install_gatt)(device_t); void (*deinstall_gatt)(device_t); void (*write_gtt)(device_t, u_int, uint32_t); void (*install_gtt_pte)(device_t, u_int, vm_offset_t, int); u_int32_t (*read_gtt_pte)(device_t, u_int); vm_paddr_t (*read_gtt_pte_paddr)(device_t , u_int); int (*set_aperture)(device_t, u_int32_t); int (*chipset_flush_setup)(device_t); void (*chipset_flush_teardown)(device_t); void (*chipset_flush)(device_t); }; static struct { struct intel_gtt base; } intel_private; static const struct agp_i810_driver agp_i810_i810_driver = { .chiptype = CHIP_I810, .gen = 1, .busdma_addr_mask_sz = 32, .res_spec = agp_i810_res_spec, .check_active = agp_i810_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i810_dump_regs, .get_stolen_size = agp_i810_get_stolen_size, .get_gtt_mappable_entries = agp_i810_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i810_get_gtt_total_entries, .install_gatt = agp_i810_install_gatt, .deinstall_gatt = agp_i810_deinstall_gatt, .write_gtt = agp_i810_write_gtt, .install_gtt_pte = agp_i810_install_gtt_pte, .read_gtt_pte = agp_i810_read_gtt_pte, .read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr, .set_aperture = agp_i810_set_aperture, .chipset_flush_setup = agp_i810_chipset_flush_setup, .chipset_flush_teardown = agp_i810_chipset_flush_teardown, .chipset_flush = agp_i810_chipset_flush, }; static const struct agp_i810_driver agp_i810_i815_driver = { .chiptype = CHIP_I810, .gen = 2, .busdma_addr_mask_sz = 32, .res_spec = agp_i810_res_spec, .check_active = agp_i810_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i810_dump_regs, .get_stolen_size = agp_i810_get_stolen_size, .get_gtt_mappable_entries = agp_i830_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i810_get_gtt_total_entries, .install_gatt = agp_i810_install_gatt, .deinstall_gatt = agp_i810_deinstall_gatt, .write_gtt = agp_i810_write_gtt, .install_gtt_pte = agp_i810_install_gtt_pte, .read_gtt_pte = agp_i810_read_gtt_pte, .read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr, .set_aperture = agp_i810_set_aperture, .chipset_flush_setup = agp_i810_chipset_flush_setup, .chipset_flush_teardown = agp_i810_chipset_flush_teardown, .chipset_flush = agp_i830_chipset_flush, }; static const struct agp_i810_driver agp_i810_i830_driver = { .chiptype = CHIP_I830, .gen = 2, .busdma_addr_mask_sz = 32, .res_spec = agp_i810_res_spec, .check_active = agp_i830_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i830_dump_regs, .get_stolen_size = agp_i830_get_stolen_size, .get_gtt_mappable_entries = agp_i830_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i810_get_gtt_total_entries, .install_gatt = agp_i830_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i810_write_gtt, .install_gtt_pte = agp_i830_install_gtt_pte, .read_gtt_pte = agp_i810_read_gtt_pte, .read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr, .set_aperture = agp_i830_set_aperture, .chipset_flush_setup = agp_i810_chipset_flush_setup, .chipset_flush_teardown = agp_i810_chipset_flush_teardown, .chipset_flush = agp_i830_chipset_flush, }; static const struct agp_i810_driver agp_i810_i855_driver = { .chiptype = CHIP_I855, .gen = 2, .busdma_addr_mask_sz = 32, .res_spec = agp_i810_res_spec, .check_active = agp_i830_check_active, .set_desc = agp_82852_set_desc, .dump_regs = agp_i855_dump_regs, .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i810_get_gtt_total_entries, .install_gatt = agp_i830_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i810_write_gtt, .install_gtt_pte = agp_i830_install_gtt_pte, .read_gtt_pte = agp_i810_read_gtt_pte, .read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr, .set_aperture = agp_i830_set_aperture, .chipset_flush_setup = agp_i810_chipset_flush_setup, .chipset_flush_teardown = agp_i810_chipset_flush_teardown, .chipset_flush = agp_i830_chipset_flush, }; static const struct agp_i810_driver agp_i810_i865_driver = { .chiptype = CHIP_I855, .gen = 2, .busdma_addr_mask_sz = 32, .res_spec = agp_i810_res_spec, .check_active = agp_i830_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i855_dump_regs, .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i810_get_gtt_total_entries, .install_gatt = agp_i830_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i810_write_gtt, .install_gtt_pte = agp_i830_install_gtt_pte, .read_gtt_pte = agp_i810_read_gtt_pte, .read_gtt_pte_paddr = agp_i810_read_gtt_pte_paddr, .set_aperture = agp_i915_set_aperture, .chipset_flush_setup = agp_i810_chipset_flush_setup, .chipset_flush_teardown = agp_i810_chipset_flush_teardown, .chipset_flush = agp_i830_chipset_flush, }; static const struct agp_i810_driver agp_i810_i915_driver = { .chiptype = CHIP_I915, .gen = 3, .busdma_addr_mask_sz = 32, .res_spec = agp_i915_res_spec, .check_active = agp_i915_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i915_dump_regs, .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i810_get_gtt_total_entries, .install_gatt = agp_i830_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i915_write_gtt, .install_gtt_pte = agp_i915_install_gtt_pte, .read_gtt_pte = agp_i915_read_gtt_pte, .read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr, .set_aperture = agp_i915_set_aperture, .chipset_flush_setup = agp_i915_chipset_flush_setup, .chipset_flush_teardown = agp_i915_chipset_flush_teardown, .chipset_flush = agp_i915_chipset_flush, }; static const struct agp_i810_driver agp_i810_g33_driver = { .chiptype = CHIP_G33, .gen = 3, .busdma_addr_mask_sz = 36, .res_spec = agp_i915_res_spec, .check_active = agp_i915_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i965_dump_regs, .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i965_get_gtt_total_entries, .install_gatt = agp_i830_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i915_write_gtt, .install_gtt_pte = agp_i915_install_gtt_pte, .read_gtt_pte = agp_i915_read_gtt_pte, .read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr, .set_aperture = agp_i915_set_aperture, .chipset_flush_setup = agp_i965_chipset_flush_setup, .chipset_flush_teardown = agp_i965_chipset_flush_teardown, .chipset_flush = agp_i915_chipset_flush, }; static const struct agp_i810_driver agp_i810_igd_driver = { .chiptype = CHIP_IGD, .gen = 3, .busdma_addr_mask_sz = 36, .res_spec = agp_i915_res_spec, .check_active = agp_i915_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i915_dump_regs, .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i965_get_gtt_total_entries, .install_gatt = agp_i830_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i915_write_gtt, .install_gtt_pte = agp_i915_install_gtt_pte, .read_gtt_pte = agp_i915_read_gtt_pte, .read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr, .set_aperture = agp_i915_set_aperture, .chipset_flush_setup = agp_i965_chipset_flush_setup, .chipset_flush_teardown = agp_i965_chipset_flush_teardown, .chipset_flush = agp_i915_chipset_flush, }; static const struct agp_i810_driver agp_i810_g965_driver = { .chiptype = CHIP_I965, .gen = 4, .busdma_addr_mask_sz = 36, .res_spec = agp_i965_res_spec, .check_active = agp_i915_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i965_dump_regs, .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_i965_get_gtt_total_entries, .install_gatt = agp_i965_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_i965_write_gtt, .install_gtt_pte = agp_i965_install_gtt_pte, .read_gtt_pte = agp_i965_read_gtt_pte, .read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr, .set_aperture = agp_i915_set_aperture, .chipset_flush_setup = agp_i965_chipset_flush_setup, .chipset_flush_teardown = agp_i965_chipset_flush_teardown, .chipset_flush = agp_i915_chipset_flush, }; static const struct agp_i810_driver agp_i810_g4x_driver = { .chiptype = CHIP_G4X, .gen = 5, .busdma_addr_mask_sz = 36, .res_spec = agp_i965_res_spec, .check_active = agp_i915_check_active, .set_desc = agp_i810_set_desc, .dump_regs = agp_i965_dump_regs, .get_stolen_size = agp_i915_get_stolen_size, .get_gtt_mappable_entries = agp_i915_get_gtt_mappable_entries, .get_gtt_total_entries = agp_gen5_get_gtt_total_entries, .install_gatt = agp_g4x_install_gatt, .deinstall_gatt = agp_i830_deinstall_gatt, .write_gtt = agp_g4x_write_gtt, .install_gtt_pte = agp_g4x_install_gtt_pte, .read_gtt_pte = agp_g4x_read_gtt_pte, .read_gtt_pte_paddr = agp_i915_read_gtt_pte_paddr, .set_aperture = agp_i915_set_aperture, .chipset_flush_setup = agp_i965_chipset_flush_setup, .chipset_flush_teardown = agp_i965_chipset_flush_teardown, .chipset_flush = agp_i915_chipset_flush, }; /* For adding new devices, devid is the id of the graphics controller * (pci:0:2:0, for example). The placeholder (usually at pci:0:2:1) for the * second head should never be added. The bridge_offset is the offset to * subtract from devid to get the id of the hostb that the device is on. */ static const struct agp_i810_match { int devid; char *name; const struct agp_i810_driver *driver; } agp_i810_matches[] = { { .devid = 0x71218086, .name = "Intel 82810 (i810 GMCH) SVGA controller", .driver = &agp_i810_i810_driver }, { .devid = 0x71238086, .name = "Intel 82810-DC100 (i810-DC100 GMCH) SVGA controller", .driver = &agp_i810_i810_driver }, { .devid = 0x71258086, .name = "Intel 82810E (i810E GMCH) SVGA controller", .driver = &agp_i810_i810_driver }, { .devid = 0x11328086, .name = "Intel 82815 (i815 GMCH) SVGA controller", .driver = &agp_i810_i815_driver }, { .devid = 0x35778086, .name = "Intel 82830M (830M GMCH) SVGA controller", .driver = &agp_i810_i830_driver }, { .devid = 0x25628086, .name = "Intel 82845M (845M GMCH) SVGA controller", .driver = &agp_i810_i830_driver }, { .devid = 0x35828086, .name = "Intel 82852/855GM SVGA controller", .driver = &agp_i810_i855_driver }, { .devid = 0x25728086, .name = "Intel 82865G (865G GMCH) SVGA controller", .driver = &agp_i810_i865_driver }, { .devid = 0x25828086, .name = "Intel 82915G (915G GMCH) SVGA controller", .driver = &agp_i810_i915_driver }, { .devid = 0x258A8086, .name = "Intel E7221 SVGA controller", .driver = &agp_i810_i915_driver }, { .devid = 0x25928086, .name = "Intel 82915GM (915GM GMCH) SVGA controller", .driver = &agp_i810_i915_driver }, { .devid = 0x27728086, .name = "Intel 82945G (945G GMCH) SVGA controller", .driver = &agp_i810_i915_driver }, { .devid = 0x27A28086, .name = "Intel 82945GM (945GM GMCH) SVGA controller", .driver = &agp_i810_i915_driver }, { .devid = 0x27AE8086, .name = "Intel 945GME SVGA controller", .driver = &agp_i810_i915_driver }, { .devid = 0x29728086, .name = "Intel 946GZ SVGA controller", .driver = &agp_i810_g965_driver }, { .devid = 0x29828086, .name = "Intel G965 SVGA controller", .driver = &agp_i810_g965_driver }, { .devid = 0x29928086, .name = "Intel Q965 SVGA controller", .driver = &agp_i810_g965_driver }, { .devid = 0x29A28086, .name = "Intel G965 SVGA controller", .driver = &agp_i810_g965_driver }, { .devid = 0x29B28086, .name = "Intel Q35 SVGA controller", .driver = &agp_i810_g33_driver }, { .devid = 0x29C28086, .name = "Intel G33 SVGA controller", .driver = &agp_i810_g33_driver }, { .devid = 0x29D28086, .name = "Intel Q33 SVGA controller", .driver = &agp_i810_g33_driver }, { .devid = 0xA0018086, .name = "Intel Pineview SVGA controller", .driver = &agp_i810_igd_driver }, { .devid = 0xA0118086, .name = "Intel Pineview (M) SVGA controller", .driver = &agp_i810_igd_driver }, { .devid = 0x2A028086, .name = "Intel GM965 SVGA controller", .driver = &agp_i810_g965_driver }, { .devid = 0x2A128086, .name = "Intel GME965 SVGA controller", .driver = &agp_i810_g965_driver }, { .devid = 0x2A428086, .name = "Intel GM45 SVGA controller", .driver = &agp_i810_g4x_driver }, { .devid = 0x2E028086, .name = "Intel Eaglelake SVGA controller", .driver = &agp_i810_g4x_driver }, { .devid = 0x2E128086, .name = "Intel Q45 SVGA controller", .driver = &agp_i810_g4x_driver }, { .devid = 0x2E228086, .name = "Intel G45 SVGA controller", .driver = &agp_i810_g4x_driver }, { .devid = 0x2E328086, .name = "Intel G41 SVGA controller", .driver = &agp_i810_g4x_driver }, { .devid = 0x00428086, .name = "Intel Ironlake (D) SVGA controller", .driver = &agp_i810_g4x_driver }, { .devid = 0x00468086, .name = "Intel Ironlake (M) SVGA controller", .driver = &agp_i810_g4x_driver }, { .devid = 0, } }; static const struct agp_i810_match* agp_i810_match(device_t dev) { int i, devid; if (pci_get_class(dev) != PCIC_DISPLAY || (pci_get_subclass(dev) != PCIS_DISPLAY_VGA && pci_get_subclass(dev) != PCIS_DISPLAY_OTHER)) return (NULL); devid = pci_get_devid(dev); for (i = 0; agp_i810_matches[i].devid != 0; i++) { if (agp_i810_matches[i].devid == devid) break; } if (agp_i810_matches[i].devid == 0) return (NULL); else return (&agp_i810_matches[i]); } /* * Find bridge device. */ static device_t agp_i810_find_bridge(device_t dev) { return (pci_find_dbsf(0, 0, 0, 0)); } static void agp_i810_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "agp", -1) == NULL && agp_i810_match(parent)) device_add_child(parent, "agp", -1); } static int agp_i810_check_active(device_t bridge_dev) { u_int8_t smram; smram = pci_read_config(bridge_dev, AGP_I810_SMRAM, 1); if ((smram & AGP_I810_SMRAM_GMS) == AGP_I810_SMRAM_GMS_DISABLED) return (ENXIO); return (0); } static int agp_i830_check_active(device_t bridge_dev) { int gcc1; gcc1 = pci_read_config(bridge_dev, AGP_I830_GCC1, 1); if ((gcc1 & AGP_I830_GCC1_DEV2) == AGP_I830_GCC1_DEV2_DISABLED) return (ENXIO); return (0); } static int agp_i915_check_active(device_t bridge_dev) { int deven; deven = pci_read_config(bridge_dev, AGP_I915_DEVEN, 4); if ((deven & AGP_I915_DEVEN_D2F0) == AGP_I915_DEVEN_D2F0_DISABLED) return (ENXIO); return (0); } static void agp_82852_set_desc(device_t dev, const struct agp_i810_match *match) { switch (pci_read_config(dev, AGP_I85X_CAPID, 1)) { case AGP_I855_GME: device_set_desc(dev, "Intel 82855GME (855GME GMCH) SVGA controller"); break; case AGP_I855_GM: device_set_desc(dev, "Intel 82855GM (855GM GMCH) SVGA controller"); break; case AGP_I852_GME: device_set_desc(dev, "Intel 82852GME (852GME GMCH) SVGA controller"); break; case AGP_I852_GM: device_set_desc(dev, "Intel 82852GM (852GM GMCH) SVGA controller"); break; default: device_set_desc(dev, "Intel 8285xM (85xGM GMCH) SVGA controller"); break; } } static void agp_i810_set_desc(device_t dev, const struct agp_i810_match *match) { device_set_desc(dev, match->name); } static int agp_i810_probe(device_t dev) { device_t bdev; const struct agp_i810_match *match; int err; if (resource_disabled("agp", device_get_unit(dev))) return (ENXIO); match = agp_i810_match(dev); if (match == NULL) return (ENXIO); bdev = agp_i810_find_bridge(dev); if (bdev == NULL) { if (bootverbose) printf("I810: can't find bridge device\n"); return (ENXIO); } /* * checking whether internal graphics device has been activated. */ err = match->driver->check_active(bdev); if (err != 0) { if (bootverbose) printf("i810: disabled, not probing\n"); return (err); } match->driver->set_desc(dev, match); return (BUS_PROBE_DEFAULT); } static void agp_i810_dump_regs(device_t dev) { struct agp_i810_softc *sc = device_get_softc(dev); device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n", bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL)); device_printf(dev, "AGP_I810_MISCC: 0x%04x\n", pci_read_config(sc->bdev, AGP_I810_MISCC, 2)); } static void agp_i830_dump_regs(device_t dev) { struct agp_i810_softc *sc = device_get_softc(dev); device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n", bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL)); device_printf(dev, "AGP_I830_GCC1: 0x%02x\n", pci_read_config(sc->bdev, AGP_I830_GCC1, 1)); } static void agp_i855_dump_regs(device_t dev) { struct agp_i810_softc *sc = device_get_softc(dev); device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n", bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL)); device_printf(dev, "AGP_I855_GCC1: 0x%02x\n", pci_read_config(sc->bdev, AGP_I855_GCC1, 1)); } static void agp_i915_dump_regs(device_t dev) { struct agp_i810_softc *sc = device_get_softc(dev); device_printf(dev, "AGP_I810_PGTBL_CTL: %08x\n", bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL)); device_printf(dev, "AGP_I855_GCC1: 0x%02x\n", pci_read_config(sc->bdev, AGP_I855_GCC1, 1)); device_printf(dev, "AGP_I915_MSAC: 0x%02x\n", pci_read_config(sc->bdev, AGP_I915_MSAC, 1)); } static void agp_i965_dump_regs(device_t dev) { struct agp_i810_softc *sc = device_get_softc(dev); device_printf(dev, "AGP_I965_PGTBL_CTL2: %08x\n", bus_read_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2)); device_printf(dev, "AGP_I855_GCC1: 0x%02x\n", pci_read_config(sc->bdev, AGP_I855_GCC1, 1)); device_printf(dev, "AGP_I965_MSAC: 0x%02x\n", pci_read_config(sc->bdev, AGP_I965_MSAC, 1)); } static int agp_i810_get_stolen_size(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); sc->stolen = 0; sc->stolen_size = 0; return (0); } static int agp_i830_get_stolen_size(device_t dev) { struct agp_i810_softc *sc; unsigned int gcc1; sc = device_get_softc(dev); gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 1); switch (gcc1 & AGP_I830_GCC1_GMS) { case AGP_I830_GCC1_GMS_STOLEN_512: sc->stolen = (512 - 132) * 1024 / 4096; sc->stolen_size = 512 * 1024; break; case AGP_I830_GCC1_GMS_STOLEN_1024: sc->stolen = (1024 - 132) * 1024 / 4096; sc->stolen_size = 1024 * 1024; break; case AGP_I830_GCC1_GMS_STOLEN_8192: sc->stolen = (8192 - 132) * 1024 / 4096; sc->stolen_size = 8192 * 1024; break; default: sc->stolen = 0; device_printf(dev, "unknown memory configuration, disabling (GCC1 %x)\n", gcc1); return (EINVAL); } return (0); } static int agp_i915_get_stolen_size(device_t dev) { struct agp_i810_softc *sc; unsigned int gcc1, stolen, gtt_size; sc = device_get_softc(dev); /* * Stolen memory is set up at the beginning of the aperture by * the BIOS, consisting of the GATT followed by 4kb for the * BIOS display. */ switch (sc->match->driver->chiptype) { case CHIP_I855: gtt_size = 128; break; case CHIP_I915: gtt_size = 256; break; case CHIP_I965: switch (bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL) & AGP_I810_PGTBL_SIZE_MASK) { case AGP_I810_PGTBL_SIZE_128KB: gtt_size = 128; break; case AGP_I810_PGTBL_SIZE_256KB: gtt_size = 256; break; case AGP_I810_PGTBL_SIZE_512KB: gtt_size = 512; break; case AGP_I965_PGTBL_SIZE_1MB: gtt_size = 1024; break; case AGP_I965_PGTBL_SIZE_2MB: gtt_size = 2048; break; case AGP_I965_PGTBL_SIZE_1_5MB: gtt_size = 1024 + 512; break; default: device_printf(dev, "Bad PGTBL size\n"); return (EINVAL); } break; case CHIP_G33: gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 2); switch (gcc1 & AGP_G33_MGGC_GGMS_MASK) { case AGP_G33_MGGC_GGMS_SIZE_1M: gtt_size = 1024; break; case AGP_G33_MGGC_GGMS_SIZE_2M: gtt_size = 2048; break; default: device_printf(dev, "Bad PGTBL size\n"); return (EINVAL); } break; case CHIP_IGD: case CHIP_G4X: gtt_size = 0; break; default: device_printf(dev, "Bad chiptype\n"); return (EINVAL); } /* GCC1 is called MGGC on i915+ */ gcc1 = pci_read_config(sc->bdev, AGP_I855_GCC1, 1); switch (gcc1 & AGP_I855_GCC1_GMS) { case AGP_I855_GCC1_GMS_STOLEN_1M: stolen = 1024; break; case AGP_I855_GCC1_GMS_STOLEN_4M: stolen = 4 * 1024; break; case AGP_I855_GCC1_GMS_STOLEN_8M: stolen = 8 * 1024; break; case AGP_I855_GCC1_GMS_STOLEN_16M: stolen = 16 * 1024; break; case AGP_I855_GCC1_GMS_STOLEN_32M: stolen = 32 * 1024; break; case AGP_I915_GCC1_GMS_STOLEN_48M: stolen = sc->match->driver->gen > 2 ? 48 * 1024 : 0; break; case AGP_I915_GCC1_GMS_STOLEN_64M: stolen = sc->match->driver->gen > 2 ? 64 * 1024 : 0; break; case AGP_G33_GCC1_GMS_STOLEN_128M: stolen = sc->match->driver->gen > 2 ? 128 * 1024 : 0; break; case AGP_G33_GCC1_GMS_STOLEN_256M: stolen = sc->match->driver->gen > 2 ? 256 * 1024 : 0; break; case AGP_G4X_GCC1_GMS_STOLEN_96M: if (sc->match->driver->chiptype == CHIP_I965 || sc->match->driver->chiptype == CHIP_G4X) stolen = 96 * 1024; else stolen = 0; break; case AGP_G4X_GCC1_GMS_STOLEN_160M: if (sc->match->driver->chiptype == CHIP_I965 || sc->match->driver->chiptype == CHIP_G4X) stolen = 160 * 1024; else stolen = 0; break; case AGP_G4X_GCC1_GMS_STOLEN_224M: if (sc->match->driver->chiptype == CHIP_I965 || sc->match->driver->chiptype == CHIP_G4X) stolen = 224 * 1024; else stolen = 0; break; case AGP_G4X_GCC1_GMS_STOLEN_352M: if (sc->match->driver->chiptype == CHIP_I965 || sc->match->driver->chiptype == CHIP_G4X) stolen = 352 * 1024; else stolen = 0; break; default: device_printf(dev, "unknown memory configuration, disabling (GCC1 %x)\n", gcc1); return (EINVAL); } gtt_size += 4; sc->stolen_size = stolen * 1024; sc->stolen = (stolen - gtt_size) * 1024 / 4096; return (0); } static int agp_i810_get_gtt_mappable_entries(device_t dev) { struct agp_i810_softc *sc; uint32_t ap; uint16_t miscc; sc = device_get_softc(dev); miscc = pci_read_config(sc->bdev, AGP_I810_MISCC, 2); if ((miscc & AGP_I810_MISCC_WINSIZE) == AGP_I810_MISCC_WINSIZE_32) ap = 32; else ap = 64; sc->gtt_mappable_entries = (ap * 1024 * 1024) >> AGP_PAGE_SHIFT; return (0); } static int agp_i830_get_gtt_mappable_entries(device_t dev) { struct agp_i810_softc *sc; uint32_t ap; uint16_t gmch_ctl; sc = device_get_softc(dev); gmch_ctl = pci_read_config(sc->bdev, AGP_I830_GCC1, 2); if ((gmch_ctl & AGP_I830_GCC1_GMASIZE) == AGP_I830_GCC1_GMASIZE_64) ap = 64; else ap = 128; sc->gtt_mappable_entries = (ap * 1024 * 1024) >> AGP_PAGE_SHIFT; return (0); } static int agp_i915_get_gtt_mappable_entries(device_t dev) { struct agp_i810_softc *sc; uint32_t ap; sc = device_get_softc(dev); ap = AGP_GET_APERTURE(dev); sc->gtt_mappable_entries = ap >> AGP_PAGE_SHIFT; return (0); } static int agp_i810_get_gtt_total_entries(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); sc->gtt_total_entries = sc->gtt_mappable_entries; return (0); } static int agp_i965_get_gtt_total_entries(device_t dev) { struct agp_i810_softc *sc; uint32_t pgetbl_ctl; int error; sc = device_get_softc(dev); error = 0; pgetbl_ctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL); switch (pgetbl_ctl & AGP_I810_PGTBL_SIZE_MASK) { case AGP_I810_PGTBL_SIZE_128KB: sc->gtt_total_entries = 128 * 1024 / 4; break; case AGP_I810_PGTBL_SIZE_256KB: sc->gtt_total_entries = 256 * 1024 / 4; break; case AGP_I810_PGTBL_SIZE_512KB: sc->gtt_total_entries = 512 * 1024 / 4; break; /* GTT pagetable sizes bigger than 512KB are not possible on G33! */ case AGP_I810_PGTBL_SIZE_1MB: sc->gtt_total_entries = 1024 * 1024 / 4; break; case AGP_I810_PGTBL_SIZE_2MB: sc->gtt_total_entries = 2 * 1024 * 1024 / 4; break; case AGP_I810_PGTBL_SIZE_1_5MB: sc->gtt_total_entries = (1024 + 512) * 1024 / 4; break; default: device_printf(dev, "Unknown page table size\n"); error = ENXIO; } return (error); } static void agp_gen5_adjust_pgtbl_size(device_t dev, uint32_t sz) { struct agp_i810_softc *sc; uint32_t pgetbl_ctl, pgetbl_ctl2; sc = device_get_softc(dev); /* Disable per-process page table. */ pgetbl_ctl2 = bus_read_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2); pgetbl_ctl2 &= ~AGP_I810_PGTBL_ENABLED; bus_write_4(sc->sc_res[0], AGP_I965_PGTBL_CTL2, pgetbl_ctl2); /* Write the new ggtt size. */ pgetbl_ctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL); pgetbl_ctl &= ~AGP_I810_PGTBL_SIZE_MASK; pgetbl_ctl |= sz; bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgetbl_ctl); } static int agp_gen5_get_gtt_total_entries(device_t dev) { struct agp_i810_softc *sc; uint16_t gcc1; sc = device_get_softc(dev); gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 2); switch (gcc1 & AGP_G4x_GCC1_SIZE_MASK) { case AGP_G4x_GCC1_SIZE_1M: case AGP_G4x_GCC1_SIZE_VT_1M: agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_1MB); break; case AGP_G4x_GCC1_SIZE_VT_1_5M: agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_1_5MB); break; case AGP_G4x_GCC1_SIZE_2M: case AGP_G4x_GCC1_SIZE_VT_2M: agp_gen5_adjust_pgtbl_size(dev, AGP_I810_PGTBL_SIZE_2MB); break; default: device_printf(dev, "Unknown page table size\n"); return (ENXIO); } return (agp_i965_get_gtt_total_entries(dev)); } static int agp_i810_install_gatt(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); /* Some i810s have on-chip memory called dcache. */ if ((bus_read_1(sc->sc_res[0], AGP_I810_DRT) & AGP_I810_DRT_POPULATED) != 0) sc->dcache_size = 4 * 1024 * 1024; else sc->dcache_size = 0; /* According to the specs the gatt on the i810 must be 64k. */ - sc->gatt->ag_virtual = (void *)kmem_alloc_contig(kernel_arena, - 64 * 1024, M_NOWAIT | M_ZERO, 0, ~0, PAGE_SIZE, - 0, VM_MEMATTR_WRITE_COMBINING); + sc->gatt->ag_virtual = (void *)kmem_alloc_contig(64 * 1024, M_NOWAIT | + M_ZERO, 0, ~0, PAGE_SIZE, 0, VM_MEMATTR_WRITE_COMBINING); if (sc->gatt->ag_virtual == NULL) { if (bootverbose) device_printf(dev, "contiguous allocation failed\n"); return (ENOMEM); } sc->gatt->ag_physical = vtophys((vm_offset_t)sc->gatt->ag_virtual); /* Install the GATT. */ bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, sc->gatt->ag_physical | 1); return (0); } static void agp_i830_install_gatt_init(struct agp_i810_softc *sc) { uint32_t pgtblctl; /* * The i830 automatically initializes the 128k gatt on boot. * GATT address is already in there, make sure it's enabled. */ pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL); pgtblctl |= 1; bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl); sc->gatt->ag_physical = pgtblctl & ~1; } static int agp_i830_install_gatt(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); agp_i830_install_gatt_init(sc); return (0); } static int agp_gen4_install_gatt(device_t dev, const vm_size_t gtt_offset) { struct agp_i810_softc *sc; sc = device_get_softc(dev); pmap_change_attr((vm_offset_t)rman_get_virtual(sc->sc_res[0]) + gtt_offset, rman_get_size(sc->sc_res[0]) - gtt_offset, VM_MEMATTR_WRITE_COMBINING); agp_i830_install_gatt_init(sc); return (0); } static int agp_i965_install_gatt(device_t dev) { return (agp_gen4_install_gatt(dev, 512 * 1024)); } static int agp_g4x_install_gatt(device_t dev) { return (agp_gen4_install_gatt(dev, 2 * 1024 * 1024)); } static int agp_i810_attach(device_t dev) { struct agp_i810_softc *sc; int error; sc = device_get_softc(dev); sc->bdev = agp_i810_find_bridge(dev); if (sc->bdev == NULL) return (ENOENT); sc->match = agp_i810_match(dev); agp_set_aperture_resource(dev, sc->match->driver->gen <= 2 ? AGP_APBASE : AGP_I915_GMADR); error = agp_generic_attach(dev); if (error) return (error); if (ptoa((vm_paddr_t)Maxmem) > (1ULL << sc->match->driver->busdma_addr_mask_sz) - 1) { device_printf(dev, "agp_i810 does not support physical " "memory above %ju.\n", (uintmax_t)(1ULL << sc->match->driver->busdma_addr_mask_sz) - 1); return (ENOENT); } if (bus_alloc_resources(dev, sc->match->driver->res_spec, sc->sc_res)) { agp_generic_detach(dev); return (ENODEV); } sc->initial_aperture = AGP_GET_APERTURE(dev); sc->gatt = malloc(sizeof(struct agp_gatt), M_AGP, M_WAITOK); sc->gatt->ag_entries = AGP_GET_APERTURE(dev) >> AGP_PAGE_SHIFT; if ((error = sc->match->driver->get_stolen_size(dev)) != 0 || (error = sc->match->driver->install_gatt(dev)) != 0 || (error = sc->match->driver->get_gtt_mappable_entries(dev)) != 0 || (error = sc->match->driver->get_gtt_total_entries(dev)) != 0 || (error = sc->match->driver->chipset_flush_setup(dev)) != 0) { bus_release_resources(dev, sc->match->driver->res_spec, sc->sc_res); free(sc->gatt, M_AGP); agp_generic_detach(dev); return (error); } intel_agp = dev; device_printf(dev, "aperture size is %dM", sc->initial_aperture / 1024 / 1024); if (sc->stolen > 0) printf(", detected %dk stolen memory\n", sc->stolen * 4); else printf("\n"); if (bootverbose) { sc->match->driver->dump_regs(dev); device_printf(dev, "Mappable GTT entries: %d\n", sc->gtt_mappable_entries); device_printf(dev, "Total GTT entries: %d\n", sc->gtt_total_entries); } return (0); } static void agp_i810_deinstall_gatt(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, 0); kmem_free(kernel_arena, (vm_offset_t)sc->gatt->ag_virtual, 64 * 1024); } static void agp_i830_deinstall_gatt(device_t dev) { struct agp_i810_softc *sc; unsigned int pgtblctl; sc = device_get_softc(dev); pgtblctl = bus_read_4(sc->sc_res[0], AGP_I810_PGTBL_CTL); pgtblctl &= ~1; bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, pgtblctl); } static int agp_i810_detach(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); agp_free_cdev(dev); /* Clear the GATT base. */ sc->match->driver->deinstall_gatt(dev); sc->match->driver->chipset_flush_teardown(dev); /* Put the aperture back the way it started. */ AGP_SET_APERTURE(dev, sc->initial_aperture); free(sc->gatt, M_AGP); bus_release_resources(dev, sc->match->driver->res_spec, sc->sc_res); agp_free_res(dev); return (0); } static int agp_i810_resume(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); AGP_SET_APERTURE(dev, sc->initial_aperture); /* Install the GATT. */ bus_write_4(sc->sc_res[0], AGP_I810_PGTBL_CTL, sc->gatt->ag_physical | 1); return (bus_generic_resume(dev)); } /** * Sets the PCI resource size of the aperture on i830-class and below chipsets, * while returning failure on later chipsets when an actual change is * requested. * * This whole function is likely bogus, as the kernel would probably need to * reconfigure the placement of the AGP aperture if a larger size is requested, * which doesn't happen currently. */ static int agp_i810_set_aperture(device_t dev, u_int32_t aperture) { struct agp_i810_softc *sc; u_int16_t miscc; sc = device_get_softc(dev); /* * Double check for sanity. */ if (aperture != 32 * 1024 * 1024 && aperture != 64 * 1024 * 1024) { device_printf(dev, "bad aperture size %d\n", aperture); return (EINVAL); } miscc = pci_read_config(sc->bdev, AGP_I810_MISCC, 2); miscc &= ~AGP_I810_MISCC_WINSIZE; if (aperture == 32 * 1024 * 1024) miscc |= AGP_I810_MISCC_WINSIZE_32; else miscc |= AGP_I810_MISCC_WINSIZE_64; pci_write_config(sc->bdev, AGP_I810_MISCC, miscc, 2); return (0); } static int agp_i830_set_aperture(device_t dev, u_int32_t aperture) { struct agp_i810_softc *sc; u_int16_t gcc1; sc = device_get_softc(dev); if (aperture != 64 * 1024 * 1024 && aperture != 128 * 1024 * 1024) { device_printf(dev, "bad aperture size %d\n", aperture); return (EINVAL); } gcc1 = pci_read_config(sc->bdev, AGP_I830_GCC1, 2); gcc1 &= ~AGP_I830_GCC1_GMASIZE; if (aperture == 64 * 1024 * 1024) gcc1 |= AGP_I830_GCC1_GMASIZE_64; else gcc1 |= AGP_I830_GCC1_GMASIZE_128; pci_write_config(sc->bdev, AGP_I830_GCC1, gcc1, 2); return (0); } static int agp_i915_set_aperture(device_t dev, u_int32_t aperture) { return (agp_generic_set_aperture(dev, aperture)); } static int agp_i810_method_set_aperture(device_t dev, u_int32_t aperture) { struct agp_i810_softc *sc; sc = device_get_softc(dev); return (sc->match->driver->set_aperture(dev, aperture)); } /** * Writes a GTT entry mapping the page at the given offset from the * beginning of the aperture to the given physical address. Setup the * caching mode according to flags. * * For gen 1, 2 and 3, GTT start is located at AGP_I810_GTT offset * from corresponding BAR start. For gen 4, offset is 512KB + * AGP_I810_GTT, for gen 5 and 6 it is 2MB + AGP_I810_GTT. * * Also, the bits of the physical page address above 4GB needs to be * placed into bits 40-32 of PTE. */ static void agp_i810_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags) { uint32_t pte; pte = (u_int32_t)physical | I810_PTE_VALID; if (flags == AGP_DCACHE_MEMORY) pte |= I810_PTE_LOCAL; else if (flags == AGP_USER_CACHED_MEMORY) pte |= I830_PTE_SYSTEM_CACHED; agp_i810_write_gtt(dev, index, pte); } static void agp_i810_write_gtt(device_t dev, u_int index, uint32_t pte) { struct agp_i810_softc *sc; sc = device_get_softc(dev); bus_write_4(sc->sc_res[0], AGP_I810_GTT + index * 4, pte); CTR2(KTR_AGP_I810, "810_pte %x %x", index, pte); } static void agp_i830_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags) { uint32_t pte; pte = (u_int32_t)physical | I810_PTE_VALID; if (flags == AGP_USER_CACHED_MEMORY) pte |= I830_PTE_SYSTEM_CACHED; agp_i810_write_gtt(dev, index, pte); } static void agp_i915_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags) { uint32_t pte; pte = (u_int32_t)physical | I810_PTE_VALID; if (flags == AGP_USER_CACHED_MEMORY) pte |= I830_PTE_SYSTEM_CACHED; pte |= (physical & 0x0000000f00000000ull) >> 28; agp_i915_write_gtt(dev, index, pte); } static void agp_i915_write_gtt(device_t dev, u_int index, uint32_t pte) { struct agp_i810_softc *sc; sc = device_get_softc(dev); bus_write_4(sc->sc_res[1], index * 4, pte); CTR2(KTR_AGP_I810, "915_pte %x %x", index, pte); } static void agp_i965_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags) { uint32_t pte; pte = (u_int32_t)physical | I810_PTE_VALID; if (flags == AGP_USER_CACHED_MEMORY) pte |= I830_PTE_SYSTEM_CACHED; pte |= (physical & 0x0000000f00000000ull) >> 28; agp_i965_write_gtt(dev, index, pte); } static void agp_i965_write_gtt(device_t dev, u_int index, uint32_t pte) { struct agp_i810_softc *sc; sc = device_get_softc(dev); bus_write_4(sc->sc_res[0], index * 4 + (512 * 1024), pte); CTR2(KTR_AGP_I810, "965_pte %x %x", index, pte); } static void agp_g4x_install_gtt_pte(device_t dev, u_int index, vm_offset_t physical, int flags) { uint32_t pte; pte = (u_int32_t)physical | I810_PTE_VALID; if (flags == AGP_USER_CACHED_MEMORY) pte |= I830_PTE_SYSTEM_CACHED; pte |= (physical & 0x0000000f00000000ull) >> 28; agp_g4x_write_gtt(dev, index, pte); } static void agp_g4x_write_gtt(device_t dev, u_int index, uint32_t pte) { struct agp_i810_softc *sc; sc = device_get_softc(dev); bus_write_4(sc->sc_res[0], index * 4 + (2 * 1024 * 1024), pte); CTR2(KTR_AGP_I810, "g4x_pte %x %x", index, pte); } static int agp_i810_bind_page(device_t dev, vm_offset_t offset, vm_offset_t physical) { struct agp_i810_softc *sc = device_get_softc(dev); u_int index; if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT)) { device_printf(dev, "failed: offset is 0x%08jx, " "shift is %d, entries is %d\n", (intmax_t)offset, AGP_PAGE_SHIFT, sc->gatt->ag_entries); return (EINVAL); } index = offset >> AGP_PAGE_SHIFT; if (sc->stolen != 0 && index < sc->stolen) { device_printf(dev, "trying to bind into stolen memory\n"); return (EINVAL); } sc->match->driver->install_gtt_pte(dev, index, physical, 0); return (0); } static int agp_i810_unbind_page(device_t dev, vm_offset_t offset) { struct agp_i810_softc *sc; u_int index; sc = device_get_softc(dev); if (offset >= (sc->gatt->ag_entries << AGP_PAGE_SHIFT)) return (EINVAL); index = offset >> AGP_PAGE_SHIFT; if (sc->stolen != 0 && index < sc->stolen) { device_printf(dev, "trying to unbind from stolen memory\n"); return (EINVAL); } sc->match->driver->install_gtt_pte(dev, index, 0, 0); return (0); } static u_int32_t agp_i810_read_gtt_pte(device_t dev, u_int index) { struct agp_i810_softc *sc; u_int32_t pte; sc = device_get_softc(dev); pte = bus_read_4(sc->sc_res[0], AGP_I810_GTT + index * 4); return (pte); } static u_int32_t agp_i915_read_gtt_pte(device_t dev, u_int index) { struct agp_i810_softc *sc; u_int32_t pte; sc = device_get_softc(dev); pte = bus_read_4(sc->sc_res[1], index * 4); return (pte); } static u_int32_t agp_i965_read_gtt_pte(device_t dev, u_int index) { struct agp_i810_softc *sc; u_int32_t pte; sc = device_get_softc(dev); pte = bus_read_4(sc->sc_res[0], index * 4 + (512 * 1024)); return (pte); } static u_int32_t agp_g4x_read_gtt_pte(device_t dev, u_int index) { struct agp_i810_softc *sc; u_int32_t pte; sc = device_get_softc(dev); pte = bus_read_4(sc->sc_res[0], index * 4 + (2 * 1024 * 1024)); return (pte); } static vm_paddr_t agp_i810_read_gtt_pte_paddr(device_t dev, u_int index) { struct agp_i810_softc *sc; u_int32_t pte; vm_paddr_t res; sc = device_get_softc(dev); pte = sc->match->driver->read_gtt_pte(dev, index); res = pte & ~PAGE_MASK; return (res); } static vm_paddr_t agp_i915_read_gtt_pte_paddr(device_t dev, u_int index) { struct agp_i810_softc *sc; u_int32_t pte; vm_paddr_t res; sc = device_get_softc(dev); pte = sc->match->driver->read_gtt_pte(dev, index); res = (pte & ~PAGE_MASK) | ((pte & 0xf0) << 28); return (res); } /* * Writing via memory mapped registers already flushes all TLBs. */ static void agp_i810_flush_tlb(device_t dev) { } static int agp_i810_enable(device_t dev, u_int32_t mode) { return (0); } static struct agp_memory * agp_i810_alloc_memory(device_t dev, int type, vm_size_t size) { struct agp_i810_softc *sc; struct agp_memory *mem; vm_page_t m; sc = device_get_softc(dev); if ((size & (AGP_PAGE_SIZE - 1)) != 0 || sc->agp.as_allocated + size > sc->agp.as_maxmem) return (0); if (type == 1) { /* * Mapping local DRAM into GATT. */ if (sc->match->driver->chiptype != CHIP_I810) return (0); if (size != sc->dcache_size) return (0); } else if (type == 2) { /* * Type 2 is the contiguous physical memory type, that hands * back a physical address. This is used for cursors on i810. * Hand back as many single pages with physical as the user * wants, but only allow one larger allocation (ARGB cursor) * for simplicity. */ if (size != AGP_PAGE_SIZE) { if (sc->argb_cursor != NULL) return (0); /* Allocate memory for ARGB cursor, if we can. */ sc->argb_cursor = contigmalloc(size, M_AGP, 0, 0, ~0, PAGE_SIZE, 0); if (sc->argb_cursor == NULL) return (0); } } mem = malloc(sizeof *mem, M_AGP, M_WAITOK); mem->am_id = sc->agp.as_nextid++; mem->am_size = size; mem->am_type = type; if (type != 1 && (type != 2 || size == AGP_PAGE_SIZE)) mem->am_obj = vm_object_allocate(OBJT_DEFAULT, atop(round_page(size))); else mem->am_obj = 0; if (type == 2) { if (size == AGP_PAGE_SIZE) { /* * Allocate and wire down the page now so that we can * get its physical address. */ VM_OBJECT_WLOCK(mem->am_obj); m = vm_page_grab(mem->am_obj, 0, VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); VM_OBJECT_WUNLOCK(mem->am_obj); mem->am_physical = VM_PAGE_TO_PHYS(m); } else { /* Our allocation is already nicely wired down for us. * Just grab the physical address. */ mem->am_physical = vtophys(sc->argb_cursor); } } else mem->am_physical = 0; mem->am_offset = 0; mem->am_is_bound = 0; TAILQ_INSERT_TAIL(&sc->agp.as_memory, mem, am_link); sc->agp.as_allocated += size; return (mem); } static int agp_i810_free_memory(device_t dev, struct agp_memory *mem) { struct agp_i810_softc *sc; vm_page_t m; if (mem->am_is_bound) return (EBUSY); sc = device_get_softc(dev); if (mem->am_type == 2) { if (mem->am_size == AGP_PAGE_SIZE) { /* * Unwire the page which we wired in alloc_memory. */ VM_OBJECT_WLOCK(mem->am_obj); m = vm_page_lookup(mem->am_obj, 0); vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); vm_page_unlock(m); VM_OBJECT_WUNLOCK(mem->am_obj); } else { contigfree(sc->argb_cursor, mem->am_size, M_AGP); sc->argb_cursor = NULL; } } sc->agp.as_allocated -= mem->am_size; TAILQ_REMOVE(&sc->agp.as_memory, mem, am_link); if (mem->am_obj) vm_object_deallocate(mem->am_obj); free(mem, M_AGP); return (0); } static int agp_i810_bind_memory(device_t dev, struct agp_memory *mem, vm_offset_t offset) { struct agp_i810_softc *sc; vm_offset_t i; /* Do some sanity checks first. */ if ((offset & (AGP_PAGE_SIZE - 1)) != 0 || offset + mem->am_size > AGP_GET_APERTURE(dev)) { device_printf(dev, "binding memory at bad offset %#x\n", (int)offset); return (EINVAL); } sc = device_get_softc(dev); if (mem->am_type == 2 && mem->am_size != AGP_PAGE_SIZE) { mtx_lock(&sc->agp.as_lock); if (mem->am_is_bound) { mtx_unlock(&sc->agp.as_lock); return (EINVAL); } /* The memory's already wired down, just stick it in the GTT. */ for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) { sc->match->driver->install_gtt_pte(dev, (offset + i) >> AGP_PAGE_SHIFT, mem->am_physical + i, 0); } mem->am_offset = offset; mem->am_is_bound = 1; mtx_unlock(&sc->agp.as_lock); return (0); } if (mem->am_type != 1) return (agp_generic_bind_memory(dev, mem, offset)); /* * Mapping local DRAM into GATT. */ if (sc->match->driver->chiptype != CHIP_I810) return (EINVAL); for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) bus_write_4(sc->sc_res[0], AGP_I810_GTT + (i >> AGP_PAGE_SHIFT) * 4, i | 3); return (0); } static int agp_i810_unbind_memory(device_t dev, struct agp_memory *mem) { struct agp_i810_softc *sc; vm_offset_t i; sc = device_get_softc(dev); if (mem->am_type == 2 && mem->am_size != AGP_PAGE_SIZE) { mtx_lock(&sc->agp.as_lock); if (!mem->am_is_bound) { mtx_unlock(&sc->agp.as_lock); return (EINVAL); } for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) { sc->match->driver->install_gtt_pte(dev, (mem->am_offset + i) >> AGP_PAGE_SHIFT, 0, 0); } mem->am_is_bound = 0; mtx_unlock(&sc->agp.as_lock); return (0); } if (mem->am_type != 1) return (agp_generic_unbind_memory(dev, mem)); if (sc->match->driver->chiptype != CHIP_I810) return (EINVAL); for (i = 0; i < mem->am_size; i += AGP_PAGE_SIZE) { sc->match->driver->install_gtt_pte(dev, i >> AGP_PAGE_SHIFT, 0, 0); } return (0); } static device_method_t agp_i810_methods[] = { /* Device interface */ DEVMETHOD(device_identify, agp_i810_identify), DEVMETHOD(device_probe, agp_i810_probe), DEVMETHOD(device_attach, agp_i810_attach), DEVMETHOD(device_detach, agp_i810_detach), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, agp_i810_resume), /* AGP interface */ DEVMETHOD(agp_get_aperture, agp_generic_get_aperture), DEVMETHOD(agp_set_aperture, agp_i810_method_set_aperture), DEVMETHOD(agp_bind_page, agp_i810_bind_page), DEVMETHOD(agp_unbind_page, agp_i810_unbind_page), DEVMETHOD(agp_flush_tlb, agp_i810_flush_tlb), DEVMETHOD(agp_enable, agp_i810_enable), DEVMETHOD(agp_alloc_memory, agp_i810_alloc_memory), DEVMETHOD(agp_free_memory, agp_i810_free_memory), DEVMETHOD(agp_bind_memory, agp_i810_bind_memory), DEVMETHOD(agp_unbind_memory, agp_i810_unbind_memory), DEVMETHOD(agp_chipset_flush, agp_intel_gtt_chipset_flush), { 0, 0 } }; static driver_t agp_i810_driver = { "agp", agp_i810_methods, sizeof(struct agp_i810_softc), }; static devclass_t agp_devclass; DRIVER_MODULE(agp_i810, vgapci, agp_i810_driver, agp_devclass, 0, 0); MODULE_DEPEND(agp_i810, agp, 1, 1, 1); MODULE_DEPEND(agp_i810, pci, 1, 1, 1); void agp_intel_gtt_clear_range(device_t dev, u_int first_entry, u_int num_entries) { struct agp_i810_softc *sc; u_int i; sc = device_get_softc(dev); for (i = 0; i < num_entries; i++) sc->match->driver->install_gtt_pte(dev, first_entry + i, VM_PAGE_TO_PHYS(bogus_page), 0); sc->match->driver->read_gtt_pte(dev, first_entry + num_entries - 1); } void agp_intel_gtt_insert_pages(device_t dev, u_int first_entry, u_int num_entries, vm_page_t *pages, u_int flags) { struct agp_i810_softc *sc; u_int i; sc = device_get_softc(dev); for (i = 0; i < num_entries; i++) { MPASS(pages[i]->valid == VM_PAGE_BITS_ALL); MPASS(pages[i]->wire_count > 0); sc->match->driver->install_gtt_pte(dev, first_entry + i, VM_PAGE_TO_PHYS(pages[i]), flags); } sc->match->driver->read_gtt_pte(dev, first_entry + num_entries - 1); } struct intel_gtt agp_intel_gtt_get(device_t dev) { struct agp_i810_softc *sc; struct intel_gtt res; sc = device_get_softc(dev); res.stolen_size = sc->stolen_size; res.gtt_total_entries = sc->gtt_total_entries; res.gtt_mappable_entries = sc->gtt_mappable_entries; res.do_idle_maps = 0; res.scratch_page_dma = VM_PAGE_TO_PHYS(bogus_page); if (sc->agp.as_aperture != NULL) res.gma_bus_addr = rman_get_start(sc->agp.as_aperture); else res.gma_bus_addr = 0; return (res); } static int agp_i810_chipset_flush_setup(device_t dev) { return (0); } static void agp_i810_chipset_flush_teardown(device_t dev) { /* Nothing to do. */ } static void agp_i810_chipset_flush(device_t dev) { /* Nothing to do. */ } static void agp_i830_chipset_flush(device_t dev) { struct agp_i810_softc *sc; uint32_t hic; int i; sc = device_get_softc(dev); pmap_invalidate_cache(); hic = bus_read_4(sc->sc_res[0], AGP_I830_HIC); bus_write_4(sc->sc_res[0], AGP_I830_HIC, hic | (1U << 31)); for (i = 0; i < 20000 /* 1 sec */; i++) { hic = bus_read_4(sc->sc_res[0], AGP_I830_HIC); if ((hic & (1U << 31)) == 0) break; DELAY(50); } } static int agp_i915_chipset_flush_alloc_page(device_t dev, uint64_t start, uint64_t end) { struct agp_i810_softc *sc; device_t vga; sc = device_get_softc(dev); vga = device_get_parent(dev); sc->sc_flush_page_rid = 100; sc->sc_flush_page_res = BUS_ALLOC_RESOURCE(device_get_parent(vga), dev, SYS_RES_MEMORY, &sc->sc_flush_page_rid, start, end, PAGE_SIZE, RF_ACTIVE); if (sc->sc_flush_page_res == NULL) { device_printf(dev, "Failed to allocate flush page at 0x%jx\n", (uintmax_t)start); return (EINVAL); } sc->sc_flush_page_vaddr = rman_get_virtual(sc->sc_flush_page_res); if (bootverbose) { device_printf(dev, "Allocated flush page phys 0x%jx virt %p\n", (uintmax_t)rman_get_start(sc->sc_flush_page_res), sc->sc_flush_page_vaddr); } return (0); } static void agp_i915_chipset_flush_free_page(device_t dev) { struct agp_i810_softc *sc; device_t vga; sc = device_get_softc(dev); vga = device_get_parent(dev); if (sc->sc_flush_page_res == NULL) return; BUS_DEACTIVATE_RESOURCE(device_get_parent(vga), dev, SYS_RES_MEMORY, sc->sc_flush_page_rid, sc->sc_flush_page_res); BUS_RELEASE_RESOURCE(device_get_parent(vga), dev, SYS_RES_MEMORY, sc->sc_flush_page_rid, sc->sc_flush_page_res); } static int agp_i915_chipset_flush_setup(device_t dev) { struct agp_i810_softc *sc; uint32_t temp; int error; sc = device_get_softc(dev); temp = pci_read_config(sc->bdev, AGP_I915_IFPADDR, 4); if ((temp & 1) != 0) { temp &= ~1; if (bootverbose) device_printf(dev, "Found already configured flush page at 0x%jx\n", (uintmax_t)temp); sc->sc_bios_allocated_flush_page = 1; /* * In the case BIOS initialized the flush pointer (?) * register, expect that BIOS also set up the resource * for the page. */ error = agp_i915_chipset_flush_alloc_page(dev, temp, temp + PAGE_SIZE - 1); if (error != 0) return (error); } else { sc->sc_bios_allocated_flush_page = 0; error = agp_i915_chipset_flush_alloc_page(dev, 0, 0xffffffff); if (error != 0) return (error); temp = rman_get_start(sc->sc_flush_page_res); pci_write_config(sc->bdev, AGP_I915_IFPADDR, temp | 1, 4); } return (0); } static void agp_i915_chipset_flush_teardown(device_t dev) { struct agp_i810_softc *sc; uint32_t temp; sc = device_get_softc(dev); if (sc->sc_flush_page_res == NULL) return; if (!sc->sc_bios_allocated_flush_page) { temp = pci_read_config(sc->bdev, AGP_I915_IFPADDR, 4); temp &= ~1; pci_write_config(sc->bdev, AGP_I915_IFPADDR, temp, 4); } agp_i915_chipset_flush_free_page(dev); } static int agp_i965_chipset_flush_setup(device_t dev) { struct agp_i810_softc *sc; uint64_t temp; uint32_t temp_hi, temp_lo; int error; sc = device_get_softc(dev); temp_hi = pci_read_config(sc->bdev, AGP_I965_IFPADDR + 4, 4); temp_lo = pci_read_config(sc->bdev, AGP_I965_IFPADDR, 4); if ((temp_lo & 1) != 0) { temp = ((uint64_t)temp_hi << 32) | (temp_lo & ~1); if (bootverbose) device_printf(dev, "Found already configured flush page at 0x%jx\n", (uintmax_t)temp); sc->sc_bios_allocated_flush_page = 1; /* * In the case BIOS initialized the flush pointer (?) * register, expect that BIOS also set up the resource * for the page. */ error = agp_i915_chipset_flush_alloc_page(dev, temp, temp + PAGE_SIZE - 1); if (error != 0) return (error); } else { sc->sc_bios_allocated_flush_page = 0; error = agp_i915_chipset_flush_alloc_page(dev, 0, ~0); if (error != 0) return (error); temp = rman_get_start(sc->sc_flush_page_res); pci_write_config(sc->bdev, AGP_I965_IFPADDR + 4, (temp >> 32) & UINT32_MAX, 4); pci_write_config(sc->bdev, AGP_I965_IFPADDR, (temp & UINT32_MAX) | 1, 4); } return (0); } static void agp_i965_chipset_flush_teardown(device_t dev) { struct agp_i810_softc *sc; uint32_t temp_lo; sc = device_get_softc(dev); if (sc->sc_flush_page_res == NULL) return; if (!sc->sc_bios_allocated_flush_page) { temp_lo = pci_read_config(sc->bdev, AGP_I965_IFPADDR, 4); temp_lo &= ~1; pci_write_config(sc->bdev, AGP_I965_IFPADDR, temp_lo, 4); } agp_i915_chipset_flush_free_page(dev); } static void agp_i915_chipset_flush(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); *(uint32_t *)sc->sc_flush_page_vaddr = 1; } int agp_intel_gtt_chipset_flush(device_t dev) { struct agp_i810_softc *sc; sc = device_get_softc(dev); sc->match->driver->chipset_flush(dev); return (0); } void agp_intel_gtt_unmap_memory(device_t dev, struct sglist *sg_list) { } int agp_intel_gtt_map_memory(device_t dev, vm_page_t *pages, u_int num_entries, struct sglist **sg_list) { struct agp_i810_softc *sc; struct sglist *sg; int i; #if 0 int error; bus_dma_tag_t dmat; #endif if (*sg_list != NULL) return (0); sc = device_get_softc(dev); sg = sglist_alloc(num_entries, M_WAITOK /* XXXKIB */); for (i = 0; i < num_entries; i++) { sg->sg_segs[i].ss_paddr = VM_PAGE_TO_PHYS(pages[i]); sg->sg_segs[i].ss_len = PAGE_SIZE; } #if 0 error = bus_dma_tag_create(bus_get_dma_tag(dev), 1 /* alignment */, 0 /* boundary */, 1ULL << sc->match->busdma_addr_mask_sz /* lowaddr */, BUS_SPACE_MAXADDR /* highaddr */, NULL /* filtfunc */, NULL /* filtfuncarg */, BUS_SPACE_MAXADDR /* maxsize */, BUS_SPACE_UNRESTRICTED /* nsegments */, BUS_SPACE_MAXADDR /* maxsegsz */, 0 /* flags */, NULL /* lockfunc */, NULL /* lockfuncarg */, &dmat); if (error != 0) { sglist_free(sg); return (error); } /* XXXKIB */ #endif *sg_list = sg; return (0); } static void agp_intel_gtt_install_pte(device_t dev, u_int index, vm_paddr_t addr, u_int flags) { struct agp_i810_softc *sc; sc = device_get_softc(dev); sc->match->driver->install_gtt_pte(dev, index, addr, flags); } void agp_intel_gtt_insert_sg_entries(device_t dev, struct sglist *sg_list, u_int first_entry, u_int flags) { struct agp_i810_softc *sc; vm_paddr_t spaddr; size_t slen; u_int i, j; sc = device_get_softc(dev); for (i = j = 0; j < sg_list->sg_nseg; j++) { spaddr = sg_list->sg_segs[i].ss_paddr; slen = sg_list->sg_segs[i].ss_len; for (; slen > 0; i++) { sc->match->driver->install_gtt_pte(dev, first_entry + i, spaddr, flags); spaddr += AGP_PAGE_SIZE; slen -= AGP_PAGE_SIZE; } } sc->match->driver->read_gtt_pte(dev, first_entry + i - 1); } void intel_gtt_clear_range(u_int first_entry, u_int num_entries) { agp_intel_gtt_clear_range(intel_agp, first_entry, num_entries); } void intel_gtt_insert_pages(u_int first_entry, u_int num_entries, vm_page_t *pages, u_int flags) { agp_intel_gtt_insert_pages(intel_agp, first_entry, num_entries, pages, flags); } struct intel_gtt * intel_gtt_get(void) { intel_private.base = agp_intel_gtt_get(intel_agp); return (&intel_private.base); } int intel_gtt_chipset_flush(void) { return (agp_intel_gtt_chipset_flush(intel_agp)); } void intel_gtt_unmap_memory(struct sglist *sg_list) { agp_intel_gtt_unmap_memory(intel_agp, sg_list); } int intel_gtt_map_memory(vm_page_t *pages, u_int num_entries, struct sglist **sg_list) { return (agp_intel_gtt_map_memory(intel_agp, pages, num_entries, sg_list)); } void intel_gtt_insert_sg_entries(struct sglist *sg_list, u_int first_entry, u_int flags) { agp_intel_gtt_insert_sg_entries(intel_agp, sg_list, first_entry, flags); } void intel_gtt_install_pte(u_int index, vm_paddr_t addr, u_int flags) { agp_intel_gtt_install_pte(intel_agp, index, addr, flags); } device_t intel_gtt_get_bridge_device(void) { struct agp_i810_softc *sc; sc = device_get_softc(intel_agp); return (sc->bdev); } vm_paddr_t intel_gtt_read_pte_paddr(u_int entry) { struct agp_i810_softc *sc; sc = device_get_softc(intel_agp); return (sc->match->driver->read_gtt_pte_paddr(intel_agp, entry)); } u_int32_t intel_gtt_read_pte(u_int entry) { struct agp_i810_softc *sc; sc = device_get_softc(intel_agp); return (sc->match->driver->read_gtt_pte(intel_agp, entry)); } void intel_gtt_write(u_int entry, uint32_t val) { struct agp_i810_softc *sc; sc = device_get_softc(intel_agp); return (sc->match->driver->write_gtt(intel_agp, entry, val)); } Index: head/sys/dev/liquidio/lio_network.h =================================================================== --- head/sys/dev/liquidio/lio_network.h (revision 338106) +++ head/sys/dev/liquidio/lio_network.h (revision 338107) @@ -1,293 +1,293 @@ /* * BSD LICENSE * * Copyright(c) 2017 Cavium, Inc.. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Cavium, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ /* \file lio_network.h * \brief Host NIC Driver: Structure and Macro definitions used by NIC Module. */ #ifndef __LIO_NETWORK_H__ #define __LIO_NETWORK_H__ #include "lio_rss.h" #define LIO_MIN_MTU_SIZE 72 #define LIO_MAX_MTU_SIZE (LIO_MAX_FRM_SIZE - LIO_FRM_HEADER_SIZE) #define LIO_MAX_SG 64 #define LIO_MAX_FRAME_SIZE 60000 struct lio_fw_stats_resp { uint64_t rh; struct octeon_link_stats stats; uint64_t status; }; /* LiquidIO per-interface network private data */ struct lio { /* State of the interface. Rx/Tx happens only in the RUNNING state. */ int ifstate; /* * Octeon Interface index number. This device will be represented as * oct in the system. */ int ifidx; /* Octeon Input queue to use to transmit for this network interface. */ int txq; /* * Octeon Output queue from which pkts arrive * for this network interface. */ int rxq; /* Guards each glist */ struct mtx *glist_lock; #define LIO_DEFAULT_STATS_INTERVAL 10000 /* callout timer for stats */ struct callout stats_timer; /* Stats Update Interval in milli Seconds */ uint16_t stats_interval; /* IRQ coalescing driver stats */ struct octeon_intrmod_cfg intrmod_cfg; /* Array of gather component linked lists */ struct lio_stailq_head *ghead; void **glists_virt_base; vm_paddr_t *glists_dma_base; uint32_t glist_entry_size; /* Pointer to the octeon device structure. */ struct octeon_device *oct_dev; struct ifnet *ifp; struct ifmedia ifmedia; int if_flags; /* Link information sent by the core application for this interface. */ struct octeon_link_info linfo; /* counter of link changes */ uint64_t link_changes; /* Size of Tx queue for this octeon device. */ uint32_t tx_qsize; /* Size of Rx queue for this octeon device. */ uint32_t rx_qsize; /* Size of MTU this octeon device. */ uint32_t mtu; /* msg level flag per interface. */ uint32_t msg_enable; /* Interface info */ uint32_t intf_open; /* task queue for rx oom status */ struct lio_tq rx_status_tq; /* VLAN Filtering related */ eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; #ifdef RSS struct lio_rss_params_set rss_set; #endif /* RSS */ }; #define LIO_MAX_CORES 12 /* * \brief Enable or disable feature * @param ifp pointer to network device * @param cmd Command that just requires acknowledgment * @param param1 Parameter to command */ int lio_set_feature(struct ifnet *ifp, int cmd, uint16_t param1); /* * \brief Link control command completion callback * @param nctrl_ptr pointer to control packet structure * * This routine is called by the callback function when a ctrl pkt sent to * core app completes. The nctrl_ptr contains a copy of the command type * and data sent to the core app. This routine is only called if the ctrl * pkt was sent successfully to the core app. */ void lio_ctrl_cmd_completion(void *nctrl_ptr); int lio_setup_io_queues(struct octeon_device *octeon_dev, int ifidx, uint32_t num_iqs, uint32_t num_oqs); int lio_setup_interrupt(struct octeon_device *oct, uint32_t num_ioqs); static inline void * lio_recv_buffer_alloc(uint32_t size) { struct mbuf *mb = NULL; mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (mb != NULL) mb->m_pkthdr.len = mb->m_len = size; return ((void *)mb); } static inline void lio_recv_buffer_free(void *buffer) { m_freem((struct mbuf *)buffer); } static inline int lio_get_order(unsigned long size) { int order; size = (size - 1) >> PAGE_SHIFT; order = 0; while (size) { order++; size >>= 1; } return (order); } static inline void * lio_dma_alloc(size_t size, vm_paddr_t *dma_handle) { size_t align; void *mem; align = PAGE_SIZE << lio_get_order(size); - mem = (void *)kmem_alloc_contig(kmem_arena, size, M_WAITOK, 0, ~0ul, - align, 0, VM_MEMATTR_DEFAULT); + mem = (void *)kmem_alloc_contig(size, M_WAITOK, 0, ~0ul, align, 0, + VM_MEMATTR_DEFAULT); if (mem != NULL) *dma_handle = vtophys(mem); else *dma_handle = 0; return (mem); } static inline void lio_dma_free(size_t size, void *cpu_addr) { kmem_free(kmem_arena, (vm_offset_t)cpu_addr, size); } static inline uint64_t lio_map_ring(device_t dev, void *buf, uint32_t size) { vm_paddr_t dma_addr; dma_addr = vtophys(((struct mbuf *)buf)->m_data); return ((uint64_t)dma_addr); } /* * \brief check interface state * @param lio per-network private data * @param state_flag flag state to check */ static inline int lio_ifstate_check(struct lio *lio, int state_flag) { return (atomic_load_acq_int(&lio->ifstate) & state_flag); } /* * \brief set interface state * @param lio per-network private data * @param state_flag flag state to set */ static inline void lio_ifstate_set(struct lio *lio, int state_flag) { atomic_store_rel_int(&lio->ifstate, (atomic_load_acq_int(&lio->ifstate) | state_flag)); } /* * \brief clear interface state * @param lio per-network private data * @param state_flag flag state to clear */ static inline void lio_ifstate_reset(struct lio *lio, int state_flag) { atomic_store_rel_int(&lio->ifstate, (atomic_load_acq_int(&lio->ifstate) & ~(state_flag))); } /* * \brief wait for all pending requests to complete * @param oct Pointer to Octeon device * * Called during shutdown sequence */ static inline int lio_wait_for_pending_requests(struct octeon_device *oct) { int i, pcount = 0; for (i = 0; i < 100; i++) { pcount = atomic_load_acq_int( &oct->response_list[LIO_ORDERED_SC_LIST]. pending_req_count); if (pcount) lio_sleep_timeout(100); else break; } if (pcount) return (1); return (0); } #endif /* __LIO_NETWORK_H__ */ Index: head/sys/dev/xdma/controller/pl330.c =================================================================== --- head/sys/dev/xdma/controller/pl330.c (revision 338106) +++ head/sys/dev/xdma/controller/pl330.c (revision 338107) @@ -1,663 +1,663 @@ /*- * Copyright (c) 2017-2018 Ruslan Bukin * All rights reserved. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* ARM PrimeCell DMA Controller (PL330) driver. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif #include #include #include "xdma_if.h" #define PL330_DEBUG #undef PL330_DEBUG #ifdef PL330_DEBUG #define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) #else #define dprintf(fmt, ...) #endif #define READ4(_sc, _reg) \ bus_read_4(_sc->res[0], _reg) #define WRITE4(_sc, _reg, _val) \ bus_write_4(_sc->res[0], _reg, _val) #define PL330_NCHANNELS 32 #define PL330_MAXLOAD 2048 struct pl330_channel { struct pl330_softc *sc; xdma_channel_t *xchan; int used; int index; uint8_t *ibuf; bus_addr_t ibuf_phys; uint32_t enqueued; uint32_t capacity; }; struct pl330_fdt_data { uint32_t periph_id; }; struct pl330_softc { device_t dev; struct resource *res[PL330_NCHANNELS + 1]; void *ih[PL330_NCHANNELS]; struct pl330_channel channels[PL330_NCHANNELS]; }; static struct resource_spec pl330_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE }, { SYS_RES_IRQ, 1, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 2, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 3, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 4, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 5, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 6, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 7, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 8, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 9, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 10, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 11, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 12, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 13, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 14, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 15, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 16, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 17, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 18, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 19, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 20, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 21, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 22, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 23, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 24, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 25, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 26, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 27, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 28, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 29, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 30, RF_ACTIVE | RF_OPTIONAL }, { SYS_RES_IRQ, 31, RF_ACTIVE | RF_OPTIONAL }, { -1, 0 } }; #define HWTYPE_NONE 0 #define HWTYPE_STD 1 static struct ofw_compat_data compat_data[] = { { "arm,pl330", HWTYPE_STD }, { NULL, HWTYPE_NONE }, }; static void pl330_intr(void *arg) { xdma_transfer_status_t status; struct xdma_transfer_status st; struct pl330_channel *chan; struct xdma_channel *xchan; struct pl330_softc *sc; uint32_t pending; int i; int c; sc = arg; pending = READ4(sc, INTMIS); dprintf("%s: 0x%x, LC0 %x, SAR %x DAR %x\n", __func__, pending, READ4(sc, LC0(0)), READ4(sc, SAR(0)), READ4(sc, DAR(0))); WRITE4(sc, INTCLR, pending); for (c = 0; c < PL330_NCHANNELS; c++) { if ((pending & (1 << c)) == 0) { continue; } chan = &sc->channels[c]; xchan = chan->xchan; st.error = 0; st.transferred = 0; for (i = 0; i < chan->enqueued; i++) { xchan_seg_done(xchan, &st); } /* Accept new requests. */ chan->capacity = PL330_MAXLOAD; /* Finish operation */ status.error = 0; status.transferred = 0; xdma_callback(chan->xchan, &status); } } static uint32_t emit_mov(uint8_t *buf, uint32_t reg, uint32_t val) { buf[0] = DMAMOV; buf[1] = reg; buf[2] = val; buf[3] = val >> 8; buf[4] = val >> 16; buf[5] = val >> 24; return (6); } static uint32_t emit_lp(uint8_t *buf, uint8_t idx, uint32_t iter) { if (idx > 1) return (0); /* We have two loops only. */ buf[0] = DMALP; buf[0] |= (idx << 1); buf[1] = (iter - 1) & 0xff; return (2); } static uint32_t emit_lpend(uint8_t *buf, uint8_t idx, uint8_t burst, uint8_t jump_addr_relative) { buf[0] = DMALPEND; buf[0] |= DMALPEND_NF; buf[0] |= (idx << 2); if (burst) buf[0] |= (1 << 1) | (1 << 0); else buf[0] |= (0 << 1) | (1 << 0); buf[1] = jump_addr_relative; return (2); } static uint32_t emit_ld(uint8_t *buf, uint8_t burst) { buf[0] = DMALD; if (burst) buf[0] |= (1 << 1) | (1 << 0); else buf[0] |= (0 << 1) | (1 << 0); return (1); } static uint32_t emit_st(uint8_t *buf, uint8_t burst) { buf[0] = DMAST; if (burst) buf[0] |= (1 << 1) | (1 << 0); else buf[0] |= (0 << 1) | (1 << 0); return (1); } static uint32_t emit_end(uint8_t *buf) { buf[0] = DMAEND; return (1); } static uint32_t emit_sev(uint8_t *buf, uint32_t ev) { buf[0] = DMASEV; buf[1] = (ev << 3); return (2); } static uint32_t emit_wfp(uint8_t *buf, uint32_t p_id) { buf[0] = DMAWFP; buf[0] |= (1 << 0); buf[1] = (p_id << 3); return (2); } static uint32_t emit_go(uint8_t *buf, uint32_t chan_id, uint32_t addr, uint8_t non_secure) { buf[0] = DMAGO; buf[0] |= (non_secure << 1); buf[1] = chan_id; buf[2] = addr; buf[3] = addr >> 8; buf[4] = addr >> 16; buf[5] = addr >> 24; return (6); } static int pl330_probe(device_t dev) { int hwtype; if (!ofw_bus_status_okay(dev)) return (ENXIO); hwtype = ofw_bus_search_compatible(dev, compat_data)->ocd_data; if (hwtype == HWTYPE_NONE) return (ENXIO); device_set_desc(dev, "ARM PrimeCell DMA Controller (PL330)"); return (BUS_PROBE_DEFAULT); } static int pl330_attach(device_t dev) { struct pl330_softc *sc; phandle_t xref, node; int err; int i; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, pl330_spec, sc->res)) { device_printf(dev, "could not allocate resources for device\n"); return (ENXIO); } /* Setup interrupt handler */ for (i = 0; i < PL330_NCHANNELS; i++) { if (sc->res[i + 1] == NULL) break; err = bus_setup_intr(dev, sc->res[i + 1], INTR_TYPE_MISC | INTR_MPSAFE, NULL, pl330_intr, sc, sc->ih[i]); if (err) { device_printf(dev, "Unable to alloc interrupt resource.\n"); return (ENXIO); } } node = ofw_bus_get_node(dev); xref = OF_xref_from_node(node); OF_device_register_xref(xref, dev); return (0); } static int pl330_detach(device_t dev) { struct pl330_softc *sc; sc = device_get_softc(dev); return (0); } static int pl330_channel_alloc(device_t dev, struct xdma_channel *xchan) { struct pl330_channel *chan; struct pl330_softc *sc; int i; sc = device_get_softc(dev); for (i = 0; i < PL330_NCHANNELS; i++) { chan = &sc->channels[i]; if (chan->used == 0) { chan->xchan = xchan; xchan->chan = (void *)chan; xchan->caps |= XCHAN_CAP_BUSDMA; chan->index = i; chan->sc = sc; chan->used = 1; - chan->ibuf = (void *)kmem_alloc_contig(kernel_arena, - PAGE_SIZE*8, M_ZERO, 0, ~0, PAGE_SIZE, 0, + chan->ibuf = (void *)kmem_alloc_contig(PAGE_SIZE * 8, + M_ZERO, 0, ~0, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE); chan->ibuf_phys = vtophys(chan->ibuf); return (0); } } return (-1); } static int pl330_channel_free(device_t dev, struct xdma_channel *xchan) { struct pl330_channel *chan; struct pl330_softc *sc; sc = device_get_softc(dev); chan = (struct pl330_channel *)xchan->chan; chan->used = 0; return (0); } static int pl330_channel_capacity(device_t dev, xdma_channel_t *xchan, uint32_t *capacity) { struct pl330_channel *chan; chan = (struct pl330_channel *)xchan->chan; *capacity = chan->capacity; return (0); } static int pl330_ccr_port_width(struct xdma_sglist *sg, uint32_t *addr) { uint32_t reg; reg = 0; switch (sg->src_width) { case 1: reg |= CCR_SRC_BURST_SIZE_1; break; case 2: reg |= CCR_SRC_BURST_SIZE_2; break; case 4: reg |= CCR_SRC_BURST_SIZE_4; break; default: return (-1); } switch (sg->dst_width) { case 1: reg |= CCR_DST_BURST_SIZE_1; break; case 2: reg |= CCR_DST_BURST_SIZE_2; break; case 4: reg |= CCR_DST_BURST_SIZE_4; break; default: return (-1); } *addr |= reg; return (0); } static int pl330_channel_submit_sg(device_t dev, struct xdma_channel *xchan, struct xdma_sglist *sg, uint32_t sg_n) { struct pl330_fdt_data *data; xdma_controller_t *xdma; struct pl330_channel *chan; struct pl330_softc *sc; uint32_t src_addr_lo; uint32_t dst_addr_lo; uint32_t len; uint32_t reg; uint32_t offs; uint32_t cnt; uint8_t *ibuf; uint8_t dbuf[6]; uint8_t offs0, offs1; int err; int i; sc = device_get_softc(dev); xdma = xchan->xdma; data = (struct pl330_fdt_data *)xdma->data; chan = (struct pl330_channel *)xchan->chan; ibuf = chan->ibuf; dprintf("%s: chan->index %d\n", __func__, chan->index); offs = 0; for (i = 0; i < sg_n; i++) { if (sg[i].direction == XDMA_DEV_TO_MEM) reg = CCR_DST_INC; else { reg = CCR_SRC_INC; reg |= (CCR_DST_PROT_PRIV); } err = pl330_ccr_port_width(&sg[i], ®); if (err != 0) return (err); offs += emit_mov(&chan->ibuf[offs], R_CCR, reg); src_addr_lo = (uint32_t)sg[i].src_addr; dst_addr_lo = (uint32_t)sg[i].dst_addr; len = (uint32_t)sg[i].len; dprintf("%s: src %x dst %x len %d periph_id %d\n", __func__, src_addr_lo, dst_addr_lo, len, data->periph_id); offs += emit_mov(&ibuf[offs], R_SAR, src_addr_lo); offs += emit_mov(&ibuf[offs], R_DAR, dst_addr_lo); if (sg[i].src_width != sg[i].dst_width) return (-1); /* Not supported. */ cnt = (len / sg[i].src_width); if (cnt > 128) { offs += emit_lp(&ibuf[offs], 0, cnt / 128); offs0 = offs; offs += emit_lp(&ibuf[offs], 1, 128); offs1 = offs; } else { offs += emit_lp(&ibuf[offs], 0, cnt); offs0 = offs; } offs += emit_wfp(&ibuf[offs], data->periph_id); offs += emit_ld(&ibuf[offs], 1); offs += emit_st(&ibuf[offs], 1); if (cnt > 128) offs += emit_lpend(&ibuf[offs], 1, 1, (offs - offs1)); offs += emit_lpend(&ibuf[offs], 0, 1, (offs - offs0)); } offs += emit_sev(&ibuf[offs], chan->index); offs += emit_end(&ibuf[offs]); emit_go(dbuf, chan->index, chan->ibuf_phys, 0); reg = (dbuf[1] << 24) | (dbuf[0] << 16); WRITE4(sc, DBGINST0, reg); reg = (dbuf[5] << 24) | (dbuf[4] << 16) | (dbuf[3] << 8) | dbuf[2]; WRITE4(sc, DBGINST1, reg); WRITE4(sc, INTCLR, 0xffffffff); WRITE4(sc, INTEN, (1 << chan->index)); chan->enqueued = sg_n; chan->capacity = 0; /* Start operation */ WRITE4(sc, DBGCMD, 0); return (0); } static int pl330_channel_prep_sg(device_t dev, struct xdma_channel *xchan) { struct pl330_channel *chan; struct pl330_softc *sc; sc = device_get_softc(dev); dprintf("%s(%d)\n", __func__, device_get_unit(dev)); chan = (struct pl330_channel *)xchan->chan; chan->capacity = PL330_MAXLOAD; return (0); } static int pl330_channel_control(device_t dev, xdma_channel_t *xchan, int cmd) { struct pl330_channel *chan; struct pl330_softc *sc; sc = device_get_softc(dev); chan = (struct pl330_channel *)xchan->chan; switch (cmd) { case XDMA_CMD_BEGIN: case XDMA_CMD_TERMINATE: case XDMA_CMD_PAUSE: /* TODO: implement me */ return (-1); } return (0); } #ifdef FDT static int pl330_ofw_md_data(device_t dev, pcell_t *cells, int ncells, void **ptr) { struct pl330_fdt_data *data; if (ncells != 1) return (-1); data = malloc(sizeof(struct pl330_fdt_data), M_DEVBUF, (M_WAITOK | M_ZERO)); data->periph_id = cells[0]; *ptr = data; return (0); } #endif static device_method_t pl330_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pl330_probe), DEVMETHOD(device_attach, pl330_attach), DEVMETHOD(device_detach, pl330_detach), /* xDMA Interface */ DEVMETHOD(xdma_channel_alloc, pl330_channel_alloc), DEVMETHOD(xdma_channel_free, pl330_channel_free), DEVMETHOD(xdma_channel_control, pl330_channel_control), /* xDMA SG Interface */ DEVMETHOD(xdma_channel_capacity, pl330_channel_capacity), DEVMETHOD(xdma_channel_prep_sg, pl330_channel_prep_sg), DEVMETHOD(xdma_channel_submit_sg, pl330_channel_submit_sg), #ifdef FDT DEVMETHOD(xdma_ofw_md_data, pl330_ofw_md_data), #endif DEVMETHOD_END }; static driver_t pl330_driver = { "pl330", pl330_methods, sizeof(struct pl330_softc), }; static devclass_t pl330_devclass; EARLY_DRIVER_MODULE(pl330, simplebus, pl330_driver, pl330_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_LATE); Index: head/sys/kern/kern_malloc.c =================================================================== --- head/sys/kern/kern_malloc.c (revision 338106) +++ head/sys/kern/kern_malloc.c (revision 338107) @@ -1,1278 +1,1278 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1987, 1991, 1993 * The Regents of the University of California. * Copyright (c) 2005-2009 Robert N. M. Watson * Copyright (c) 2008 Otto Moerbeek (mallocarray) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 */ /* * Kernel malloc(9) implementation -- general purpose kernel memory allocator * based on memory types. Back end is implemented using the UMA(9) zone * allocator. A set of fixed-size buckets are used for smaller allocations, * and a special UMA allocation interface is used for larger allocations. * Callers declare memory types, and statistics are maintained independently * for each memory type. Statistics are maintained per-CPU for performance * reasons. See malloc(9) and comments in malloc.h for a detailed * description. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEBUG_MEMGUARD #include #endif #ifdef DEBUG_REDZONE #include #endif #if defined(INVARIANTS) && defined(__i386__) #include #endif #include #ifdef KDTRACE_HOOKS #include bool __read_frequently dtrace_malloc_enabled; dtrace_malloc_probe_func_t __read_mostly dtrace_malloc_probe; #endif #if defined(INVARIANTS) || defined(MALLOC_MAKE_FAILURES) || \ defined(DEBUG_MEMGUARD) || defined(DEBUG_REDZONE) #define MALLOC_DEBUG 1 #endif /* * When realloc() is called, if the new size is sufficiently smaller than * the old size, realloc() will allocate a new, smaller block to avoid * wasting memory. 'Sufficiently smaller' is defined as: newsize <= * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'. */ #ifndef REALLOC_FRACTION #define REALLOC_FRACTION 1 /* new block if <= half the size */ #endif /* * Centrally define some common malloc types. */ MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches"); MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); static struct malloc_type *kmemstatistics; static int kmemcount; #define KMEM_ZSHIFT 4 #define KMEM_ZBASE 16 #define KMEM_ZMASK (KMEM_ZBASE - 1) #define KMEM_ZMAX 65536 #define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT) static uint8_t kmemsize[KMEM_ZSIZE + 1]; #ifndef MALLOC_DEBUG_MAXZONES #define MALLOC_DEBUG_MAXZONES 1 #endif static int numzones = MALLOC_DEBUG_MAXZONES; /* * Small malloc(9) memory allocations are allocated from a set of UMA buckets * of various sizes. * * XXX: The comment here used to read "These won't be powers of two for * long." It's possible that a significant amount of wasted memory could be * recovered by tuning the sizes of these buckets. */ struct { int kz_size; char *kz_name; uma_zone_t kz_zone[MALLOC_DEBUG_MAXZONES]; } kmemzones[] = { {16, "16", }, {32, "32", }, {64, "64", }, {128, "128", }, {256, "256", }, {512, "512", }, {1024, "1024", }, {2048, "2048", }, {4096, "4096", }, {8192, "8192", }, {16384, "16384", }, {32768, "32768", }, {65536, "65536", }, {0, NULL}, }; /* * Zone to allocate malloc type descriptions from. For ABI reasons, memory * types are described by a data structure passed by the declaring code, but * the malloc(9) implementation has its own data structure describing the * type and statistics. This permits the malloc(9)-internal data structures * to be modified without breaking binary-compiled kernel modules that * declare malloc types. */ static uma_zone_t mt_zone; u_long vm_kmem_size; SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0, "Size of kernel memory"); static u_long kmem_zmax = KMEM_ZMAX; SYSCTL_ULONG(_vm, OID_AUTO, kmem_zmax, CTLFLAG_RDTUN, &kmem_zmax, 0, "Maximum allocation size that malloc(9) would use UMA as backend"); static u_long vm_kmem_size_min; SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, &vm_kmem_size_min, 0, "Minimum size of kernel memory"); static u_long vm_kmem_size_max; SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_max, CTLFLAG_RDTUN, &vm_kmem_size_max, 0, "Maximum size of kernel memory"); static u_int vm_kmem_size_scale; SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, &vm_kmem_size_scale, 0, "Scale factor for kernel memory size"); static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size, CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, sysctl_kmem_map_size, "LU", "Current kmem allocation size"); static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free, CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, sysctl_kmem_map_free, "LU", "Free space in kmem"); /* * The malloc_mtx protects the kmemstatistics linked list. */ struct mtx malloc_mtx; #ifdef MALLOC_PROFILE uint64_t krequests[KMEM_ZSIZE + 1]; static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS); #endif static int sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS); /* * time_uptime of the last malloc(9) failure (induced or real). */ static time_t t_malloc_fail; #if defined(MALLOC_MAKE_FAILURES) || (MALLOC_DEBUG_MAXZONES > 1) static SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD, 0, "Kernel malloc debugging options"); #endif /* * malloc(9) fault injection -- cause malloc failures every (n) mallocs when * the caller specifies M_NOWAIT. If set to 0, no failures are caused. */ #ifdef MALLOC_MAKE_FAILURES static int malloc_failure_rate; static int malloc_nowait_count; static int malloc_failure_count; SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RWTUN, &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail"); SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD, &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures"); #endif static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS) { u_long size; size = uma_size(); return (sysctl_handle_long(oidp, &size, 0, req)); } static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) { u_long size, limit; /* The sysctl is unsigned, implement as a saturation value. */ size = uma_size(); limit = uma_limit(); if (size > limit) size = 0; else size = limit - size; return (sysctl_handle_long(oidp, &size, 0, req)); } /* * malloc(9) uma zone separation -- sub-page buffer overruns in one * malloc type will affect only a subset of other malloc types. */ #if MALLOC_DEBUG_MAXZONES > 1 static void tunable_set_numzones(void) { TUNABLE_INT_FETCH("debug.malloc.numzones", &numzones); /* Sanity check the number of malloc uma zones. */ if (numzones <= 0) numzones = 1; if (numzones > MALLOC_DEBUG_MAXZONES) numzones = MALLOC_DEBUG_MAXZONES; } SYSINIT(numzones, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_set_numzones, NULL); SYSCTL_INT(_debug_malloc, OID_AUTO, numzones, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &numzones, 0, "Number of malloc uma subzones"); /* * Any number that changes regularly is an okay choice for the * offset. Build numbers are pretty good of you have them. */ static u_int zone_offset = __FreeBSD_version; TUNABLE_INT("debug.malloc.zone_offset", &zone_offset); SYSCTL_UINT(_debug_malloc, OID_AUTO, zone_offset, CTLFLAG_RDTUN, &zone_offset, 0, "Separate malloc types by examining the " "Nth character in the malloc type short description."); static void mtp_set_subzone(struct malloc_type *mtp) { struct malloc_type_internal *mtip; const char *desc; size_t len; u_int val; mtip = mtp->ks_handle; desc = mtp->ks_shortdesc; if (desc == NULL || (len = strlen(desc)) == 0) val = 0; else val = desc[zone_offset % len]; mtip->mti_zone = (val % numzones); } static inline u_int mtp_get_subzone(struct malloc_type *mtp) { struct malloc_type_internal *mtip; mtip = mtp->ks_handle; KASSERT(mtip->mti_zone < numzones, ("mti_zone %u out of range %d", mtip->mti_zone, numzones)); return (mtip->mti_zone); } #elif MALLOC_DEBUG_MAXZONES == 0 #error "MALLOC_DEBUG_MAXZONES must be positive." #else static void mtp_set_subzone(struct malloc_type *mtp) { struct malloc_type_internal *mtip; mtip = mtp->ks_handle; mtip->mti_zone = 0; } static inline u_int mtp_get_subzone(struct malloc_type *mtp) { return (0); } #endif /* MALLOC_DEBUG_MAXZONES > 1 */ int malloc_last_fail(void) { return (time_uptime - t_malloc_fail); } /* * An allocation has succeeded -- update malloc type statistics for the * amount of bucket size. Occurs within a critical section so that the * thread isn't preempted and doesn't migrate while updating per-PCU * statistics. */ static void malloc_type_zone_allocated(struct malloc_type *mtp, unsigned long size, int zindx) { struct malloc_type_internal *mtip; struct malloc_type_stats *mtsp; critical_enter(); mtip = mtp->ks_handle; mtsp = &mtip->mti_stats[curcpu]; if (size > 0) { mtsp->mts_memalloced += size; mtsp->mts_numallocs++; } if (zindx != -1) mtsp->mts_size |= 1 << zindx; #ifdef KDTRACE_HOOKS if (__predict_false(dtrace_malloc_enabled)) { uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_MALLOC]; if (probe_id != 0) (dtrace_malloc_probe)(probe_id, (uintptr_t) mtp, (uintptr_t) mtip, (uintptr_t) mtsp, size, zindx); } #endif critical_exit(); } void malloc_type_allocated(struct malloc_type *mtp, unsigned long size) { if (size > 0) malloc_type_zone_allocated(mtp, size, -1); } /* * A free operation has occurred -- update malloc type statistics for the * amount of the bucket size. Occurs within a critical section so that the * thread isn't preempted and doesn't migrate while updating per-CPU * statistics. */ void malloc_type_freed(struct malloc_type *mtp, unsigned long size) { struct malloc_type_internal *mtip; struct malloc_type_stats *mtsp; critical_enter(); mtip = mtp->ks_handle; mtsp = &mtip->mti_stats[curcpu]; mtsp->mts_memfreed += size; mtsp->mts_numfrees++; #ifdef KDTRACE_HOOKS if (__predict_false(dtrace_malloc_enabled)) { uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_FREE]; if (probe_id != 0) (dtrace_malloc_probe)(probe_id, (uintptr_t) mtp, (uintptr_t) mtip, (uintptr_t) mtsp, size, 0); } #endif critical_exit(); } /* * contigmalloc: * * Allocate a block of physically contiguous memory. * * If M_NOWAIT is set, this routine will not block and return NULL if * the allocation fails. */ void * contigmalloc(unsigned long size, struct malloc_type *type, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) { void *ret; - ret = (void *)kmem_alloc_contig(kernel_arena, size, flags, low, high, - alignment, boundary, VM_MEMATTR_DEFAULT); + ret = (void *)kmem_alloc_contig(size, flags, low, high, alignment, + boundary, VM_MEMATTR_DEFAULT); if (ret != NULL) malloc_type_allocated(type, round_page(size)); return (ret); } void * contigmalloc_domain(unsigned long size, struct malloc_type *type, int domain, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) { void *ret; ret = (void *)kmem_alloc_contig_domain(domain, size, flags, low, high, alignment, boundary, VM_MEMATTR_DEFAULT); if (ret != NULL) malloc_type_allocated(type, round_page(size)); return (ret); } /* * contigfree: * * Free a block of memory allocated by contigmalloc. * * This routine may not block. */ void contigfree(void *addr, unsigned long size, struct malloc_type *type) { kmem_free(kernel_arena, (vm_offset_t)addr, size); malloc_type_freed(type, round_page(size)); } #ifdef MALLOC_DEBUG static int malloc_dbg(caddr_t *vap, size_t *sizep, struct malloc_type *mtp, int flags) { #ifdef INVARIANTS int indx; KASSERT(mtp->ks_magic == M_MAGIC, ("malloc: bad malloc type magic")); /* * Check that exactly one of M_WAITOK or M_NOWAIT is specified. */ indx = flags & (M_WAITOK | M_NOWAIT); if (indx != M_NOWAIT && indx != M_WAITOK) { static struct timeval lasterr; static int curerr, once; if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) { printf("Bad malloc flags: %x\n", indx); kdb_backtrace(); flags |= M_WAITOK; once++; } } #endif #ifdef MALLOC_MAKE_FAILURES if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) { atomic_add_int(&malloc_nowait_count, 1); if ((malloc_nowait_count % malloc_failure_rate) == 0) { atomic_add_int(&malloc_failure_count, 1); t_malloc_fail = time_uptime; *vap = NULL; return (EJUSTRETURN); } } #endif if (flags & M_WAITOK) { KASSERT(curthread->td_intr_nesting_level == 0, ("malloc(M_WAITOK) in interrupt context")); KASSERT(curthread->td_epochnest == 0, ("malloc(M_WAITOK) in epoch context")); } KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("malloc: called with spinlock or critical section held")); #ifdef DEBUG_MEMGUARD if (memguard_cmp_mtp(mtp, *sizep)) { *vap = memguard_alloc(*sizep, flags); if (*vap != NULL) return (EJUSTRETURN); /* This is unfortunate but should not be fatal. */ } #endif #ifdef DEBUG_REDZONE *sizep = redzone_size_ntor(*sizep); #endif return (0); } #endif /* * malloc: * * Allocate a block of memory. * * If M_NOWAIT is set, this routine will not block and return NULL if * the allocation fails. */ void * (malloc)(size_t size, struct malloc_type *mtp, int flags) { int indx; caddr_t va; uma_zone_t zone; #if defined(DEBUG_REDZONE) unsigned long osize = size; #endif #ifdef MALLOC_DEBUG va = NULL; if (malloc_dbg(&va, &size, mtp, flags) != 0) return (va); #endif if (size <= kmem_zmax && (flags & M_EXEC) == 0) { if (size & KMEM_ZMASK) size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)]; #ifdef MALLOC_PROFILE krequests[size >> KMEM_ZSHIFT]++; #endif va = uma_zalloc(zone, flags); if (va != NULL) size = zone->uz_size; malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); } else { size = roundup(size, PAGE_SIZE); zone = NULL; va = uma_large_malloc(size, flags); malloc_type_allocated(mtp, va == NULL ? 0 : size); } if (flags & M_WAITOK) KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL")); else if (va == NULL) t_malloc_fail = time_uptime; #ifdef DEBUG_REDZONE if (va != NULL) va = redzone_setup(va, osize); #endif return ((void *) va); } void * malloc_domain(size_t size, struct malloc_type *mtp, int domain, int flags) { int indx; caddr_t va; uma_zone_t zone; #if defined(DEBUG_REDZONE) unsigned long osize = size; #endif #ifdef MALLOC_DEBUG va = NULL; if (malloc_dbg(&va, &size, mtp, flags) != 0) return (va); #endif if (size <= kmem_zmax && (flags & M_EXEC) == 0) { if (size & KMEM_ZMASK) size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)]; #ifdef MALLOC_PROFILE krequests[size >> KMEM_ZSHIFT]++; #endif va = uma_zalloc_domain(zone, NULL, domain, flags); if (va != NULL) size = zone->uz_size; malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); } else { size = roundup(size, PAGE_SIZE); zone = NULL; va = uma_large_malloc_domain(size, domain, flags); malloc_type_allocated(mtp, va == NULL ? 0 : size); } if (flags & M_WAITOK) KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL")); else if (va == NULL) t_malloc_fail = time_uptime; #ifdef DEBUG_REDZONE if (va != NULL) va = redzone_setup(va, osize); #endif return ((void *) va); } void * mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags) { if (WOULD_OVERFLOW(nmemb, size)) panic("mallocarray: %zu * %zu overflowed", nmemb, size); return (malloc(size * nmemb, type, flags)); } #ifdef INVARIANTS static void free_save_type(void *addr, struct malloc_type *mtp, u_long size) { struct malloc_type **mtpp = addr; /* * Cache a pointer to the malloc_type that most recently freed * this memory here. This way we know who is most likely to * have stepped on it later. * * This code assumes that size is a multiple of 8 bytes for * 64 bit machines */ mtpp = (struct malloc_type **) ((unsigned long)mtpp & ~UMA_ALIGN_PTR); mtpp += (size - sizeof(struct malloc_type *)) / sizeof(struct malloc_type *); *mtpp = mtp; } #endif #ifdef MALLOC_DEBUG static int free_dbg(void **addrp, struct malloc_type *mtp) { void *addr; addr = *addrp; KASSERT(mtp->ks_magic == M_MAGIC, ("free: bad malloc type magic")); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("free: called with spinlock or critical section held")); /* free(NULL, ...) does nothing */ if (addr == NULL) return (EJUSTRETURN); #ifdef DEBUG_MEMGUARD if (is_memguard_addr(addr)) { memguard_free(addr); return (EJUSTRETURN); } #endif #ifdef DEBUG_REDZONE redzone_check(addr); *addrp = redzone_addr_ntor(addr); #endif return (0); } #endif /* * free: * * Free a block of memory allocated by malloc. * * This routine may not block. */ void free(void *addr, struct malloc_type *mtp) { uma_slab_t slab; u_long size; #ifdef MALLOC_DEBUG if (free_dbg(&addr, mtp) != 0) return; #endif /* free(NULL, ...) does nothing */ if (addr == NULL) return; slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK)); if (slab == NULL) panic("free: address %p(%p) has not been allocated.\n", addr, (void *)((u_long)addr & (~UMA_SLAB_MASK))); if (!(slab->us_flags & UMA_SLAB_MALLOC)) { size = slab->us_keg->uk_size; #ifdef INVARIANTS free_save_type(addr, mtp, size); #endif uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab); } else { size = slab->us_size; uma_large_free(slab); } malloc_type_freed(mtp, size); } void free_domain(void *addr, struct malloc_type *mtp) { uma_slab_t slab; u_long size; #ifdef MALLOC_DEBUG if (free_dbg(&addr, mtp) != 0) return; #endif /* free(NULL, ...) does nothing */ if (addr == NULL) return; slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK)); if (slab == NULL) panic("free_domain: address %p(%p) has not been allocated.\n", addr, (void *)((u_long)addr & (~UMA_SLAB_MASK))); if (!(slab->us_flags & UMA_SLAB_MALLOC)) { size = slab->us_keg->uk_size; #ifdef INVARIANTS free_save_type(addr, mtp, size); #endif uma_zfree_domain(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab); } else { size = slab->us_size; uma_large_free(slab); } malloc_type_freed(mtp, size); } /* * realloc: change the size of a memory block */ void * realloc(void *addr, size_t size, struct malloc_type *mtp, int flags) { uma_slab_t slab; unsigned long alloc; void *newaddr; KASSERT(mtp->ks_magic == M_MAGIC, ("realloc: bad malloc type magic")); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("realloc: called with spinlock or critical section held")); /* realloc(NULL, ...) is equivalent to malloc(...) */ if (addr == NULL) return (malloc(size, mtp, flags)); /* * XXX: Should report free of old memory and alloc of new memory to * per-CPU stats. */ #ifdef DEBUG_MEMGUARD if (is_memguard_addr(addr)) return (memguard_realloc(addr, size, mtp, flags)); #endif #ifdef DEBUG_REDZONE slab = NULL; alloc = redzone_get_size(addr); #else slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK)); /* Sanity check */ KASSERT(slab != NULL, ("realloc: address %p out of range", (void *)addr)); /* Get the size of the original block */ if (!(slab->us_flags & UMA_SLAB_MALLOC)) alloc = slab->us_keg->uk_size; else alloc = slab->us_size; /* Reuse the original block if appropriate */ if (size <= alloc && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE)) return (addr); #endif /* !DEBUG_REDZONE */ /* Allocate a new, bigger (or smaller) block */ if ((newaddr = malloc(size, mtp, flags)) == NULL) return (NULL); /* Copy over original contents */ bcopy(addr, newaddr, min(size, alloc)); free(addr, mtp); return (newaddr); } /* * reallocf: same as realloc() but free memory on failure. */ void * reallocf(void *addr, size_t size, struct malloc_type *mtp, int flags) { void *mem; if ((mem = realloc(addr, size, mtp, flags)) == NULL) free(addr, mtp); return (mem); } #ifndef __sparc64__ CTASSERT(VM_KMEM_SIZE_SCALE >= 1); #endif /* * Initialize the kernel memory (kmem) arena. */ void kmeminit(void) { u_long mem_size; u_long tmp; #ifdef VM_KMEM_SIZE if (vm_kmem_size == 0) vm_kmem_size = VM_KMEM_SIZE; #endif #ifdef VM_KMEM_SIZE_MIN if (vm_kmem_size_min == 0) vm_kmem_size_min = VM_KMEM_SIZE_MIN; #endif #ifdef VM_KMEM_SIZE_MAX if (vm_kmem_size_max == 0) vm_kmem_size_max = VM_KMEM_SIZE_MAX; #endif /* * Calculate the amount of kernel virtual address (KVA) space that is * preallocated to the kmem arena. In order to support a wide range * of machines, it is a function of the physical memory size, * specifically, * * min(max(physical memory size / VM_KMEM_SIZE_SCALE, * VM_KMEM_SIZE_MIN), VM_KMEM_SIZE_MAX) * * Every architecture must define an integral value for * VM_KMEM_SIZE_SCALE. However, the definitions of VM_KMEM_SIZE_MIN * and VM_KMEM_SIZE_MAX, which represent respectively the floor and * ceiling on this preallocation, are optional. Typically, * VM_KMEM_SIZE_MAX is itself a function of the available KVA space on * a given architecture. */ mem_size = vm_cnt.v_page_count; if (mem_size <= 32768) /* delphij XXX 128MB */ kmem_zmax = PAGE_SIZE; if (vm_kmem_size_scale < 1) vm_kmem_size_scale = VM_KMEM_SIZE_SCALE; /* * Check if we should use defaults for the "vm_kmem_size" * variable: */ if (vm_kmem_size == 0) { vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE; if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min) vm_kmem_size = vm_kmem_size_min; if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max) vm_kmem_size = vm_kmem_size_max; } /* * The amount of KVA space that is preallocated to the * kmem arena can be set statically at compile-time or manually * through the kernel environment. However, it is still limited to * twice the physical memory size, which has been sufficient to handle * the most severe cases of external fragmentation in the kmem arena. */ if (vm_kmem_size / 2 / PAGE_SIZE > mem_size) vm_kmem_size = 2 * mem_size * PAGE_SIZE; vm_kmem_size = round_page(vm_kmem_size); #ifdef DEBUG_MEMGUARD tmp = memguard_fudge(vm_kmem_size, kernel_map); #else tmp = vm_kmem_size; #endif uma_set_limit(tmp); #ifdef DEBUG_MEMGUARD /* * Initialize MemGuard if support compiled in. MemGuard is a * replacement allocator used for detecting tamper-after-free * scenarios as they occur. It is only used for debugging. */ memguard_init(kernel_arena); #endif } /* * Initialize the kernel memory allocator */ /* ARGSUSED*/ static void mallocinit(void *dummy) { int i; uint8_t indx; mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF); kmeminit(); if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX) kmem_zmax = KMEM_ZMAX; mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal), #ifdef INVARIANTS mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, #else NULL, NULL, NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_MALLOC); for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { int size = kmemzones[indx].kz_size; char *name = kmemzones[indx].kz_name; int subzone; for (subzone = 0; subzone < numzones; subzone++) { kmemzones[indx].kz_zone[subzone] = uma_zcreate(name, size, #ifdef INVARIANTS mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, #else NULL, NULL, NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_MALLOC); } for (;i <= size; i+= KMEM_ZBASE) kmemsize[i >> KMEM_ZSHIFT] = indx; } } SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_SECOND, mallocinit, NULL); void malloc_init(void *data) { struct malloc_type_internal *mtip; struct malloc_type *mtp; KASSERT(vm_cnt.v_page_count != 0, ("malloc_register before vm_init")); mtp = data; if (mtp->ks_magic != M_MAGIC) panic("malloc_init: bad malloc type magic"); mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO); mtp->ks_handle = mtip; mtp_set_subzone(mtp); mtx_lock(&malloc_mtx); mtp->ks_next = kmemstatistics; kmemstatistics = mtp; kmemcount++; mtx_unlock(&malloc_mtx); } void malloc_uninit(void *data) { struct malloc_type_internal *mtip; struct malloc_type_stats *mtsp; struct malloc_type *mtp, *temp; uma_slab_t slab; long temp_allocs, temp_bytes; int i; mtp = data; KASSERT(mtp->ks_magic == M_MAGIC, ("malloc_uninit: bad malloc type magic")); KASSERT(mtp->ks_handle != NULL, ("malloc_deregister: cookie NULL")); mtx_lock(&malloc_mtx); mtip = mtp->ks_handle; mtp->ks_handle = NULL; if (mtp != kmemstatistics) { for (temp = kmemstatistics; temp != NULL; temp = temp->ks_next) { if (temp->ks_next == mtp) { temp->ks_next = mtp->ks_next; break; } } KASSERT(temp, ("malloc_uninit: type '%s' not found", mtp->ks_shortdesc)); } else kmemstatistics = mtp->ks_next; kmemcount--; mtx_unlock(&malloc_mtx); /* * Look for memory leaks. */ temp_allocs = temp_bytes = 0; for (i = 0; i < MAXCPU; i++) { mtsp = &mtip->mti_stats[i]; temp_allocs += mtsp->mts_numallocs; temp_allocs -= mtsp->mts_numfrees; temp_bytes += mtsp->mts_memalloced; temp_bytes -= mtsp->mts_memfreed; } if (temp_allocs > 0 || temp_bytes > 0) { printf("Warning: memory type %s leaked memory on destroy " "(%ld allocations, %ld bytes leaked).\n", mtp->ks_shortdesc, temp_allocs, temp_bytes); } slab = vtoslab((vm_offset_t) mtip & (~UMA_SLAB_MASK)); uma_zfree_arg(mt_zone, mtip, slab); } struct malloc_type * malloc_desc2type(const char *desc) { struct malloc_type *mtp; mtx_assert(&malloc_mtx, MA_OWNED); for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { if (strcmp(mtp->ks_shortdesc, desc) == 0) return (mtp); } return (NULL); } static int sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS) { struct malloc_type_stream_header mtsh; struct malloc_type_internal *mtip; struct malloc_type_header mth; struct malloc_type *mtp; int error, i; struct sbuf sbuf; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL); mtx_lock(&malloc_mtx); /* * Insert stream header. */ bzero(&mtsh, sizeof(mtsh)); mtsh.mtsh_version = MALLOC_TYPE_STREAM_VERSION; mtsh.mtsh_maxcpus = MAXCPU; mtsh.mtsh_count = kmemcount; (void)sbuf_bcat(&sbuf, &mtsh, sizeof(mtsh)); /* * Insert alternating sequence of type headers and type statistics. */ for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { mtip = (struct malloc_type_internal *)mtp->ks_handle; /* * Insert type header. */ bzero(&mth, sizeof(mth)); strlcpy(mth.mth_name, mtp->ks_shortdesc, MALLOC_MAX_NAME); (void)sbuf_bcat(&sbuf, &mth, sizeof(mth)); /* * Insert type statistics for each CPU. */ for (i = 0; i < MAXCPU; i++) { (void)sbuf_bcat(&sbuf, &mtip->mti_stats[i], sizeof(mtip->mti_stats[i])); } } mtx_unlock(&malloc_mtx); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } SYSCTL_PROC(_kern, OID_AUTO, malloc_stats, CTLFLAG_RD|CTLTYPE_STRUCT, 0, 0, sysctl_kern_malloc_stats, "s,malloc_type_ustats", "Return malloc types"); SYSCTL_INT(_kern, OID_AUTO, malloc_count, CTLFLAG_RD, &kmemcount, 0, "Count of kernel malloc types"); void malloc_type_list(malloc_type_list_func_t *func, void *arg) { struct malloc_type *mtp, **bufmtp; int count, i; size_t buflen; mtx_lock(&malloc_mtx); restart: mtx_assert(&malloc_mtx, MA_OWNED); count = kmemcount; mtx_unlock(&malloc_mtx); buflen = sizeof(struct malloc_type *) * count; bufmtp = malloc(buflen, M_TEMP, M_WAITOK); mtx_lock(&malloc_mtx); if (count < kmemcount) { free(bufmtp, M_TEMP); goto restart; } for (mtp = kmemstatistics, i = 0; mtp != NULL; mtp = mtp->ks_next, i++) bufmtp[i] = mtp; mtx_unlock(&malloc_mtx); for (i = 0; i < count; i++) (func)(bufmtp[i], arg); free(bufmtp, M_TEMP); } #ifdef DDB DB_SHOW_COMMAND(malloc, db_show_malloc) { struct malloc_type_internal *mtip; struct malloc_type *mtp; uint64_t allocs, frees; uint64_t alloced, freed; int i; db_printf("%18s %12s %12s %12s\n", "Type", "InUse", "MemUse", "Requests"); for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { mtip = (struct malloc_type_internal *)mtp->ks_handle; allocs = 0; frees = 0; alloced = 0; freed = 0; for (i = 0; i < MAXCPU; i++) { allocs += mtip->mti_stats[i].mts_numallocs; frees += mtip->mti_stats[i].mts_numfrees; alloced += mtip->mti_stats[i].mts_memalloced; freed += mtip->mti_stats[i].mts_memfreed; } db_printf("%18s %12ju %12juK %12ju\n", mtp->ks_shortdesc, allocs - frees, (alloced - freed + 1023) / 1024, allocs); if (db_pager_quit) break; } } #if MALLOC_DEBUG_MAXZONES > 1 DB_SHOW_COMMAND(multizone_matches, db_show_multizone_matches) { struct malloc_type_internal *mtip; struct malloc_type *mtp; u_int subzone; if (!have_addr) { db_printf("Usage: show multizone_matches \n"); return; } mtp = (void *)addr; if (mtp->ks_magic != M_MAGIC) { db_printf("Magic %lx does not match expected %x\n", mtp->ks_magic, M_MAGIC); return; } mtip = mtp->ks_handle; subzone = mtip->mti_zone; for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) { mtip = mtp->ks_handle; if (mtip->mti_zone != subzone) continue; db_printf("%s\n", mtp->ks_shortdesc); if (db_pager_quit) break; } } #endif /* MALLOC_DEBUG_MAXZONES > 1 */ #endif /* DDB */ #ifdef MALLOC_PROFILE static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; uint64_t count; uint64_t waste; uint64_t mem; int error; int rsize; int size; int i; waste = 0; mem = 0; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sbuf_printf(&sbuf, "\n Size Requests Real Size\n"); for (i = 0; i < KMEM_ZSIZE; i++) { size = i << KMEM_ZSHIFT; rsize = kmemzones[kmemsize[i]].kz_size; count = (long long unsigned)krequests[i]; sbuf_printf(&sbuf, "%6d%28llu%11d\n", size, (unsigned long long)count, rsize); if ((rsize * count) > (size * count)) waste += (rsize * count) - (size * count); mem += (rsize * count); } sbuf_printf(&sbuf, "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n", (unsigned long long)mem, (unsigned long long)waste); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD, NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling"); #endif /* MALLOC_PROFILE */ Index: head/sys/mips/ingenic/jz4780_lcd.c =================================================================== --- head/sys/mips/ingenic/jz4780_lcd.c (revision 338106) +++ head/sys/mips/ingenic/jz4780_lcd.c (revision 338107) @@ -1,575 +1,575 @@ /*- * Copyright (c) 2016 Jared McNeill * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Ingenic JZ4780 LCD Controller */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fb_if.h" #include "hdmi_if.h" #define FB_DEFAULT_W 800 #define FB_DEFAULT_H 600 #define FB_DEFAULT_REF 60 #define FB_BPP 32 #define FB_ALIGN (16 * 4) #define FB_MAX_BW (1920 * 1080 * 60) #define FB_MAX_W 2048 #define FB_MAX_H 2048 #define FB_DIVIDE(x, y) (((x) + ((y) / 2)) / (y)) #define PCFG_MAGIC 0xc7ff2100 #define DOT_CLOCK_TO_HZ(c) ((c) * 1000) #ifndef VM_MEMATTR_WRITE_COMBINING #define VM_MEMATTR_WRITE_COMBINING VM_MEMATTR_UNCACHEABLE #endif struct jzlcd_softc { device_t dev; device_t fbdev; struct resource *res[1]; /* Clocks */ clk_t clk; clk_t clk_pix; /* Framebuffer */ struct fb_info info; size_t fbsize; bus_addr_t paddr; vm_offset_t vaddr; /* HDMI */ eventhandler_tag hdmi_evh; /* Frame descriptor DMA */ bus_dma_tag_t fdesc_tag; bus_dmamap_t fdesc_map; bus_addr_t fdesc_paddr; struct lcd_frame_descriptor *fdesc; }; static struct resource_spec jzlcd_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, { -1, 0 } }; #define LCD_READ(sc, reg) bus_read_4((sc)->res[0], (reg)) #define LCD_WRITE(sc, reg, val) bus_write_4((sc)->res[0], (reg), (val)) static int jzlcd_allocfb(struct jzlcd_softc *sc) { - sc->vaddr = kmem_alloc_contig(kernel_arena, sc->fbsize, - M_NOWAIT | M_ZERO, 0, ~0, FB_ALIGN, 0, VM_MEMATTR_WRITE_COMBINING); + sc->vaddr = kmem_alloc_contig(sc->fbsize, M_NOWAIT | M_ZERO, 0, ~0, + FB_ALIGN, 0, VM_MEMATTR_WRITE_COMBINING); if (sc->vaddr == 0) { device_printf(sc->dev, "failed to allocate FB memory\n"); return (ENOMEM); } sc->paddr = pmap_kextract(sc->vaddr); return (0); } static void jzlcd_freefb(struct jzlcd_softc *sc) { kmem_free(kernel_arena, sc->vaddr, sc->fbsize); } static void jzlcd_start(struct jzlcd_softc *sc) { uint32_t ctrl; /* Clear status registers */ LCD_WRITE(sc, LCDSTATE, 0); LCD_WRITE(sc, LCDOSDS, 0); /* Enable the controller */ ctrl = LCD_READ(sc, LCDCTRL); ctrl |= LCDCTRL_ENA; ctrl &= ~LCDCTRL_DIS; LCD_WRITE(sc, LCDCTRL, ctrl); } static void jzlcd_stop(struct jzlcd_softc *sc) { uint32_t ctrl; ctrl = LCD_READ(sc, LCDCTRL); if ((ctrl & LCDCTRL_ENA) != 0) { /* Disable the controller and wait for it to stop */ ctrl |= LCDCTRL_DIS; LCD_WRITE(sc, LCDCTRL, ctrl); while ((LCD_READ(sc, LCDSTATE) & LCDSTATE_LDD) == 0) DELAY(100); } /* Clear all status except for disable */ LCD_WRITE(sc, LCDSTATE, LCD_READ(sc, LCDSTATE) & ~LCDSTATE_LDD); } static void jzlcd_setup_descriptor(struct jzlcd_softc *sc, const struct videomode *mode, u_int desno) { struct lcd_frame_descriptor *fdesc; int line_sz; /* Frame size is specified in # words */ line_sz = (mode->hdisplay * FB_BPP) >> 3; line_sz = ((line_sz + 3) & ~3) / 4; fdesc = sc->fdesc + desno; if (desno == 0) fdesc->next = sc->fdesc_paddr + sizeof(struct lcd_frame_descriptor); else fdesc->next = sc->fdesc_paddr; fdesc->physaddr = sc->paddr; fdesc->id = desno; fdesc->cmd = LCDCMD_FRM_EN | (line_sz * mode->vdisplay); fdesc->offs = 0; fdesc->pw = 0; fdesc->cnum_pos = LCDPOS_BPP01_18_24 | LCDPOS_PREMULTI01 | (desno == 0 ? LCDPOS_COEF_BLE01_1 : LCDPOS_COEF_SLE01); fdesc->dessize = LCDDESSIZE_ALPHA | ((mode->vdisplay - 1) << LCDDESSIZE_HEIGHT_SHIFT) | ((mode->hdisplay - 1) << LCDDESSIZE_WIDTH_SHIFT); } static int jzlcd_set_videomode(struct jzlcd_softc *sc, const struct videomode *mode) { u_int hbp, hfp, hsw, vbp, vfp, vsw; u_int hds, hde, ht, vds, vde, vt; uint32_t ctrl; int error; hbp = mode->htotal - mode->hsync_end; hfp = mode->hsync_start - mode->hdisplay; hsw = mode->hsync_end - mode->hsync_start; vbp = mode->vtotal - mode->vsync_end; vfp = mode->vsync_start - mode->vdisplay; vsw = mode->vsync_end - mode->vsync_start; hds = hsw + hbp; hde = hds + mode->hdisplay; ht = hde + hfp; vds = vsw + vbp; vde = vds + mode->vdisplay; vt = vde + vfp; /* Setup timings */ LCD_WRITE(sc, LCDVAT, (ht << LCDVAT_HT_SHIFT) | (vt << LCDVAT_VT_SHIFT)); LCD_WRITE(sc, LCDDAH, (hds << LCDDAH_HDS_SHIFT) | (hde << LCDDAH_HDE_SHIFT)); LCD_WRITE(sc, LCDDAV, (vds << LCDDAV_VDS_SHIFT) | (vde << LCDDAV_VDE_SHIFT)); LCD_WRITE(sc, LCDHSYNC, hsw); LCD_WRITE(sc, LCDVSYNC, vsw); /* Set configuration */ LCD_WRITE(sc, LCDCFG, LCDCFG_NEWDES | LCDCFG_RECOVER | LCDCFG_24 | LCDCFG_PSM | LCDCFG_CLSM | LCDCFG_SPLM | LCDCFG_REVM | LCDCFG_PCP); ctrl = LCD_READ(sc, LCDCTRL); ctrl &= ~LCDCTRL_BST; ctrl |= LCDCTRL_BST_64 | LCDCTRL_OFUM; LCD_WRITE(sc, LCDCTRL, ctrl); LCD_WRITE(sc, LCDPCFG, PCFG_MAGIC); LCD_WRITE(sc, LCDRGBC, LCDRGBC_RGBFMT); /* Update registers */ LCD_WRITE(sc, LCDSTATE, 0); /* Setup frame descriptors */ jzlcd_setup_descriptor(sc, mode, 0); jzlcd_setup_descriptor(sc, mode, 1); bus_dmamap_sync(sc->fdesc_tag, sc->fdesc_map, BUS_DMASYNC_PREWRITE); /* Setup DMA channels */ LCD_WRITE(sc, LCDDA0, sc->fdesc_paddr + sizeof(struct lcd_frame_descriptor)); LCD_WRITE(sc, LCDDA1, sc->fdesc_paddr); /* Set display clock */ error = clk_set_freq(sc->clk_pix, DOT_CLOCK_TO_HZ(mode->dot_clock), 0); if (error != 0) { device_printf(sc->dev, "failed to set pixel clock to %u Hz\n", DOT_CLOCK_TO_HZ(mode->dot_clock)); return (error); } return (0); } static int jzlcd_configure(struct jzlcd_softc *sc, const struct videomode *mode) { size_t fbsize; int error; fbsize = round_page(mode->hdisplay * mode->vdisplay * (FB_BPP / NBBY)); /* Detach the old FB device */ if (sc->fbdev != NULL) { device_delete_child(sc->dev, sc->fbdev); sc->fbdev = NULL; } /* If the FB size has changed, free the old FB memory */ if (sc->fbsize > 0 && sc->fbsize != fbsize) { jzlcd_freefb(sc); sc->vaddr = 0; } /* Allocate the FB if necessary */ sc->fbsize = fbsize; if (sc->vaddr == 0) { error = jzlcd_allocfb(sc); if (error != 0) { device_printf(sc->dev, "failed to allocate FB memory\n"); return (ENXIO); } } /* Setup video mode */ error = jzlcd_set_videomode(sc, mode); if (error != 0) return (error); /* Attach framebuffer device */ sc->info.fb_name = device_get_nameunit(sc->dev); sc->info.fb_vbase = (intptr_t)sc->vaddr; sc->info.fb_pbase = sc->paddr; sc->info.fb_size = sc->fbsize; sc->info.fb_bpp = sc->info.fb_depth = FB_BPP; sc->info.fb_stride = mode->hdisplay * (FB_BPP / NBBY); sc->info.fb_width = mode->hdisplay; sc->info.fb_height = mode->vdisplay; #ifdef VM_MEMATTR_WRITE_COMBINING sc->info.fb_flags = FB_FLAG_MEMATTR; sc->info.fb_memattr = VM_MEMATTR_WRITE_COMBINING; #endif sc->fbdev = device_add_child(sc->dev, "fbd", device_get_unit(sc->dev)); if (sc->fbdev == NULL) { device_printf(sc->dev, "failed to add fbd child\n"); return (ENOENT); } error = device_probe_and_attach(sc->fbdev); if (error != 0) { device_printf(sc->dev, "failed to attach fbd device\n"); return (error); } return (0); } static int jzlcd_get_bandwidth(const struct videomode *mode) { int refresh; refresh = FB_DIVIDE(FB_DIVIDE(DOT_CLOCK_TO_HZ(mode->dot_clock), mode->htotal), mode->vtotal); return mode->hdisplay * mode->vdisplay * refresh; } static int jzlcd_mode_supported(const struct videomode *mode) { /* Width and height must be less than 2048 */ if (mode->hdisplay > FB_MAX_W || mode->vdisplay > FB_MAX_H) return (0); /* Bandwidth check */ if (jzlcd_get_bandwidth(mode) > FB_MAX_BW) return (0); /* Interlace modes not yet supported by the driver */ if ((mode->flags & VID_INTERLACE) != 0) return (0); return (1); } static const struct videomode * jzlcd_find_mode(struct edid_info *ei) { const struct videomode *best; int n, bw, best_bw; /* If the preferred mode is OK, just use it */ if (jzlcd_mode_supported(ei->edid_preferred_mode) != 0) return ei->edid_preferred_mode; /* Pick the mode with the highest bandwidth requirements */ best = NULL; best_bw = 0; for (n = 0; n < ei->edid_nmodes; n++) { if (jzlcd_mode_supported(&ei->edid_modes[n]) == 0) continue; bw = jzlcd_get_bandwidth(&ei->edid_modes[n]); if (bw > FB_MAX_BW) continue; if (best == NULL || bw > best_bw) { best = &ei->edid_modes[n]; best_bw = bw; } } return best; } static void jzlcd_hdmi_event(void *arg, device_t hdmi_dev) { const struct videomode *mode; struct videomode hdmi_mode; struct jzlcd_softc *sc; struct edid_info ei; uint8_t *edid; uint32_t edid_len; int error; sc = arg; edid = NULL; edid_len = 0; mode = NULL; error = HDMI_GET_EDID(hdmi_dev, &edid, &edid_len); if (error != 0) { device_printf(sc->dev, "failed to get EDID: %d\n", error); } else { error = edid_parse(edid, &ei); if (error != 0) { device_printf(sc->dev, "failed to parse EDID: %d\n", error); } else { if (bootverbose) edid_print(&ei); mode = jzlcd_find_mode(&ei); } } /* If a suitable mode could not be found, try the default */ if (mode == NULL) mode = pick_mode_by_ref(FB_DEFAULT_W, FB_DEFAULT_H, FB_DEFAULT_REF); if (mode == NULL) { device_printf(sc->dev, "failed to find usable video mode\n"); return; } if (bootverbose) device_printf(sc->dev, "using %dx%d\n", mode->hdisplay, mode->vdisplay); /* Stop the controller */ jzlcd_stop(sc); /* Configure LCD controller */ error = jzlcd_configure(sc, mode); if (error != 0) { device_printf(sc->dev, "failed to configure FB: %d\n", error); return; } /* Enable HDMI TX */ hdmi_mode = *mode; HDMI_SET_VIDEOMODE(hdmi_dev, &hdmi_mode); /* Start the controller! */ jzlcd_start(sc); } static void jzlcd_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error != 0) return; *(bus_addr_t *)arg = segs[0].ds_addr; } static int jzlcd_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "ingenic,jz4780-lcd")) return (ENXIO); device_set_desc(dev, "Ingenic JZ4780 LCD Controller"); return (BUS_PROBE_DEFAULT); } static int jzlcd_attach(device_t dev) { struct jzlcd_softc *sc; int error; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, jzlcd_spec, sc->res)) { device_printf(dev, "cannot allocate resources for device\n"); goto failed; } if (clk_get_by_ofw_name(dev, 0, "lcd_clk", &sc->clk) != 0 || clk_get_by_ofw_name(dev, 0, "lcd_pixclk", &sc->clk_pix) != 0) { device_printf(dev, "cannot get clocks\n"); goto failed; } if (clk_enable(sc->clk) != 0 || clk_enable(sc->clk_pix) != 0) { device_printf(dev, "cannot enable clocks\n"); goto failed; } error = bus_dma_tag_create( bus_get_dma_tag(dev), sizeof(struct lcd_frame_descriptor), 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct lcd_frame_descriptor) * 2, 1, sizeof(struct lcd_frame_descriptor) * 2, 0, NULL, NULL, &sc->fdesc_tag); if (error != 0) { device_printf(dev, "cannot create bus dma tag\n"); goto failed; } error = bus_dmamem_alloc(sc->fdesc_tag, (void **)&sc->fdesc, BUS_DMA_NOCACHE | BUS_DMA_WAITOK | BUS_DMA_ZERO, &sc->fdesc_map); if (error != 0) { device_printf(dev, "cannot allocate dma descriptor\n"); goto dmaalloc_failed; } error = bus_dmamap_load(sc->fdesc_tag, sc->fdesc_map, sc->fdesc, sizeof(struct lcd_frame_descriptor) * 2, jzlcd_dmamap_cb, &sc->fdesc_paddr, 0); if (error != 0) { device_printf(dev, "cannot load dma map\n"); goto dmaload_failed; } sc->hdmi_evh = EVENTHANDLER_REGISTER(hdmi_event, jzlcd_hdmi_event, sc, 0); return (0); dmaload_failed: bus_dmamem_free(sc->fdesc_tag, sc->fdesc, sc->fdesc_map); dmaalloc_failed: bus_dma_tag_destroy(sc->fdesc_tag); failed: if (sc->clk_pix != NULL) clk_release(sc->clk); if (sc->clk != NULL) clk_release(sc->clk); if (sc->res != NULL) bus_release_resources(dev, jzlcd_spec, sc->res); return (ENXIO); } static struct fb_info * jzlcd_fb_getinfo(device_t dev) { struct jzlcd_softc *sc; sc = device_get_softc(dev); return (&sc->info); } static device_method_t jzlcd_methods[] = { /* Device interface */ DEVMETHOD(device_probe, jzlcd_probe), DEVMETHOD(device_attach, jzlcd_attach), /* FB interface */ DEVMETHOD(fb_getinfo, jzlcd_fb_getinfo), DEVMETHOD_END }; static driver_t jzlcd_driver = { "fb", jzlcd_methods, sizeof(struct jzlcd_softc), }; static devclass_t jzlcd_devclass; DRIVER_MODULE(fb, simplebus, jzlcd_driver, jzlcd_devclass, 0, 0); Index: head/sys/mips/mips/busdma_machdep.c =================================================================== --- head/sys/mips/mips/busdma_machdep.c (revision 338106) +++ head/sys/mips/mips/busdma_machdep.c (revision 338107) @@ -1,1525 +1,1524 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred */ #include __FBSDID("$FreeBSD$"); /* * MIPS bus dma support routines */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 64 #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 /* * On XBurst cores from Ingenic, cache-line writeback is local * only, unless accompanied by invalidation. Invalidations force * dirty line writeout and invalidation requests forwarded to * other cores if other cores have the cache line dirty. */ #if defined(SMP) && defined(CPU_XBURST) #define BUS_DMA_FORCE_WBINV #endif struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ vm_offset_t vaddr_nocache; /* kva of bounce buffer uncached */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ bus_addr_t dataaddr; /* client physical address */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); #define DMAMAP_UNCACHEABLE 0x08 #define DMAMAP_CACHE_ALIGNED 0x10 struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; int flags; void *origbuffer; void *allocbuffer; TAILQ_ENTRY(bus_dmamap) freelist; STAILQ_ENTRY(bus_dmamap) links; bus_dmamap_callback_t *callback; void *callback_arg; int sync_count; struct sync_list *slist; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); /* Default tag, as most drivers provide no parent tag. */ bus_dma_tag_t mips_root_dma_tag; static uma_zone_t dmamap_zone; /* Cache of struct bus_dmamap items */ static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages"); /* * This is the ctor function passed to uma_zcreate() for the pool of dma maps. * It'll need platform-specific changes if this code is copied. */ static int dmamap_ctor(void *mem, int size, void *arg, int flags) { bus_dmamap_t map; bus_dma_tag_t dmat; map = (bus_dmamap_t)mem; dmat = (bus_dma_tag_t)arg; dmat->map_count++; map->dmat = dmat; map->flags = 0; map->slist = NULL; map->allocbuffer = NULL; map->sync_count = 0; STAILQ_INIT(&map->bpages); return (0); } /* * This is the dtor function passed to uma_zcreate() for the pool of dma maps. * It may need platform-specific changes if this code is copied . */ static void dmamap_dtor(void *mem, int size, void *arg) { bus_dmamap_t map; map = (bus_dmamap_t)mem; map->dmat->map_count--; } static void busdma_init(void *dummy) { /* Create a cache of maps for bus_dmamap_create(). */ dmamap_zone = uma_zcreate("dma maps", sizeof(struct bus_dmamap), dmamap_ctor, dmamap_dtor, NULL, NULL, UMA_ALIGN_PTR, 0); /* Create a cache of buffers in standard (cacheable) memory. */ standard_allocator = busdma_bufalloc_create("buffer", mips_dcache_max_linesize, /* minimum_alignment */ NULL, /* uma_alloc func */ NULL, /* uma_free func */ 0); /* uma_zcreate_flags */ /* * Create a cache of buffers in uncacheable memory, to implement the * BUS_DMA_COHERENT flag. */ coherent_allocator = busdma_bufalloc_create("coherent", mips_dcache_max_linesize, /* minimum_alignment */ busdma_bufalloc_alloc_uncacheable, busdma_bufalloc_free_uncacheable, 0); /* uma_zcreate_flags */ } SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_FOURTH, busdma_init, NULL); /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || ((paddr & (dmat->alignment - 1)) != 0)) && (dmat->filter == NULL || (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * Check to see if the specified page is in an allowed DMA range. */ static __inline int _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr > phys_avail[i])) return (1); } return (0); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { #ifdef INVARIANTS panic("driver error: busdma dflt_lock called"); #else printf("DRIVER_ERROR: busdma dflt_lock called\n"); #endif } static __inline bus_dmamap_t _busdma_alloc_dmamap(bus_dma_tag_t dmat) { struct sync_list *slist; bus_dmamap_t map; slist = malloc(sizeof(*slist) * dmat->nsegments, M_BUSDMA, M_NOWAIT); if (slist == NULL) return (NULL); map = uma_zalloc_arg(dmamap_zone, dmat, M_NOWAIT); if (map != NULL) map->slist = slist; else free(slist, M_BUSDMA); return (map); } static __inline void _busdma_free_dmamap(bus_dmamap_t map) { free(map->slist, M_BUSDMA); uma_zfree(dmamap_zone, map); } /* * Allocate a device specific dma_tag. */ #define SEG_NB 1024 int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Return a NULL tag on failure */ *dmat = NULL; if (!parent) parent = mips_root_dma_tag; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; if (cpuinfo.cache_coherent_dma) newtag->flags |= BUS_DMA_COHERENT; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } newtag->segments = NULL; /* * Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if ((newtag->filter != NULL) || ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) || newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_BUSDMA); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) free(newtag, M_BUSDMA); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) { return (0); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { #ifdef KTR bus_dma_tag_t dmat_copy = dmat; #endif if (dmat != NULL) { if (dmat->map_count != 0) return (EBUSY); while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_BUSDMA); free(dmat, M_BUSDMA); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } CTR2(KTR_BUSDMA, "%s tag %p", __func__, dmat_copy); return (0); } #include /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { bus_dmamap_t newmap; int error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } newmap = _busdma_alloc_dmamap(dmat); if (newmap == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } *mapp = newmap; /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { _busdma_free_dmamap(newmap); *mapp = NULL; return (error); } } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ maxpages = MAX_BPAGES; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } else { error = 0; } } bz->map_count++; } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; _busdma_free_dmamap(map); CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddrp, int flags, bus_dmamap_t *mapp) { bus_dmamap_t newmap = NULL; busdma_bufalloc_t ba; struct busdma_bufzone *bufzone; vm_memattr_t memattr; void *vaddr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } } newmap = _busdma_alloc_dmamap(dmat); if (newmap == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } /* * If all the memory is coherent with DMA then we don't need to * do anything special for a coherent mapping request. */ if (dmat->flags & BUS_DMA_COHERENT) flags &= ~BUS_DMA_COHERENT; if (flags & BUS_DMA_COHERENT) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; newmap->flags |= DMAMAP_UNCACHEABLE; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; } /* All buffers we allocate are cache-aligned. */ newmap->flags |= DMAMAP_CACHE_ALIGNED; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; /* * Try to find a bufzone in the allocator that holds a cache of buffers * of the right size for this request. If the buffer is too big to be * held in the allocator cache, this returns NULL. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); /* * Allocate the buffer from the uma(9) allocator if... * - It's small enough to be in the allocator (bufzone not NULL). * - The alignment constraint isn't larger than the allocation size * (the allocator aligns buffers to their size boundaries). * - There's no need to handle lowaddr/highaddr exclusion zones. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) { vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && dmat->alignment <= PAGE_SIZE && (dmat->boundary % PAGE_SIZE) == 0) { vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0, dmat->lowaddr, memattr); } else { - vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->maxsize, - mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary, - memattr); + vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0, + dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } if (vaddr == NULL) { _busdma_free_dmamap(newmap); newmap = NULL; } else { newmap->sync_count = 0; } *vaddrp = vaddr; *mapp = newmap; return (vaddr == NULL ? ENOMEM : 0); } /* * Free a piece of memory and it's allocated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; if (map->flags & DMAMAP_UNCACHEABLE) ba = coherent_allocator; else ba = standard_allocator; free(map->slist, M_BUSDMA); uma_zfree(dmamap_zone, map); bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { bus_size_t sg_len; KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); paddr = pmap_kextract(vaddr); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && run_filter(dmat, paddr) != 0) { sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * the previous segment if possible. */ seg = *segp; if (seg >= 0 && curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) { segs[seg].ds_len += sgsize; } else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_size_t sgsize; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. * first indicates if this is the first invocation of this function. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; struct sync_list *sl; vm_offset_t vaddr = (vm_offset_t)buf; int error = 0; if (segs == NULL) segs = dmat->segments; if ((flags & BUS_DMA_LOAD_MBUF) != 0) map->flags |= DMAMAP_CACHE_ALIGNED; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, " "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); while (buflen > 0) { /* * Get the physical address for this segment. * * XXX Don't support checking for coherent mappings * XXX in user address space. */ KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); curaddr = pmap_kextract(vaddr); /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { curaddr = add_bounce_page(dmat, map, vaddr, curaddr, sgsize); } else { sl = &map->slist[map->sync_count - 1]; if (map->sync_count == 0 || vaddr != sl->vaddr + sl->datacount) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = vaddr; sl->datacount = sgsize; sl->busaddr = curaddr; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); error = EFBIG; /* XXX better return value here? */ } return (error); } void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { KASSERT(dmat != NULL, ("dmatag is NULL")); KASSERT(map != NULL, ("dmamap is NULL")); map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } map->sync_count = 0; return; } static void bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int aligned) { char tmp_cl[mips_dcache_max_linesize], tmp_clend[mips_dcache_max_linesize]; vm_offset_t buf_cl, buf_clend; vm_size_t size_cl, size_clend; int cache_linesize_mask = mips_dcache_max_linesize - 1; /* * dcache invalidation operates on cache line aligned addresses * and could modify areas of memory that share the same cache line * at the beginning and the ending of the buffer. In order to * prevent a data loss we save these chunks in temporary buffer * before invalidation and restore them afer it. * * If the aligned flag is set the buffer is either an mbuf or came from * our allocator caches. In both cases they are always sized and * aligned to cacheline boundaries, so we can skip preserving nearby * data if a transfer appears to overlap cachelines. An mbuf in * particular will usually appear to be overlapped because of offsetting * within the buffer to align the L3 headers, but we know that the bytes * preceeding that offset are part of the same mbuf memory and are not * unrelated adjacent data (and a rule of mbuf handling is that the cpu * is not allowed to touch the mbuf while dma is in progress, including * header fields). */ if (aligned) { size_cl = 0; size_clend = 0; } else { buf_cl = buf & ~cache_linesize_mask; size_cl = buf & cache_linesize_mask; buf_clend = buf + len; size_clend = (mips_dcache_max_linesize - (buf_clend & cache_linesize_mask)) & cache_linesize_mask; } switch (op) { case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: case BUS_DMASYNC_POSTREAD: /* * Save buffers that might be modified by invalidation */ if (size_cl) memcpy (tmp_cl, (void*)buf_cl, size_cl); if (size_clend) memcpy (tmp_clend, (void*)buf_clend, size_clend); mips_dcache_inv_range(buf, len); /* * Restore them */ if (size_cl) memcpy ((void*)buf_cl, tmp_cl, size_cl); if (size_clend) memcpy ((void*)buf_clend, tmp_clend, size_clend); /* * Copies above have brought corresponding memory * cache lines back into dirty state. Write them back * out and invalidate affected cache lines again if * necessary. */ if (size_cl) mips_dcache_wbinv_range(buf_cl, size_cl); if (size_clend && (size_cl == 0 || buf_clend - buf_cl > mips_dcache_max_linesize)) mips_dcache_wbinv_range(buf_clend, size_clend); break; case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE: mips_dcache_wbinv_range(buf, len); break; case BUS_DMASYNC_PREREAD: /* * Save buffers that might be modified by invalidation */ if (size_cl) memcpy (tmp_cl, (void *)buf_cl, size_cl); if (size_clend) memcpy (tmp_clend, (void *)buf_clend, size_clend); mips_dcache_inv_range(buf, len); /* * Restore them */ if (size_cl) memcpy ((void *)buf_cl, tmp_cl, size_cl); if (size_clend) memcpy ((void *)buf_clend, tmp_clend, size_clend); /* * Copies above have brought corresponding memory * cache lines back into dirty state. Write them back * out and invalidate affected cache lines again if * necessary. */ if (size_cl) mips_dcache_wbinv_range(buf_cl, size_cl); if (size_clend && (size_cl == 0 || buf_clend - buf_cl > mips_dcache_max_linesize)) mips_dcache_wbinv_range(buf_clend, size_clend); break; case BUS_DMASYNC_PREWRITE: #ifdef BUS_DMA_FORCE_WBINV mips_dcache_wbinv_range(buf, len); #else mips_dcache_wb_range(buf, len); #endif break; } } static void _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; STAILQ_FOREACH(bpage, &map->bpages, links) { if (op & BUS_DMASYNC_PREWRITE) { if (bpage->datavaddr != 0) bcopy((void *)bpage->datavaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); else physcopyout(bpage->dataaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); if (bpage->vaddr_nocache == 0) { #ifdef BUS_DMA_FORCE_WBINV mips_dcache_wbinv_range(bpage->vaddr, bpage->datacount); #else mips_dcache_wb_range(bpage->vaddr, bpage->datacount); #endif } dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { if (bpage->vaddr_nocache == 0) { mips_dcache_inv_range(bpage->vaddr, bpage->datacount); } if (bpage->datavaddr != 0) bcopy((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), (void *)bpage->datavaddr, bpage->datacount); else physcopyin((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->dataaddr, bpage->datacount); dmat->bounce_zone->total_bounced++; } } } void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct sync_list *sl, *end; int aligned; if (op == BUS_DMASYNC_POSTWRITE) return; if (STAILQ_FIRST(&map->bpages)) _bus_dmamap_sync_bp(dmat, map, op); if ((dmat->flags & BUS_DMA_COHERENT) || (map->flags & DMAMAP_UNCACHEABLE)) { if (op & BUS_DMASYNC_PREWRITE) mips_sync(); return; } aligned = (map->flags & DMAMAP_CACHE_ALIGNED) ? 1 : 0; CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags); if (map->sync_count) { end = &map->slist[map->sync_count]; for (sl = &map->slist[0]; sl != end; sl++) bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, aligned); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_BUSDMA); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); bpage->vaddr_nocache = (vm_offset_t)pmap_mapdev(bpage->busaddr, PAGE_SIZE); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->dataaddr = addr; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: head/sys/powerpc/powerpc/busdma_machdep.c =================================================================== --- head/sys/powerpc/powerpc/busdma_machdep.c (revision 338106) +++ head/sys/powerpc/powerpc/busdma_machdep.c (revision 338107) @@ -1,1229 +1,1229 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * From amd64/busdma_machdep.c, r204214 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iommu_if.h" #define MAX_BPAGES MIN(8192, physmem/40) struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; struct bounce_zone *bounce_zone; device_t iommu; void *iommu_cookie; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dma_segment_t *segments; int nsegs; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; int contigalloc; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (dmat->filter == NULL && dmat->iommu == NULL && paddr > dmat->lowaddr && paddr <= dmat->highaddr) retval = 1; if (dmat->filter == NULL && (paddr & (dmat->alignment - 1)) != 0) retval = 1; if (dmat->filter != NULL && (*dmat->filter)(dmat->filterarg, paddr) != 0) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Basic sanity checking */ if (boundary != 0 && boundary < maxsegsz) maxsegsz = boundary; if (maxsegsz == 0) { return (EINVAL); } /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); newtag->iommu = parent->iommu; newtag->iommu_cookie = parent->iommu_cookie; } if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) && newtag->iommu == NULL) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } if (error != 0) { free(newtag, M_DEVBUF); } else { *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain) { return (0); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { int error; error = 0; *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, M_NOWAIT | M_ZERO); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) return (error); } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr)); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } else { error = 0; } } bz->map_count++; } (*mapp)->nsegs = 0; (*mapp)->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, M_NOWAIT); if ((*mapp)->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } if (error == 0) dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (dmat->flags & BUS_DMA_COULD_BOUNCE) { if (STAILQ_FIRST(&map->bpages) != NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; } free(map->segments, M_DEVBUF); free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { vm_memattr_t attr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; bus_dmamap_create(dmat, flags, mapp); if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; #ifdef NOTYET if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else #endif attr = VM_MEMATTR_DEFAULT; /* * XXX: * (dmat->alignment <= dmat->maxsize) is just a quick hack; the exact * alignment guarantees of malloc need to be nailed down, and the * code below should be rewritten to take that into account. * * In the meantime, we'll warn the user if malloc gets it wrong. */ if ((dmat->maxsize <= PAGE_SIZE) && (dmat->alignment <= dmat->maxsize) && dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags); } else { /* * XXX Use Contigmalloc until it is merged into this facility * and handles multi-seg allocations. Nobody is doing * multi-seg allocations yet though. * XXX Certain AGP hardware does. */ - *vaddr = (void *)kmem_alloc_contig(kmem_arena, dmat->maxsize, - mflags, 0ul, dmat->lowaddr, dmat->alignment ? - dmat->alignment : 1ul, dmat->boundary, attr); + *vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0ul, + dmat->lowaddr, dmat->alignment ? dmat->alignment : 1ul, + dmat->boundary, attr); (*mapp)->contigalloc = 1; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { if (!map->contigalloc) free(vaddr, M_DEVBUF); else kmem_free(kmem_arena, (vm_offset_t)vaddr, dmat->maxsize); bus_dmamap_destroy(dmat, map); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { bus_size_t sg_len; sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (run_filter(dmat, paddr) != 0) { sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_size_t sgsize; int error; if (segs == NULL) segs = map->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; vm_offset_t kvaddr, vaddr; int error; if (segs == NULL) segs = map->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } vaddr = (vm_offset_t)buf; while (buflen > 0) { bus_size_t max_sgsize; /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->maxsegsz); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if (dmat->flags & BUS_DMA_COULD_BOUNCE) { map->dmat = dmat; map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { map->nsegs = nsegs; if (segs != NULL) memcpy(map->segments, segs, map->nsegs*sizeof(segs[0])); if (dmat->iommu != NULL) IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs, dmat->lowaddr, dmat->highaddr, dmat->alignment, dmat->boundary, dmat->iommu_cookie); if (segs != NULL) memcpy(segs, map->segments, map->nsegs*sizeof(segs[0])); else segs = map->segments; return (segs); } /* * Release the mapping held by map. */ void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; if (dmat->iommu) { IOMMU_UNMAP(dmat->iommu, map->segments, map->nsegs, dmat->iommu_cookie); map->nsegs = 0; } while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } } void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; vm_offset_t datavaddr, tempvaddr; if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { /* * Handle data bouncing. We might also * want to add support for invalidating * the caches on broken hardware */ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->flags, op); if (op & BUS_DMASYNC_PREWRITE) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } powerpc_sync(); } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } int bus_dma_tag_set_iommu(bus_dma_tag_t tag, device_t iommu, void *cookie) { tag->iommu = iommu; tag->iommu_cookie = cookie; return (0); } Index: head/sys/vm/vm_extern.h =================================================================== --- head/sys/vm/vm_extern.h (revision 338106) +++ head/sys/vm/vm_extern.h (revision 338107) @@ -1,131 +1,131 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94 * $FreeBSD$ */ #ifndef _VM_EXTERN_H_ #define _VM_EXTERN_H_ struct pmap; struct proc; struct vmspace; struct vnode; struct vmem; #ifdef _KERNEL struct cdev; struct cdevsw; /* These operate on kernel virtual addresses only. */ vm_offset_t kva_alloc(vm_size_t); void kva_free(vm_offset_t, vm_size_t); /* These operate on pageable virtual addresses. */ vm_offset_t kmap_alloc_wait(vm_map_t, vm_size_t); void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t); /* These operate on virtual addresses backed by memory. */ vm_offset_t kmem_alloc_attr(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); -vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags, +vm_offset_t kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags); vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags); void kmem_free(struct vmem *, vm_offset_t, vm_size_t); /* This provides memory for previously allocated address space. */ int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int); int kmem_back_domain(int, vm_object_t, vm_offset_t, vm_size_t, int); void kmem_unback(vm_object_t, vm_offset_t, vm_size_t); /* Bootstrapping. */ void kmem_bootstrap_free(vm_offset_t, vm_size_t); vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t, boolean_t); void kmem_init(vm_offset_t, vm_offset_t); void kmem_init_zero_region(void); void kmeminit(void); int kernacc(void *, int, int); int useracc(void *, int, int); int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t, vm_ooffset_t *); int vm_fault_disable_pagefaults(void); void vm_fault_enable_pagefaults(int save); int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); void vm_waitproc(struct proc *); int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t); int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, vm_object_t, vm_ooffset_t, boolean_t, struct thread *); int vm_mmap_to_errno(int rv); int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *); int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); void vm_set_page_size(void); void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t); typedef int (*pmap_pinit_t)(struct pmap *pmap); struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t, pmap_pinit_t); struct vmspace *vmspace_fork(struct vmspace *, vm_ooffset_t *); int vmspace_exec(struct proc *, vm_offset_t, vm_offset_t); int vmspace_unshare(struct proc *); void vmspace_exit(struct thread *); struct vmspace *vmspace_acquire_ref(struct proc *); void vmspace_free(struct vmspace *); void vmspace_exitfree(struct proc *); void vmspace_switch_aio(struct vmspace *); void vnode_pager_setsize(struct vnode *, vm_ooffset_t); int vslock(void *, size_t); void vsunlock(void *, size_t); struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); u_int vm_active_count(void); u_int vm_inactive_count(void); u_int vm_laundry_count(void); u_int vm_wait_count(void); #endif /* _KERNEL */ #endif /* !_VM_EXTERN_H_ */ Index: head/sys/vm/vm_kern.c =================================================================== --- head/sys/vm/vm_kern.c (revision 338106) +++ head/sys/vm/vm_kern.c (revision 338107) @@ -1,743 +1,739 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Kernel memory management. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include /* for ticks and hz */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include vm_map_t kernel_map; vm_map_t exec_map; vm_map_t pipe_map; const void *zero_region; CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); /* NB: Used by kernel debuggers. */ const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS; u_int exec_map_entry_size; u_int exec_map_entries; SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, #if defined(__arm__) || defined(__sparc64__) &vm_max_kernel_address, 0, #else SYSCTL_NULL_ULONG_PTR, VM_MAX_KERNEL_ADDRESS, #endif "Max kernel address"); /* * kva_alloc: * * Allocate a virtual address range with no underlying object and * no initial mapping to physical memory. Any mapping from this * range to physical memory must be explicitly created prior to * its use, typically with pmap_qenter(). Any attempt to create * a mapping on demand through vm_fault() will result in a panic. */ vm_offset_t kva_alloc(vm_size_t size) { vm_offset_t addr; size = round_page(size); if (vmem_alloc(kernel_arena, size, M_BESTFIT | M_NOWAIT, &addr)) return (0); return (addr); } /* * kva_free: * * Release a region of kernel virtual memory allocated * with kva_alloc, and return the physical pages * associated with that region. * * This routine may not block on kernel maps. */ void kva_free(vm_offset_t addr, vm_size_t size) { size = round_page(size); vmem_free(kernel_arena, addr, size); } /* * Allocates a region from the kernel address map and physical pages * within the specified address range to the kernel object. Creates a * wired mapping from this region to these pages, and returns the * region's starting virtual address. The allocated pages are not * necessarily physically contiguous. If M_ZERO is specified through the * given flags, then the pages are zeroed before they are mapped. */ vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, i, offset; vm_page_t m; int pflags, tries; size = round_page(size); vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); pflags |= VM_ALLOC_NOWAIT; VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += PAGE_SIZE) { tries = 0; retry: m = vm_page_alloc_contig_domain(object, atop(offset + i), domain, pflags, 1, low, high, PAGE_SIZE, 0, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { if (!vm_page_reclaim_contig_domain(domain, pflags, 1, low, high, PAGE_SIZE, 0) && (flags & M_WAITOK) != 0) vm_wait_domain(domain); VM_OBJECT_WLOCK(object); tries++; goto retry; } kmem_unback(object, addr, i); vmem_free(vmem, addr, size); return (0); } KASSERT(vm_phys_domain(m) == domain, ("kmem_alloc_attr_domain: Domain mismatch %d != %d", vm_phys_domain(m), domain)); if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; pmap_enter(kernel_pmap, addr + i, m, VM_PROT_RW, VM_PROT_RW | PMAP_ENTER_WIRED, 0); } VM_OBJECT_WUNLOCK(object); return (addr); } vm_offset_t kmem_alloc_attr(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { struct vm_domainset_iter di; vm_offset_t addr; int domain; vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); do { addr = kmem_alloc_attr_domain(domain, size, flags, low, high, memattr); if (addr != 0) break; } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); return (addr); } /* * Allocates a region from the kernel address map and physically * contiguous pages within the specified address range to the kernel * object. Creates a wired mapping from this region to these pages, and * returns the region's starting virtual address. If M_ZERO is specified * through the given flags, then the pages are zeroed before they are * mapped. */ vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, offset, tmp; vm_page_t end_m, m; u_long npages; int pflags, tries; size = round_page(size); vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); pflags |= VM_ALLOC_NOWAIT; npages = atop(size); VM_OBJECT_WLOCK(object); tries = 0; retry: m = vm_page_alloc_contig_domain(object, atop(offset), domain, pflags, npages, low, high, alignment, boundary, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { if (!vm_page_reclaim_contig_domain(domain, pflags, npages, low, high, alignment, boundary) && (flags & M_WAITOK) != 0) vm_wait_domain(domain); VM_OBJECT_WLOCK(object); tries++; goto retry; } vmem_free(vmem, addr, size); return (0); } KASSERT(vm_phys_domain(m) == domain, ("kmem_alloc_contig_domain: Domain mismatch %d != %d", vm_phys_domain(m), domain)); end_m = m + npages; tmp = addr; for (; m < end_m; m++) { if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; pmap_enter(kernel_pmap, tmp, m, VM_PROT_RW, VM_PROT_RW | PMAP_ENTER_WIRED, 0); tmp += PAGE_SIZE; } VM_OBJECT_WUNLOCK(object); return (addr); } vm_offset_t -kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, - vm_paddr_t high, u_long alignment, vm_paddr_t boundary, - vm_memattr_t memattr) +kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { struct vm_domainset_iter di; vm_offset_t addr; int domain; - - KASSERT(vmem == kernel_arena, - ("kmem_alloc_contig: Only kernel_arena is supported.")); vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); do { addr = kmem_alloc_contig_domain(domain, size, flags, low, high, alignment, boundary, memattr); if (addr != 0) break; } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); return (addr); } /* * kmem_suballoc: * * Allocates a map to manage a subrange * of the kernel virtual address space. * * Arguments are as follows: * * parent Map to take range from * min, max Returned endpoints of map * size Size of range to find * superpage_align Request that min is superpage aligned */ vm_map_t kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, vm_size_t size, boolean_t superpage_align) { int ret; vm_map_t result; size = round_page(size); *min = vm_map_min(parent); ret = vm_map_find(parent, NULL, 0, min, size, 0, superpage_align ? VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_ACC_NO_CHARGE); if (ret != KERN_SUCCESS) panic("kmem_suballoc: bad status return of %d", ret); *max = *min + size; result = vm_map_create(vm_map_pmap(parent), *min, *max); if (result == NULL) panic("kmem_suballoc: cannot create submap"); if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) panic("kmem_suballoc: unable to change range to submap"); return (result); } /* * kmem_malloc: * * Allocate wired-down pages in the kernel's address space. */ vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags) { vmem_t *arena; vm_offset_t addr; int rv; #if VM_NRESERVLEVEL > 0 if (__predict_true((flags & M_EXEC) == 0)) arena = vm_dom[domain].vmd_kernel_arena; else arena = vm_dom[domain].vmd_kernel_rwx_arena; #else arena = vm_dom[domain].vmd_kernel_arena; #endif size = round_page(size); if (vmem_alloc(arena, size, flags | M_BESTFIT, &addr)) return (0); rv = kmem_back_domain(domain, kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { vmem_free(arena, addr, size); return (0); } return (addr); } vm_offset_t kmem_malloc(struct vmem *vmem __unused, vm_size_t size, int flags) { struct vm_domainset_iter di; vm_offset_t addr; int domain; vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); do { addr = kmem_malloc_domain(domain, size, flags); if (addr != 0) break; } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); return (addr); } /* * kmem_back: * * Allocate physical pages for the specified virtual address range. */ int kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) { vm_offset_t offset, i; vm_page_t m, mpred; vm_prot_t prot; int pflags; KASSERT(object == kernel_object, ("kmem_back_domain: only supports kernel object.")); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); if (flags & M_WAITOK) pflags |= VM_ALLOC_WAITFAIL; prot = (flags & M_EXEC) != 0 ? VM_PROT_ALL : VM_PROT_RW; i = 0; VM_OBJECT_WLOCK(object); retry: mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i)); for (; i < size; i += PAGE_SIZE, mpred = m) { m = vm_page_alloc_domain_after(object, atop(offset + i), domain, pflags, mpred); /* * Ran out of space, free everything up and return. Don't need * to lock page queues here as we know that the pages we got * aren't on any queues. */ if (m == NULL) { if ((flags & M_NOWAIT) == 0) goto retry; VM_OBJECT_WUNLOCK(object); kmem_unback(object, addr, i); return (KERN_NO_SPACE); } KASSERT(vm_phys_domain(m) == domain, ("kmem_back_domain: Domain mismatch %d != %d", vm_phys_domain(m), domain)); if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("kmem_malloc: page %p is managed", m)); m->valid = VM_PAGE_BITS_ALL; pmap_enter(kernel_pmap, addr + i, m, prot, prot | PMAP_ENTER_WIRED, 0); } VM_OBJECT_WUNLOCK(object); return (KERN_SUCCESS); } int kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) { struct vm_domainset_iter di; int domain; int ret; KASSERT(object == kernel_object, ("kmem_back: only supports kernel object.")); vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); do { ret = kmem_back_domain(domain, object, addr, size, flags); if (ret == KERN_SUCCESS) break; } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); return (ret); } /* * kmem_unback: * * Unmap and free the physical pages underlying the specified virtual * address range. * * A physical page must exist within the specified object at each index * that is being unmapped. */ static int _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { vm_page_t m, next; vm_offset_t end, offset; int domain; KASSERT(object == kernel_object, ("kmem_unback: only supports kernel object.")); if (size == 0) return (0); pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); m = vm_page_lookup(object, atop(offset)); domain = vm_phys_domain(m); for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); return (domain); } void kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { _kmem_unback(object, addr, size); } /* * kmem_free: * * Free memory allocated with kmem_malloc. The size must match the * original allocation. */ void kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { struct vmem *arena; int domain; #if VM_NRESERVLEVEL > 0 KASSERT(vmem == kernel_arena || vmem == kernel_rwx_arena, ("kmem_free: Only kernel_arena or kernel_rwx_arena are supported.")); #else KASSERT(vmem == kernel_arena, ("kmem_free: Only kernel_arena is supported.")); #endif size = round_page(size); domain = _kmem_unback(kernel_object, addr, size); #if VM_NRESERVLEVEL > 0 if (__predict_true(vmem == kernel_arena)) arena = vm_dom[domain].vmd_kernel_arena; else arena = vm_dom[domain].vmd_kernel_rwx_arena; #else arena = vm_dom[domain].vmd_kernel_arena; #endif vmem_free(arena, addr, size); } /* * kmap_alloc_wait: * * Allocates pageable memory from a sub-map of the kernel. If the submap * has no room, the caller sleeps waiting for more memory in the submap. * * This routine may block. */ vm_offset_t kmap_alloc_wait(vm_map_t map, vm_size_t size) { vm_offset_t addr; size = round_page(size); if (!swap_reserve(size)) return (0); for (;;) { /* * To make this work for more than one map, use the map's lock * to lock out sleepers/wakers. */ vm_map_lock(map); if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) break; /* no space now; see if we can ever get space */ if (vm_map_max(map) - vm_map_min(map) < size) { vm_map_unlock(map); swap_release(size); return (0); } map->needs_wakeup = TRUE; vm_map_unlock_and_wait(map, 0); } vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, MAP_ACC_CHARGED); vm_map_unlock(map); return (addr); } /* * kmap_free_wakeup: * * Returns memory to a submap of the kernel, and wakes up any processes * waiting for memory in that map. */ void kmap_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size) { vm_map_lock(map); (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); if (map->needs_wakeup) { map->needs_wakeup = FALSE; vm_map_wakeup(map); } vm_map_unlock(map); } void kmem_init_zero_region(void) { vm_offset_t addr, i; vm_page_t m; /* * Map a single physical page of zeros to a larger virtual range. * This requires less looping in places that want large amounts of * zeros, while not using much more physical resources. */ addr = kva_alloc(ZERO_REGION_SIZE); m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) pmap_qenter(addr + i, &m, 1); pmap_protect(kernel_pmap, addr, addr + ZERO_REGION_SIZE, VM_PROT_READ); zero_region = (const void *)addr; } /* * kmem_init: * * Create the kernel map; insert a mapping covering kernel text, * data, bss, and all space allocated thus far (`boostrap' data). The * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and * `start' as allocated, and the range between `start' and `end' as free. */ void kmem_init(vm_offset_t start, vm_offset_t end) { vm_map_t m; m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); m->system_map = 1; vm_map_lock(m); /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, #ifdef __amd64__ KERNBASE, #else VM_MIN_KERNEL_ADDRESS, #endif start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); /* ... and ending with the completion of the above `insert' */ vm_map_unlock(m); } /* * kmem_bootstrap_free: * * Free pages backing preloaded data (e.g., kernel modules) to the * system. Currently only supported on platforms that create a * vm_phys segment for preloaded data. */ void kmem_bootstrap_free(vm_offset_t start, vm_size_t size) { #if defined(__i386__) || defined(__amd64__) struct vm_domain *vmd; vm_offset_t end, va; vm_paddr_t pa; vm_page_t m; end = trunc_page(start + size); start = round_page(start); for (va = start; va < end; va += PAGE_SIZE) { pa = pmap_kextract(va); m = PHYS_TO_VM_PAGE(pa); vmd = vm_pagequeue_domain(m); vm_domain_free_lock(vmd); vm_phys_free_pages(m, 0); vmd->vmd_page_count++; vm_domain_free_unlock(vmd); vm_domain_freecnt_inc(vmd, 1); vm_cnt.v_page_count++; } pmap_remove(kernel_pmap, start, end); (void)vmem_add(kernel_arena, start, end - start, M_WAITOK); #endif } #ifdef DIAGNOSTIC /* * Allow userspace to directly trigger the VM drain routine for testing * purposes. */ static int debug_vm_lowmem(SYSCTL_HANDLER_ARGS) { int error, i; i = 0; error = sysctl_handle_int(oidp, &i, 0, req); if (error) return (error); if ((i & ~(VM_LOW_KMEM | VM_LOW_PAGES)) != 0) return (EINVAL); if (i != 0) EVENTHANDLER_INVOKE(vm_lowmem, i); return (0); } SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0, debug_vm_lowmem, "I", "set to trigger vm_lowmem event with given flags"); #endif Index: head/sys/x86/iommu/intel_intrmap.c =================================================================== --- head/sys/x86/iommu/intel_intrmap.c (revision 338106) +++ head/sys/x86/iommu/intel_intrmap.c (revision 338107) @@ -1,380 +1,380 @@ /*- * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct dmar_unit *dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar); static void dmar_ir_program_irte(struct dmar_unit *unit, u_int idx, uint64_t low, uint16_t rid); static int dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie); int iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) { struct dmar_unit *unit; vmem_addr_t vmem_res; u_int idx, i; int error; unit = dmar_ir_find(src, NULL, NULL); if (unit == NULL || !unit->ir_enabled) { for (i = 0; i < count; i++) cookies[i] = -1; return (EOPNOTSUPP); } error = vmem_alloc(unit->irtids, count, M_FIRSTFIT | M_NOWAIT, &vmem_res); if (error != 0) { KASSERT(error != EOPNOTSUPP, ("impossible EOPNOTSUPP from vmem")); return (error); } idx = vmem_res; for (i = 0; i < count; i++) cookies[i] = idx + i; return (0); } int iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, uint64_t *addr, uint32_t *data) { struct dmar_unit *unit; uint64_t low; uint16_t rid; int is_dmar; unit = dmar_ir_find(src, &rid, &is_dmar); if (is_dmar) { KASSERT(unit == NULL, ("DMAR cannot translate itself")); /* * See VT-d specification, 5.1.6 Remapping Hardware - * Interrupt Programming. */ *data = vector; *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12); if (x2apic_mode) *addr |= ((uint64_t)cpu & 0xffffff00) << 32; else KASSERT(cpu <= 0xff, ("cpu id too big %d", cpu)); return (0); } if (unit == NULL || !unit->ir_enabled || cookie == -1) return (EOPNOTSUPP); low = (DMAR_X2APIC(unit) ? DMAR_IRTE1_DST_x2APIC(cpu) : DMAR_IRTE1_DST_xAPIC(cpu)) | DMAR_IRTE1_V(vector) | DMAR_IRTE1_DLM_FM | DMAR_IRTE1_TM_EDGE | DMAR_IRTE1_RH_DIRECT | DMAR_IRTE1_DM_PHYSICAL | DMAR_IRTE1_P; dmar_ir_program_irte(unit, cookie, low, rid); if (addr != NULL) { /* * See VT-d specification, 5.1.5.2 MSI and MSI-X * Register Programming. */ *addr = MSI_INTEL_ADDR_BASE | ((cookie & 0x7fff) << 5) | ((cookie & 0x8000) << 2) | 0x18; *data = 0; } return (0); } int iommu_unmap_msi_intr(device_t src, u_int cookie) { struct dmar_unit *unit; if (cookie == -1) return (0); unit = dmar_ir_find(src, NULL, NULL); return (dmar_ir_free_irte(unit, cookie)); } int iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) { struct dmar_unit *unit; vmem_addr_t vmem_res; uint64_t low, iorte; u_int idx; int error; uint16_t rid; unit = dmar_find_ioapic(ioapic_id, &rid); if (unit == NULL || !unit->ir_enabled) { *cookie = -1; return (EOPNOTSUPP); } error = vmem_alloc(unit->irtids, 1, M_FIRSTFIT | M_NOWAIT, &vmem_res); if (error != 0) { KASSERT(error != EOPNOTSUPP, ("impossible EOPNOTSUPP from vmem")); return (error); } idx = vmem_res; low = 0; switch (irq) { case IRQ_EXTINT: low |= DMAR_IRTE1_DLM_ExtINT; break; case IRQ_NMI: low |= DMAR_IRTE1_DLM_NMI; break; case IRQ_SMI: low |= DMAR_IRTE1_DLM_SMI; break; default: KASSERT(vector != 0, ("No vector for IRQ %u", irq)); low |= DMAR_IRTE1_DLM_FM | DMAR_IRTE1_V(vector); break; } low |= (DMAR_X2APIC(unit) ? DMAR_IRTE1_DST_x2APIC(cpu) : DMAR_IRTE1_DST_xAPIC(cpu)) | (edge ? DMAR_IRTE1_TM_EDGE : DMAR_IRTE1_TM_LEVEL) | DMAR_IRTE1_RH_DIRECT | DMAR_IRTE1_DM_PHYSICAL | DMAR_IRTE1_P; dmar_ir_program_irte(unit, idx, low, rid); if (hi != NULL) { /* * See VT-d specification, 5.1.5.1 I/OxAPIC * Programming. */ iorte = (1ULL << 48) | ((uint64_t)(idx & 0x7fff) << 49) | ((idx & 0x8000) != 0 ? (1 << 11) : 0) | (edge ? IOART_TRGREDG : IOART_TRGRLVL) | (activehi ? IOART_INTAHI : IOART_INTALO) | IOART_DELFIXED | vector; *hi = iorte >> 32; *lo = iorte; } *cookie = idx; return (0); } int iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) { struct dmar_unit *unit; u_int idx; idx = *cookie; if (idx == -1) return (0); *cookie = -1; unit = dmar_find_ioapic(ioapic_id, NULL); KASSERT(unit != NULL && unit->ir_enabled, ("unmap: cookie %d unit %p", idx, unit)); return (dmar_ir_free_irte(unit, idx)); } static struct dmar_unit * dmar_ir_find(device_t src, uint16_t *rid, int *is_dmar) { devclass_t src_class; struct dmar_unit *unit; /* * We need to determine if the interrupt source generates FSB * interrupts. If yes, it is either DMAR, in which case * interrupts are not remapped. Or it is HPET, and interrupts * are remapped. For HPET, source id is reported by HPET * record in DMAR ACPI table. */ if (is_dmar != NULL) *is_dmar = FALSE; src_class = device_get_devclass(src); if (src_class == devclass_find("dmar")) { unit = NULL; if (is_dmar != NULL) *is_dmar = TRUE; } else if (src_class == devclass_find("hpet")) { unit = dmar_find_hpet(src, rid); } else { unit = dmar_find(src); if (unit != NULL && rid != NULL) dmar_get_requester(src, rid); } return (unit); } static void dmar_ir_program_irte(struct dmar_unit *unit, u_int idx, uint64_t low, uint16_t rid) { dmar_irte_t *irte; uint64_t high; KASSERT(idx < unit->irte_cnt, ("bad cookie %d %d", idx, unit->irte_cnt)); irte = &(unit->irt[idx]); high = DMAR_IRTE2_SVT_RID | DMAR_IRTE2_SQ_RID | DMAR_IRTE2_SID_RID(rid); device_printf(unit->dev, "programming irte[%d] rid %#x high %#jx low %#jx\n", idx, rid, (uintmax_t)high, (uintmax_t)low); DMAR_LOCK(unit); if ((irte->irte1 & DMAR_IRTE1_P) != 0) { /* * The rte is already valid. Assume that the request * is to remap the interrupt for balancing. Only low * word of rte needs to be changed. Assert that the * high word contains expected value. */ KASSERT(irte->irte2 == high, ("irte2 mismatch, %jx %jx", (uintmax_t)irte->irte2, (uintmax_t)high)); dmar_pte_update(&irte->irte1, low); } else { dmar_pte_store(&irte->irte2, high); dmar_pte_store(&irte->irte1, low); } dmar_qi_invalidate_iec(unit, idx, 1); DMAR_UNLOCK(unit); } static int dmar_ir_free_irte(struct dmar_unit *unit, u_int cookie) { dmar_irte_t *irte; KASSERT(unit != NULL && unit->ir_enabled, ("unmap: cookie %d unit %p", cookie, unit)); KASSERT(cookie < unit->irte_cnt, ("bad cookie %u %u", cookie, unit->irte_cnt)); irte = &(unit->irt[cookie]); dmar_pte_clear(&irte->irte1); dmar_pte_clear(&irte->irte2); DMAR_LOCK(unit); dmar_qi_invalidate_iec(unit, cookie, 1); DMAR_UNLOCK(unit); vmem_free(unit->irtids, cookie, 1); return (0); } static u_int clp2(u_int v) { return (powerof2(v) ? v : 1 << fls(v)); } int dmar_init_irt(struct dmar_unit *unit) { if ((unit->hw_ecap & DMAR_ECAP_IR) == 0) return (0); unit->ir_enabled = 1; TUNABLE_INT_FETCH("hw.dmar.ir", &unit->ir_enabled); if (!unit->ir_enabled) return (0); if (!unit->qi_enabled) { unit->ir_enabled = 0; if (bootverbose) device_printf(unit->dev, "QI disabled, disabling interrupt remapping\n"); return (0); } unit->irte_cnt = clp2(NUM_IO_INTS); - unit->irt = (dmar_irte_t *)(uintptr_t)kmem_alloc_contig(kernel_arena, + unit->irt = (dmar_irte_t *)(uintptr_t)kmem_alloc_contig( unit->irte_cnt * sizeof(dmar_irte_t), M_ZERO | M_WAITOK, 0, dmar_high, PAGE_SIZE, 0, DMAR_IS_COHERENT(unit) ? VM_MEMATTR_DEFAULT : VM_MEMATTR_UNCACHEABLE); if (unit->irt == NULL) return (ENOMEM); unit->irt_phys = pmap_kextract((vm_offset_t)unit->irt); unit->irtids = vmem_create("dmarirt", 0, unit->irte_cnt, 1, 0, M_FIRSTFIT | M_NOWAIT); DMAR_LOCK(unit); dmar_load_irt_ptr(unit); dmar_qi_invalidate_iec_glob(unit); DMAR_UNLOCK(unit); /* * Initialize mappings for already configured interrupt pins. * Required, because otherwise the interrupts fault without * irtes. */ intr_reprogram(); DMAR_LOCK(unit); dmar_enable_ir(unit); DMAR_UNLOCK(unit); return (0); } void dmar_fini_irt(struct dmar_unit *unit) { unit->ir_enabled = 0; if (unit->irt != NULL) { dmar_disable_ir(unit); dmar_qi_invalidate_iec_glob(unit); vmem_destroy(unit->irtids); kmem_free(kernel_arena, (vm_offset_t)unit->irt, unit->irte_cnt * sizeof(dmar_irte_t)); } } Index: head/sys/x86/iommu/intel_qi.c =================================================================== --- head/sys/x86/iommu/intel_qi.c (revision 338106) +++ head/sys/x86/iommu/intel_qi.c (revision 338107) @@ -1,474 +1,474 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static bool dmar_qi_seq_processed(const struct dmar_unit *unit, const struct dmar_qi_genseq *pseq) { return (pseq->gen < unit->inv_waitd_gen || (pseq->gen == unit->inv_waitd_gen && pseq->seq <= unit->inv_waitd_seq_hw)); } static int dmar_enable_qi(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd |= DMAR_GCMD_QIE; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) != 0)); return (error); } static int dmar_disable_qi(struct dmar_unit *unit) { int error; DMAR_ASSERT_LOCKED(unit); unit->hw_gcmd &= ~DMAR_GCMD_QIE; dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); DMAR_WAIT_UNTIL(((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) == 0)); return (error); } static void dmar_qi_advance_tail(struct dmar_unit *unit) { DMAR_ASSERT_LOCKED(unit); dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail); } static void dmar_qi_ensure(struct dmar_unit *unit, int descr_count) { uint32_t head; int bytes; DMAR_ASSERT_LOCKED(unit); bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT; for (;;) { if (bytes <= unit->inv_queue_avail) break; /* refill */ head = dmar_read4(unit, DMAR_IQH_REG); head &= DMAR_IQH_MASK; unit->inv_queue_avail = head - unit->inv_queue_tail - DMAR_IQ_DESCR_SZ; if (head <= unit->inv_queue_tail) unit->inv_queue_avail += unit->inv_queue_size; if (bytes <= unit->inv_queue_avail) break; /* * No space in the queue, do busy wait. Hardware must * make a progress. But first advance the tail to * inform the descriptor streamer about entries we * might have already filled, otherwise they could * clog the whole queue.. */ dmar_qi_advance_tail(unit); unit->inv_queue_full++; cpu_spinwait(); } unit->inv_queue_avail -= bytes; } static void dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2) { DMAR_ASSERT_LOCKED(unit); *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1; unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, (uintmax_t)unit->inv_queue_size)); unit->inv_queue_tail &= unit->inv_queue_size - 1; *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2; unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2; KASSERT(unit->inv_queue_tail <= unit->inv_queue_size, ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail, (uintmax_t)unit->inv_queue_size)); unit->inv_queue_tail &= unit->inv_queue_size - 1; } static void dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr, bool memw, bool fence) { DMAR_ASSERT_LOCKED(unit); dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID | (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) | (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) | (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0), memw ? unit->inv_waitd_seq_hw_phys : 0); } static void dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct dmar_qi_genseq *pseq, bool emit_wait) { struct dmar_qi_genseq gsec; uint32_t seq; KASSERT(pseq != NULL, ("wait descriptor with no place for seq")); DMAR_ASSERT_LOCKED(unit); if (unit->inv_waitd_seq == 0xffffffff) { gsec.gen = unit->inv_waitd_gen; gsec.seq = unit->inv_waitd_seq; dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false); dmar_qi_advance_tail(unit); while (!dmar_qi_seq_processed(unit, &gsec)) cpu_spinwait(); unit->inv_waitd_gen++; unit->inv_waitd_seq = 1; } seq = unit->inv_waitd_seq++; pseq->gen = unit->inv_waitd_gen; pseq->seq = seq; if (emit_wait) { dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_descr(unit, seq, true, true, false); } } static void dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct dmar_qi_genseq *gseq, bool nowait) { DMAR_ASSERT_LOCKED(unit); unit->inv_seq_waiters++; while (!dmar_qi_seq_processed(unit, gseq)) { if (cold || nowait) { cpu_spinwait(); } else { msleep(&unit->inv_seq_waiters, &unit->lock, 0, "dmarse", hz); } } unit->inv_seq_waiters--; } void dmar_qi_invalidate_locked(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, struct dmar_qi_genseq *pseq, bool emit_wait) { struct dmar_unit *unit; dmar_gaddr_t isize; int am; unit = domain->dmar; DMAR_ASSERT_LOCKED(unit); for (; size > 0; base += isize, size -= isize) { am = calc_am(unit, base, size, &isize); dmar_qi_ensure(unit, 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR | DMAR_IQ_DESCR_IOTLB_DID(domain->domain), base | am); } dmar_qi_emit_wait_seq(unit, pseq, emit_wait); dmar_qi_advance_tail(unit); } void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); dmar_qi_ensure(unit, 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); } void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); dmar_qi_ensure(unit, 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB | DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); } void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; DMAR_ASSERT_LOCKED(unit); dmar_qi_ensure(unit, 2); dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV, 0); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); } void dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt) { struct dmar_qi_genseq gseq; u_int c, l; DMAR_ASSERT_LOCKED(unit); KASSERT(start < unit->irte_cnt && start < start + cnt && start + cnt <= unit->irte_cnt, ("inv iec overflow %d %d %d", unit->irte_cnt, start, cnt)); for (; cnt > 0; cnt -= c, start += c) { l = ffs(start | cnt) - 1; c = 1 << l; dmar_qi_ensure(unit, 1); dmar_qi_emit(unit, DMAR_IQ_DESCR_IEC_INV | DMAR_IQ_DESCR_IEC_IDX | DMAR_IQ_DESCR_IEC_IIDX(start) | DMAR_IQ_DESCR_IEC_IM(l), 0); } dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); /* * The caller of the function, in particular, * dmar_ir_program_irte(), may be called from the context * where the sleeping is forbidden (in fact, the * intr_table_lock mutex may be held, locked from * intr_shuffle_irqs()). Wait for the invalidation completion * using the busy wait. * * The impact on the interrupt input setup code is small, the * expected overhead is comparable with the chipset register * read. It is more harmful for the parallel DMA operations, * since we own the dmar unit lock until whole invalidation * queue is processed, which includes requests possibly issued * before our request. */ dmar_qi_wait_for_seq(unit, &gseq, true); } int dmar_qi_intr(void *arg) { struct dmar_unit *unit; unit = arg; KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->unit)); taskqueue_enqueue(unit->qi_taskqueue, &unit->qi_task); return (FILTER_HANDLED); } static void dmar_qi_task(void *arg, int pending __unused) { struct dmar_unit *unit; struct dmar_map_entry *entry; uint32_t ics; unit = arg; DMAR_LOCK(unit); for (;;) { entry = TAILQ_FIRST(&unit->tlb_flush_entries); if (entry == NULL) break; if (!dmar_qi_seq_processed(unit, &entry->gseq)) break; TAILQ_REMOVE(&unit->tlb_flush_entries, entry, dmamap_link); DMAR_UNLOCK(unit); dmar_domain_free_entry(entry, (entry->flags & DMAR_MAP_ENTRY_QI_NF) == 0); DMAR_LOCK(unit); } ics = dmar_read4(unit, DMAR_ICS_REG); if ((ics & DMAR_ICS_IWC) != 0) { ics = DMAR_ICS_IWC; dmar_write4(unit, DMAR_ICS_REG, ics); } if (unit->inv_seq_waiters > 0) wakeup(&unit->inv_seq_waiters); DMAR_UNLOCK(unit); } int dmar_init_qi(struct dmar_unit *unit) { uint64_t iqa; uint32_t ics; int qi_sz; if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0) return (0); unit->qi_enabled = 1; TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled); if (!unit->qi_enabled) return (0); TAILQ_INIT(&unit->tlb_flush_entries); TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit); unit->qi_taskqueue = taskqueue_create_fast("dmarqf", M_WAITOK, taskqueue_thread_enqueue, &unit->qi_taskqueue); taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV, "dmar%d qi taskq", unit->unit); unit->inv_waitd_gen = 0; unit->inv_waitd_seq = 1; qi_sz = DMAR_IQA_QS_DEF; TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz); if (qi_sz > DMAR_IQA_QS_MAX) qi_sz = DMAR_IQA_QS_MAX; unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE; /* Reserve one descriptor to prevent wraparound. */ unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ; /* The invalidation queue reads by DMARs are always coherent. */ - unit->inv_queue = kmem_alloc_contig(kernel_arena, unit->inv_queue_size, - M_WAITOK | M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); + unit->inv_queue = kmem_alloc_contig(unit->inv_queue_size, M_WAITOK | + M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); unit->inv_waitd_seq_hw_phys = pmap_kextract( (vm_offset_t)&unit->inv_waitd_seq_hw); DMAR_LOCK(unit); dmar_write8(unit, DMAR_IQT_REG, 0); iqa = pmap_kextract(unit->inv_queue); iqa |= qi_sz; dmar_write8(unit, DMAR_IQA_REG, iqa); dmar_enable_qi(unit); ics = dmar_read4(unit, DMAR_ICS_REG); if ((ics & DMAR_ICS_IWC) != 0) { ics = DMAR_ICS_IWC; dmar_write4(unit, DMAR_ICS_REG, ics); } dmar_enable_qi_intr(unit); DMAR_UNLOCK(unit); return (0); } void dmar_fini_qi(struct dmar_unit *unit) { struct dmar_qi_genseq gseq; if (unit->qi_enabled) return; taskqueue_drain(unit->qi_taskqueue, &unit->qi_task); taskqueue_free(unit->qi_taskqueue); unit->qi_taskqueue = NULL; DMAR_LOCK(unit); /* quisce */ dmar_qi_ensure(unit, 1); dmar_qi_emit_wait_seq(unit, &gseq, true); dmar_qi_advance_tail(unit); dmar_qi_wait_for_seq(unit, &gseq, false); /* only after the quisce, disable queue */ dmar_disable_qi_intr(unit); dmar_disable_qi(unit); KASSERT(unit->inv_seq_waiters == 0, ("dmar%d: waiters on disabled queue", unit->unit)); DMAR_UNLOCK(unit); kmem_free(kernel_arena, unit->inv_queue, unit->inv_queue_size); unit->inv_queue = 0; unit->inv_queue_size = 0; unit->qi_enabled = 0; } void dmar_enable_qi_intr(struct dmar_unit *unit) { uint32_t iectl; DMAR_ASSERT_LOCKED(unit); KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit)); iectl = dmar_read4(unit, DMAR_IECTL_REG); iectl &= ~DMAR_IECTL_IM; dmar_write4(unit, DMAR_IECTL_REG, iectl); } void dmar_disable_qi_intr(struct dmar_unit *unit) { uint32_t iectl; DMAR_ASSERT_LOCKED(unit); KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit)); iectl = dmar_read4(unit, DMAR_IECTL_REG); dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM); }