Changeset View
Changeset View
Standalone View
Standalone View
sys/vm/vm_page.h
Show First 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | |||||
* the field without holding that lock. If the underlying | * the field without holding that lock. If the underlying | ||||
* architecture does not support atomic read-modify-write | * architecture does not support atomic read-modify-write | ||||
* operations on the field's type, then the machine-independent | * operations on the field's type, then the machine-independent | ||||
* layer uses a 32-bit atomic on the aligned 32-bit word that | * layer uses a 32-bit atomic on the aligned 32-bit word that | ||||
* contains the dirty field. In the machine-independent layer, | * contains the dirty field. In the machine-independent layer, | ||||
* the implementation of read-modify-write operations on the | * the implementation of read-modify-write operations on the | ||||
* field is encapsulated in vm_page_clear_dirty_mask(). | * field is encapsulated in vm_page_clear_dirty_mask(). | ||||
* | * | ||||
* The page structure contains two counters which prevent page reuse. | * The ref_count field tracks references to the page. References that | ||||
* Both counters are protected by the page lock (P). The hold | * prevent the page from being reclaimable are called wirings and are | ||||
* counter counts transient references obtained via a pmap lookup, and | * counted in the low bits of ref_count. Upper bits are reserved for | ||||
* is also used to prevent page reclamation in situations where it is | * special references that do not prevent reclamation of the page. | ||||
* undesirable to block other accesses to the page. The wire counter | * Specifically, the containing object, if any, holds such a reference, | ||||
* is used to implement mlock(2) and is non-zero for pages containing | * and the page daemon takes a transient reference when it is scanning | ||||
* kernel memory. Pages that are wired or held will not be reclaimed | * a page. Updates to ref_count are atomic unless the page is | ||||
* or laundered by the page daemon, but are treated differently during | * unallocated. To wire a page after it has been allocated, the object | ||||
* a page queue scan: held pages remain at their position in the queue, | * lock must be held, or the page must be busy, or the wiring thread | ||||
* while wired pages are removed from the queue and must later be | * must atomically take a reference and verify that the VPRC_BLOCKED | ||||
* re-enqueued appropriately by the unwiring thread. It is legal to | * bit is not set. No locks are required to unwire a page, but care | ||||
* call vm_page_free() on a held page; doing so causes it to be removed | * must be taken to free the page if that wiring represented the last | ||||
* from its object and page queue, and the page is released to the | * reference to the page. | ||||
* allocator once the last hold reference is dropped. In contrast, | |||||
* wired pages may not be freed. | |||||
* | * | ||||
* In some pmap implementations, the wire count of a page table page is | |||||
* used to track the number of populated entries. | |||||
* | |||||
* The busy lock is an embedded reader-writer lock which protects the | * The busy lock is an embedded reader-writer lock which protects the | ||||
* page's contents and identity (i.e., its <object, pindex> tuple) and | * page's contents and identity (i.e., its <object, pindex> tuple) and | ||||
* interlocks with the object lock (O). In particular, a page may be | * interlocks with the object lock (O). In particular, a page may be | ||||
* busied or unbusied only with the object write lock held. To avoid | * busied or unbusied only with the object write lock held. To avoid | ||||
* bloating the page structure, the busy lock lacks some of the | * bloating the page structure, the busy lock lacks some of the | ||||
* features available to the kernel's general-purpose synchronization | * features available to the kernel's general-purpose synchronization | ||||
* primitives. As a result, busy lock ordering rules are not verified, | * primitives. As a result, busy lock ordering rules are not verified, | ||||
* lock recursion is not detected, and an attempt to xbusy a busy page | * lock recursion is not detected, and an attempt to xbusy a busy page | ||||
* or sbusy an xbusy page results will trigger a panic rather than | * or sbusy an xbusy page results will trigger a panic rather than | ||||
* causing the thread to block. vm_page_sleep_if_busy() can be used to | * causing the thread to block. vm_page_sleep_if_busy() can be used to | ||||
* sleep until the page's busy state changes, after which the caller | * sleep until the page's busy state changes, after which the caller | ||||
* must re-lookup the page and re-evaluate its state. | * must re-lookup the page and re-evaluate its state. | ||||
* | * | ||||
* The queue field is the index of the page queue containing the | * The queue field is the index of the page queue containing the | ||||
* page, or PQ_NONE if the page is not enqueued. The queue lock of a | * page, or PQ_NONE if the page is not enqueued. The queue lock of a | ||||
* page is the page queue lock corresponding to the page queue index, | * page is the page queue lock corresponding to the page queue index, | ||||
* or the page lock (P) for the page if it is not enqueued. To modify | * or the page lock (P) for the page if it is not enqueued. To modify | ||||
* the queue field, the queue lock for the old value of the field must | * the queue field, the queue lock for the old value of the field must | ||||
* be held. It is invalid for a page's queue field to transition | * be held. It is invalid for a page's queue field to transition | ||||
* between two distinct page queue indices. That is, when updating | * between two distinct page queue indices. That is, when updating | ||||
* the queue field, either the new value or the old value must be | * the queue field, either the new value or the old value must be | ||||
* PQ_NONE. | * PQ_NONE. There is one exception to this rule: the page daemon may | ||||
* transition the queue field from PQ_INACTIVE to PQ_NONE immediately | |||||
* prior to freeing a page during an inactive queue scan. At that | |||||
* point the page will have already been physically dequeued, and it | |||||
* is known that no other references to that vm_page structure exist. | |||||
* | * | ||||
* To avoid contention on page queue locks, page queue operations | * To avoid contention on page queue locks, page queue operations | ||||
* (enqueue, dequeue, requeue) are batched using per-CPU queues. | * (enqueue, dequeue, requeue) are batched using per-CPU queues. | ||||
* A deferred operation is requested by inserting an entry into a | * A deferred operation is requested by inserting an entry into a | ||||
* batch queue; the entry is simply a pointer to the page, and the | * batch queue; the entry is simply a pointer to the page, and the | ||||
* request type is encoded in the page's aflags field using the values | * request type is encoded in the page's aflags field using the values | ||||
* in PGA_QUEUE_STATE_MASK. The type-stability of struct vm_pages is | * in PGA_QUEUE_STATE_MASK. The type-stability of struct vm_pages is | ||||
* crucial to this scheme since the processing of entries in a given | * crucial to this scheme since the processing of entries in a given | ||||
* batch queue may be deferred indefinitely. In particular, a page | * batch queue may be deferred indefinitely. In particular, a page | ||||
* may be freed before its pending batch queue entries have been | * may be freed before its pending batch queue entries have been | ||||
* processed. The page lock (P) must be held to schedule a batched | * processed. The page lock (P) must be held to schedule a batched | ||||
* queue operation, and the page queue lock must be held in order to | * queue operation, and the page queue lock must be held in order to | ||||
* process batch queue entries for the page queue. | * process batch queue entries for the page queue. When the page is | ||||
* being freed, the thread freeing the page is permitted to schedule | |||||
* a dequeue of the page without the page lock held. | |||||
*/ | */ | ||||
#if PAGE_SIZE == 4096 | #if PAGE_SIZE == 4096 | ||||
#define VM_PAGE_BITS_ALL 0xffu | #define VM_PAGE_BITS_ALL 0xffu | ||||
typedef uint8_t vm_page_bits_t; | typedef uint8_t vm_page_bits_t; | ||||
#elif PAGE_SIZE == 8192 | #elif PAGE_SIZE == 8192 | ||||
#define VM_PAGE_BITS_ALL 0xffffu | #define VM_PAGE_BITS_ALL 0xffffu | ||||
typedef uint16_t vm_page_bits_t; | typedef uint16_t vm_page_bits_t; | ||||
Show All 13 Lines | struct { | ||||
void *pv; | void *pv; | ||||
} s; | } s; | ||||
struct { | struct { | ||||
u_long p; | u_long p; | ||||
u_long v; | u_long v; | ||||
} memguard; | } memguard; | ||||
} plinks; | } plinks; | ||||
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ | TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ | ||||
vm_object_t object; /* which object am I in (O,P) */ | vm_object_t object; /* which object am I in (O) */ | ||||
vm_pindex_t pindex; /* offset into object (O,P) */ | vm_pindex_t pindex; /* offset into object (O,P) */ | ||||
vm_paddr_t phys_addr; /* physical address of page (C) */ | vm_paddr_t phys_addr; /* physical address of page (C) */ | ||||
struct md_page md; /* machine dependent stuff */ | struct md_page md; /* machine dependent stuff */ | ||||
u_int wire_count; /* wired down maps refs (P) */ | union { | ||||
u_int wire_count; | |||||
u_int ref_count; /* page references */ | |||||
}; | |||||
kib: Anon unions are not in C99, they appeared in C11. | |||||
Done Inline ActionsHmm, we use them in other core pieces of the kernel, struct mbuf for example, so I thought they were ok. I plan to remove this on in a follow-up diff in any case. markj: Hmm, we use them in other core pieces of the kernel, struct mbuf for example, so I thought they… | |||||
volatile u_int busy_lock; /* busy owners lock */ | volatile u_int busy_lock; /* busy owners lock */ | ||||
uint16_t flags; /* page PG_* flags (P) */ | uint16_t flags; /* page PG_* flags (P) */ | ||||
uint8_t order; /* index of the buddy queue (F) */ | uint8_t order; /* index of the buddy queue (F) */ | ||||
uint8_t pool; /* vm_phys freepool index (F) */ | uint8_t pool; /* vm_phys freepool index (F) */ | ||||
uint8_t aflags; /* access is atomic */ | uint8_t aflags; /* access is atomic */ | ||||
uint8_t oflags; /* page VPO_* flags (O) */ | uint8_t oflags; /* page VPO_* flags (O) */ | ||||
uint8_t queue; /* page queue index (Q) */ | uint8_t queue; /* page queue index (Q) */ | ||||
int8_t psind; /* pagesizes[] index (O) */ | int8_t psind; /* pagesizes[] index (O) */ | ||||
int8_t segind; /* vm_phys segment index (C) */ | int8_t segind; /* vm_phys segment index (C) */ | ||||
u_char act_count; /* page usage count (P) */ | u_char act_count; /* page usage count (P) */ | ||||
/* NOTE that these must support one bit per DEV_BSIZE in a page */ | /* NOTE that these must support one bit per DEV_BSIZE in a page */ | ||||
/* so, on normal X86 kernels, they must be at least 8 bits wide */ | /* so, on normal X86 kernels, they must be at least 8 bits wide */ | ||||
vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ | vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ | ||||
vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ | vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ | ||||
}; | }; | ||||
/* | /* | ||||
* Special bits used in the ref_count field. | |||||
* | |||||
* ref_count is normally used to count wirings that prevent the page from being | |||||
* reclaimed, but also supports several special types of references that do not | |||||
* prevent reclamation. Accesses to the ref_count field must be atomic unless | |||||
* the page is unallocated. | |||||
* | |||||
* VPRC_PDREF is a transient reference acquired by the page daemon when | |||||
* scanning. Pages may be dequeued without the page lock held when they are | |||||
* being freed, and this reference ensures that the page daemon is not | |||||
* simultaneously manipulating the queue state of the page. The page lock must | |||||
* be held to set or clear this bit. | |||||
* | |||||
* VPRC_OBJREF is the reference held by the containing object. It can set or | |||||
* cleared only when the corresponding object's write lock is held. | |||||
* | |||||
* VPRC_BLOCKED is used to atomically block wirings via pmap lookups while | |||||
* attempting to tear down all mappings of a given page. The page lock and | |||||
* object write lock must both be held in order to set or clear this bit. | |||||
*/ | |||||
#define VPRC_BLOCKED 0x20000000u /* mappings are being removed */ | |||||
#define VPRC_OBJREF 0x40000000u /* object reference, cleared with (O) */ | |||||
#define VPRC_PDREF 0x80000000u /* page daemon reference for scanning */ | |||||
#define _VPRC_REFMASK (VPRC_BLOCKED | VPRC_OBJREF | VPRC_PDREF) | |||||
#define VPRC_WIRE_COUNT(c) ((c) & ~_VPRC_REFMASK) | |||||
#define VPRC_WIRE_COUNT_MAX (~_VPRC_REFMASK) | |||||
/* | |||||
* Page flags stored in oflags: | * Page flags stored in oflags: | ||||
* | * | ||||
* Access to these page flags is synchronized by the lock on the object | * Access to these page flags is synchronized by the lock on the object | ||||
* containing the page (O). | * containing the page (O). | ||||
* | * | ||||
* Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG) | * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG) | ||||
* indicates that the page is not under PV management but | * indicates that the page is not under PV management but | ||||
* otherwise should be treated as a normal page. Pages not | * otherwise should be treated as a normal page. Pages not | ||||
▲ Show 20 Lines • Show All 321 Lines • ▼ Show 20 Lines | |||||
bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m); | bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m); | ||||
void vm_page_putfake(vm_page_t m); | void vm_page_putfake(vm_page_t m); | ||||
void vm_page_readahead_finish(vm_page_t m); | void vm_page_readahead_finish(vm_page_t m); | ||||
bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, | bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, | ||||
vm_paddr_t high, u_long alignment, vm_paddr_t boundary); | vm_paddr_t high, u_long alignment, vm_paddr_t boundary); | ||||
bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages, | bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages, | ||||
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); | vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); | ||||
void vm_page_reference(vm_page_t m); | void vm_page_reference(vm_page_t m); | ||||
void vm_page_remove (vm_page_t); | void vm_page_release(vm_page_t m, bool nocache); | ||||
void vm_page_release_locked(vm_page_t m, bool nocache); | |||||
bool vm_page_remove(vm_page_t); | |||||
int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); | int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t); | ||||
vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, | vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, | ||||
vm_pindex_t pindex); | vm_pindex_t pindex); | ||||
void vm_page_requeue(vm_page_t m); | void vm_page_requeue(vm_page_t m); | ||||
int vm_page_sbusied(vm_page_t m); | int vm_page_sbusied(vm_page_t m); | ||||
vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, | vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, | ||||
vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options); | vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options); | ||||
void vm_page_set_valid_range(vm_page_t m, int base, int size); | void vm_page_set_valid_range(vm_page_t m, int base, int size); | ||||
int vm_page_sleep_if_busy(vm_page_t m, const char *msg); | int vm_page_sleep_if_busy(vm_page_t m, const char *msg); | ||||
vm_offset_t vm_page_startup(vm_offset_t vaddr); | vm_offset_t vm_page_startup(vm_offset_t vaddr); | ||||
void vm_page_sunbusy(vm_page_t m); | void vm_page_sunbusy(vm_page_t m); | ||||
bool vm_page_try_to_free(vm_page_t m); | bool vm_page_try_remove_all(vm_page_t m); | ||||
bool vm_page_try_remove_write(vm_page_t m); | |||||
int vm_page_trysbusy(vm_page_t m); | int vm_page_trysbusy(vm_page_t m); | ||||
void vm_page_unhold_pages(vm_page_t *ma, int count); | void vm_page_unhold_pages(vm_page_t *ma, int count); | ||||
void vm_page_unswappable(vm_page_t m); | void vm_page_unswappable(vm_page_t m); | ||||
bool vm_page_unwire(vm_page_t m, uint8_t queue); | void vm_page_unwire(vm_page_t m, uint8_t queue); | ||||
bool vm_page_unwire_noq(vm_page_t m); | bool vm_page_unwire_noq(vm_page_t m); | ||||
void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); | void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); | ||||
void vm_page_wire (vm_page_t); | void vm_page_wire(vm_page_t); | ||||
bool vm_page_wire_mapped(vm_page_t m); | |||||
void vm_page_xunbusy_hard(vm_page_t m); | void vm_page_xunbusy_hard(vm_page_t m); | ||||
void vm_page_xunbusy_maybelocked(vm_page_t m); | void vm_page_xunbusy_maybelocked(vm_page_t m); | ||||
void vm_page_set_validclean (vm_page_t, int, int); | void vm_page_set_validclean (vm_page_t, int, int); | ||||
void vm_page_clear_dirty (vm_page_t, int, int); | void vm_page_clear_dirty (vm_page_t, int, int); | ||||
void vm_page_set_invalid (vm_page_t, int, int); | void vm_page_set_invalid (vm_page_t, int, int); | ||||
int vm_page_is_valid (vm_page_t, int, int); | int vm_page_is_valid (vm_page_t, int, int); | ||||
void vm_page_test_dirty (vm_page_t); | void vm_page_test_dirty (vm_page_t); | ||||
vm_page_bits_t vm_page_bits(int base, int size); | vm_page_bits_t vm_page_bits(int base, int size); | ||||
▲ Show 20 Lines • Show All 214 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
uint8_t queue; | uint8_t queue; | ||||
queue = vm_page_queue(m); | queue = vm_page_queue(m); | ||||
return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE); | return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE); | ||||
} | } | ||||
/* | /* | ||||
* vm_page_drop: | |||||
* | |||||
* Release a reference to a page and return the old reference count. | |||||
*/ | |||||
static inline u_int | |||||
vm_page_drop(vm_page_t m, u_int val) | |||||
{ | |||||
/* | |||||
* Synchronize with vm_page_free_prep(): ensure that all updates to the | |||||
* page structure are visible before it is freed. | |||||
*/ | |||||
atomic_thread_fence_rel(); | |||||
return (atomic_fetchadd_int(&m->ref_count, val)); | |||||
} | |||||
/* | |||||
* vm_page_wired: | * vm_page_wired: | ||||
* | * | ||||
* Return true if a reference prevents the page from being reclaimable. | * Return true if a reference prevents the page from being reclaimable. | ||||
*/ | */ | ||||
static inline bool | static inline bool | ||||
vm_page_wired(vm_page_t m) | vm_page_wired(vm_page_t m) | ||||
{ | { | ||||
return (m->wire_count > 0); | return (VPRC_WIRE_COUNT(m->ref_count) > 0); | ||||
} | } | ||||
#endif /* _KERNEL */ | #endif /* _KERNEL */ | ||||
#endif /* !_VM_PAGE_ */ | #endif /* !_VM_PAGE_ */ |
Anon unions are not in C99, they appeared in C11.