Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F153191718
D20839.id59345.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
D20839.id59345.diff
View Options
Index: sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- sys/dev/cxgbe/tom/t4_cpl_io.c
+++ sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -76,28 +76,6 @@
static void t4_aiotx_cancel(struct kaiocb *job);
static void t4_aiotx_queue_toep(struct socket *so, struct toepcb *toep);
-static size_t
-aiotx_mbuf_pgoff(struct mbuf *m)
-{
- struct aiotx_buffer *ab;
-
- MPASS(IS_AIOTX_MBUF(m));
- ab = m->m_ext.ext_arg1;
- return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE);
-}
-
-static vm_page_t *
-aiotx_mbuf_pages(struct mbuf *m)
-{
- struct aiotx_buffer *ab;
- int npages;
-
- MPASS(IS_AIOTX_MBUF(m));
- ab = m->m_ext.ext_arg1;
- npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE;
- return (ab->ps.pages + npages);
-}
-
void
send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
{
@@ -647,10 +625,7 @@
i = -1;
for (m = start; m != stop; m = m->m_next) {
- if (IS_AIOTX_MBUF(m))
- rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m),
- aiotx_mbuf_pgoff(m), m->m_len);
- else if (m->m_flags & M_NOMAP)
+ if (m->m_flags & M_NOMAP)
rc = sglist_append_mb_ext_pgs(&sg, m);
else
rc = sglist_append(&sg, mtod(m, void *), m->m_len);
@@ -713,7 +688,7 @@
struct sockbuf *sb = &so->so_snd;
int tx_credits, shove, compl, sowwakeup;
struct ofld_tx_sdesc *txsd;
- bool aiotx_mbuf_seen;
+ bool nomap_mbuf_seen;
INP_WLOCK_ASSERT(inp);
KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
@@ -766,14 +741,11 @@
plen = 0;
nsegs = 0;
max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
- aiotx_mbuf_seen = false;
+ nomap_mbuf_seen = false;
for (m = sndptr; m != NULL; m = m->m_next) {
int n;
- if (IS_AIOTX_MBUF(m))
- n = sglist_count_vmpages(aiotx_mbuf_pages(m),
- aiotx_mbuf_pgoff(m), m->m_len);
- else if (m->m_flags & M_NOMAP)
+ if (m->m_flags & M_NOMAP)
n = sglist_count_mb_ext_pgs(m);
else
n = sglist_count(mtod(m, void *), m->m_len);
@@ -802,8 +774,8 @@
break;
}
- if (IS_AIOTX_MBUF(m))
- aiotx_mbuf_seen = true;
+ if (m->m_flags & M_NOMAP)
+ nomap_mbuf_seen = true;
if (max_nsegs_1mbuf < n)
max_nsegs_1mbuf = n;
sb_sndptr = m; /* new sb->sb_sndptr if all goes well */
@@ -852,7 +824,7 @@
panic("%s: excess tx.", __func__);
shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
- if (plen <= max_imm && !aiotx_mbuf_seen) {
+ if (plen <= max_imm && !nomap_mbuf_seen) {
/* Immediate data tx */
@@ -1910,71 +1882,94 @@
}
/*
- * Use the 'backend3' field in AIO jobs to store the amount of data
- * sent by the AIO job so far and the 'backend4' field to hold an
- * error that should be reported when the job is completed.
+ * Use the 'backend1' field in AIO jobs to hold an error that should
+ * be reported when the job is completed, the 'backend3' field to
+ * store the amount of data sent by the AIO job so far, and the
+ * 'backend4' field to hold a reference count on the job.
+ *
+ * Each unmapped mbuf holds a reference on the job as does the queue
+ * so long as the job is queued.
*/
+#define aio_error backend1
#define aio_sent backend3
-#define aio_error backend4
+#define aio_refs backend4
#define jobtotid(job) \
(((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid)
-
+
static void
-free_aiotx_buffer(struct aiotx_buffer *ab)
+aiotx_free_job(struct kaiocb *job)
{
- struct kaiocb *job;
long status;
int error;
- if (refcount_release(&ab->refcount) == 0)
+ if (refcount_release(&job->aio_refs) == 0)
return;
- job = ab->job;
- error = job->aio_error;
+ error = (intptr_t)job->aio_error;
status = job->aio_sent;
- vm_page_unhold_pages(ab->ps.pages, ab->ps.npages);
- free(ab, M_CXGBE);
#ifdef VERBOSE_TRACES
CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__,
jobtotid(job), job, status, error);
#endif
- if (error == ECANCELED && status != 0)
+ if (error != 0 && status != 0)
error = 0;
if (error == ECANCELED)
aio_cancel(job);
else if (error)
aio_complete(job, -1, error);
- else
+ else {
+ job->msgsnd = 1;
aio_complete(job, status, 0);
+ }
}
static void
-t4_aiotx_mbuf_free(struct mbuf *m)
+aiotx_free_pgs(struct mbuf *m)
{
- struct aiotx_buffer *ab = m->m_ext.ext_arg1;
+ struct mbuf_ext_pgs *ext_pgs;
+ struct kaiocb *job;
+ struct mtx *mtx;
+ vm_page_t pg;
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ job = m->m_ext.ext_arg1;
#ifdef VERBOSE_TRACES
CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__,
- m->m_len, jobtotid(ab->job));
+ m->m_len, jobtotid(job));
#endif
- free_aiotx_buffer(ab);
+
+ mtx = NULL;
+ for (int i = 0; i < ext_pgs->npgs; i++) {
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ vm_page_change_lock(pg, &mtx);
+ vm_page_unhold(pg);
+ }
+ if (mtx != NULL)
+ mtx_unlock(mtx);
+
+ aiotx_free_job(job);
}
/*
- * Hold the buffer backing an AIO request and return an AIO transmit
- * buffer.
+ * Allocate a chain of unmapped mbufs describing the next 'len' bytes
+ * of an AIO job.
*/
-static int
-hold_aio(struct kaiocb *job)
+static struct mbuf *
+alloc_aiotx_mbuf(struct kaiocb *job, int len)
{
- struct aiotx_buffer *ab;
struct vmspace *vm;
+ vm_page_t pgs[MBUF_PEXT_MAX_PGS];
+ struct mbuf *m, *top, *last;
+ struct mbuf_ext_pgs *ext_pgs;
vm_map_t map;
- vm_offset_t start, end, pgoff;
- int n;
+ vm_offset_t start;
+ int i, mlen, npages, pgoff;
- MPASS(job->backend1 == NULL);
+ KASSERT(job->aio_sent + len <= job->uaiocb.aio_nbytes,
+ ("%s(%p, %d): request to send beyond end of buffer", __func__,
+ job, len));
/*
* The AIO subsystem will cancel and drain all requests before
@@ -1983,35 +1978,65 @@
*/
vm = job->userproc->p_vmspace;
map = &vm->vm_map;
- start = (uintptr_t)job->uaiocb.aio_buf;
+ start = (uintptr_t)job->uaiocb.aio_buf + job->aio_sent;
pgoff = start & PAGE_MASK;
- end = round_page(start + job->uaiocb.aio_nbytes);
- start = trunc_page(start);
- n = atop(end - start);
- ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK |
- M_ZERO);
- refcount_init(&ab->refcount, 1);
- ab->ps.pages = (vm_page_t *)(ab + 1);
- ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start,
- VM_PROT_WRITE, ab->ps.pages, n);
- if (ab->ps.npages < 0) {
- free(ab, M_CXGBE);
- return (EFAULT);
- }
+ top = NULL;
+ last = NULL;
+ while (len > 0) {
+ mlen = imin(len, MBUF_PEXT_MAX_PGS * PAGE_SIZE - pgoff);
+ KASSERT(mlen == len || (start + mlen & PAGE_MASK) == 0,
+ ("%s: next start (%#jx + %#x) is not page aligned",
+ __func__, (uintmax_t)start, mlen));
- KASSERT(ab->ps.npages == n,
- ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n));
+ npages = vm_fault_quick_hold_pages(map, start, mlen,
+ VM_PROT_WRITE, pgs, nitems(pgs));
+ if (npages < 0)
+ break;
+
+ m = mb_alloc_ext_pgs(M_WAITOK, false, aiotx_free_pgs);
+ if (m == NULL) {
+ vm_page_unhold_pages(pgs, npages);
+ break;
+ }
+
+ ext_pgs = m->m_ext.ext_pgs;
+ ext_pgs->first_pg_off = pgoff;
+ ext_pgs->npgs = npages;
+ if (npages == 1) {
+ KASSERT(mlen + pgoff <= PAGE_SIZE,
+ ("%s: single page is too large (off %d len %d)",
+ __func__, pgoff, mlen));
+ ext_pgs->last_pg_len = mlen;
+ } else {
+ ext_pgs->last_pg_len = mlen - (PAGE_SIZE - pgoff) -
+ (npages - 2) * PAGE_SIZE;
+ }
+ for (i = 0; i < npages; i++)
+ ext_pgs->pa[i] = VM_PAGE_TO_PHYS(pgs[i]);
+
+ m->m_len = mlen;
+ m->m_ext.ext_size = npages * PAGE_SIZE;
+ m->m_ext.ext_arg1 = job;
+ refcount_acquire(&job->aio_refs);
- ab->ps.offset = pgoff;
- ab->ps.len = job->uaiocb.aio_nbytes;
- ab->job = job;
- job->backend1 = ab;
#ifdef VERBOSE_TRACES
- CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d",
- __func__, jobtotid(job), &ab->ps, job, ab->ps.npages);
+ CTR5(KTR_CXGBE, "%s: tid %d, new mbuf %p for job %p, npages %d",
+ __func__, jobtotid(job), m, job, npages);
#endif
- return (0);
+
+ if (top == NULL)
+ top = m;
+ else
+ last->m_next = m;
+ last = m;
+
+ len -= mlen;
+ start += mlen;
+ pgoff = 0;
+ }
+
+ return (top);
}
static void
@@ -2020,18 +2045,16 @@
struct adapter *sc;
struct sockbuf *sb;
struct file *fp;
- struct aiotx_buffer *ab;
struct inpcb *inp;
struct tcpcb *tp;
struct mbuf *m;
- int error;
+ int error, len;
bool moretocome, sendmore;
sc = td_adapter(toep->td);
sb = &so->so_snd;
SOCKBUF_UNLOCK(sb);
fp = job->fd_file;
- ab = job->backend1;
m = NULL;
#ifdef MAC
@@ -2040,23 +2063,12 @@
goto out;
#endif
- if (ab == NULL) {
- error = hold_aio(job);
- if (error != 0)
- goto out;
- ab = job->backend1;
- }
-
/* Inline sosend_generic(). */
- job->msgsnd = 1;
-
error = sblock(sb, SBL_WAIT);
MPASS(error == 0);
sendanother:
- m = m_get(M_WAITOK, MT_DATA);
-
SOCKBUF_LOCK(sb);
if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
SOCKBUF_UNLOCK(sb);
@@ -2105,14 +2117,14 @@
* Write as much data as the socket permits, but no more than a
* a single sndbuf at a time.
*/
- m->m_len = sbspace(sb);
- if (m->m_len > ab->ps.len - job->aio_sent) {
- m->m_len = ab->ps.len - job->aio_sent;
+ len = sbspace(sb);
+ if (len > job->uaiocb.aio_nbytes - job->aio_sent) {
+ len = job->uaiocb.aio_nbytes - job->aio_sent;
moretocome = false;
} else
moretocome = true;
- if (m->m_len > sc->tt.sndbuf) {
- m->m_len = sc->tt.sndbuf;
+ if (len > sc->tt.sndbuf) {
+ len = sc->tt.sndbuf;
sendmore = true;
} else
sendmore = false;
@@ -2120,7 +2132,14 @@
if (!TAILQ_EMPTY(&toep->aiotx_jobq))
moretocome = true;
SOCKBUF_UNLOCK(sb);
- MPASS(m->m_len != 0);
+ MPASS(len != 0);
+
+ m = alloc_aiotx_mbuf(job, len);
+ if (m == NULL) {
+ sbunlock(sb);
+ error = EFAULT;
+ goto out;
+ }
/* Inlined tcp_usr_send(). */
@@ -2133,12 +2152,8 @@
goto out;
}
- refcount_acquire(&ab->refcount);
- m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab,
- (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV);
- m->m_ext.ext_flags |= EXT_FLAG_AIOTX;
- job->aio_sent += m->m_len;
-
+ job->aio_sent += m_length(m, NULL);
+
sbappendstream(sb, m, 0);
m = NULL;
@@ -2160,8 +2175,8 @@
goto out;
/*
- * If this is a non-blocking socket and the request has not
- * been fully completed, requeue it until the socket is ready
+ * If this is a blocking socket and the request has not been
+ * fully completed, requeue it until the socket is ready
* again.
*/
if (job->aio_sent < job->uaiocb.aio_nbytes &&
@@ -2177,22 +2192,18 @@
}
/*
- * If the request will not be requeued, drop a reference on
- * the aiotx buffer. Any mbufs in flight should still
- * contain a reference, but this drops the reference that the
- * job owns while it is waiting to queue mbufs to the socket.
+ * If the request will not be requeued, drop the queue's
+ * reference to the job. Any mbufs in flight should still
+ * hold a reference, but this drops the reference that the
+ * queue owns while it is waiting to queue mbufs to the
+ * socket.
*/
- free_aiotx_buffer(ab);
+ aiotx_free_job(job);
out:
if (error) {
- if (ab != NULL) {
- job->aio_error = error;
- free_aiotx_buffer(ab);
- } else {
- MPASS(job->aio_sent == 0);
- aio_complete(job, -1, error);
- }
+ job->aio_error = (void *)(intptr_t)error;
+ aiotx_free_job(job);
}
if (m != NULL)
m_free(m);
@@ -2246,7 +2257,6 @@
static void
t4_aiotx_cancel(struct kaiocb *job)
{
- struct aiotx_buffer *ab;
struct socket *so;
struct sockbuf *sb;
struct tcpcb *tp;
@@ -2263,11 +2273,8 @@
TAILQ_REMOVE(&toep->aiotx_jobq, job, list);
SOCKBUF_UNLOCK(sb);
- ab = job->backend1;
- if (ab != NULL)
- free_aiotx_buffer(ab);
- else
- aio_cancel(job);
+ job->aio_error = (void *)(intptr_t)ECANCELED;
+ aiotx_free_job(job);
}
int
@@ -2293,6 +2300,7 @@
#endif
if (!aio_set_cancel_function(job, t4_aiotx_cancel))
panic("new job was cancelled");
+ refcount_init(&job->aio_refs, 1);
TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list);
if (sowriteable(so))
t4_aiotx_queue_toep(so, toep);
Index: sys/dev/cxgbe/tom/t4_tls.c
===================================================================
--- sys/dev/cxgbe/tom/t4_tls.c
+++ sys/dev/cxgbe/tom/t4_tls.c
@@ -1193,7 +1193,6 @@
/* Read the header of the next TLS record. */
sndptr = sbsndmbuf(sb, tls_ofld->sb_off, &sndptroff);
- MPASS(!IS_AIOTX_MBUF(sndptr));
m_copydata(sndptr, sndptroff, sizeof(thdr), (caddr_t)&thdr);
tls_size = htons(thdr.length);
plen = TLS_HEADER_LENGTH + tls_size;
Index: sys/dev/cxgbe/tom/t4_tom.h
===================================================================
--- sys/dev/cxgbe/tom/t4_tom.h
+++ sys/dev/cxgbe/tom/t4_tom.h
@@ -127,11 +127,6 @@
#define PS_WIRED 0x0001 /* Pages wired rather than held. */
#define PS_PPODS_WRITTEN 0x0002 /* Page pods written to the card. */
-#define EXT_FLAG_AIOTX EXT_FLAG_VENDOR1
-
-#define IS_AIOTX_MBUF(m) \
- ((m)->m_flags & M_EXT && (m)->m_ext.ext_flags & EXT_FLAG_AIOTX)
-
struct ddp_buffer {
struct pageset *ps;
@@ -153,12 +148,6 @@
struct mtx lock;
};
-struct aiotx_buffer {
- struct pageset ps;
- struct kaiocb *job;
- int refcount;
-};
-
struct toepcb {
TAILQ_ENTRY(toepcb) link; /* toep_list */
u_int flags; /* miscellaneous flags */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Apr 20, 5:23 PM (18 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31853523
Default Alt Text
D20839.id59345.diff (12 KB)
Attached To
Mode
D20839: Use unmapped (M_NOMAP) mbufs for zero-copy AIO writes via TOE.
Attached
Detach File
Event Timeline
Log In to Comment