#include    ys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $");  #include "opt_zero.h"  #include <sys/param.h> #include <sys/systm.h> #include  #include  #include  #include  #include s/mutex.h> #include <sys/proc.h> #include  #include  #include  #include  #include   #include  #include <vm/vm_page.h>
#include <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include #include #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_objec
t_t uobject; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_p
age_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); i
f ((kern_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct th
read *td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) {
uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->i
ov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * ass
ertion failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int c
nt, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disp
oseable pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov-
>iov_base, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if
(cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt;
n -= cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->io
v_base; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue
; hashsize >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31
, 61, 127, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hash
size <= elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT();
} int copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(s
rc, dst, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); }
int copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset =
-1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#
include <sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include ys/systm.h> #include ys/kernel.h> #include <sys/ktr.h> #include #include #include <sys/mutex.h> #include <sys/proc.h> #include <sys/malloc.h> #include #include #include #include #include #include m/vm_page.h>
blubblubbblubblubblubblubblubblub#include m/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include #include #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object
_t uobject; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_pa
ge_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if
((kern_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thr
ead *td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { u
io->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->io
v_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * asse
rtion failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cn
t, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * dispo
seable pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->
iov_base, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (
cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt;
n -= cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov
_base; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue;
hashsize >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31,
61, 127, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashs
ize <= elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT();
} int copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(sr
c, dst, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); }
int copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -
1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#i
nclude <sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include ys/systm.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/malloc.h> #include #include #include #include #include #include <vm/vm_page.h> #
include <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include #include #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_
t uobject; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_pag
e_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if
((kern_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thre
ad *td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { ui
o->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov
_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * asser
tion failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt
, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * dispos
eable pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->i
ov_base, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (c
nt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n
-= cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_
base; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue;
hashsize >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31,
61, 127, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsi
ze <= elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); }
int copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src
, dst, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); }
int copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1
; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#in
clude <sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include <sys/systm.h> #include ys/kernel.h> #include <sys/ktr.h> #include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> #include ys/proc.h> #include #include #include #include #include #include #include m/vm_page.h> #i
nclude <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include #include _object.h> #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t
uobject; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page
_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if (
(kern_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct threa
d *td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio
->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_
base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * assert
ion failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt,
struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * dispose
able pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->io
v_base, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cn
t > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n
-= cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_b
ase; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue; h
ashsize >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31, 6
1, 127, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsiz
e <= elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); }
int copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src,
dst, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } i
nt copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1;
uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#inc
lude <sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include ys/systm.h> #include <sys/kernel.h> #include <sys/ktr.h> #include ys/limits.h> #include ys/lock.h> #include <sys/mutex.h> #include #include #include #include #include #include #include #include /vm_page.h> #in
clude <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include <vm/vm_param.h> #include <vm/vm_object.h> #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t
uobject; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page_
sleep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if ((
kern_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thread
*td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio-
>uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_b
ase + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * asserti
on failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt,
struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disposea
ble pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->iov
_base, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt
> n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -
= cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_ba
se; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue; ha
shsize >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31, 61
, 127, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsize
<= elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); }
int copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src,
dst, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } in
t copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1;
uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#incl
ude <sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include ys/param.h> #include <sys/systm.h> #include ys/kernel.h> #include <sys/ktr.h> #include <sys/limits.h> #include #include #include #include #include #include #include #include #include #include vm_page.h> #inc
lude <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include <vm/vm_param.h> #include <vm/vm_object.h> #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t u
object; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page_s
leep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if ((k
ern_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thread
*td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->
uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_ba
se + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * assertio
n failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt, s
truct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disposeab
le pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->iov_
base, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt
> n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -=
cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_bas
e; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue; has
hsize >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31, 61,
127, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsize
<= elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); } i
nt copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src, d
st, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } int
copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; u
io->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#inclu
de <sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/ktr.h> #include ys/limits.h> #include <sys/lock.h> #include ys/mutex.h> #include #include #include #include #include #include #include #include m_page.h> #incl
ude <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include #include ct.h> #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uo
bject; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page_sl
eep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if ((ke
rn_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thread *
td = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->u
io_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_bas
e + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * assertion
failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt, st
ruct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disposeabl
e pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->iov_b
ase, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt >
n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -=
cnt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_base
; *iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue; hash
size >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31, 61,
127, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsize <
= elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); } in
t copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src, ds
t, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } int
copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; ui
o->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#includ
e <sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> #include c.h> #include /malloc.h> #include urcevar.h> #include <sys/sched.h> #include <sys/sysctl.h> #include <sys/vnode.h> #include <vm/vm.h> #include _page.h> #inclu
de <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include #include #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uob
ject; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page_sle
ep_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if ((ker
n_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thread *t
d = curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->ui
o_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base
+ cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * assertion
failure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt, str
uct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disposeable
pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->iov_ba
se, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt >
n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= c
nt; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_base;
*iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue; hashs
ize >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31, 61, 1
27, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsize <=
elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); } int
copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src, dst
, len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } int c
opyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio
->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#include
<sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/malloc.h> #include <sys/resourcevar.h> #include ys/sched.h> #include <sys/sysctl.h> #include s/vnode.h> #include #include page.h> #includ
e <vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include <vm/vm_param.h> #include <vm/vm_object.h> #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobj
ect; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page_slee
p_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if ((kern
_pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thread *td
= curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio
_iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base
+ cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * assertion f
ailure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt, stru
ct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disposeable
pages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->iov_bas
e, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n
) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cn
t; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_base;
*iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue; hashsi
ze >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31, 61, 12
7, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsize <=
elements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); } int
copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src, dst,
len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } int co
pyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio-
>uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#include
<sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/malloc.h> #include esourcevar.h> #include #include #include #include #include age.h> #include
<vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include m/vm_param.h> #include <vm/vm_object.h> #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobje
ct; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page_sleep
_if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if ((kern_
pg->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thread *td
= curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_
iov++; uio->uio_iovcnt--; continue; } if (cnt > n) cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); if (uio->uio_rw == UIO_READ) error = copyout(cp, iov->iov_base, cnt); else error = copyin(iov->iov_base, cp, cnt); if (error) goto out; break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base +
cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt; } out: if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); } /* * Wrapper for uiomove() that validates the arguments against a known-good * kernel buffer. Currently, uiomove accepts a signed (n) argument, which * is almost definitely a bad thing, so we catch that here as well. We * return a runtime failure, but it might be desirable to generate a runtime * assertion fa
ilure instead. */ int uiomove_frombuf(void *buf, int buflen, struct uio *uio) { unsigned int offset, n; if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset) return (EINVAL); if (buflen <= 0 || offset >= buflen) return (0); if ((n = buflen - offset) > INT_MAX) return (EINVAL); return (uiomove((char *)buf + offset, n, uio)); } #ifdef ZERO_COPY_SOCKETS /* * Experimental support for zero-copy I/O */ static int userspaceco(void *cp, u_int cnt, struc
t uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; int error; iov = uio->uio_iov; if (uio->uio_rw == UIO_READ) { if ((so_zero_copy_receive != 0) && (obj != NULL) && ((cnt & PAGE_MASK) == 0) && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) && ((uio->uio_offset & PAGE_MASK) == 0) && ((((intptr_t) cp) & PAGE_MASK) == 0) && (obj->type == OBJT_DEFAULT) && (disposable != 0)) { /* SOCKET: use page-trading */ /* * We only want to call vm_pgmoveco() on * disposeable p
ages, since it gives the * kernel page to the userland process. */ error =vm_pgmoveco(&curproc->p_vmspace->vm_map, obj, (vm_offset_t)cp, (vm_offset_t)iov->iov_base); /* * If we get an error back, attempt * to use copyout() instead. The * disposable page should be freed * automatically if we weren't able to move * it into userland. */ if (error != 0) error = copyout(cp, iov->iov_base, cnt); } else { error = copyout(cp, iov->iov_base, cnt); } } else { error = copyin(iov->iov_base
, cp, cnt); } return (error); } int uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, int disposable) { struct iovec *iov; u_int cnt; int error; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomoveco: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomoveco proc")); while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_iov++; uio->uio_iovcnt--; continue; } if (cnt > n)
cnt = n; switch (uio->uio_segflg) { case UIO_USERSPACE: if (ticks - PCPU_GET(switchticks) >= hogticks) uio_yield(); error = userspaceco(cp, cnt, uio, obj, disposable); if (error) return (error); break; case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) bcopy(cp, iov->iov_base, cnt); else bcopy(iov->iov_base, cp, cnt); break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; uio->uio_offset += cnt; cp = (char *)cp + cnt; n -= cnt
; } return (0); } #endif /* ZERO_COPY_SOCKETS */ /* * Give next character to user as result of read. */ int ureadc(int c, struct uio *uio) { struct iovec *iov; char *iov_base; again: if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) panic("ureadc"); iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iovcnt--; uio->uio_iov++; goto again; } switch (uio->uio_segflg) { case UIO_USERSPACE: if (subyte(iov->iov_base, c) < 0) return (EFAULT); break; case UIO_SYSSPACE: iov_base = iov->iov_base; *
iov_base = c; iov->iov_base = iov_base; break; case UIO_NOCOPY: break; } iov->iov_base = (char *)iov->iov_base + 1; iov->iov_len--; uio->uio_resid--; uio->uio_offset++; return (0); } /* * General routine to allocate a hash table. */ void * hashinit(int elements, struct malloc_type *type, u_long *hashmask) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("hashinit: bad elements"); for (hashsize = 1; hashsize <= elements; hashsize <<= 1) continue; hashsiz
e >>= 1; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *hashmask = hashsize - 1; return (hashtbl); } void hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) { LIST_HEAD(generic, generic) *hashtbl, *hp; hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) if (!LIST_EMPTY(hp)) panic("hashdestroy: hash not empty"); free(hashtbl, type); } static int primes[] = { 1, 13, 31, 61, 127
, 251, 509, 761, 1021, 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; #define NPRIMES (sizeof(primes) / sizeof(primes[0])) /* * General routine to allocate a prime number sized hash table. */ void * phashinit(int elements, struct malloc_type *type, u_long *nentries) { long hashsize; LIST_HEAD(generic, generic) *hashtbl; int i; if (elements <= 0) panic("phashinit: bad elements"); for (i = 1, hashsize = primes[1]; hashsize <= e
lements;) { i++; if (i == NPRIMES) break; hashsize = primes[i]; } hashsize = primes[i - 1]; hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); *nentries = hashsize; return (hashtbl); } void uio_yield(void) { struct thread *td; td = curthread; mtx_lock_spin(&sched_lock); DROP_GIANT(); sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ mi_switch(SW_INVOL, NULL); mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); } int c
opyinfrom(const void * __restrict src, void * __restrict dst, size_t len, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyin(src, dst, len); break; case UIO_SYSSPACE: bcopy(src, dst, len); break; default: panic("copyinfrom: bad seg %d\n", seg); } return (error); } int copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, size_t * __restrict copied, int seg) { int error = 0; switch (seg) { case UIO_USERSPACE: error = copyinstr(src, dst,
len, copied); break; case UIO_SYSSPACE: error = copystr(src, dst, len, copied); break; default: panic("copyinstrfrom: bad seg %d\n", seg); } return (error); } int copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) { u_int iovlen; *iov = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof (struct iovec); *iov = malloc(iovlen, M_IOV, M_WAITOK); error = copyin(iovp, *iov, iovlen); if (error) { free(*iov, M_IOV); *iov = NULL; } return (error); } int cop
yinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) { struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); error = copyin(iovp, iov, iovlen); if (error) { free(uio, M_IOV); return (error); } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->
uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } struct uio * cloneuio(struct uio *uiop) { struct uio *uio; int iovlen; iovlen = uiop->uio_iovcnt * sizeof (struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); *uio = *uiop; uio->uio_iov = (struct iovec *)(uio + 1); bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); }#include <
sys/cdefs.h> __FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/kern/kern_subr.c,v 1.88.2.2 2005/01/31 23:26:16 imp Exp $"); #include "opt_zero.h" #include #include #include <sys/kernel.h> #include <sys/ktr.h> #include ys/limits.h> #include <sys/lock.h> #include #include #include #include #include #include #include #include #include ge.h> #include
<vm/vm_map.h> #ifdef ZERO_COPY_SOCKETS #include #include #endif SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; static int vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, vm_offset_t uaddr) { vm_map_t map = mapa; vm_page_t kern_pg, user_pg; vm_object_t uobjec
t; vm_map_entry_t entry; vm_pindex_t upindex, kpindex; vm_prot_t prot; boolean_t wired; /* * First lookup the kernel page. */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); /* * XXX The vm object containing kern_pg needs locking. */ if ((vm_map_lookup(&map, uaddr, VM_PROT_WRITE, &entry, &uobject, &upindex, &prot, &wired)) != KERN_SUCCESS) { return(EFAULT); } VM_OBJECT_LOCK(uobject); if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { do vm_page_lock_queues(); while (vm_page_sleep_
if_busy(user_pg, 1, "vm_pgmoveco")); vm_page_busy(user_pg); pmap_remove_all(user_pg); vm_page_free(user_pg); } else vm_page_lock_queues(); if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, kern_pg->hold_count, (u_long)kern_pg->phys_addr); if ((kern_p
g->queue - kern_pg->pc) == PQ_FREE) panic("vm_pgmoveco: renaming free page"); else panic("vm_pgmoveco: renaming busy page"); } kpindex = kern_pg->pindex; vm_page_busy(kern_pg); vm_page_rename(kern_pg, uobject, upindex); vm_page_flag_clear(kern_pg, PG_BUSY); kern_pg->valid = VM_PAGE_BITS_ALL; vm_page_unlock_queues(); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ int uiomove(void *cp, int n, struct uio *uio) { struct thread *td =
curthread; struct iovec *iov; u_int cnt; int error = 0; int save = 0; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; if (cnt == 0) { uio->uio_i

Rendering time: 17 seconds.