Index: release/i386/dokern.sh =================================================================== RCS file: /data/FreeBSD/cvs/src/release/i386/Attic/dokern.sh,v retrieving revision 1.58.2.8 diff -u -r1.58.2.8 dokern.sh --- release/i386/dokern.sh 1 Sep 2003 04:22:41 -0000 1.58.2.8 +++ release/i386/dokern.sh 8 Mar 2004 17:50:33 -0000 @@ -15,6 +15,7 @@ -e '/ICMP_BANDLIM/d' \ -e '/PROCFS/d' \ -e '/KTRACE/d' \ + -e '/SYSTRACE/d' \ -e '/SYSVMSG/d' \ -e '/SOFTUPDATES/d' \ -e '/UFS_DIRHASH/d' \ Index: sys/conf/files =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/conf/files,v retrieving revision 1.340.2.142 diff -u -r1.340.2.142 files --- sys/conf/files 22 Aug 2003 20:52:47 -0000 1.340.2.142 +++ sys/conf/files 8 Mar 2004 15:43:07 -0000 @@ -772,6 +772,7 @@ kern/kern_synch.c standard kern/kern_syscalls.c standard kern/kern_sysctl.c standard +kern/kern_systrace.c optional systrace kern/kern_time.c standard kern/kern_timeout.c standard kern/kern_xxx.c standard Index: sys/conf/majors =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/conf/majors,v retrieving revision 1.98.2.20 diff -u -r1.98.2.20 majors --- sys/conf/majors 13 Nov 2002 00:20:16 -0000 1.98.2.20 +++ sys/conf/majors 8 Mar 2004 15:43:07 -0000 @@ -22,6 +22,7 @@ 0 ?? 0-199 see current source 199 ?? 0-199 see current source 200 ?? entries from 200-252 are reserved for local use +251 systrace system call filtering 252 ?? entries from 200-252 are reserved for local use 253 reserved 253-255 see current source 255 reserved 253-255 see current source Index: sys/conf/options =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/conf/options,v retrieving revision 1.191.2.58 diff -u -r1.191.2.58 options --- sys/conf/options 24 Aug 2003 08:24:37 -0000 1.191.2.58 +++ sys/conf/options 8 Mar 2004 15:43:07 -0000 @@ -516,6 +516,9 @@ HIFN_RNDTEST opt_hifn.h HIFN_NO_RNG opt_hifn.h +# options for systrace +SYSTRACE opt_systrace.h + # options for safe driver SAFE_DEBUG opt_safe.h SAFE_RNDTEST opt_safe.h Index: sys/i386/conf/GENERIC =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/i386/conf/GENERIC,v retrieving revision 1.246.2.54 diff -u -r1.246.2.54 GENERIC --- sys/i386/conf/GENERIC 28 Apr 2003 03:41:46 -0000 1.246.2.54 +++ sys/i386/conf/GENERIC 8 Mar 2004 15:43:07 -0000 @@ -269,3 +269,6 @@ device firewire # FireWire bus code device sbp # SCSI over FireWire (Requires scbus and da) device fwe # Ethernet over FireWire (non-standard!) + +# systrace +options SYSTRACE # system call filtering via systrace(1) Index: sys/i386/i386/trap.c =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/i386/i386/trap.c,v retrieving revision 1.147.2.11 diff -u -r1.147.2.11 trap.c --- sys/i386/i386/trap.c 27 Feb 2003 19:09:59 -0000 1.147.2.11 +++ sys/i386/i386/trap.c 8 Mar 2004 15:43:07 -0000 @@ -47,6 +47,7 @@ #include "opt_ktrace.h" #include "opt_clock.h" #include "opt_trap.h" +#include "opt_systrace.h" #include #include @@ -57,9 +58,11 @@ #include #include #include +#include #include #include #include +#include #ifdef KTRACE #include #endif @@ -1172,7 +1175,15 @@ STOPEVENT(p, S_SCE, narg); /* MP aware */ - error = (*callp->sy_call)(p, args); +#ifdef SYSTRACE + if ( (error == 0) && (ISSET(p->p_flag, P_SYSTRACE)) ) { + error = systrace_enter(p, code, args, p->p_retval); + + if (error == 0) + error = (*callp->sy_call)(p, args); + } else +#endif + error = (*callp->sy_call)(p, args); /* * MP SAFE (we may or may not have the MP lock at this point) @@ -1230,6 +1241,11 @@ */ have_mplock = userret(p, &frame, sticks, have_mplock); +#ifdef SYSTRACE + if (ISSET(p->p_flag, P_SYSTRACE)) { + systrace_exit(p, code, args, p->p_retval, error); + } +#endif #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { if (have_mplock == 0) { Index: sys/i386/linux/Makefile =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/i386/linux/Makefile,v retrieving revision 1.4 diff -u -r1.4 Makefile --- sys/i386/linux/Makefile 28 Aug 1999 00:45:21 -0000 1.4 +++ sys/i386/linux/Makefile 8 Mar 2004 15:43:07 -0000 @@ -5,7 +5,8 @@ all: @echo "make linux_sysent.c only" -linux_sysent.c linux_syscall.h linux_proto.h: ../../kern/makesyscalls.sh \ +linux_syscalls.c linux_sysent.c linux_syscall.h linux_proto.h: \ + ../../kern/makesyscalls.sh \ syscalls.master syscalls.conf -mv -f linux_sysent.c linux_sysent.c.bak -mv -f linux_syscall.h linux_syscall.h.bak Index: sys/i386/linux/linux_proto.h =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/i386/linux/linux_proto.h,v retrieving revision 1.32.2.10 diff -u -r1.32.2.10 linux_proto.h --- sys/i386/linux/linux_proto.h 2 Jan 2003 20:41:33 -0000 1.32.2.10 +++ sys/i386/linux/linux_proto.h 8 Mar 2004 15:43:07 -0000 @@ -2,8 +2,8 @@ * System call prototypes. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/i386/linux/linux_proto.h,v 1.32.2.10 2003/01/02 20:41:33 kan Exp $ - * created from FreeBSD: src/sys/i386/linux/syscalls.master,v 1.30.2.7 2001/11/05 19:08:23 marcel Exp + * $FreeBSD$ + * created from FreeBSD: src/sys/i386/linux/syscalls.master,v 1.30.2.8 2003/01/02 20:41:33 kan Exp */ #ifndef _LINUX_SYSPROTO_H_ Index: sys/i386/linux/linux_syscall.h =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/i386/linux/linux_syscall.h,v retrieving revision 1.27.2.10 diff -u -r1.27.2.10 linux_syscall.h --- sys/i386/linux/linux_syscall.h 2 Jan 2003 20:41:33 -0000 1.27.2.10 +++ sys/i386/linux/linux_syscall.h 8 Mar 2004 15:43:07 -0000 @@ -2,8 +2,8 @@ * System call numbers. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/i386/linux/linux_syscall.h,v 1.27.2.10 2003/01/02 20:41:33 kan Exp $ - * created from FreeBSD: src/sys/i386/linux/syscalls.master,v 1.30.2.7 2001/11/05 19:08:23 marcel Exp + * $FreeBSD$ + * created from FreeBSD: src/sys/i386/linux/syscalls.master,v 1.30.2.8 2003/01/02 20:41:33 kan Exp */ #define LINUX_SYS_exit 1 Index: sys/i386/linux/linux_sysent.c =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/i386/linux/linux_sysent.c,v retrieving revision 1.33.2.10 diff -u -r1.33.2.10 linux_sysent.c --- sys/i386/linux/linux_sysent.c 2 Jan 2003 20:41:33 -0000 1.33.2.10 +++ sys/i386/linux/linux_sysent.c 8 Mar 2004 15:43:07 -0000 @@ -2,8 +2,8 @@ * System call switch table. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/i386/linux/linux_sysent.c,v 1.33.2.10 2003/01/02 20:41:33 kan Exp $ - * created from FreeBSD: src/sys/i386/linux/syscalls.master,v 1.30.2.7 2001/11/05 19:08:23 marcel Exp + * $FreeBSD$ + * created from FreeBSD: src/sys/i386/linux/syscalls.master,v 1.30.2.8 2003/01/02 20:41:33 kan Exp */ #include "opt_compat.h" Index: sys/i386/linux/syscalls.conf =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/i386/linux/syscalls.conf,v retrieving revision 1.5 diff -u -r1.5 syscalls.conf --- sys/i386/linux/syscalls.conf 28 Aug 1999 00:45:25 -0000 1.5 +++ sys/i386/linux/syscalls.conf 8 Mar 2004 15:43:07 -0000 @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/i386/linux/syscalls.conf,v 1.5 1999/08/28 00:45:25 peter Exp $ -sysnames="/dev/null" +sysnames="linux_syscalls.c" sysproto="linux_proto.h" sysproto_h=_LINUX_SYSPROTO_H_ syshdr="linux_syscall.h" Index: sys/kern/kern_exit.c =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.92.2.11 diff -u -r1.92.2.11 kern_exit.c --- sys/kern/kern_exit.c 13 Jan 2003 22:51:16 -0000 1.92.2.11 +++ sys/kern/kern_exit.c 8 Mar 2004 15:43:07 -0000 @@ -41,6 +41,7 @@ #include "opt_compat.h" #include "opt_ktrace.h" +#include "opt_systrace.h" #include #include @@ -66,6 +67,9 @@ #include #include #include + +#include + #include #include #include @@ -271,6 +275,12 @@ vrele(vtmp); } #endif + +#if defined(__i386__) && defined(SYSTRACE) + if (p->p_flag & P_SYSTRACE) + systrace_sys_exit(p); +#endif + /* * Release reference to text vnode */ Index: sys/kern/kern_fork.c =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.72.2.15 diff -u -r1.72.2.15 kern_fork.c --- sys/kern/kern_fork.c 28 Sep 2003 11:08:31 -0000 1.72.2.15 +++ sys/kern/kern_fork.c 8 Mar 2004 15:43:07 -0000 @@ -40,6 +40,7 @@ */ #include "opt_ktrace.h" +#include "opt_systrace.h" #include #include @@ -59,6 +60,9 @@ #include #include #include + +#include + #include #include #include @@ -497,6 +501,11 @@ if ((p2->p_tracep = p1->p_tracep) != NULL) VREF(p2->p_tracep); } +#endif + +#if defined(__i386__) && defined(SYSTRACE) + if (p1->p_flag & P_SYSTRACE) + systrace_sys_fork(p1, p2); #endif /* Index: sys/sys/file.h =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/sys/file.h,v retrieving revision 1.22.2.7 diff -u -r1.22.2.7 file.h --- sys/sys/file.h 21 Nov 2002 23:39:24 -0000 1.22.2.7 +++ sys/sys/file.h 8 Mar 2004 15:43:07 -0000 @@ -63,6 +63,7 @@ #define DTYPE_FIFO 4 /* fifo (named pipe) */ #define DTYPE_KQUEUE 5 /* event queue */ #define DTYPE_CRYPTO 6 /* crypto */ +#define DTYPE_SYSTRACE 7 /* systrace */ short f_type; /* descriptor type */ u_int f_flag; /* see fcntl.h */ struct ucred *f_cred; /* credentials associated with descriptor */ Index: sys/sys/proc.h =================================================================== RCS file: /data/FreeBSD/cvs/src/sys/sys/proc.h,v retrieving revision 1.99.2.10 diff -u -r1.99.2.10 proc.h --- sys/sys/proc.h 6 Jul 2003 16:35:47 -0000 1.99.2.10 +++ sys/sys/proc.h 8 Mar 2004 15:43:07 -0000 @@ -183,6 +183,7 @@ int p_traceflag; /* Kernel trace points. */ struct vnode *p_tracep; /* Trace to vnode. */ + void *p_systrace; /* Back pointer to systrace */ sigset_t p_siglist; /* Signals arrived but not delivered. */ @@ -277,6 +278,7 @@ /* was P_NOSWAP 0x08000 was: Do not swap upages; p->p_hold */ /* was P_PHYSIO 0x10000 was: Doing physical I/O; use p->p_hold */ + /* Should be moved to machine-dependent areas. */ #define P_OWEUPC 0x20000 /* Owe process an addupc() call at next ast. */ @@ -284,7 +286,7 @@ #define P_SWAPINREQ 0x80000 /* Swapin request due to wakeup */ /* Marked a kernel thread */ -#define P_UNUSED100000 0x100000 +#define P_SYSTRACE 0x100000 /* Process system call tracing active */ #define P_KTHREADP 0x200000 /* Process is really a kernel thread */ #define P_DEADLKTREAT 0x800000 /* lock aquisition - deadlock treatment */ @@ -293,6 +295,7 @@ #define P_OLDMASK 0x2000000 /* need to restore mask before pause */ #define P_ALTSTACK 0x4000000 /* have alternate signal stack */ #define P_INEXEC 0x8000000 /* Process is in execve(). */ + /* * MOVE TO ucred.h? Index: sys/kern/kern_systrace.c =================================================================== RCS file: sys/kern/kern_systrace.c diff -N sys/kern/kern_systrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kern/kern_systrace.c 25 Jan 2004 16:13:20 -0000 @@ -0,0 +1,1894 @@ +/* $NetBSD: kern_systrace.c,v 1.30 2003/06/29 22:31:25 fvdl Exp $ */ + +/* + * Copyright 2002, 2003 Niels Provos + * Copyright 2003 Dr. Rich Murphey + * Copyright 2003 Vladimir Kotal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#ifdef __NetBSD__ +__KERNEL_RCSID(0, "$NetBSD: kern_systrace.c,v 1.30 2003/06/29 22:31:25 fvdl Exp $"); +#endif +#include "opt_systrace.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __FreeBSD__5__ +#include +#endif + +#include + +#if __FreeBSD__5__ +/* XXX */ +#include +#include +#include +#else +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + + +#ifdef __FreeBSD__5__ +#define curlwp curthread +#else +#define curlwp curproc +#endif + +#define SYSTRACE_LOCK(lockp, p) lockmgr(lockp, LK_EXCLUSIVE, NULL, p) +#define SYSTRACE_UNLOCK(lockp, p) lockmgr(lockp, LK_RELEASE, NULL, p) + +#ifndef M_XDATA +#ifdef __FreeBSD__ +static MALLOC_DEFINE(M_SYSTRACE, "systrace", "Systrace data"); +#define M_XDATA M_SYSTRACE +#else +#define M_XDATA M_FILE /* XXX */ +#endif +#endif + +static d_open_t systraceopen; + +#ifdef __FreeBSD__5__ +int systracef_read(struct file *fp, struct uio *uio, + struct ucred *active_cred, int flags, + struct thread *td); +int systracef_write(struct file *fp, struct uio *uio, + struct ucred *active_cred, int flags, + struct thread *td); +int systracef_poll(struct file *fp, int events, + struct ucred *active_cred, struct thread *td); +#else +int systracef_poll(struct file *fp, int events, struct ucred *, + struct proc *p); +int systracef_read(struct file *, struct uio *, + struct ucred *, int flags, struct proc *p); +int systracef_write(struct file *, struct uio *, + struct ucred *, int flags, struct proc *p); +/* XXX added fcntl according to NetBSD */ +int systracef_fcntl(struct file *, u_int, void *, struct proc *); +#endif + +#ifdef __FreeBSD__5__ +int systracef_ioctl(struct file *fp, u_long com, void *data, + struct ucred *active_cred, struct thread *td); +#else +int systracef_ioctl(struct file *, u_long, caddr_t, struct proc *); +#endif + +int systracef_kqfilter(struct file *, struct knote *); + +#ifdef __FreeBSD__5__ +int systracef_stat(struct file *fp, struct stat *sb, + struct ucred *active_cred, struct thread *td); +int systracef_close(struct file *fp, struct thread *td); +#else +int systracef_stat(struct file *, struct stat *, struct proc *); +int systracef_close(struct file *, struct proc *); +#endif + +struct str_policy { + TAILQ_ENTRY(str_policy) next; + + int nr; + + const struct emul *emul; /* Is only valid for this emulation */ + + int refcount; + + int nsysent; + u_char *sysent; +}; + +#define STR_PROC_ONQUEUE 0x01 +#define STR_PROC_WAITANSWER 0x02 +#define STR_PROC_SYSCALLRES 0x04 +#define STR_PROC_REPORT 0x08 /* Report emulation */ +#define STR_PROC_NEEDSEQNR 0x10 /* Answer must quote seqnr */ +#define STR_PROC_SETEUID 0x20 /* Elevate privileges */ +#define STR_PROC_SETEGID 0x40 + +struct str_process { + TAILQ_ENTRY(str_process) next; + + struct proc *proc; + const struct emul *oldemul; + uid_t olduid; + gid_t oldgid; + + pid_t pid; + + struct fsystrace *parent; + struct str_policy *policy; + + struct systrace_replace *replace; + + int flags; + short answer; + short error; + u_int16_t seqnr; /* expected reply sequence number */ + + uid_t seteuid; + uid_t saveuid; + gid_t setegid; + gid_t savegid; +}; + +uid_t systrace_seteuid(struct proc *, uid_t); +gid_t systrace_setegid(struct proc *, gid_t); +void systrace_lock(void); +void systrace_unlock(void); + +/* Needs to be called with fst locked */ + +int systrace_attach(struct fsystrace *, pid_t); +int systrace_detach(struct str_process *); +int systrace_answer(struct str_process *, struct systrace_answer *); +int systrace_io(struct str_process *, struct systrace_io *); +int systrace_policy(struct fsystrace *, struct systrace_policy *); +int systrace_preprepl(struct str_process *, struct systrace_replace *); +int systrace_replace(struct str_process *, size_t, register_t []); +int systrace_getcwd(struct fsystrace *, struct str_process *); + +int systrace_processready(struct str_process *); +struct proc *systrace_find(struct str_process *); +struct str_process *systrace_findpid(struct fsystrace *fst, pid_t pid); +void systrace_wakeup(struct fsystrace *); +void systrace_closepolicy(struct fsystrace *, struct str_policy *); +int systrace_insert_process(struct fsystrace *, struct proc *, + struct str_process **); +struct str_policy *systrace_newpolicy(struct fsystrace *, int); +int systrace_msg_child(struct fsystrace *, struct str_process *, pid_t); +int systrace_msg_policyfree(struct fsystrace *, struct str_policy *); +int systrace_msg_ask(struct fsystrace *, struct str_process *, + int, size_t, register_t []); +int systrace_msg_result(struct fsystrace *, struct str_process *, + int, int, size_t, register_t [], register_t []); +int systrace_msg_emul(struct fsystrace *, struct str_process *); +int systrace_msg_ugid(struct fsystrace *, struct str_process *); +int systrace_make_msg(struct str_process *, int, struct str_message *); + +static struct fileops systracefops = { + systracef_read, + systracef_write, + systracef_ioctl, + systracef_poll, + systracef_kqfilter, + systracef_stat, + systracef_close +}; + +#ifdef __FreeBSD__5__ +#define SYSTRACE_POOL_INIT(POOL, SIZE, a, b, c, NAME, ALLOCATOR) \ + POOL = uma_zcreate(NAME, SIZE, NULL, \ + NULL, NULL, NULL, UMA_ALIGN_PTR, 0) + +#define SYSTRACE_POOL_GET_WAIT(POOL, ELEM, FLAGS) \ + ELEM = uma_zalloc(POOL, FLAGS) +#define SYSTRACE_POOL_GET(POOL, ELEM, FLAGS) \ + ELEM = uma_zalloc(POOL, FLAGS) + +#define SYSTRACE_POOL_PUT(POOL, ELEM) \ + uma_zfree(POOL, ELEM) +#define PR_NOWAIT M_NOWAIT +#define PR_WAITOK M_WAITOK + +uma_zone_t systr_proc_pl; +uma_zone_t systr_policy_pl; +uma_zone_t systr_msgcontainer_pl; +#else +#define SYSTRACE_POOL_INIT(POOL, SIZE, a, b, c, NAME, ALLOCATOR) \ + POOL = zinit(NAME, SIZE, NULL, NULL, 10) +/* XXX tweak number of pages to grow zone */ + +#define SYSTRACE_POOL_GET(POOL, ELEM, FLAGS) \ + ELEM = zalloc(POOL) +/* XXX FLAGS are unused, we cannot get flags from zone(9) in RELENG-4 */ + +/* free the pool */ +#define SYSTRACE_POOL_PUT(POOL, ELEM) \ + zfree(POOL, ELEM) + +vm_zone_t systr_proc_pl; +vm_zone_t systr_policy_pl; +vm_zone_t systr_msgcontainer_pl; +#endif + + + +int systrace_debug = 0; +struct lock systrace_lck; + +#ifdef __FreeBSD__5__ +/* XXX devfs in FreeBSD 5.x */ +#else +#define CDEV_MAJOR 251 +static struct cdevsw systrace_cdevsw = { + /* open */ systraceopen, + /* close */ noclose, + /* read */ noread, + /* write */ nowrite, + /* ioctl */ noioctl, + /* poll */ nopoll, + /* mmap */ nommap, + /* strategy */ nostrategy, + /* name */ "systrace", + /* maj */ CDEV_MAJOR, + /* dump */ nodump, + /* psize */ nopsize, + /* flags */ NULL, + /* bmaj */ -1, + /* kqfilter */ nokqfilter, +}; +#endif + +#define DPRINTF(y) if (systrace_debug) printf y; + +/* ARGSUSED */ +int +systracef_read(struct file *fp, +#ifdef __FreeBSD__5__ + off_t *poff, +#endif + struct uio *uio, struct ucred *active_cred +#if defined(__NetBSD__) || defined(__FreeBSD__) + , int flags +#endif +#ifdef __FreeBSD__5__ + , struct thread *tp +#else + , struct proc *p +#endif +) +{ + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + struct str_msgcontainer *cont; + int error = 0; + + if (uio->uio_resid != sizeof(struct str_message)) + return (EINVAL); + + again: + systrace_lock(); + SYSTRACE_LOCK(&fst->lock, curlwp); /* XXX tady bude asi curproc */ + systrace_unlock(); + if ((cont = TAILQ_FIRST(&fst->messages)) != NULL) { + error = uiomove((caddr_t)&cont->msg, + sizeof(struct str_message), uio); + if (!error) { + TAILQ_REMOVE(&fst->messages, cont, next); + if (!SYSTR_MSG_NOPROCESS(cont)) + CLR(cont->strp->flags, STR_PROC_ONQUEUE); + SYSTRACE_POOL_PUT(systr_msgcontainer_pl, cont); + + } + } else if (TAILQ_FIRST(&fst->processes) == NULL) { + /* EOF situation */ + ; + } else { + if (fp->f_flag & FNONBLOCK) + error = EAGAIN; + else { + SYSTRACE_UNLOCK(&fst->lock, curlwp); + error = tsleep(fst, PWAIT|PCATCH, "systrrd", 0); + if (error) + goto out; + goto again; + } + + } + + SYSTRACE_UNLOCK(&fst->lock, curlwp); + out: + return (error); +} + +/* ARGSUSED */ +int +systracef_write(struct file *fp, +#ifdef __FreeBSD__5__ + off_t *poff, +#endif + struct uio *uio, struct ucred *active_cred +#if defined(__NetBSD__) || defined(__FreeBSD__) + , int flags +#endif +#ifdef __FreeBSD__5__ + , struct thread *tp +#else + , struct proc *p +#endif +) +{ + return (EIO); +} + +#define POLICY_VALID(x) ((x) == SYSTR_POLICY_PERMIT || \ + (x) == SYSTR_POLICY_ASK || \ + (x) == SYSTR_POLICY_NEVER) + +/* ARGSUSED */ +int +systracef_ioctl(struct file *fp, u_long cmd, + caddr_t data, +#ifdef __FreeBSD__5__ + struct ucred *active_cred, struct thread *threadp +#else + struct proc *p +#endif +) +{ +#ifdef __FreeBSD__5__ + struct proc *p = threadp->td_proc; +#endif + int ret = 0; + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + struct filedesc *fdp; + struct str_process *strp = NULL; + pid_t pid = 0; + + DPRINTF(("systrace in systracef_ioctl()\n")); + + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return (0); + + case STRIOCDETACH: + case STRIOCREPORT: + pid = *(pid_t *)data; + if (!pid) + ret = EINVAL; + break; + case STRIOCANSWER: + pid = ((struct systrace_answer *)data)->stra_pid; + if (!pid) + ret = EINVAL; + break; + case STRIOCIO: + pid = ((struct systrace_io *)data)->strio_pid; + if (!pid) + ret = EINVAL; + break; + case STRIOCGETCWD: + pid = *(pid_t *)data; + if (!pid) + ret = EINVAL; + break; + case STRIOCATTACH: + case STRIOCRESCWD: + case STRIOCPOLICY: + break; + case STRIOCREPLACE: + pid = ((struct systrace_replace *)data)->strr_pid; + if (!pid) + ret = EINVAL; + break; + default: + ret = EINVAL; + break; + } + + if (ret) + return (ret); + + systrace_lock(); + SYSTRACE_LOCK(&fst->lock, curlwp); + systrace_unlock(); + if (pid) { + strp = systrace_findpid(fst, pid); + if (strp == NULL) { + ret = ESRCH; + goto unlock; + } + } + + switch (cmd) { + case STRIOCATTACH: + pid = *(pid_t *)data; + if (!pid) + ret = EINVAL; + else + ret = systrace_attach(fst, pid); + DPRINTF(("%s: attach to %u: %d\n", __func__, pid, ret)); + break; + case STRIOCDETACH: + ret = systrace_detach(strp); + break; + case STRIOCREPORT: + SET(strp->flags, STR_PROC_REPORT); + break; + case STRIOCANSWER: + ret = systrace_answer(strp, (struct systrace_answer *)data); + break; + case STRIOCIO: + ret = systrace_io(strp, (struct systrace_io *)data); + break; + case STRIOCPOLICY: + ret = systrace_policy(fst, (struct systrace_policy *)data); + break; + case STRIOCREPLACE: + ret = systrace_preprepl(strp, (struct systrace_replace *)data); + break; + case STRIOCRESCWD: + if (!fst->fd_pid) { + ret = EINVAL; + break; + } + + fdp = p->p_fd; + + /* Release cwd from other process */ + if (fdp->fd_cdir) + vrele(fdp->fd_cdir); + if (fdp->fd_rdir) + vrele(fdp->fd_rdir); + /* This restores the cwd we had before */ + fdp->fd_cdir = fst->fd_cdir; + fdp->fd_rdir = fst->fd_rdir; + + /* Note that we are normal again */ + fst->fd_pid = 0; + fst->fd_cdir = fst->fd_rdir = NULL; + break; + case STRIOCGETCWD: + ret = systrace_getcwd(fst, strp); + break; + default: + ret = EINVAL; + break; + } + + unlock: + SYSTRACE_UNLOCK(&fst->lock, curlwp); + + return (ret); +} + +/* XXX use systracef_fcntl ? + + this is undefined in systracef ops now (yet) +*/ +#if defined(__NetBSD__) || defined(__FreeBSD__) +/* ARGSUSED */ +int +systracef_fcntl(struct file *fp, u_int cmd, void *data, struct proc *p) +{ + + if (cmd == FNONBLOCK || cmd == FASYNC) + return 0; + + return (EOPNOTSUPP); +} +#endif + +#if defined(__NetBSD__) || defined(__FreeBSD__) +int +systracef_poll(struct file *fp, int events, +#ifdef __FreeBSD__5__ + struct ucred *active_cred, struct thread *p +#else + struct ucred *active_cred, struct proc *p +#endif +) +{ + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + int revents = 0; + + if ((events & (POLLIN | POLLRDNORM)) == 0) + return (revents); + + systrace_lock(); + SYSTRACE_LOCK(&fst->lock, p); + systrace_unlock(); + if (!TAILQ_EMPTY(&fst->messages)) + revents |= events & (POLLIN | POLLRDNORM); + if (revents == 0) + selrecord(p, &fst->si); + SYSTRACE_UNLOCK(&fst->lock, p); + + return (revents); +} +#endif + +/* ARGSUSED */ +int +systracef_kqfilter(struct file *fp, struct knote *kn) +{ + return (1); +} + + +/* ARGSUSED */ +int +systracef_stat(struct file *fp, struct stat *sb, +#ifdef __FreeBSD__5__ + struct ucred *active_cred, struct thread *p +#else + struct proc *p +#endif +) +{ + return (EOPNOTSUPP); +} + +/* ARGSUSED */ +int +systracef_close(struct file *fp, +#ifdef __FreeBSD__5__ + struct thread *p +#else + struct proc *p +#endif +) +{ + struct fsystrace *fst = (struct fsystrace *)fp->f_data; + struct str_process *strp; + struct str_msgcontainer *cont; + struct str_policy *strpol; + + systrace_lock(); + SYSTRACE_LOCK(&fst->lock, curlwp); + systrace_unlock(); + + /* Untrace all processes */ + for (strp = TAILQ_FIRST(&fst->processes); strp; + strp = TAILQ_FIRST(&fst->processes)) { + struct proc *q = strp->proc; + + systrace_detach(strp); + psignal(q, SIGKILL); + } + + /* Clean up fork and exit messages */ + for (cont = TAILQ_FIRST(&fst->messages); cont; + cont = TAILQ_FIRST(&fst->messages)) { + TAILQ_REMOVE(&fst->messages, cont, next); + SYSTRACE_POOL_PUT(systr_msgcontainer_pl, cont); + } + + /* Clean up all policies */ + for (strpol = TAILQ_FIRST(&fst->policies); strpol; + strpol = TAILQ_FIRST(&fst->policies)) + systrace_closepolicy(fst, strpol); + + /* Release vnodes */ + if (fst->fd_cdir) + vrele(fst->fd_cdir); + if (fst->fd_rdir) + vrele(fst->fd_rdir); + SYSTRACE_UNLOCK(&fst->lock, curlwp); + + FREE(fp->f_data, M_XDATA); + fp->f_data = NULL; + + return (0); +} + +void +systrace_lock(void) +{ + SYSTRACE_LOCK(&systrace_lck, curlwp); +} + +void +systrace_unlock(void) +{ + SYSTRACE_UNLOCK(&systrace_lck, curlwp); +} + +void +systrace_init(void) +{ + DPRINTF(("making systrace device for FreeBSD\n")); + make_dev(&systrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "systrace"); + lockinit(&systrace_lck, PLOCK, "systrace", 0, 0); + + SYSTRACE_POOL_INIT(systr_proc_pl, sizeof(struct str_process), 0, 0, 0, + "strprocpl", NULL); + SYSTRACE_POOL_INIT(systr_policy_pl, sizeof(struct str_policy), 0, 0, 0, + "strpolpl", NULL); + SYSTRACE_POOL_INIT(systr_msgcontainer_pl, sizeof(struct str_msgcontainer), + 0, 0, 0, "strmsgpl", NULL); +} + +int +systraceopen(dev_t dev, int flag, int mode, +#ifdef __FreeBSD__5__ + struct thread *threadp +#else + struct proc *p +#endif +) +{ +#ifdef __FreeBSD__5__ + struct proc *p = threadp->td_proc; +#endif + struct fsystrace *fst; + struct file *fp; + int error, fd; + + DPRINTF(("systrace open in kernel\n")); + + /* falloc() will use the descriptor for us. */ + if ((error = falloc( +#ifdef __FreeBSD__5__ + threadp, +#else + p, +#endif + &fp, &fd)) != 0) + return (error); + + DPRINTF(("allocated fd %d for systrace\n", fd)); + + MALLOC(fst, struct fsystrace *, sizeof(*fst), M_XDATA, M_WAITOK); + + memset(fst, 0, sizeof(struct fsystrace)); + lockinit(&fst->lock, PLOCK, "systrace", 0, 0); + + TAILQ_INIT(&fst->processes); + TAILQ_INIT(&fst->messages); + TAILQ_INIT(&fst->policies); + + if (suser( +#ifdef __FreeBSD__5__ + threadp +#else + p +#endif + ) == 0) + fst->issuser = 1; + + fst->p_ruid = p->p_ucred->cr_uid; + fst->p_rgid = p->p_ucred->cr_gid; + + fp->f_flag = FREAD | FWRITE; + fp->f_type = +#ifdef __FreeBSD__ + /* XXX NetBSD change log: + + Rename DTYPE_SYSTRACE to DTYPE_MISC. DTYPE_MISC can now be used by + any misc. pseudo-device which requires cloning semantics. + + no DTYPE_MISC in 4.x + */ + DTYPE_SYSTRACE; +#else + DTYPE_MISC; +#endif + fp->f_ops = &systracefops; + fp->f_data = (caddr_t) fst; + + p->p_dupfd = fd; /* XXX return fd in open("/dev/systrace", O_RDONLY) */ + + return (ENXIO); +} + +void +systrace_wakeup(struct fsystrace *fst) +{ + wakeup((caddr_t)fst); + selwakeup(&fst->si); +} + +struct proc * +systrace_find(struct str_process *strp) +{ + struct proc *proc; + + DPRINTF(("%s: Trying to find systr process %p\n", __func__, strp)); + + if ((proc = pfind(strp->pid)) == NULL) + return (NULL); + + if (proc != strp->proc) + return (NULL); + + if (!ISSET(proc->p_flag, P_SYSTRACE)) + return (NULL); + + return (proc); +} + +void +systrace_sys_exit(struct proc *proc) +{ + struct str_process *strp; + struct fsystrace *fst; + + systrace_lock(); + strp = proc->p_systrace; + if (strp != NULL) { + fst = strp->parent; + SYSTRACE_LOCK(&fst->lock, curlwp); + systrace_unlock(); + + /* Insert Exit message */ + systrace_msg_child(fst, strp, -1); + + systrace_detach(strp); + SYSTRACE_UNLOCK(&fst->lock, curlwp); + } else + systrace_unlock(); + CLR(proc->p_flag, P_SYSTRACE); +} + +void +systrace_sys_fork(struct proc *oldproc, struct proc *p) +{ + struct str_process *oldstrp, *strp; + struct fsystrace *fst; + + systrace_lock(); + oldstrp = oldproc->p_systrace; + if (oldstrp == NULL) { + systrace_unlock(); + return; + } + + fst = oldstrp->parent; + SYSTRACE_LOCK(&fst->lock, curlwp); + systrace_unlock(); + + if (systrace_insert_process(fst, p, &strp)) { + /* We need to kill the child */ + psignal(p, SIGKILL); + goto out; + } + + /* Reference policy */ + if ((strp->policy = oldstrp->policy) != NULL) + strp->policy->refcount++; + + /* Insert fork message */ + systrace_msg_child(fst, oldstrp, p->p_pid); + out: + SYSTRACE_UNLOCK(&fst->lock, curlwp); +} + + +int +systrace_enter( +#ifdef __FreeBSD__5__ + struct thread *threadp, +#else + struct proc *p, +#endif + register_t code, void *v, register_t retval[]) +{ +#ifdef __FreeBSD__5__ + struct proc *p = threadp->td_proc; +#endif + const struct sysent *callp; + struct str_process *strp; + struct str_policy *strpolicy; + struct fsystrace *fst; + struct pcred *pc; + int policy, error = 0, maycontrol = 0, issuser = 0; + + systrace_lock(); + DPRINTF(("%s: proc %p, pid %d, syscall code: %d\n", __func__, p, p->p_pid, code)); + strp = p->p_systrace; + if (strp == NULL) { + systrace_unlock(); + return (EINVAL); + } + + + fst = strp->parent; + +#ifdef __FreeBSD__5__ + SYSTRACE_LOCK(&fst->lock, threadp); +#else + SYSTRACE_LOCK(&fst->lock, p); +#endif + + systrace_unlock(); + + /* + * We can not monitor a SUID process unless we are root, + * but we wait until it executes something unprivileged. + * A non-root user may only monitor if the real uid and + * real gid match the monitored process. Changing the + * uid or gid causes P_SUGID to be set. + */ + if (fst->issuser) { + maycontrol = 1; + issuser = 1; + } else if (!(p->p_flag & P_SUGID)) { + maycontrol = fst->p_ruid == p->p_cred->p_ruid && + fst->p_rgid == p->p_cred->p_rgid; + } + + if (!maycontrol) { + policy = SYSTR_POLICY_PERMIT; + } else { + /* Find out current policy */ + if ((strpolicy = strp->policy) == NULL) + policy = SYSTR_POLICY_ASK; + else { + if (code >= strpolicy->nsysent) + policy = SYSTR_POLICY_NEVER; + else + policy = strpolicy->sysent[code]; + } + } + + callp = (p->p_sysent->sv_table) + code; + + /* Fast-path */ + if (policy != SYSTR_POLICY_ASK) { + if (policy != SYSTR_POLICY_PERMIT) { + if (policy > 0) + error = policy; + else + error = EPERM; + } + strp->oldemul = NULL; +#ifdef __FreeBSD__5__ + SYSTRACE_UNLOCK(&fst->lock, threadp); +#else + SYSTRACE_UNLOCK(&fst->lock, p); +#endif + return (error); + } + + /* Puts the current process to sleep, return unlocked */ + error = systrace_msg_ask(fst, strp, code, + (callp->sy_narg & SYF_ARGMASK) * sizeof(register_t), + v); + + /* lock has been released in systrace_msg_ask() */ + fst = NULL; + /* We might have detached by now for some reason */ + if (!error && (strp = p->p_systrace) != NULL) { + /* XXX - do I need to lock here? */ + if (strp->answer == SYSTR_POLICY_NEVER) { + error = strp->error; + if (strp->replace != NULL) { + free(strp->replace, M_XDATA); + strp->replace = NULL; + } + } else { + if (ISSET(strp->flags, STR_PROC_SYSCALLRES)) { +#ifndef __FreeBSD__ + CLR(strp->flags, STR_PROC_SYSCALLRES); +#endif + } + /* Replace the arguments if necessary */ + if (strp->replace != NULL) { + error = systrace_replace(strp, + (callp->sy_narg & SYF_ARGMASK) * sizeof(register_t), + v); + } + } + } + + systrace_lock(); + if ((strp = p->p_systrace) == NULL) + goto out; + + if (error) { + strp->oldemul = NULL; + goto out; + } + + pc = p->p_cred; + strp->olduid = pc->p_ruid; + strp->oldgid = pc->p_rgid; + strp->oldemul = p->p_emuldata; /* XXX */ + + /* Elevate privileges as desired */ + if (issuser) { + if (ISSET(strp->flags, STR_PROC_SETEUID)) + strp->saveuid = systrace_seteuid(p, strp->seteuid); + if (ISSET(strp->flags, STR_PROC_SETEGID)) + strp->savegid = systrace_setegid(p, strp->setegid); + } else + CLR(strp->flags, STR_PROC_SETEUID|STR_PROC_SETEGID); + + out: + systrace_unlock(); + return (error); +} + +void +systrace_exit( +#ifdef __FreeBSD__5__ + struct thread *threadp, +#else + struct proc *p, +#endif + register_t code, void *v, register_t retval[], int error) +{ +#ifdef __FreeBSD__5__ + struct proc *p = threadp->td_proc; +#endif + const struct sysent *callp; + struct str_process *strp; + struct fsystrace *fst; + + /* Report change in emulation */ + systrace_lock(); + strp = p->p_systrace; + DPRINTF(("systrace_exit: 1. strp == %p, strp->oldemul == %p\n", \ + strp, strp->oldemul)); + if (strp == NULL) { + systrace_unlock(); + return; + } + DPRINTF(("exit syscall %lu, oldemul %p\n", (u_long)code, strp->oldemul)); + + /* Return to old privileges */ + if (ISSET(strp->flags, STR_PROC_SETEUID)) { + if (p->p_cred->pc_ucred->cr_uid == strp->seteuid) + systrace_seteuid(p, strp->saveuid); + CLR(strp->flags, STR_PROC_SETEUID); + } + if (ISSET(strp->flags, STR_PROC_SETEGID)) { + if (p->p_cred->pc_ucred->cr_gid == strp->setegid) + systrace_setegid(p, strp->savegid); + CLR(strp->flags, STR_PROC_SETEGID); + } + + if (p->p_flag & P_SUGID) { + if ((fst = strp->parent) == NULL || !fst->issuser) { + systrace_unlock(); + return; + } + } + + /* See if we should force a report */ + if (ISSET(strp->flags, STR_PROC_REPORT)) { + CLR(strp->flags, STR_PROC_REPORT); + strp->oldemul = NULL; + } + +#ifndef __FreeBSD__ + /* XXX emulation isssue */ + if (p->p_emul != strp->oldemul && strp != NULL) { + fst = strp->parent; + SYSTRACE_LOCK(&fst->lock, p); + systrace_unlock(); + + /* Old policy is without meaning now */ + if (strp->policy) { + systrace_closepolicy(fst, strp->policy); + strp->policy = NULL; + } + systrace_msg_emul(fst, strp); + } else +#endif + systrace_unlock(); + + /* Report if effective uid or gid changed */ + systrace_lock(); + strp = p->p_systrace; + if (strp != NULL && (strp->olduid != p->p_cred->p_ruid + || strp->oldgid != p->p_cred->p_rgid)) { + fst = strp->parent; +#ifdef __FreeBSD__5__ + SYSTRACE_LOCK(&fst->lock, threadp); +#else + SYSTRACE_LOCK(&fst->lock, p); +#endif + + systrace_unlock(); + + systrace_msg_ugid(fst, strp); + } else + systrace_unlock(); + + /* Report result from system call */ + systrace_lock(); + strp = p->p_systrace; + DPRINTF(("%s: strp %p, strp->flags %x\n", \ + __func__, strp, strp->flags)); + if (strp != NULL && ISSET(strp->flags, STR_PROC_SYSCALLRES)) { + CLR(strp->flags, STR_PROC_SYSCALLRES); + fst = strp->parent; +#ifdef __FreeBSD__5__ + SYSTRACE_LOCK(&fst->lock, threadp); +#else + SYSTRACE_LOCK(&fst->lock, p); +#endif + systrace_unlock(); + DPRINTF(("will ask syscall %lu, strp %p\n", (u_long)code, strp)); + + callp = (p->p_sysent->sv_table) + code; + + systrace_msg_result(fst, strp, error, code, + (callp->sy_narg & SYF_ARGMASK) * sizeof(register_t), + v, retval); + } else { + DPRINTF(("will not ask syscall %lu, strp %p\n", (u_long)code, strp)); + systrace_unlock(); + } +} + +uid_t +systrace_seteuid(struct proc *p, uid_t euid) +{ + struct pcred *pc = p->p_cred; + uid_t oeuid = p->p_ucred->cr_uid; + + if (pc->pc_ucred->cr_uid == euid) + return (oeuid); + + /* + * Copy credentials so other references do not see our changes. + */ + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_uid = euid; + setsugid(p); + + return (oeuid); +} + +gid_t +systrace_setegid(struct proc *p, gid_t egid) +{ + struct pcred *pc = p->p_cred; + gid_t oegid = p->p_ucred->cr_gid; + + if (pc->pc_ucred->cr_gid == egid) + return (oegid); + + /* + * Copy credentials so other references do not see our changes. + */ + pc->pc_ucred = crcopy(pc->pc_ucred); + pc->pc_ucred->cr_gid = egid; + setsugid(p); + + return (oegid); +} + +/* Called with fst locked */ + +int +systrace_answer(struct str_process *strp, struct systrace_answer *ans) +{ + int error = 0; + + DPRINTF(("%s: %u: policy %d\n", __func__, + ans->stra_pid, ans->stra_policy)); + + if (!POLICY_VALID(ans->stra_policy)) { + error = EINVAL; + goto out; + } + + /* Check if answer is in sync with us */ + if (ans->stra_seqnr != strp->seqnr) { + error = ESRCH; + goto out; + } + + DPRINTF(("%s: seq nr ok for strp %p\n", __func__, strp)); + + if ((error = systrace_processready(strp)) != 0) + goto out; + + DPRINTF(("%s: filling strp %p with answer\n", __func__, strp)); + + strp->answer = ans->stra_policy; + strp->error = ans->stra_error; + if (!strp->error) + strp->error = EPERM; + if (ISSET(ans->stra_flags, SYSTR_FLAGS_RESULT)) + SET(strp->flags, STR_PROC_SYSCALLRES); + + /* See if we should elevate privileges for this system call */ + if (ISSET(ans->stra_flags, SYSTR_FLAGS_SETEUID)) { + SET(strp->flags, STR_PROC_SETEUID); + strp->seteuid = ans->stra_seteuid; + } + if (ISSET(ans->stra_flags, SYSTR_FLAGS_SETEGID)) { + SET(strp->flags, STR_PROC_SETEGID); + strp->setegid = ans->stra_setegid; + } + + + /* Clearing the flag indicates to the process that it woke up */ + CLR(strp->flags, STR_PROC_WAITANSWER); + wakeup(strp); + out: + + return (error); +} + +int +systrace_policy(struct fsystrace *fst, struct systrace_policy *pol) +{ + struct str_policy *strpol; + struct str_process *strp; + + switch(pol->strp_op) { + case SYSTR_POLICY_NEW: + DPRINTF(("%s: new, ents %d\n", __func__, + pol->strp_maxents)); + if (pol->strp_maxents <= 0 || pol->strp_maxents > 1024) + return (EINVAL); + strpol = systrace_newpolicy(fst, pol->strp_maxents); + if (strpol == NULL) + return (ENOBUFS); + pol->strp_num = strpol->nr; + break; + case SYSTR_POLICY_ASSIGN: + DPRINTF(("%s: %d -> pid %d\n", __func__, + pol->strp_num, pol->strp_pid)); + + /* Find right policy by number */ + TAILQ_FOREACH(strpol, &fst->policies, next) + if (strpol->nr == pol->strp_num) + break; + if (strpol == NULL) + return (EINVAL); + + strp = systrace_findpid(fst, pol->strp_pid); + if (strp == NULL) + return (EINVAL); + +#ifndef __FreeBSD__ + /* XXX Check that emulation matches */ + if (strpol->emul && strpol->emul != strp->proc->p_emul) + return (EINVAL); +#endif + if (strp->policy) + systrace_closepolicy(fst, strp->policy); + strp->policy = strpol; + strpol->refcount++; +#ifndef __FreeBSD__ + /* XXX Record emulation for this policy */ + if (strpol->emul == NULL) + strpol->emul = strp->proc->p_emul; +#endif + break; + case SYSTR_POLICY_MODIFY: + DPRINTF(("%s: %d: code %d -> policy %d\n", __func__, + pol->strp_num, pol->strp_code, pol->strp_policy)); + if (!POLICY_VALID(pol->strp_policy)) + return (EINVAL); + TAILQ_FOREACH(strpol, &fst->policies, next) + if (strpol->nr == pol->strp_num) + break; + if (strpol == NULL) + return (EINVAL); + if (pol->strp_code < 0 || pol->strp_code >= strpol->nsysent) + return (EINVAL); + strpol->sysent[pol->strp_code] = pol->strp_policy; + break; + default: + return (EINVAL); + } + + return (0); +} + +int +systrace_processready(struct str_process *strp) +{ + if (ISSET(strp->flags, STR_PROC_ONQUEUE)) + return (EBUSY); + + if (!ISSET(strp->flags, STR_PROC_WAITANSWER)) + return (EBUSY); + + /* XXX - ignore until systrace knows about lwps. :-( + if (strp->proc->p_stat != LSSLEEP) + return (EBUSY); + */ + return (0); +} + +int +systrace_getcwd(struct fsystrace *fst, struct str_process *strp) +{ + struct filedesc *myfdp, *fdp; + int error; + + DPRINTF(("%s: %d\n", __func__, strp->pid)); + + error = systrace_processready(strp); + if (error) + return (error); + +#ifdef __FreeBSD__5__ + myfdp = curlwp->td_proc->p_fd; +#else + myfdp = curlwp->p_fd; +#endif + fdp = strp->proc->p_fd; + if (myfdp == NULL || fdp == NULL) + return (EINVAL); + + /* Store our current values */ + fst->fd_pid = strp->pid; + fst->fd_cdir = myfdp->fd_cdir; + fst->fd_rdir = myfdp->fd_rdir; + + if ((myfdp->fd_cdir = fdp->fd_cdir) != NULL) + VREF(myfdp->fd_cdir); + if ((myfdp->fd_rdir = fdp->fd_rdir) != NULL) + VREF(myfdp->fd_rdir); + + return (0); +} + +int +systrace_io(struct str_process *strp, struct systrace_io *io) +{ +#ifdef __FreeBSD___5_ + struct thread *p = curthread; +#else + struct proc *p = curproc; +#endif + struct proc *t = strp->proc; + struct uio uio; + struct iovec iov; + int error = 0; + + DPRINTF(("%s: %u: %p(%lu)\n", __func__, + io->strio_pid, io->strio_offs, (u_long)io->strio_len)); + + switch (io->strio_op) { + case SYSTR_READ: + uio.uio_rw = UIO_READ; + break; + case SYSTR_WRITE: + uio.uio_rw = UIO_WRITE; + break; + default: + return (EINVAL); + } + + error = systrace_processready(strp); + if (error) + goto out; + + iov.iov_base = io->strio_addr; + iov.iov_len = io->strio_len; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = (off_t)(long)io->strio_offs; + uio.uio_resid = io->strio_len; + uio.uio_segflg = UIO_USERSPACE; + uio.uio_procp = p; + /* XXX routines from procfs, ensure that PROCFS is compiled in */ +#ifdef __FreeBSD__5__ + error = procfs_doprocmem(p, t, NULL, NULL, &uio); +#else + error = procfs_domem(p, t, NULL, &uio); +#endif + io->strio_len -= uio.uio_resid; + out: + + return (error); +} + +int +systrace_attach(struct fsystrace *fst, pid_t pid) +{ + int error = 0; + struct proc *proc, *p = curproc; + + if ((proc = pfind(pid)) == NULL) { + error = ESRCH; + goto out; + } + + if (ISSET(proc->p_flag, P_INEXEC)) { + error = EAGAIN; + goto out; + } + + /* + * You can't attach to a process if: + * (1) it's the process that's doing the attaching, + */ + if (proc->p_pid == p->p_pid) { + error = EINVAL; + goto out; + } + + /* + * (2) it's a system process + */ + if (ISSET(proc->p_flag, P_SYSTEM)) { + error = EPERM; + goto out; + } + + /* + * (3) it's being traced already + */ + if (ISSET(proc->p_flag, P_SYSTRACE)) { + error = EBUSY; + goto out; + } + + /* + * (4) it's not owned by you, or the last exec + * gave us setuid/setgid privs (unless + * you're root), or... + * + * [Note: once P_SUGID gets set in execve(), it stays + * set until the process does another execve(). Hence + * this prevents a setuid process which revokes it's + * special privilidges using setuid() from being + * traced. This is good security.] + */ +#ifdef __FreeBSD__5__ + if ((proc->p_ucred->cr_ruid != p->p_ucred->cr_ruid || + ISSET(proc->p_flag, P_SUGID)) && + (error = suser(curthread)) != 0) + goto out; +#else + if ((proc->p_cred->p_ruid != p->p_cred->p_ruid || + ISSET(proc->p_flag, P_SUGID)) && + (error = suser(p)) != 0) + goto out; +#endif + /* + * (5) ...it's init, which controls the security level + * of the entire system, and the system was not + * compiled with permanently insecure mode turned + * on. + */ + if ((proc->p_pid == 1) && (securelevel > -1)) { + error = EPERM; + goto out; + } + + error = systrace_insert_process(fst, proc, NULL); + + out: + return (error); +} + +/* Prepare to replace arguments */ + +int +systrace_preprepl(struct str_process *strp, struct systrace_replace *repl) +{ + size_t len; + int i, ret = 0; + + ret = systrace_processready(strp); + if (ret) + return (ret); + + if (strp->replace != NULL) { + free(strp->replace, M_XDATA); + strp->replace = NULL; + } + + if (repl->strr_nrepl < 0 || repl->strr_nrepl > SYSTR_MAXARGS) + return (EINVAL); + + for (i = 0, len = 0; i < repl->strr_nrepl; i++) { + len += repl->strr_offlen[i]; + if (repl->strr_offlen[i] == 0) + continue; + if (repl->strr_offlen[i] + repl->strr_off[i] > len) + return (EINVAL); + } + + /* Make sure that the length adds up */ + if (repl->strr_len != len) + return (EINVAL); + + /* Check against a maximum length */ + if (repl->strr_len > 2048) + return (EINVAL); + + strp->replace = (struct systrace_replace *) + malloc(sizeof(struct systrace_replace) + len, M_XDATA, M_WAITOK); + + memcpy(strp->replace, repl, sizeof(struct systrace_replace)); + ret = copyin(repl->strr_base, strp->replace + 1, len); + if (ret) { + free(strp->replace, M_XDATA); + strp->replace = NULL; + return (ret); + } + + /* Adjust the offset */ + repl = strp->replace; + repl->strr_base = (caddr_t)(repl + 1); + + return (0); +} + +/* + * Memory allocation from the 'stack gap' region below the stack. + */ +static __inline caddr_t stackgap_init(void); +static __inline void *stackgap_alloc(caddr_t *, size_t); + +#define szsigcode (*(curproc->p_sysent->sv_szsigcode)) + +static __inline caddr_t +stackgap_init() +{ + return (caddr_t)(PS_STRINGS - szsigcode - SPARE_USRSPACE); +} + +static __inline void * +stackgap_alloc(sgp, sz) + caddr_t *sgp; + size_t sz; +{ + void *p = (void *) *sgp; + + sz = ALIGN(sz); + if (*sgp + sz > (caddr_t)(PS_STRINGS - szsigcode)) + return NULL; + *sgp += sz; + return p; +} + +/* + * Replace the arguments with arguments from the monitoring process. + */ +int +systrace_replace(struct str_process *strp, size_t argsize, register_t args[]) +{ + struct proc *p = strp->proc; /* XXX what's this for ? */ + struct systrace_replace *repl = strp->replace; + caddr_t sg, kdata, udata, kbase, ubase; + int i, maxarg, ind, ret = 0; + + maxarg = argsize/sizeof(register_t); + sg = stackgap_init(); + if ((ubase = stackgap_alloc(&sg, repl->strr_len)) == NULL) + return(NULL); + + kbase = repl->strr_base; + for (i = 0; i < maxarg && i < repl->strr_nrepl; i++) { + ind = repl->strr_argind[i]; + if (ind < 0 || ind >= maxarg) { + ret = EINVAL; + goto out; + } + if (repl->strr_offlen[i] == 0) { + args[ind] = repl->strr_off[i]; + continue; + } + kdata = kbase + repl->strr_off[i]; + udata = ubase + repl->strr_off[i]; + if (copyout(kdata, udata, repl->strr_offlen[i])) { + ret = EINVAL; + goto out; + } + + /* Replace the argument with the new address */ + args[ind] = (register_t)(intptr_t)udata; + } + + out: + free(repl, M_XDATA); + strp->replace = NULL; + return (ret); +} + +struct str_process * +systrace_findpid(struct fsystrace *fst, pid_t pid) +{ + struct str_process *strp; + struct proc *proc = NULL; + + TAILQ_FOREACH(strp, &fst->processes, next) + if (strp->pid == pid) + break; + + if (strp == NULL) + return (NULL); + + proc = systrace_find(strp); + + return (proc ? strp : NULL); +} + +int +systrace_detach(struct str_process *strp) +{ + struct proc *proc; + struct fsystrace *fst = NULL; + int error = 0; + + DPRINTF(("%s: Trying to detach from %d\n", __func__, strp->pid)); + + if ((proc = systrace_find(strp)) != NULL) { + CLR(proc->p_flag, P_SYSTRACE); + proc->p_systrace = NULL; + } else + error = ESRCH; + + if (ISSET(strp->flags, STR_PROC_WAITANSWER)) { + CLR(strp->flags, STR_PROC_WAITANSWER); + wakeup(strp); + } + + fst = strp->parent; + systrace_wakeup(fst); + + TAILQ_REMOVE(&fst->processes, strp, next); + fst->nprocesses--; + + if (strp->policy) + systrace_closepolicy(fst, strp->policy); + if (strp->replace) + free(strp->replace, M_XDATA); + SYSTRACE_POOL_PUT(systr_proc_pl, strp); + + return (error); +} + +void +systrace_closepolicy(struct fsystrace *fst, struct str_policy *policy) +{ + if (--policy->refcount) + return; + + fst->npolicies--; + + if (policy->nsysent) + free(policy->sysent, M_XDATA); + + TAILQ_REMOVE(&fst->policies, policy, next); + + SYSTRACE_POOL_PUT(systr_policy_pl, policy); +} + + +int +systrace_insert_process(struct fsystrace *fst, struct proc *proc, + struct str_process **pstrp) +{ + struct str_process *strp; + + SYSTRACE_POOL_GET(systr_proc_pl, strp, PR_NOWAIT); + if (strp == NULL) + return (ENOBUFS); + + memset((caddr_t)strp, 0, sizeof(struct str_process)); + strp->pid = proc->p_pid; + strp->proc = proc; + strp->parent = fst; + + TAILQ_INSERT_TAIL(&fst->processes, strp, next); + fst->nprocesses++; + + proc->p_systrace = strp; + SET(proc->p_flag, P_SYSTRACE); + + /* Pass the new pointer back to the caller */ + if (pstrp != NULL) + *pstrp = strp; + + return (0); +} + +struct str_policy * +systrace_newpolicy(struct fsystrace *fst, int maxents) +{ + struct str_policy *pol; + int i; + + if (fst->npolicies > SYSTR_MAX_POLICIES && !fst->issuser) { + struct str_policy *tmp; + + /* Try to find a policy for freeing */ + TAILQ_FOREACH(tmp, &fst->policies, next) { + if (tmp->refcount == 1) + break; + } + + if (tmp == NULL) + return (NULL); + + /* Notify userland about freed policy */ + systrace_msg_policyfree(fst, tmp); + /* Free this policy */ + systrace_closepolicy(fst, tmp); + } + + SYSTRACE_POOL_GET(systr_policy_pl, pol, PR_NOWAIT); + if (pol == NULL) + return (NULL); + + DPRINTF(("%s: allocating %d -> %lu\n", __func__, + maxents, (u_long)maxents * sizeof(int))); + + memset((caddr_t)pol, 0, sizeof(struct str_policy)); + + pol->sysent = (u_char *)malloc(maxents * sizeof(u_char), + M_XDATA, M_WAITOK); + pol->nsysent = maxents; + for (i = 0; i < maxents; i++) + pol->sysent[i] = SYSTR_POLICY_ASK; + + fst->npolicies++; + pol->nr = fst->npolicynr++; + pol->refcount = 1; + + TAILQ_INSERT_TAIL(&fst->policies, pol, next); + + return (pol); +} + +int +systrace_msg_ask(struct fsystrace *fst, struct str_process *strp, + int code, size_t argsize, register_t args[]) +{ + struct str_message msg; + struct str_msg_ask *msg_ask = &msg.msg_data.msg_ask; + int i; + + msg_ask->code = code; + msg_ask->argsize = argsize; + DPRINTF(("%s: copying args (#%d)\n", __func__, \ + argsize/sizeof(register_t))); + for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++) + msg_ask->args[i] = args[i]; + + return (systrace_make_msg(strp, SYSTR_MSG_ASK, &msg)); +} + +int +systrace_msg_result(struct fsystrace *fst, struct str_process *strp, + int error, int code, size_t argsize, register_t args[], register_t rval[]) +{ + struct str_message msg; + struct str_msg_ask *msg_ask = &msg.msg_data.msg_ask; + int i; + + msg_ask->code = code; + msg_ask->argsize = argsize; + msg_ask->result = error; + for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++) + msg_ask->args[i] = args[i]; + + msg_ask->rval[0] = rval[0]; + msg_ask->rval[1] = rval[1]; + + return (systrace_make_msg(strp, SYSTR_MSG_RES, &msg)); +} + +int +systrace_msg_emul(struct fsystrace *fst, struct str_process *strp) +{ + struct str_message msg; + + struct str_msg_emul *msg_emul = &msg.msg_data.msg_emul; + struct proc *p = strp->proc; +#if 0 + memcpy(msg_emul->emul, p->p_emul->e_name, SYSTR_EMULEN); /* XXX */ +#endif + return (systrace_make_msg(strp, SYSTR_MSG_EMUL, &msg)); +} + +int +systrace_msg_ugid(struct fsystrace *fst, struct str_process *strp) +{ + struct str_message msg; + struct str_msg_ugid *msg_ugid = &msg.msg_data.msg_ugid; + struct proc *p = strp->proc; + + msg_ugid->uid = p->p_cred->p_ruid; + msg_ugid->gid = p->p_cred->p_rgid; + + return (systrace_make_msg(strp, SYSTR_MSG_UGID, &msg)); +} + +int +systrace_make_msg(struct str_process *strp, int type, struct str_message *tmsg) +{ + struct str_msgcontainer *cont; + struct str_message *msg; + struct fsystrace *fst = strp->parent; + int st; + + DPRINTF(("%s: constructing msg\n", __func__)); + + SYSTRACE_POOL_GET(systr_msgcontainer_pl, cont, PR_WAITOK); + memset(cont, 0, sizeof(struct str_msgcontainer)); + cont->strp = strp; + + msg = &cont->msg; + + /* Copy the already filled in fields */ + memcpy(&msg->msg_data, &tmsg->msg_data, sizeof(msg->msg_data)); + + /* Add the extra fields to the message */ + msg->msg_seqnr = ++strp->seqnr; + msg->msg_type = type; + msg->msg_pid = strp->pid; + if (strp->policy) + msg->msg_policy = strp->policy->nr; + else + msg->msg_policy = -1; + + SET(strp->flags, STR_PROC_WAITANSWER); + if (ISSET(strp->flags, STR_PROC_ONQUEUE)) + goto out; + + TAILQ_INSERT_TAIL(&fst->messages, cont, next); + SET(strp->flags, STR_PROC_ONQUEUE); + + out: + systrace_wakeup(fst); + + /* Release the lock - XXX */ +#ifdef __FreeBSD__5__ + SYSTRACE_UNLOCK(&fst->lock, FIRST_THREAD_IN_PROC(strp->proc)); /* XXX */ +#else + SYSTRACE_UNLOCK(&fst->lock, strp->proc); +#endif + + while (1) { + st = tsleep(strp, PWAIT | PCATCH, "systrmsg", 0); + if (st != 0) + return (ERESTART); + /* If we detach, then everything is permitted */ + if ((strp = curproc->p_systrace) == NULL) + return (0); + if (!ISSET(strp->flags, STR_PROC_WAITANSWER)) + break; + } + + return (0); +} + +int +systrace_msg_child(struct fsystrace *fst, struct str_process *strp, pid_t npid) +{ + struct str_msgcontainer *cont; + struct str_message *msg; + struct str_msg_child *msg_child; + + SYSTRACE_POOL_GET(systr_msgcontainer_pl, cont, PR_WAITOK); + KASSERT(cont != NULL, + ("systrace_msg_child: cont not zallocated")); + memset(cont, 0, sizeof(struct str_msgcontainer)); + cont->strp = strp; + + msg = &cont->msg; + + DPRINTF(("%s: %p: pid %d -> pid %d\n", __func__, + msg, strp->pid, npid)); + + msg_child = &msg->msg_data.msg_child; + + msg->msg_type = SYSTR_MSG_CHILD; + msg->msg_pid = strp->pid; + if (strp->policy) + msg->msg_policy = strp->policy->nr; + else + msg->msg_policy = -1; + msg_child->new_pid = npid; + + TAILQ_INSERT_TAIL(&fst->messages, cont, next); + + systrace_wakeup(fst); + + return (0); +} + +int +systrace_msg_policyfree(struct fsystrace *fst, struct str_policy *strpol) +{ + struct str_msgcontainer *cont; + struct str_message *msg; + + SYSTRACE_POOL_GET(systr_msgcontainer_pl, cont, PR_WAITOK); + memset(cont, 0, sizeof(struct str_msgcontainer)); + + msg = &cont->msg; + + DPRINTF(("%s: free %d\n", __func__, strpol->nr)); + + msg->msg_type = SYSTR_MSG_POLICYFREE; + msg->msg_policy = strpol->nr; + + TAILQ_INSERT_TAIL(&fst->messages, cont, next); + + systrace_wakeup(fst); + + return (0); +} + +/* bpf.c code analogy */ +static void systrace_drvinit __P((void *unused)); + +static void +systrace_drvinit(unused) + void *unused; +{ + DPRINTF(("running systrace_init()\n")); + systrace_init(); +} + +SYSINIT(systracedev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,systrace_drvinit,NULL) Index: sys/sys/systrace.h =================================================================== RCS file: sys/sys/systrace.h diff -N sys/sys/systrace.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/systrace.h 25 Jan 2004 16:13:20 -0000 @@ -0,0 +1,229 @@ +/* $NetBSD: systrace.h,v 1.10 2003/06/03 05:24:00 provos Exp $ */ + +/* + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYSTRACE_H_ +#define _SYSTRACE_H_ + +#include +#include +#include +#if 0 +XXX 5.x specific ? +#include +#endif + +#define SYSTR_EMULEN 8 /* sync with sys proc */ + +struct str_msg_emul { + char emul[SYSTR_EMULEN]; +}; + +struct str_msg_ugid { + uid_t uid; + gid_t gid; +}; + +#define SYSTR_MAX_POLICIES 64 +#define SYSTR_MAXARGS 64 + +struct str_msg_ask { + int32_t code; + int32_t argsize; + register_t args[SYSTR_MAXARGS]; + register_t rval[2]; + int32_t result; +}; + +/* Queued on fork or exit of a process */ + +struct str_msg_child { + pid_t new_pid; +}; + +#define SYSTR_MSG_ASK 1 +#define SYSTR_MSG_RES 2 +#define SYSTR_MSG_EMUL 3 +#define SYSTR_MSG_CHILD 4 +#define SYSTR_MSG_UGID 5 +#define SYSTR_MSG_POLICYFREE 6 + +#define SYSTR_MSG_NOPROCESS(x) \ + ((x)->msg.msg_type == SYSTR_MSG_CHILD || \ + (x)->msg.msg_type == SYSTR_MSG_POLICYFREE) + +struct str_message { + int32_t msg_type; + pid_t msg_pid; + u_int16_t msg_seqnr; /* answer has to match seqnr */ + int16_t msg_policy; + union { + struct str_msg_emul msg_emul; + struct str_msg_ugid msg_ugid; + struct str_msg_ask msg_ask; + struct str_msg_child msg_child; + } msg_data; +}; + +struct str_process; +struct str_msgcontainer { + TAILQ_ENTRY(str_msgcontainer) next; + struct str_process *strp; + + struct str_message msg; +}; + + +struct systrace_answer { + pid_t stra_pid; + u_int16_t stra_seqnr; + int16_t reserved; + uid_t stra_seteuid; /* elevated privileges for system call */ + uid_t stra_setegid; + int32_t stra_policy; + int32_t stra_error; + int32_t stra_flags; +}; + +#define SYSTR_READ 1 +#define SYSTR_WRITE 2 + +struct systrace_io { + pid_t strio_pid; + int32_t strio_op; + void *strio_offs; + void *strio_addr; + size_t strio_len; +}; + +#define SYSTR_POLICY_NEW 1 +#define SYSTR_POLICY_ASSIGN 2 +#define SYSTR_POLICY_MODIFY 3 + +struct systrace_policy { + int32_t strp_op; + int32_t strp_num; + union { + struct { + int16_t code; + int16_t policy; + } assign; + pid_t pid; + int32_t maxents; + } strp_data; +}; + +#define strp_pid strp_data.pid +#define strp_maxents strp_data.maxents +#define strp_code strp_data.assign.code +#define strp_policy strp_data.assign.policy + +struct systrace_replace { + pid_t strr_pid; + int32_t strr_nrepl; + caddr_t strr_base; /* Base memory */ + size_t strr_len; /* Length of memory */ + int32_t strr_argind[SYSTR_MAXARGS]; + size_t strr_off[SYSTR_MAXARGS]; + size_t strr_offlen[SYSTR_MAXARGS]; +}; + +#define STRIOCATTACH _IOW('s', 101, pid_t) +#define STRIOCDETACH _IOW('s', 102, pid_t) +#define STRIOCANSWER _IOW('s', 103, struct systrace_answer) +#define STRIOCIO _IOWR('s', 104, struct systrace_io) +#define STRIOCPOLICY _IOWR('s', 105, struct systrace_policy) +#define STRIOCGETCWD _IOW('s', 106, pid_t) +#define STRIOCRESCWD _IO('s', 107) +#define STRIOCREPORT _IOW('s', 108, pid_t) +#define STRIOCREPLACE _IOW('s', 109, struct systrace_replace) + +#define SYSTR_POLICY_ASK 0 +#define SYSTR_POLICY_PERMIT 1 +#define SYSTR_POLICY_NEVER 2 + +#define SYSTR_FLAGS_RESULT 0x001 +#define SYSTR_FLAGS_SETEUID 0x002 +#define SYSTR_FLAGS_SETEGID 0x004 + +#ifdef _KERNEL +/* XXX: these shouldn't be here. */ +#define SET(t, f) ((t) |= (f)) +#define ISSET(t, f) ((t) & (f)) +#define CLR(t, f) ((t) &= ~(f)) + +struct fsystrace { + struct lock lock; + struct selinfo si; + + TAILQ_HEAD(strprocessq, str_process) processes; + size_t nprocesses; + + TAILQ_HEAD(strpolicyq, str_policy) policies; + + TAILQ_HEAD(strmessageq, str_msgcontainer) messages; + + size_t npolicynr; + size_t npolicies; + + int issuser; + uid_t p_ruid; + gid_t p_rgid; + + /* cwd magic */ + pid_t fd_pid; + struct vnode *fd_cdir; + struct vnode *fd_rdir; +}; + +/* Internal prototypes */ + +int systrace_enter( +#ifdef __FreeBSD__5__ + struct thread *threadp, +#else + struct proc *p, +#endif + register_t, void *, register_t []); + +void systrace_exit( +#ifdef __FreeBSD__5__ + struct thread *threadp, +#else + struct proc *p, +#endif + register_t, void *, register_t [], int); +void systrace_sys_exit(struct proc *); +void systrace_sys_fork(struct proc *, struct proc *); +void systrace_init(void); + +#endif /* _KERNEL */ +#endif /* !_SYSTRACE_H_ */ Index: sys/sys/tree.h =================================================================== RCS file: sys/sys/tree.h diff -N sys/sys/tree.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/tree.h 25 Jan 2004 16:13:20 -0000 @@ -0,0 +1,675 @@ +/* $OpenBSD: tree.h,v 1.6 2002/06/11 22:09:52 provos Exp $ */ +/* + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_TREE_H_ +#define _SYS_TREE_H_ + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ +struct name { \ + struct type *sph_root; /* root of the tree */ \ +} + +#define SPLAY_INITIALIZER(root) \ + { NULL } + +#define SPLAY_INIT(root) do { \ + (root)->sph_root = NULL; \ +} while (0) + +#define SPLAY_ENTRY(type) \ +struct { \ + struct type *spe_left; /* left element */ \ + struct type *spe_right; /* right element */ \ +} + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (0) + +#define SPLAY_LINKLEFT(head, tmp, field) do { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ +} while (0) + +#define SPLAY_LINKRIGHT(head, tmp, field) do { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ +} while (0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ +} while (0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ +void name##_SPLAY(struct name *, struct type *); \ +void name##_SPLAY_MINMAX(struct name *, int); \ +struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ +struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ + \ +/* Finds the node with the same key as elm */ \ +static __inline struct type * \ +name##_SPLAY_FIND(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) \ + return(NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_NEXT(struct name *head, struct type *elm) \ +{ \ + name##_SPLAY(head, elm); \ + if (SPLAY_RIGHT(elm, field) != NULL) { \ + elm = SPLAY_RIGHT(elm, field); \ + while (SPLAY_LEFT(elm, field) != NULL) { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } else \ + elm = NULL; \ + return (elm); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_MIN_MAX(struct name *head, int val) \ +{ \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ +} + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ +struct type * \ +name##_SPLAY_INSERT(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } else { \ + int __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if(__comp < 0) { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } else if (__comp > 0) { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ +} \ + \ +struct type * \ +name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *__tmp; \ + if (SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) { \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\ + } else { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ +} \ + \ +void \ +name##_SPLAY(struct name *head, struct type *elm) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ + int __comp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while ((__comp = (cmp)(elm, (head)->sph_root))) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) > 0){ \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} \ + \ +/* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ +void name##_SPLAY_MINMAX(struct name *head, int __comp) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while (1) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp > 0) { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) \ + for ((x) = SPLAY_MIN(name, head); \ + (x) != NULL; \ + (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-back tree */ +#define RB_HEAD(name, type) \ +struct name { \ + struct type *rbh_root; /* root of the tree */ \ +} + +#define RB_INITIALIZER(root) \ + { NULL } + +#define RB_INIT(root) do { \ + (root)->rbh_root = NULL; \ +} while (0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \ +struct { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ +} + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) do { \ + RB_PARENT(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ + RB_COLOR(elm, field) = RB_RED; \ +} while (0) + +#define RB_SET_BLACKRED(black, red, field) do { \ + RB_COLOR(black, field) = RB_BLACK; \ + RB_COLOR(red, field) = RB_RED; \ +} while (0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \ + (tmp) = RB_RIGHT(elm, field); \ + if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + RB_AUGMENT(RB_PARENT(elm, field)); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_LEFT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ +} while (0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \ + (tmp) = RB_LEFT(elm, field); \ + if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + RB_AUGMENT(RB_PARENT(elm, field)); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_RIGHT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ +} while (0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) \ +void name##_RB_INSERT_COLOR(struct name *, struct type *); \ +void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\ +struct type *name##_RB_REMOVE(struct name *, struct type *); \ +struct type *name##_RB_INSERT(struct name *, struct type *); \ +struct type *name##_RB_FIND(struct name *, struct type *); \ +struct type *name##_RB_NEXT(struct name *, struct type *); \ +struct type *name##_RB_MINMAX(struct name *, int); \ + \ + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) \ +void \ +name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ +{ \ + struct type *parent, *gparent, *tmp; \ + while ((parent = RB_PARENT(elm, field)) && \ + RB_COLOR(parent, field) == RB_RED) { \ + gparent = RB_PARENT(parent, field); \ + if (parent == RB_LEFT(gparent, field)) { \ + tmp = RB_RIGHT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_RIGHT(parent, field) == elm) { \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \ + } else { \ + tmp = RB_LEFT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_LEFT(parent, field) == elm) { \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_LEFT(head, gparent, tmp, field); \ + } \ + } \ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \ +} \ + \ +void \ +name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \ +{ \ + struct type *tmp; \ + while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \ + elm != RB_ROOT(head)) { \ + if (RB_LEFT(parent, field) == elm) { \ + tmp = RB_RIGHT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\ + struct type *oleft; \ + if ((oleft = RB_LEFT(tmp, field)))\ + RB_COLOR(oleft, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_RIGHT(head, tmp, oleft, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_RIGHT(tmp, field)) \ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } else { \ + tmp = RB_LEFT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\ + struct type *oright; \ + if ((oright = RB_RIGHT(tmp, field)))\ + RB_COLOR(oright, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_LEFT(head, tmp, oright, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_LEFT(tmp, field)) \ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + } \ + if (elm) \ + RB_COLOR(elm, field) = RB_BLACK; \ +} \ + \ +struct type * \ +name##_RB_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *child, *parent, *old = elm; \ + int color; \ + if (RB_LEFT(elm, field) == NULL) \ + child = RB_RIGHT(elm, field); \ + else if (RB_RIGHT(elm, field) == NULL) \ + child = RB_LEFT(elm, field); \ + else { \ + struct type *left; \ + elm = RB_RIGHT(elm, field); \ + while ((left = RB_LEFT(elm, field))) \ + elm = left; \ + child = RB_RIGHT(elm, field); \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ + if (RB_PARENT(elm, field) == old) \ + parent = elm; \ + (elm)->field = (old)->field; \ + if (RB_PARENT(old, field)) { \ + if (RB_LEFT(RB_PARENT(old, field), field) == old)\ + RB_LEFT(RB_PARENT(old, field), field) = elm;\ + else \ + RB_RIGHT(RB_PARENT(old, field), field) = elm;\ + RB_AUGMENT(RB_PARENT(old, field)); \ + } else \ + RB_ROOT(head) = elm; \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + if (RB_RIGHT(old, field)) \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + if (parent) { \ + left = parent; \ + do { \ + RB_AUGMENT(left); \ + } while ((left = RB_PARENT(left, field))); \ + } \ + goto color; \ + } \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ +color: \ + if (color == RB_BLACK) \ + name##_RB_REMOVE_COLOR(head, parent, child); \ + return (old); \ +} \ + \ +/* Inserts a node into the RB tree */ \ +struct type * \ +name##_RB_INSERT(struct name *head, struct type *elm) \ +{ \ + struct type *tmp; \ + struct type *parent = NULL; \ + int comp = 0; \ + tmp = RB_ROOT(head); \ + while (tmp) { \ + parent = tmp; \ + comp = (cmp)(elm, parent); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + RB_SET(elm, parent, field); \ + if (parent != NULL) { \ + if (comp < 0) \ + RB_LEFT(parent, field) = elm; \ + else \ + RB_RIGHT(parent, field) = elm; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = elm; \ + name##_RB_INSERT_COLOR(head, elm); \ + return (NULL); \ +} \ + \ +/* Finds the node with the same key as elm */ \ +struct type * \ +name##_RB_FIND(struct name *head, struct type *elm) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + int comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ +} \ + \ +struct type * \ +name##_RB_NEXT(struct name *head, struct type *elm) \ +{ \ + if (RB_RIGHT(elm, field)) { \ + elm = RB_RIGHT(elm, field); \ + while (RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } else { \ + if (RB_PARENT(elm, field) && \ + (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else { \ + while (RB_PARENT(elm, field) && \ + (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ +} \ + \ +struct type * \ +name##_RB_MINMAX(struct name *head, int val) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + struct type *parent = NULL; \ + while (tmp) { \ + parent = tmp; \ + if (val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ +} + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(x, y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) \ + for ((x) = RB_MIN(name, head); \ + (x) != NULL; \ + (x) = name##_RB_NEXT(head, x)) + +#endif /* _SYS_TREE_H_ */