Created
October 7, 2013 02:06
-
-
Save ql-owo-lp/6861557 to your computer and use it in GitHub Desktop.
Recently Aged Ticks algorithm for CIS 657
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*- | |
* Copyright (c) 1982, 1986, 1991, 1993 | |
* The Regents of the University of California. All rights reserved. | |
* (c) UNIX System Laboratories, Inc. | |
* All or some portions of this file are derived from material licensed | |
* to the University of California by American Telephone and Telegraph | |
* Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
* the permission of UNIX System Laboratories, Inc. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* 3. All advertising materials mentioning features or use of this software | |
* must display the following acknowledgement: | |
* This product includes software developed by the University of | |
* California, Berkeley and its contributors. | |
* 4. Neither the name of the University nor the names of its contributors | |
* may be used to endorse or promote products derived from this software | |
* without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
* SUCH DAMAGE. | |
* | |
* @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 | |
*/ | |
#include <sys/cdefs.h> | |
__FBSDID("$FreeBSD: src/sys/kern/kern_clock.c,v 1.163 2003/10/16 08:39:15 jeff Exp $"); | |
#include "opt_ntp.h" | |
#include "opt_ddb.h" | |
#include "opt_watchdog.h" | |
#include <sys/param.h> | |
#include <sys/systm.h> | |
#include <sys/callout.h> | |
#include <sys/kernel.h> | |
#include <sys/lock.h> | |
#include <sys/ktr.h> | |
#include <sys/mutex.h> | |
#include <sys/proc.h> | |
#include <sys/resource.h> | |
#include <sys/resourcevar.h> | |
#include <sys/sched.h> | |
#include <sys/signalvar.h> | |
#include <sys/smp.h> | |
#include <vm/vm.h> | |
#include <vm/pmap.h> | |
#include <vm/vm_map.h> | |
#include <sys/sysctl.h> | |
#include <sys/bus.h> | |
#include <sys/interrupt.h> | |
#include <sys/limits.h> | |
#include <sys/timetc.h> | |
#include <machine/cpu.h> | |
#ifdef GPROF | |
#include <sys/gmon.h> | |
#endif | |
#ifdef DDB | |
#include <ddb/ddb.h> | |
#endif | |
#ifdef DEVICE_POLLING | |
extern void hardclock_device_poll(void); | |
#endif /* DEVICE_POLLING */ | |
static void initclocks(void *dummy); | |
SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) | |
/* Some of these don't belong here, but it's easiest to concentrate them. */ | |
long cp_time[CPUSTATES]; | |
SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time), | |
"LU", "CPU time statistics"); | |
#ifdef WATCHDOG | |
static int sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS); | |
static void watchdog_fire(void); | |
static int watchdog_enabled; | |
static unsigned int watchdog_ticks; | |
static int watchdog_timeout = 20; | |
SYSCTL_NODE(_debug, OID_AUTO, watchdog, CTLFLAG_RW, 0, "System watchdog"); | |
SYSCTL_INT(_debug_watchdog, OID_AUTO, enabled, CTLFLAG_RW, &watchdog_enabled, | |
0, "Enable the watchdog"); | |
SYSCTL_INT(_debug_watchdog, OID_AUTO, timeout, CTLFLAG_RW, &watchdog_timeout, | |
0, "Timeout for watchdog checkins"); | |
#endif /* WATCHDOG */ | |
/* | |
* Clock handling routines. | |
* | |
* This code is written to operate with two timers that run independently of | |
* each other. | |
* | |
* The main timer, running hz times per second, is used to trigger interval | |
* timers, timeouts and rescheduling as needed. | |
* | |
* The second timer handles kernel and user profiling, | |
* and does resource use estimation. If the second timer is programmable, | |
* it is randomized to avoid aliasing between the two clocks. For example, | |
* the randomization prevents an adversary from always giving up the cpu | |
* just before its quantum expires. Otherwise, it would never accumulate | |
* cpu ticks. The mean frequency of the second timer is stathz. | |
* | |
* If no second timer exists, stathz will be zero; in this case we drive | |
* profiling and statistics off the main clock. This WILL NOT be accurate; | |
* do not do it unless absolutely necessary. | |
* | |
* The statistics clock may (or may not) be run at a higher rate while | |
* profiling. This profile clock runs at profhz. We require that profhz | |
* be an integral multiple of stathz. | |
* | |
* If the statistics clock is running fast, it must be divided by the ratio | |
* profhz/stathz for statistics. (For profiling, every tick counts.) | |
* | |
* Time-of-day is maintained using a "timecounter", which may or may | |
* not be related to the hardware generating the above mentioned | |
* interrupts. | |
*/ | |
int stathz; | |
int profhz; | |
int profprocs; | |
int ticks; | |
int psratio; | |
/* | |
* Initialize clock frequencies and start both clocks running. | |
*/ | |
/* ARGSUSED*/ | |
static void | |
initclocks(dummy) | |
void *dummy; | |
{ | |
register int i; | |
/* | |
* Set divisors to 1 (normal case) and let the machine-specific | |
* code do its bit. | |
*/ | |
cpu_initclocks(); | |
/* | |
* Compute profhz/stathz, and fix profhz if needed. | |
*/ | |
i = stathz ? stathz : hz; | |
if (profhz == 0) | |
profhz = i; | |
psratio = profhz / i; | |
} | |
/* | |
* Each time the real-time timer fires, this function is called on all CPUs. | |
* Note that hardclock() calls hardclock_process() for the boot CPU, so only | |
* the other CPUs in the system need to call this function. | |
*/ | |
void | |
hardclock_process(frame) | |
register struct clockframe *frame; | |
{ | |
struct pstats *pstats; | |
struct thread *td = curthread; | |
struct proc *p = td->td_proc; | |
td->td_runtime++; | |
/* | |
* Run current process's virtual and profile time, as needed. | |
*/ | |
mtx_lock_spin_flags(&sched_lock, MTX_QUIET); | |
if (p->p_flag & P_SA) { | |
/* XXXKSE What to do? */ | |
} else { | |
pstats = p->p_stats; | |
if (CLKF_USERMODE(frame) && | |
timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && | |
itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { | |
p->p_sflag |= PS_ALRMPEND; | |
td->td_flags |= TDF_ASTPENDING; | |
} | |
if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && | |
itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { | |
p->p_sflag |= PS_PROFPEND; | |
td->td_flags |= TDF_ASTPENDING; | |
} | |
} | |
mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); | |
} | |
/* | |
* The real-time timer, interrupting hz times per second. | |
*/ | |
void | |
hardclock(frame) | |
register struct clockframe *frame; | |
{ | |
int need_softclock = 0; | |
CTR0(KTR_CLK, "hardclock fired"); | |
hardclock_process(frame); | |
tc_ticktock(); | |
/* | |
* If no separate statistics clock is available, run it from here. | |
* | |
* XXX: this only works for UP | |
*/ | |
if (stathz == 0) { | |
profclock(frame); | |
statclock(frame); | |
} | |
#ifdef DEVICE_POLLING | |
hardclock_device_poll(); /* this is very short and quick */ | |
#endif /* DEVICE_POLLING */ | |
/* | |
* Process callouts at a very low cpu priority, so we don't keep the | |
* relatively high clock interrupt priority any longer than necessary. | |
*/ | |
mtx_lock_spin_flags(&callout_lock, MTX_QUIET); | |
ticks++; | |
if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { | |
need_softclock = 1; | |
} else if (softticks + 1 == ticks) | |
++softticks; | |
mtx_unlock_spin_flags(&callout_lock, MTX_QUIET); | |
/* | |
* swi_sched acquires sched_lock, so we don't want to call it with | |
* callout_lock held; incorrect locking order. | |
*/ | |
if (need_softclock) | |
swi_sched(softclock_ih, 0); | |
#ifdef WATCHDOG | |
if (watchdog_enabled > 0 && | |
(int)(ticks - watchdog_ticks) >= (hz * watchdog_timeout)) | |
watchdog_fire(); | |
#endif /* WATCHDOG */ | |
} | |
/* | |
* Compute number of ticks in the specified amount of time. | |
*/ | |
int | |
tvtohz(tv) | |
struct timeval *tv; | |
{ | |
register unsigned long ticks; | |
register long sec, usec; | |
/* | |
* If the number of usecs in the whole seconds part of the time | |
* difference fits in a long, then the total number of usecs will | |
* fit in an unsigned long. Compute the total and convert it to | |
* ticks, rounding up and adding 1 to allow for the current tick | |
* to expire. Rounding also depends on unsigned long arithmetic | |
* to avoid overflow. | |
* | |
* Otherwise, if the number of ticks in the whole seconds part of | |
* the time difference fits in a long, then convert the parts to | |
* ticks separately and add, using similar rounding methods and | |
* overflow avoidance. This method would work in the previous | |
* case but it is slightly slower and assumes that hz is integral. | |
* | |
* Otherwise, round the time difference down to the maximum | |
* representable value. | |
* | |
* If ints have 32 bits, then the maximum value for any timeout in | |
* 10ms ticks is 248 days. | |
*/ | |
sec = tv->tv_sec; | |
usec = tv->tv_usec; | |
if (usec < 0) { | |
sec--; | |
usec += 1000000; | |
} | |
if (sec < 0) { | |
#ifdef DIAGNOSTIC | |
if (usec > 0) { | |
sec++; | |
usec -= 1000000; | |
} | |
printf("tvotohz: negative time difference %ld sec %ld usec\n", | |
sec, usec); | |
#endif | |
ticks = 1; | |
} else if (sec <= LONG_MAX / 1000000) | |
ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) | |
/ tick + 1; | |
else if (sec <= LONG_MAX / hz) | |
ticks = sec * hz | |
+ ((unsigned long)usec + (tick - 1)) / tick + 1; | |
else | |
ticks = LONG_MAX; | |
if (ticks > INT_MAX) | |
ticks = INT_MAX; | |
return ((int)ticks); | |
} | |
/* | |
* Start profiling on a process. | |
* | |
* Kernel profiling passes proc0 which never exits and hence | |
* keeps the profile clock running constantly. | |
*/ | |
void | |
startprofclock(p) | |
register struct proc *p; | |
{ | |
/* | |
* XXX; Right now sched_lock protects statclock(), but perhaps | |
* it should be protected later on by a time_lock, which would | |
* cover psdiv, etc. as well. | |
*/ | |
PROC_LOCK_ASSERT(p, MA_OWNED); | |
if (p->p_flag & P_STOPPROF) | |
return; | |
if ((p->p_flag & P_PROFIL) == 0) { | |
mtx_lock_spin(&sched_lock); | |
p->p_flag |= P_PROFIL; | |
if (++profprocs == 1) | |
cpu_startprofclock(); | |
mtx_unlock_spin(&sched_lock); | |
} | |
} | |
/* | |
* Stop profiling on a process. | |
*/ | |
void | |
stopprofclock(p) | |
register struct proc *p; | |
{ | |
PROC_LOCK_ASSERT(p, MA_OWNED); | |
if (p->p_flag & P_PROFIL) { | |
if (p->p_profthreads != 0) { | |
p->p_flag |= P_STOPPROF; | |
while (p->p_profthreads != 0) | |
msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, | |
"stopprof", NULL); | |
p->p_flag &= ~P_STOPPROF; | |
} | |
mtx_lock_spin(&sched_lock); | |
p->p_flag &= ~P_PROFIL; | |
if (--profprocs == 0) | |
cpu_stopprofclock(); | |
mtx_unlock_spin(&sched_lock); | |
} | |
} | |
/* | |
* Statistics clock. Grab profile sample, and if divider reaches 0, | |
* do process and kernel statistics. Most of the statistics are only | |
* used by user-level statistics programs. The main exceptions are | |
* ke->ke_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu. | |
* This should be called by all active processors. | |
*/ | |
void | |
statclock(frame) | |
register struct clockframe *frame; | |
{ | |
struct pstats *pstats; | |
struct rusage *ru; | |
struct vmspace *vm; | |
struct thread *td; | |
struct proc *p; | |
long rss; | |
td = curthread; | |
p = td->td_proc; | |
mtx_lock_spin_flags(&sched_lock, MTX_QUIET); | |
if (CLKF_USERMODE(frame)) { | |
/* | |
* Charge the time as appropriate. | |
*/ | |
if (p->p_flag & P_SA) | |
thread_statclock(1); | |
p->p_uticks++; | |
if (td->td_ksegrp->kg_nice > NZERO) | |
cp_time[CP_NICE]++; | |
else | |
cp_time[CP_USER]++; | |
} else { | |
/* | |
* Came from kernel mode, so we were: | |
* - handling an interrupt, | |
* - doing syscall or trap work on behalf of the current | |
* user process, or | |
* - spinning in the idle loop. | |
* Whichever it is, charge the time as appropriate. | |
* Note that we charge interrupts to the current process, | |
* regardless of whether they are ``for'' that process, | |
* so that we know how much of its real time was spent | |
* in ``non-process'' (i.e., interrupt) work. | |
*/ | |
if ((td->td_ithd != NULL) || td->td_intr_nesting_level >= 2) { | |
p->p_iticks++; | |
cp_time[CP_INTR]++; | |
} else { | |
if (p->p_flag & P_SA) | |
thread_statclock(0); | |
td->td_sticks++; | |
p->p_sticks++; | |
if (p != PCPU_GET(idlethread)->td_proc) | |
cp_time[CP_SYS]++; | |
else | |
cp_time[CP_IDLE]++; | |
} | |
} | |
sched_clock(td); | |
/* Update resource usage integrals and maximums. */ | |
if ((pstats = p->p_stats) != NULL && | |
(ru = &pstats->p_ru) != NULL && | |
(vm = p->p_vmspace) != NULL) { | |
ru->ru_ixrss += pgtok(vm->vm_tsize); | |
ru->ru_idrss += pgtok(vm->vm_dsize); | |
ru->ru_isrss += pgtok(vm->vm_ssize); | |
rss = pgtok(vmspace_resident_count(vm)); | |
if (ru->ru_maxrss < rss) | |
ru->ru_maxrss = rss; | |
} | |
mtx_unlock_spin_flags(&sched_lock, MTX_QUIET); | |
} | |
void | |
profclock(frame) | |
register struct clockframe *frame; | |
{ | |
struct thread *td; | |
#ifdef GPROF | |
struct gmonparam *g; | |
int i; | |
#endif | |
td = curthread; | |
if (CLKF_USERMODE(frame)) { | |
/* | |
* Came from user mode; CPU was in user state. | |
* If this process is being profiled, record the tick. | |
* if there is no related user location yet, don't | |
* bother trying to count it. | |
*/ | |
td = curthread; | |
if (td->td_proc->p_flag & P_PROFIL) | |
addupc_intr(td, CLKF_PC(frame), 1); | |
} | |
#ifdef GPROF | |
else { | |
/* | |
* Kernel statistics are just like addupc_intr, only easier. | |
*/ | |
g = &_gmonparam; | |
if (g->state == GMON_PROF_ON) { | |
i = CLKF_PC(frame) - g->lowpc; | |
if (i < g->textsize) { | |
i /= HISTFRACTION * sizeof(*g->kcount); | |
g->kcount[i]++; | |
} | |
} | |
} | |
#endif | |
} | |
/* | |
* Return information about system clocks. | |
*/ | |
static int | |
sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) | |
{ | |
struct clockinfo clkinfo; | |
/* | |
* Construct clockinfo structure. | |
*/ | |
bzero(&clkinfo, sizeof(clkinfo)); | |
clkinfo.hz = hz; | |
clkinfo.tick = tick; | |
clkinfo.profhz = profhz; | |
clkinfo.stathz = stathz ? stathz : hz; | |
return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); | |
} | |
SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, | |
0, 0, sysctl_kern_clockrate, "S,clockinfo", | |
"Rate and period of various kernel clocks"); | |
#ifdef WATCHDOG | |
/* | |
* Reset the watchdog timer to ticks, thus preventing the watchdog | |
* from firing for another watchdog timeout period. | |
*/ | |
static int | |
sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS) | |
{ | |
int ret; | |
ret = 0; | |
watchdog_ticks = ticks; | |
return sysctl_handle_int(oidp, &ret, 0, req); | |
} | |
SYSCTL_PROC(_debug_watchdog, OID_AUTO, reset, CTLFLAG_RW, 0, 0, | |
sysctl_watchdog_reset, "I", "Reset the watchdog"); | |
/* | |
* Handle a watchdog timeout by dumping interrupt information and | |
* then either dropping to DDB or panicing. | |
*/ | |
static void | |
watchdog_fire(void) | |
{ | |
int nintr; | |
u_int64_t inttotal; | |
u_long *curintr; | |
char *curname; | |
curintr = intrcnt; | |
curname = intrnames; | |
inttotal = 0; | |
nintr = eintrcnt - intrcnt; | |
printf("interrupt total\n"); | |
while (--nintr >= 0) { | |
if (*curintr) | |
printf("%-12s %20lu\n", curname, *curintr); | |
curname += strlen(curname) + 1; | |
inttotal += *curintr++; | |
} | |
printf("Total %20ju\n", (uintmax_t)inttotal); | |
#ifdef DDB | |
db_print_backtrace(); | |
Debugger("watchdog timeout"); | |
#else /* !DDB */ | |
panic("watchdog timeout"); | |
#endif /* DDB */ | |
} | |
#endif /* WATCHDOG */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*- | |
* Copyright (c) 1986, 1989, 1991, 1993 | |
* The Regents of the University of California. All rights reserved. | |
* (c) UNIX System Laboratories, Inc. | |
* All or some portions of this file are derived from material licensed | |
* to the University of California by American Telephone and Telegraph | |
* Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
* the permission of UNIX System Laboratories, Inc. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* 3. All advertising materials mentioning features or use of this software | |
* must display the following acknowledgement: | |
* This product includes software developed by the University of | |
* California, Berkeley and its contributors. | |
* 4. Neither the name of the University nor the names of its contributors | |
* may be used to endorse or promote products derived from this software | |
* without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
* SUCH DAMAGE. | |
* | |
* @(#)proc.h 8.15 (Berkeley) 5/19/95 | |
* $FreeBSD: src/sys/sys/proc.h,v 1.361 2003/11/15 23:57:19 imp Exp $ | |
*/ | |
#ifndef _SYS_PROC_H_ | |
#define _SYS_PROC_H_ | |
#include <sys/callout.h> /* For struct callout. */ | |
#include <sys/event.h> /* For struct klist. */ | |
#ifndef _KERNEL | |
#include <sys/filedesc.h> | |
#endif | |
#include <sys/_lock.h> | |
#include <sys/_mutex.h> | |
#include <sys/queue.h> | |
#include <sys/priority.h> | |
#include <sys/rtprio.h> /* XXX. */ | |
#include <sys/runq.h> | |
#include <sys/sigio.h> | |
#include <sys/signal.h> | |
#ifndef _KERNEL | |
#include <sys/time.h> /* For structs itimerval, timeval. */ | |
#else | |
#include <sys/pcpu.h> | |
#endif | |
#include <sys/ucontext.h> | |
#include <sys/ucred.h> | |
#include <machine/proc.h> /* Machine-dependent proc substruct. */ | |
/* | |
* One structure allocated per session. | |
* | |
* List of locks | |
* (m) locked by s_mtx mtx | |
* (e) locked by proctree_lock sx | |
* (c) const until freeing | |
*/ | |
struct session { | |
int s_count; /* (m) Ref cnt; pgrps in session. */ | |
struct proc *s_leader; /* (m + e) Session leader. */ | |
struct vnode *s_ttyvp; /* (m) Vnode of controlling tty. */ | |
struct tty *s_ttyp; /* (m) Controlling tty. */ | |
pid_t s_sid; /* (c) Session ID. */ | |
/* (m) Setlogin() name: */ | |
char s_login[roundup(MAXLOGNAME, sizeof(long))]; | |
struct mtx s_mtx; /* Mutex to protect members. */ | |
}; | |
/* | |
* One structure allocated per process group. | |
* | |
* List of locks | |
* (m) locked by pg_mtx mtx | |
* (e) locked by proctree_lock sx | |
* (c) const until freeing | |
*/ | |
struct pgrp { | |
LIST_ENTRY(pgrp) pg_hash; /* (e) Hash chain. */ | |
LIST_HEAD(, proc) pg_members; /* (m + e) Pointer to pgrp members. */ | |
struct session *pg_session; /* (c) Pointer to session. */ | |
struct sigiolst pg_sigiolst; /* (m) List of sigio sources. */ | |
pid_t pg_id; /* (c) Pgrp id. */ | |
int pg_jobc; /* (m) job cntl proc count */ | |
struct mtx pg_mtx; /* Mutex to protect members */ | |
}; | |
/* | |
* pargs, used to hold a copy of the command line, if it had a sane length. | |
*/ | |
struct pargs { | |
u_int ar_ref; /* Reference count. */ | |
u_int ar_length; /* Length. */ | |
u_char ar_args[1]; /* Arguments. */ | |
}; | |
/*- | |
* Description of a process. | |
* | |
* This structure contains the information needed to manage a thread of | |
* control, known in UN*X as a process; it has references to substructures | |
* containing descriptions of things that the process uses, but may share | |
* with related processes. The process structure and the substructures | |
* are always addressable except for those marked "(CPU)" below, | |
* which might be addressable only on a processor on which the process | |
* is running. | |
* | |
* Below is a key of locks used to protect each member of struct proc. The | |
* lock is indicated by a reference to a specific character in parens in the | |
* associated comment. | |
* * - not yet protected | |
* a - only touched by curproc or parent during fork/wait | |
* b - created at fork, never changes | |
* (exception aiods switch vmspaces, but they are also | |
* marked 'P_SYSTEM' so hopefully it will be left alone) | |
* c - locked by proc mtx | |
* d - locked by allproc_lock lock | |
* e - locked by proctree_lock lock | |
* f - session mtx | |
* g - process group mtx | |
* h - callout_lock mtx | |
* i - by curproc or the master session mtx | |
* j - locked by sched_lock mtx | |
* k - only accessed by curthread | |
* l - the attaching proc or attaching proc parent | |
* m - Giant | |
* n - not locked, lazy | |
* o - ktrace lock | |
* p - select lock (sellock) | |
* q - td_contested lock | |
* r - p_peers lock | |
* x - created at fork, only changes during single threading in exec | |
* z - zombie threads/kse/ksegroup lock | |
* | |
* If the locking key specifies two identifiers (for example, p_pptr) then | |
* either lock is sufficient for read access, but both locks must be held | |
* for write access. | |
*/ | |
struct ithd; | |
struct ke_sched; | |
struct kg_sched; | |
struct nlminfo; | |
struct p_sched; | |
struct td_sched; | |
struct trapframe; | |
struct turnstile; | |
/* | |
* Here we define the four structures used for process information. | |
* | |
* The first is the thread. It might be though of as a "Kernel | |
* Schedulable Entity Context". | |
* This structure contains all the information as to where a thread of | |
* execution is now, or was when it was suspended, why it was suspended, | |
* and anything else that will be needed to restart it when it is | |
* rescheduled. Always associated with a KSE when running, but can be | |
* reassigned to an equivalent KSE when being restarted for | |
* load balancing. Each of these is associated with a kernel stack | |
* and a pcb. | |
* | |
* It is important to remember that a particular thread structure only | |
* exists as long as the system call or kernel entrance (e.g. by pagefault) | |
* which it is currently executing. It should therefore NEVER be referenced | |
* by pointers in long lived structures that live longer than a single | |
* request. If several threads complete their work at the same time, | |
* they will all rewind their stacks to the user boundary, report their | |
* completion state, and all but one will be freed. That last one will | |
* be kept to provide a kernel stack and pcb for the NEXT syscall or kernel | |
* entrance. (basically to save freeing and then re-allocating it) The KSE | |
* keeps a cached thread available to allow it to quickly | |
* get one when it needs a new one. There is also a system | |
* cache of free threads. Threads have priority and partake in priority | |
* inheritance schemes. | |
*/ | |
struct thread; | |
/* | |
* The second structure is the Kernel Schedulable Entity. (KSE) | |
* It represents the ability to take a slot in the scheduler queue. | |
* As long as this is scheduled, it could continue to run any threads that | |
* are assigned to the KSEGRP (see later) until either it runs out | |
* of runnable threads of high enough priority, or CPU. | |
* It runs on one CPU and is assigned a quantum of time. When a thread is | |
* blocked, The KSE continues to run and will search for another thread | |
* in a runnable state amongst those it has. It May decide to return to user | |
* mode with a new 'empty' thread if there are no runnable threads. | |
* Threads are temporarily associated with a KSE for scheduling reasons. | |
*/ | |
struct kse; | |
/* | |
* The KSEGRP is allocated resources across a number of CPUs. | |
* (Including a number of CPUxQUANTA. It parcels these QUANTA up among | |
* its KSEs, each of which should be running in a different CPU. | |
* BASE priority and total available quanta are properties of a KSEGRP. | |
* Multiple KSEGRPs in a single process compete against each other | |
* for total quanta in the same way that a forked child competes against | |
* it's parent process. | |
*/ | |
struct ksegrp; | |
/* | |
* A process is the owner of all system resources allocated to a task | |
* except CPU quanta. | |
* All KSEGs under one process see, and have the same access to, these | |
* resources (e.g. files, memory, sockets, permissions kqueues). | |
* A process may compete for CPU cycles on the same basis as a | |
* forked process cluster by spawning several KSEGRPs. | |
*/ | |
struct proc; | |
/*************** | |
* In pictures: | |
With a single run queue used by all processors: | |
RUNQ: --->KSE---KSE--... SLEEPQ:[]---THREAD---THREAD---THREAD | |
| / []---THREAD | |
KSEG---THREAD--THREAD--THREAD [] | |
[]---THREAD---THREAD | |
(processors run THREADs from the KSEG until they are exhausted or | |
the KSEG exhausts its quantum) | |
With PER-CPU run queues: | |
KSEs on the separate run queues directly | |
They would be given priorities calculated from the KSEG. | |
* | |
*****************/ | |
/* | |
* Kernel runnable context (thread). | |
* This is what is put to sleep and reactivated. | |
* The first KSE available in the correct group will run this thread. | |
* If several are available, use the one on the same CPU as last time. | |
* When waiting to be run, threads are hung off the KSEGRP in priority order. | |
* with N runnable and queued KSEs in the KSEGRP, the first N threads | |
* are linked to them. Other threads are not yet assigned. | |
*/ | |
struct thread { | |
struct proc *td_proc; /* (*) Associated process. */ | |
struct ksegrp *td_ksegrp; /* (*) Associated KSEG. */ | |
TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ | |
TAILQ_ENTRY(thread) td_kglist; /* (*) All threads in this ksegrp. */ | |
/* The two queues below should someday be merged. */ | |
TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. */ | |
TAILQ_ENTRY(thread) td_lockq; /* (j) Lock queue. */ | |
TAILQ_ENTRY(thread) td_runq; /* (j/z) Run queue(s). XXXKSE */ | |
TAILQ_HEAD(, selinfo) td_selq; /* (p) List of selinfos. */ | |
struct turnstile *td_turnstile; /* (k) Associated turnstile. */ | |
/* Cleared during fork1() or thread_sched_upcall(). */ | |
#define td_startzero td_flags | |
int td_flags; /* (j) TDF_* flags. */ | |
int td_inhibitors; /* (j) Why can not run. */ | |
int td_pflags; /* (k) Private thread (TDP_*) flags. */ | |
struct kse *td_last_kse; /* (j) Previous value of td_kse. */ | |
struct kse *td_kse; /* (j) Current KSE if running. */ | |
int td_dupfd; /* (k) Ret value from fdopen. XXX */ | |
void *td_wchan; /* (j) Sleep address. */ | |
const char *td_wmesg; /* (j) Reason for sleep. */ | |
u_char td_lastcpu; /* (j) Last cpu we were on. */ | |
u_char td_oncpu; /* (j) Which cpu we are on. */ | |
short td_locks; /* (k) DEBUG: lockmgr count of locks. */ | |
struct turnstile *td_blocked; /* (j) Lock process is blocked on. */ | |
struct ithd *td_ithd; /* (b) For interrupt threads only. */ | |
const char *td_lockname; /* (j) Name of lock blocked on. */ | |
LIST_HEAD(, turnstile) td_contested; /* (q) Contested locks. */ | |
struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ | |
int td_intr_nesting_level; /* (k) Interrupt recursion. */ | |
int td_pinned; /* (k) Temporary cpu pin count. */ | |
struct kse_thr_mailbox *td_mailbox; /* (*) Userland mailbox address. */ | |
struct ucred *td_ucred; /* (k) Reference to credentials. */ | |
struct thread *td_standin; /* (*) Use this for an upcall. */ | |
u_int td_prticks; /* (*) Profclock hits in sys for user */ | |
struct kse_upcall *td_upcall; /* (*) Upcall structure. */ | |
uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ | |
u_int64_t td_sticks; /* (j) Statclock hits in system mode. */ | |
u_int td_uuticks; /* (*) Statclock in user, for UTS. */ | |
u_int td_usticks; /* (*) Statclock in kernel, for UTS. */ | |
int td_intrval; /* (*) Return value of TDF_INTERRUPT. */ | |
sigset_t td_oldsigmask; /* (k) Saved mask from pre sigpause. */ | |
sigset_t td_sigmask; /* (c) Current signal mask. */ | |
sigset_t td_siglist; /* (c) Sigs arrived, not delivered. */ | |
sigset_t *td_waitset; /* (c) Wait set for sigwait. */ | |
TAILQ_ENTRY(thread) td_umtx; /* (c?) Link for when we're blocked. */ | |
volatile u_int td_generation; /* (k) Enable detection of preemption */ | |
#define td_endzero td_base_pri | |
/* Copied during fork1() or thread_sched_upcall(). */ | |
#define td_startcopy td_endzero | |
u_char td_base_pri; /* (j) Thread base kernel priority. */ | |
u_char td_priority; /* (j) Thread active priority. */ | |
u_char td_usr_pri; /* (j) Thread active priority before going to kernel. */ | |
#define td_endcopy td_pcb | |
/* | |
* fields that must be manually set in fork1() or thread_sched_upcall() | |
* or already have been set in the allocator, contstructor, etc.. | |
*/ | |
struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ | |
enum { | |
TDS_INACTIVE = 0x0, | |
TDS_INHIBITED, | |
TDS_CAN_RUN, | |
TDS_RUNQ, | |
TDS_RUNNING | |
} td_state; | |
register_t td_retval[2]; /* (k) Syscall aux returns. */ | |
struct callout td_slpcallout; /* (h) Callout for sleep. */ | |
struct trapframe *td_frame; /* (k) */ | |
struct vm_object *td_kstack_obj;/* (a) Kstack object. */ | |
vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ | |
int td_kstack_pages; /* (a) Size of the kstack. */ | |
struct vm_object *td_altkstack_obj;/* (a) Alternate kstack object. */ | |
vm_offset_t td_altkstack; /* (a) Kernel VA of alternate kstack. */ | |
int td_altkstack_pages; /* (a) Size of the alternate kstack */ | |
u_int td_critnest; /* (k) Critical section nest level. */ | |
struct mdthread td_md; /* (k) Any machine-dependent fields. */ | |
struct td_sched *td_sched; /* (*) Scheduler-specific data. */ | |
}; | |
/* flags kept in td_flags */ | |
#define TDF_INPANIC 0x000002 /* Caused a panic, let it drive crashdump. */ | |
#define TDF_CAN_UNBIND 0x000004 /* Only temporarily bound. */ | |
#define TDF_SINTR 0x000008 /* Sleep is interruptible. */ | |
#define TDF_TIMEOUT 0x000010 /* Timing out during sleep. */ | |
#define TDF_IDLETD 0x000020 /* This is one of the per-CPU idle threads. */ | |
#define TDF_SELECT 0x000040 /* Selecting; wakeup/waiting danger. */ | |
#define TDF_CVWAITQ 0x000080 /* Thread is on a cv_waitq (not slpq). */ | |
#define TDF_TSNOBLOCK 0x000100 /* Don't block on a turnstile due to race. */ | |
#define TDF_ONSLEEPQ 0x000200 /* On the sleep queue. */ | |
#define TDF_ASTPENDING 0x000800 /* Thread has some asynchronous events. */ | |
#define TDF_TIMOFAIL 0x001000 /* Timeout from sleep after we were awake. */ | |
#define TDF_INTERRUPT 0x002000 /* Thread is marked as interrupted. */ | |
#define TDF_USTATCLOCK 0x004000 /* Stat clock hits in userland. */ | |
#define TDF_OWEUPC 0x008000 /* Owe thread an addupc() call at next AST. */ | |
#define TDF_NEEDRESCHED 0x010000 /* Thread needs to yield. */ | |
#define TDF_NEEDSIGCHK 0x020000 /* Thread may need signal delivery. */ | |
#define TDF_SA 0x040000 /* A scheduler activation based thread. */ | |
#define TDF_UMTXWAKEUP 0x080000 /* Libthr thread must not sleep on a umtx. */ | |
#define TDF_DEADLKTREAT 0x800000 /* Lock aquisition - deadlock treatment. */ | |
/* "private" flags kept in td_pflags */ | |
#define TDP_OLDMASK 0x0001 /* Need to restore mask after suspend. */ | |
#define TDP_INKTR 0x0002 /* Thread is currently in KTR code. */ | |
#define TDP_INKTRACE 0x0004 /* Thread is currently in KTRACE code. */ | |
#define TDP_UPCALLING 0x0008 /* This thread is doing an upcall. */ | |
#define TDP_COWINPROGRESS 0x0010 /* Snapshot copy-on-write in progress. */ | |
#define TDI_SUSPENDED 0x0001 /* On suspension queue. */ | |
#define TDI_SLEEPING 0x0002 /* Actually asleep! (tricky). */ | |
#define TDI_SWAPPED 0x0004 /* Stack not in mem.. bad juju if run. */ | |
#define TDI_LOCK 0x0008 /* Stopped on a lock. */ | |
#define TDI_IWAIT 0x0010 /* Awaiting interrupt. */ | |
#define TD_CAN_UNBIND(td) \ | |
(((td)->td_flags & TDF_CAN_UNBIND) == TDF_CAN_UNBIND && \ | |
((td)->td_upcall != NULL)) | |
#define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING) | |
#define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL) | |
#define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED) | |
#define TD_IS_SWAPPED(td) ((td)->td_inhibitors & TDI_SWAPPED) | |
#define TD_ON_LOCK(td) ((td)->td_inhibitors & TDI_LOCK) | |
#define TD_AWAITING_INTR(td) ((td)->td_inhibitors & TDI_IWAIT) | |
#define TD_IS_RUNNING(td) ((td)->td_state == TDS_RUNNING) | |
#define TD_ON_RUNQ(td) ((td)->td_state == TDS_RUNQ) | |
#define TD_CAN_RUN(td) ((td)->td_state == TDS_CAN_RUN) | |
#define TD_IS_INHIBITED(td) ((td)->td_state == TDS_INHIBITED) | |
#define TD_SET_INHIB(td, inhib) do { \ | |
(td)->td_state = TDS_INHIBITED; \ | |
(td)->td_inhibitors |= (inhib); \ | |
} while (0) | |
#define TD_CLR_INHIB(td, inhib) do { \ | |
if (((td)->td_inhibitors & (inhib)) && \ | |
(((td)->td_inhibitors &= ~(inhib)) == 0)) \ | |
(td)->td_state = TDS_CAN_RUN; \ | |
} while (0) | |
#define TD_SET_SLEEPING(td) TD_SET_INHIB((td), TDI_SLEEPING) | |
#define TD_SET_SWAPPED(td) TD_SET_INHIB((td), TDI_SWAPPED) | |
#define TD_SET_LOCK(td) TD_SET_INHIB((td), TDI_LOCK) | |
#define TD_SET_SUSPENDED(td) TD_SET_INHIB((td), TDI_SUSPENDED) | |
#define TD_SET_IWAIT(td) TD_SET_INHIB((td), TDI_IWAIT) | |
#define TD_SET_EXITING(td) TD_SET_INHIB((td), TDI_EXITING) | |
#define TD_CLR_SLEEPING(td) TD_CLR_INHIB((td), TDI_SLEEPING) | |
#define TD_CLR_SWAPPED(td) TD_CLR_INHIB((td), TDI_SWAPPED) | |
#define TD_CLR_LOCK(td) TD_CLR_INHIB((td), TDI_LOCK) | |
#define TD_CLR_SUSPENDED(td) TD_CLR_INHIB((td), TDI_SUSPENDED) | |
#define TD_CLR_IWAIT(td) TD_CLR_INHIB((td), TDI_IWAIT) | |
#define TD_SET_RUNNING(td) do {(td)->td_state = TDS_RUNNING; } while (0) | |
#define TD_SET_RUNQ(td) do {(td)->td_state = TDS_RUNQ; } while (0) | |
#define TD_SET_CAN_RUN(td) do {(td)->td_state = TDS_CAN_RUN; } while (0) | |
#define TD_SET_ON_SLEEPQ(td) do {(td)->td_flags |= TDF_ONSLEEPQ; } while (0) | |
#define TD_CLR_ON_SLEEPQ(td) do { \ | |
(td)->td_flags &= ~TDF_ONSLEEPQ; \ | |
(td)->td_wchan = NULL; \ | |
} while (0) | |
/* | |
* The schedulable entity that can be given a context to run. | |
* A process may have several of these. Probably one per processor | |
* but posibly a few more. In this universe they are grouped | |
* with a KSEG that contains the priority and niceness | |
* for the group. | |
*/ | |
struct kse { | |
struct proc *ke_proc; /* (*) Associated process. */ | |
struct ksegrp *ke_ksegrp; /* (*) Associated KSEG. */ | |
TAILQ_ENTRY(kse) ke_kglist; /* (*) Queue of KSEs in ke_ksegrp. */ | |
TAILQ_ENTRY(kse) ke_kgrlist; /* (*) Queue of KSEs in this state. */ | |
TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ | |
#define ke_startzero ke_flags | |
int ke_flags; /* (j) KEF_* flags. */ | |
struct thread *ke_thread; /* (*) Active associated thread. */ | |
fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ | |
u_char ke_oncpu; /* (j) Which cpu we are on. */ | |
char ke_rqindex; /* (j) Run queue index. */ | |
enum { | |
KES_UNUSED = 0x0, | |
KES_IDLE, | |
KES_ONRUNQ, | |
KES_UNQUEUED, /* in transit */ | |
KES_THREAD /* slaved to thread state */ | |
} ke_state; /* (j) KSE status. */ | |
#define ke_endzero ke_dummy | |
u_char ke_dummy; | |
struct ke_sched *ke_sched; /* (*) Scheduler-specific data. */ | |
}; | |
/* flags kept in ke_flags */ | |
#define KEF_SCHED0 0x00001 /* For scheduler-specific use. */ | |
#define KEF_SCHED1 0x00002 /* For scheduler-specific use. */ | |
#define KEF_SCHED2 0X00004 /* For scheduler-specific use. */ | |
#define KEF_SCHED3 0x00008 /* For scheduler-specific use. */ | |
#define KEF_DIDRUN 0x02000 /* KSE actually ran. */ | |
#define KEF_EXIT 0x04000 /* KSE is being killed. */ | |
/* | |
* The upcall management structure. | |
* The upcall is used when returning to userland. If a thread does not have | |
* an upcall on return to userland the thread exports its context and exits. | |
*/ | |
struct kse_upcall { | |
TAILQ_ENTRY(kse_upcall) ku_link; /* List of upcalls in KSEG. */ | |
struct ksegrp *ku_ksegrp; /* Associated KSEG. */ | |
struct thread *ku_owner; /* owning thread */ | |
int ku_flags; /* KUF_* flags. */ | |
struct kse_mailbox *ku_mailbox; /* userland mailbox address. */ | |
stack_t ku_stack; /* userland upcall stack. */ | |
void *ku_func; /* userland upcall function. */ | |
unsigned int ku_mflags; /* cached upcall mailbox flags */ | |
}; | |
#define KUF_DOUPCALL 0x00001 /* Do upcall now, don't wait. */ | |
#define KUF_EXITING 0x00002 /* Upcall structure is exiting. */ | |
/* | |
* Kernel-scheduled entity group (KSEG). The scheduler considers each KSEG to | |
* be an indivisible unit from a time-sharing perspective, though each KSEG may | |
* contain multiple KSEs. | |
*/ | |
struct ksegrp { | |
struct proc *kg_proc; /* (*) Process that contains this KSEG. */ | |
TAILQ_ENTRY(ksegrp) kg_ksegrp; /* (*) Queue of KSEGs in kg_proc. */ | |
TAILQ_HEAD(, kse) kg_kseq; /* (ke_kglist) All KSEs. */ | |
TAILQ_HEAD(, kse) kg_iq; /* (ke_kgrlist) All idle KSEs. */ | |
TAILQ_HEAD(, thread) kg_threads;/* (td_kglist) All threads. */ | |
TAILQ_HEAD(, thread) kg_runq; /* (td_runq) waiting RUNNABLE threads */ | |
TAILQ_HEAD(, thread) kg_slpq; /* (td_runq) NONRUNNABLE threads. */ | |
TAILQ_HEAD(, kse_upcall) kg_upcalls; /* All upcalls in the group. */ | |
#define kg_startzero kg_estcpu | |
u_int kg_estcpu; /* (j) Sum of the same field in KSEs. */ | |
u_int kg_slptime; /* (j) How long completely blocked. */ | |
struct thread *kg_last_assigned; /* (j) Last thread assigned to a KSE. */ | |
int kg_runnable; /* (j) Num runnable threads on queue. */ | |
int kg_runq_kses; /* (j) Num KSEs on runq. */ | |
int kg_idle_kses; /* (j) Num KSEs on iq. */ | |
int kg_numupcalls; /* (j) Num upcalls. */ | |
int kg_upsleeps; /* (c) Num threads in kse_release(). */ | |
struct kse_thr_mailbox *kg_completed; /* (c) Completed thread mboxes. */ | |
int kg_nextupcall; /* (*) Next upcall time. */ | |
int kg_upquantum; /* (*) Quantum to schedule an upcall. */ | |
#define kg_endzero kg_pri_class | |
#define kg_startcopy kg_endzero | |
u_char kg_pri_class; /* (j) Scheduling class. */ | |
u_char kg_user_pri; /* (j) User pri from estcpu and nice. */ | |
char kg_nice; /* (c + j) Process "nice" value. */ | |
#define kg_endcopy kg_numthreads | |
int kg_numthreads; /* (j) Num threads in total. */ | |
int kg_kses; /* (j) Num KSEs in group. */ | |
struct kg_sched *kg_sched; /* (*) Scheduler-specific data. */ | |
}; | |
/* | |
* The old fashionned process. May have multiple threads, KSEGRPs | |
* and KSEs. Starts off with a single embedded KSEGRP, KSE and THREAD. | |
*/ | |
struct proc { | |
LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ | |
TAILQ_HEAD(, ksegrp) p_ksegrps; /* (kg_ksegrp) All KSEGs. */ | |
TAILQ_HEAD(, thread) p_threads; /* (td_plist) Threads. (shortcut) */ | |
TAILQ_HEAD(, thread) p_suspended; /* (td_runq) Suspended threads. */ | |
struct ucred *p_ucred; /* (c) Process owner's identity. */ | |
struct filedesc *p_fd; /* (b) Ptr to open files structure. */ | |
struct filedesc_to_leader *p_fdtol; /* (b) Ptr to tracking node */ | |
/* Accumulated stats for all KSEs? */ | |
struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ | |
struct plimit *p_limit; /* (c*) Process limits. */ | |
struct vm_object *p_upages_obj; /* (a) Upages object. */ | |
struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ | |
/*struct ksegrp p_ksegrp; | |
struct kse p_kse; */ | |
/* | |
* The following don't make too much sense.. | |
* See the td_ or ke_ versions of the same flags | |
*/ | |
int p_flag; /* (c) P_* flags. */ | |
int p_sflag; /* (j) PS_* flags. */ | |
enum { | |
PRS_NEW = 0, /* In creation */ | |
PRS_NORMAL, /* KSEs can be run. */ | |
PRS_ZOMBIE | |
} p_state; /* (j/c) S* process status. */ | |
pid_t p_pid; /* (b) Process identifier. */ | |
LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ | |
LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */ | |
struct proc *p_pptr; /* (c + e) Pointer to parent process. */ | |
LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ | |
LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ | |
struct mtx p_mtx; /* (n) Lock for this struct. */ | |
/* The following fields are all zeroed upon creation in fork. */ | |
#define p_startzero p_oppid | |
pid_t p_oppid; /* (c + e) Save ppid in ptrace. XXX */ | |
struct vmspace *p_vmspace; /* (b) Address space. */ | |
u_int p_swtime; /* (j) Time swapped in or out. */ | |
struct itimerval p_realtimer; /* (c) Alarm timer. */ | |
struct bintime p_runtime; /* (j) Real time. */ | |
u_int64_t p_uu; /* (j) Previous user time in usec. */ | |
u_int64_t p_su; /* (j) Previous system time in usec. */ | |
u_int64_t p_iu; /* (j) Previous intr time in usec. */ | |
u_int64_t p_uticks; /* (j) Statclock hits in user mode. */ | |
u_int64_t p_sticks; /* (j) Statclock hits in system mode. */ | |
u_int64_t p_iticks; /* (j) Statclock hits in intr. */ | |
int p_profthreads; /* (c) Num threads in addupc_task. */ | |
int p_maxthrwaits; /* (c) Max threads num waiters */ | |
int p_traceflag; /* (o) Kernel trace points. */ | |
struct vnode *p_tracevp; /* (c + o) Trace to vnode. */ | |
struct ucred *p_tracecred; /* (o) Credentials to trace with. */ | |
struct vnode *p_textvp; /* (b) Vnode of executable. */ | |
sigset_t p_siglist; /* (c) Sigs not delivered to a td. */ | |
char p_lock; /* (c) Proclock (prevent swap) count. */ | |
struct klist p_klist; /* (c) Knotes attached to this proc. */ | |
struct sigiolst p_sigiolst; /* (c) List of sigio sources. */ | |
int p_sigparent; /* (c) Signal to parent on exit. */ | |
int p_sig; /* (n) For core dump/debugger XXX. */ | |
u_long p_code; /* (n) For core dump/debugger XXX. */ | |
u_int p_stops; /* (c) Stop event bitmask. */ | |
u_int p_stype; /* (c) Stop event type. */ | |
char p_step; /* (c) Process is stopped. */ | |
u_char p_pfsflags; /* (c) Procfs flags. */ | |
struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */ | |
void *p_aioinfo; /* (?) ASYNC I/O info. */ | |
struct thread *p_singlethread;/* (c + j) If single threading this is it */ | |
int p_suspcount; /* (c) # threads in suspended mode */ | |
/* End area that is zeroed on creation. */ | |
#define p_endzero p_sigstk | |
/* The following fields are all copied upon creation in fork. */ | |
#define p_startcopy p_endzero | |
stack_t p_sigstk; /* (c) Stack ptr and on-stack flag. */ | |
u_int p_magic; /* (b) Magic number. */ | |
char p_comm[MAXCOMLEN + 1]; /* (b) Process name. */ | |
struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ | |
struct sysentvec *p_sysent; /* (b) Syscall dispatch info. */ | |
struct pargs *p_args; /* (c) Process arguments. */ | |
rlim_t p_cpulimit; /* (j) Current CPU limit in seconds. */ | |
/* End area that is copied on creation. */ | |
#define p_endcopy p_xstat | |
u_short p_xstat; /* (c) Exit status; also stop sig. */ | |
int p_numthreads; /* (j) Number of threads. */ | |
int p_numksegrps; /* (?) number of ksegrps */ | |
struct mdproc p_md; /* Any machine-dependent fields. */ | |
struct callout p_itcallout; /* (h + c) Interval timer callout. */ | |
struct user *p_uarea; /* (k) Kernel VA of u-area (CPU). */ | |
u_short p_acflag; /* (c) Accounting flags. */ | |
struct rusage *p_ru; /* (a) Exit information. XXX */ | |
struct proc *p_peers; /* (r) */ | |
struct proc *p_leader; /* (b) */ | |
void *p_emuldata; /* (c) Emulator state data. */ | |
struct label *p_label; /* (*) Proc (not subject) MAC label. */ | |
struct p_sched *p_sched; /* (*) Scheduler-specific data. */ | |
}; | |
#define p_rlimit p_limit->pl_rlimit | |
#define p_session p_pgrp->pg_session | |
#define p_pgid p_pgrp->pg_id | |
#define NOCPU 0xff /* For when we aren't on a CPU. (SMP) */ | |
/* Status values (p_stat). */ | |
/* These flags are kept in p_flag. */ | |
#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ | |
#define P_CONTROLT 0x00002 /* Has a controlling terminal. */ | |
#define P_KTHREAD 0x00004 /* Kernel thread. (*)*/ | |
#define P_NOLOAD 0x00008 /* Ignore during load avg calculations. */ | |
#define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ | |
#define P_PROFIL 0x00020 /* Has started profiling. */ | |
#define P_STOPPROF 0x00040 /* Has thread in requesting to stop prof */ | |
#define P_SUGID 0x00100 /* Had set id privileges since last exec. */ | |
#define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ | |
#define P_SINGLE_EXIT 0x00400 /* Threads suspending should exit, not wait. */ | |
#define P_TRACED 0x00800 /* Debugged process being traced. */ | |
#define P_WAITED 0x01000 /* Someone is waiting for us. */ | |
#define P_WEXIT 0x02000 /* Working on exiting. */ | |
#define P_EXEC 0x04000 /* Process called exec. */ | |
#define P_SA 0x08000 /* Using scheduler activations. */ | |
#define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */ | |
#define P_STOPPED_SIG 0x20000 /* Stopped due to SIGSTOP/SIGTSTP. */ | |
#define P_STOPPED_TRACE 0x40000 /* Stopped because of tracing. */ | |
#define P_STOPPED_SINGLE 0x80000 /* Only one thread can continue */ | |
/* (not to user) */ | |
#define P_PROTECTED 0x100000 /* Do not kill on memory overcommit. */ | |
#define P_SIGEVENT 0x200000 /* Process pending signals changed. */ | |
#define P_JAILED 0x1000000 /* Process is in jail. */ | |
#define P_ALTSTACK 0x2000000 /* Have alternate signal stack. */ | |
#define P_INEXEC 0x4000000 /* Process is in execve(). */ | |
#define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) | |
#define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) | |
/* These flags are kept in p_sflag and are protected with sched_lock. */ | |
#define PS_INMEM 0x00001 /* Loaded into memory. */ | |
#define PS_XCPU 0x00002 /* Exceeded CPU limit. */ | |
#define PS_ALRMPEND 0x00020 /* Pending SIGVTALRM needs to be posted. */ | |
#define PS_PROFPEND 0x00040 /* Pending SIGPROF needs to be posted. */ | |
#define PS_SWAPINREQ 0x00100 /* Swapin request due to wakeup. */ | |
#define PS_SWAPPINGOUT 0x00200 /* Process is being swapped out. */ | |
#define PS_SWAPPINGIN 0x04000 /* Process is being swapped in. */ | |
#define PS_MACPEND 0x08000 /* Ast()-based MAC event pending. */ | |
/* used only in legacy conversion code */ | |
#define SIDL 1 /* Process being created by fork. */ | |
#define SRUN 2 /* Currently runnable. */ | |
#define SSLEEP 3 /* Sleeping on an address. */ | |
#define SSTOP 4 /* Process debugging or suspension. */ | |
#define SZOMB 5 /* Awaiting collection by parent. */ | |
#define SWAIT 6 /* Waiting for interrupt. */ | |
#define SLOCK 7 /* Blocked on a lock. */ | |
#define P_MAGIC 0xbeefface | |
#ifdef _KERNEL | |
#ifdef MALLOC_DECLARE | |
MALLOC_DECLARE(M_PARGS); | |
MALLOC_DECLARE(M_PGRP); | |
MALLOC_DECLARE(M_SESSION); | |
MALLOC_DECLARE(M_SUBPROC); | |
MALLOC_DECLARE(M_ZOMBIE); | |
#endif | |
#define FOREACH_PROC_IN_SYSTEM(p) \ | |
LIST_FOREACH((p), &allproc, p_list) | |
#define FOREACH_KSEGRP_IN_PROC(p, kg) \ | |
TAILQ_FOREACH((kg), &(p)->p_ksegrps, kg_ksegrp) | |
#define FOREACH_THREAD_IN_GROUP(kg, td) \ | |
TAILQ_FOREACH((td), &(kg)->kg_threads, td_kglist) | |
#define FOREACH_KSE_IN_GROUP(kg, ke) \ | |
TAILQ_FOREACH((ke), &(kg)->kg_kseq, ke_kglist) | |
#define FOREACH_UPCALL_IN_GROUP(kg, ku) \ | |
TAILQ_FOREACH((ku), &(kg)->kg_upcalls, ku_link) | |
#define FOREACH_THREAD_IN_PROC(p, td) \ | |
TAILQ_FOREACH((td), &(p)->p_threads, td_plist) | |
/* XXXKSE the lines below should probably only be used in 1:1 code */ | |
#define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads) | |
#define FIRST_KSEGRP_IN_PROC(p) TAILQ_FIRST(&(p)->p_ksegrps) | |
#define FIRST_KSE_IN_KSEGRP(kg) TAILQ_FIRST(&(kg)->kg_kseq) | |
#define FIRST_KSE_IN_PROC(p) FIRST_KSE_IN_KSEGRP(FIRST_KSEGRP_IN_PROC(p)) | |
/* | |
* We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, | |
* as it is used to represent "no process group". | |
*/ | |
#define PID_MAX 99999 | |
#define NO_PID 100000 | |
#define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) | |
#define SESSHOLD(s) ((s)->s_count++) | |
#define SESSRELE(s) { \ | |
if (--(s)->s_count == 0) \ | |
FREE(s, M_SESSION); \ | |
} | |
#define STOPEVENT(p, e, v) do { \ | |
PROC_LOCK(p); \ | |
_STOPEVENT((p), (e), (v)); \ | |
PROC_UNLOCK(p); \ | |
} while (0) | |
#define _STOPEVENT(p, e, v) do { \ | |
PROC_LOCK_ASSERT(p, MA_OWNED); \ | |
if ((p)->p_stops & (e)) \ | |
stopevent((p), (e), (v)); \ | |
} while (0) | |
/* Lock and unlock a process. */ | |
#define PROC_LOCK(p) mtx_lock(&(p)->p_mtx) | |
#define PROC_TRYLOCK(p) mtx_trylock(&(p)->p_mtx) | |
#define PROC_UNLOCK(p) mtx_unlock(&(p)->p_mtx) | |
#define PROC_LOCKED(p) mtx_owned(&(p)->p_mtx) | |
#define PROC_LOCK_ASSERT(p, type) mtx_assert(&(p)->p_mtx, (type)) | |
/* Lock and unlock a process group. */ | |
#define PGRP_LOCK(pg) mtx_lock(&(pg)->pg_mtx) | |
#define PGRP_UNLOCK(pg) mtx_unlock(&(pg)->pg_mtx) | |
#define PGRP_LOCKED(pg) mtx_owned(&(pg)->pg_mtx) | |
#define PGRP_LOCK_ASSERT(pg, type) mtx_assert(&(pg)->pg_mtx, (type)) | |
#define PGRP_LOCK_PGSIGNAL(pg) do { \ | |
if ((pg) != NULL) \ | |
PGRP_LOCK(pg); \ | |
} while (0) | |
#define PGRP_UNLOCK_PGSIGNAL(pg) do { \ | |
if ((pg) != NULL) \ | |
PGRP_UNLOCK(pg); \ | |
} while (0) | |
/* Lock and unlock a session. */ | |
#define SESS_LOCK(s) mtx_lock(&(s)->s_mtx) | |
#define SESS_UNLOCK(s) mtx_unlock(&(s)->s_mtx) | |
#define SESS_LOCKED(s) mtx_owned(&(s)->s_mtx) | |
#define SESS_LOCK_ASSERT(s, type) mtx_assert(&(s)->s_mtx, (type)) | |
/* Hold process U-area in memory, normally for ptrace/procfs work. */ | |
#define PHOLD(p) do { \ | |
PROC_LOCK(p); \ | |
_PHOLD(p); \ | |
PROC_UNLOCK(p); \ | |
} while (0) | |
#define _PHOLD(p) do { \ | |
PROC_LOCK_ASSERT((p), MA_OWNED); \ | |
(p)->p_lock++; \ | |
if (((p)->p_sflag & PS_INMEM) == 0) \ | |
faultin((p)); \ | |
} while (0) | |
#define PRELE(p) do { \ | |
PROC_LOCK((p)); \ | |
_PRELE((p)); \ | |
PROC_UNLOCK((p)); \ | |
} while (0) | |
#define _PRELE(p) do { \ | |
PROC_LOCK_ASSERT((p), MA_OWNED); \ | |
(--(p)->p_lock); \ | |
} while (0) | |
/* Check whether a thread is safe to be swapped out. */ | |
#define thread_safetoswapout(td) (TD_IS_SLEEPING(td) || TD_IS_SUSPENDED(td)) | |
/* Lock and unlock process arguments. */ | |
#define PARGS_LOCK(p) mtx_lock(&pargs_ref_lock) | |
#define PARGS_UNLOCK(p) mtx_unlock(&pargs_ref_lock) | |
#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) | |
extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; | |
extern u_long pidhash; | |
#define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) | |
extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; | |
extern u_long pgrphash; | |
extern struct sx allproc_lock; | |
extern struct sx proctree_lock; | |
extern struct mtx pargs_ref_lock; | |
extern struct mtx ppeers_lock; | |
extern struct proc proc0; /* Process slot for swapper. */ | |
extern struct thread thread0; /* Primary thread in proc0. */ | |
extern struct ksegrp ksegrp0; /* Primary ksegrp in proc0. */ | |
extern struct kse kse0; /* Primary kse in proc0. */ | |
extern struct vmspace vmspace0; /* VM space for proc0. */ | |
extern int hogticks; /* Limit on kernel cpu hogs. */ | |
extern int nprocs, maxproc; /* Current and max number of procs. */ | |
extern int maxprocperuid; /* Max procs per uid. */ | |
extern u_long ps_arg_cache_limit; | |
extern int ps_argsopen; | |
extern int sched_quantum; /* Scheduling quantum in ticks. */ | |
LIST_HEAD(proclist, proc); | |
TAILQ_HEAD(procqueue, proc); | |
TAILQ_HEAD(threadqueue, thread); | |
extern struct proclist allproc; /* List of all processes. */ | |
extern struct proclist zombproc; /* List of zombie processes. */ | |
extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ | |
extern struct proc *updateproc; /* Process slot for syncer (sic). */ | |
extern struct uma_zone *proc_zone; | |
extern int lastpid; | |
struct proc *pfind(pid_t); /* Find process by id. */ | |
struct pgrp *pgfind(pid_t); /* Find process group by id. */ | |
struct proc *zpfind(pid_t); /* Find zombie process by id. */ | |
void adjustrunqueue(struct thread *, int newpri); | |
void ast(struct trapframe *framep); | |
struct thread *choosethread(void); | |
int cr_cansignal(struct ucred *cred, struct proc *proc, int signum); | |
int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess); | |
int enterthispgrp(struct proc *p, struct pgrp *pgrp); | |
void faultin(struct proc *p); | |
void fixjobc(struct proc *p, struct pgrp *pgrp, int entering); | |
int fork1(struct thread *, int, int, struct proc **); | |
void fork_exit(void (*)(void *, struct trapframe *), void *, | |
struct trapframe *); | |
void fork_return(struct thread *, struct trapframe *); | |
int inferior(struct proc *p); | |
int leavepgrp(struct proc *p); | |
void mi_switch(void); | |
int p_candebug(struct thread *td, struct proc *p); | |
int p_cansee(struct thread *td, struct proc *p); | |
int p_cansched(struct thread *td, struct proc *p); | |
int p_cansignal(struct thread *td, struct proc *p, int signum); | |
struct pargs *pargs_alloc(int len); | |
void pargs_drop(struct pargs *pa); | |
void pargs_free(struct pargs *pa); | |
void pargs_hold(struct pargs *pa); | |
void procinit(void); | |
void threadinit(void); | |
void proc_linkup(struct proc *p, struct ksegrp *kg, | |
struct kse *ke, struct thread *td); | |
void proc_reparent(struct proc *child, struct proc *newparent); | |
int securelevel_ge(struct ucred *cr, int level); | |
int securelevel_gt(struct ucred *cr, int level); | |
void setrunnable(struct thread *); | |
void setrunqueue(struct thread *); | |
void setsugid(struct proc *p); | |
int sigonstack(size_t sp); | |
void sleepinit(void); | |
void stopevent(struct proc *, u_int, u_int); | |
void cpu_idle(void); | |
extern void (*cpu_idle_hook)(void); /* Hook to machdep CPU idler. */ | |
void cpu_switch(struct thread *old, struct thread *new); | |
void cpu_throw(struct thread *old, struct thread *new) __dead2; | |
void unsleep(struct thread *); | |
void userret(struct thread *, struct trapframe *, u_int); | |
void cpu_exit(struct thread *); | |
void cpu_sched_exit(struct thread *); | |
void exit1(struct thread *, int) __dead2; | |
void cpu_fork(struct thread *, struct proc *, struct thread *, int); | |
void cpu_set_fork_handler(struct thread *, void (*)(void *), void *); | |
/* New in KSE. */ | |
struct ksegrp *ksegrp_alloc(void); | |
void ksegrp_free(struct ksegrp *kg); | |
void ksegrp_stash(struct ksegrp *kg); | |
struct kse *kse_alloc(void); | |
void kse_free(struct kse *ke); | |
void kse_stash(struct kse *ke); | |
void cpu_set_upcall(struct thread *td, struct thread *td0); | |
void cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku); | |
void cpu_thread_clean(struct thread *); | |
void cpu_thread_exit(struct thread *); | |
void cpu_thread_setup(struct thread *td); | |
void cpu_thread_siginfo(int sig, u_long code, siginfo_t *si); | |
void cpu_thread_swapin(struct thread *); | |
void cpu_thread_swapout(struct thread *); | |
void kse_reassign(struct kse *ke); | |
void kse_link(struct kse *ke, struct ksegrp *kg); | |
void kse_unlink(struct kse *ke); | |
void ksegrp_link(struct ksegrp *kg, struct proc *p); | |
void ksegrp_unlink(struct ksegrp *kg); | |
void thread_signal_add(struct thread *td, int sig); | |
struct thread *thread_alloc(void); | |
void thread_exit(void) __dead2; | |
int thread_export_context(struct thread *td, int willexit); | |
void thread_free(struct thread *td); | |
void thread_link(struct thread *td, struct ksegrp *kg); | |
void thread_reap(void); | |
struct thread *thread_schedule_upcall(struct thread *td, struct kse_upcall *ku); | |
int thread_single(int how); | |
#define SINGLE_NO_EXIT 0 /* values for 'how' */ | |
#define SINGLE_EXIT 1 | |
void thread_single_end(void); | |
void thread_stash(struct thread *td); | |
int thread_suspend_check(int how); | |
void thread_suspend_one(struct thread *td); | |
void thread_unlink(struct thread *td); | |
void thread_unsuspend(struct proc *p); | |
void thread_unsuspend_one(struct thread *td); | |
int thread_userret(struct thread *td, struct trapframe *frame); | |
void thread_user_enter(struct proc *p, struct thread *td); | |
void thread_wait(struct proc *p); | |
int thread_statclock(int user); | |
struct kse_upcall *upcall_alloc(void); | |
void upcall_free(struct kse_upcall *ku); | |
void upcall_link(struct kse_upcall *ku, struct ksegrp *kg); | |
void upcall_unlink(struct kse_upcall *ku); | |
void upcall_remove(struct thread *td); | |
void upcall_stash(struct kse_upcall *ke); | |
void thread_sanity_check(struct thread *td, char *); | |
void thread_stopped(struct proc *p); | |
void thread_switchout(struct thread *td); | |
void thr_exit1(void); | |
#endif /* _KERNEL */ | |
#endif /* !_SYS_PROC_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*- | |
* Copyright (c) 1982, 1986, 1990, 1991, 1993 | |
* The Regents of the University of California. All rights reserved. | |
* (c) UNIX System Laboratories, Inc. | |
* All or some portions of this file are derived from material licensed | |
* to the University of California by American Telephone and Telegraph | |
* Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
* the permission of UNIX System Laboratories, Inc. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* 3. All advertising materials mentioning features or use of this software | |
* must display the following acknowledgement: | |
* This product includes software developed by the University of | |
* California, Berkeley and its contributors. | |
* 4. Neither the name of the University nor the names of its contributors | |
* may be used to endorse or promote products derived from this software | |
* without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
* SUCH DAMAGE. | |
*/ | |
#include <sys/cdefs.h> | |
__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.28 2003/11/09 13:45:54 bde Exp $"); | |
#include <sys/param.h> | |
#include <sys/systm.h> | |
#include <sys/kernel.h> | |
#include <sys/ktr.h> | |
#include <sys/lock.h> | |
#include <sys/mutex.h> | |
#include <sys/proc.h> | |
#include <sys/resourcevar.h> | |
#include <sys/sched.h> | |
#include <sys/smp.h> | |
#include <sys/sysctl.h> | |
#include <sys/sx.h> | |
/* | |
* INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in | |
* the range 100-256 Hz (approximately). | |
*/ | |
#define ESTCPULIM(e) \ | |
min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \ | |
RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1) | |
#ifdef SMP | |
#define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus) | |
#else | |
#define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */ | |
#endif | |
#define NICE_WEIGHT 1 /* Priorities per nice level. */ | |
struct ke_sched { | |
int ske_cpticks; /* (j) Ticks of cpu time. */ | |
}; | |
static struct ke_sched ke_sched; | |
struct ke_sched *kse0_sched = &ke_sched; | |
struct kg_sched *ksegrp0_sched = NULL; | |
struct p_sched *proc0_sched = NULL; | |
struct td_sched *thread0_sched = NULL; | |
static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ | |
#define SCHED_QUANTUM (16) /* Default sched quantum */ | |
static struct callout schedcpu_callout; | |
static struct callout roundrobin_callout; | |
static void roundrobin(void *arg); | |
static void schedcpu(void *arg); | |
static void sched_setup(void *dummy); | |
static void maybe_resched(struct thread *td); | |
static void updatepri(struct ksegrp *kg); | |
static void resetpriority(struct ksegrp *kg); | |
SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) | |
/* | |
* Global run queue. | |
*/ | |
static struct runq runq; | |
SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq) | |
static int | |
sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) | |
{ | |
int error, new_val; | |
new_val = sched_quantum * tick; | |
error = sysctl_handle_int(oidp, &new_val, 0, req); | |
if (error != 0 || req->newptr == NULL) | |
return (error); | |
if (new_val < tick) | |
return (EINVAL); | |
sched_quantum = new_val / tick; | |
hogticks = 2 * sched_quantum; | |
return (0); | |
} | |
SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, | |
0, sizeof sched_quantum, sysctl_kern_quantum, "I", | |
"Roundrobin scheduling quantum in microseconds"); | |
/* | |
* Arrange to reschedule if necessary, taking the priorities and | |
* schedulers into account. | |
*/ | |
static void | |
maybe_resched(struct thread *td) | |
{ | |
mtx_assert(&sched_lock, MA_OWNED); | |
if (td->td_priority < curthread->td_priority && curthread->td_kse) | |
curthread->td_flags |= TDF_NEEDRESCHED; | |
} | |
/* | |
* Force switch among equal priority processes every 100ms. | |
* We don't actually need to force a context switch of the current process. | |
* The act of firing the event triggers a context switch to softclock() and | |
* then switching back out again which is equivalent to a preemption, thus | |
* no further work is needed on the local CPU. | |
*/ | |
/* ARGSUSED */ | |
static void | |
roundrobin(void *arg) | |
{ | |
#ifdef SMP | |
mtx_lock_spin(&sched_lock); | |
forward_roundrobin(); | |
mtx_unlock_spin(&sched_lock); | |
#endif | |
callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL); | |
} | |
/* | |
* Constants for digital decay and forget: | |
* 90% of (kg_estcpu) usage in 5 * loadav time | |
* 95% of (ke_pctcpu) usage in 60 seconds (load insensitive) | |
* Note that, as ps(1) mentions, this can let percentages | |
* total over 100% (I've seen 137.9% for 3 processes). | |
* | |
* Note that schedclock() updates kg_estcpu and p_cpticks asynchronously. | |
* | |
* We wish to decay away 90% of kg_estcpu in (5 * loadavg) seconds. | |
* That is, the system wants to compute a value of decay such | |
* that the following for loop: | |
* for (i = 0; i < (5 * loadavg); i++) | |
* kg_estcpu *= decay; | |
* will compute | |
* kg_estcpu *= 0.1; | |
* for all values of loadavg: | |
* | |
* Mathematically this loop can be expressed by saying: | |
* decay ** (5 * loadavg) ~= .1 | |
* | |
* The system computes decay as: | |
* decay = (2 * loadavg) / (2 * loadavg + 1) | |
* | |
* We wish to prove that the system's computation of decay | |
* will always fulfill the equation: | |
* decay ** (5 * loadavg) ~= .1 | |
* | |
* If we compute b as: | |
* b = 2 * loadavg | |
* then | |
* decay = b / (b + 1) | |
* | |
* We now need to prove two things: | |
* 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) | |
* 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) | |
* | |
* Facts: | |
* For x close to zero, exp(x) =~ 1 + x, since | |
* exp(x) = 0! + x**1/1! + x**2/2! + ... . | |
* therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. | |
* For x close to zero, ln(1+x) =~ x, since | |
* ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 | |
* therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). | |
* ln(.1) =~ -2.30 | |
* | |
* Proof of (1): | |
* Solve (factor)**(power) =~ .1 given power (5*loadav): | |
* solving for factor, | |
* ln(factor) =~ (-2.30/5*loadav), or | |
* factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = | |
* exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED | |
* | |
* Proof of (2): | |
* Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): | |
* solving for power, | |
* power*ln(b/(b+1)) =~ -2.30, or | |
* power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED | |
* | |
* Actual power values for the implemented algorithm are as follows: | |
* loadav: 1 2 3 4 | |
* power: 5.68 10.32 14.94 19.55 | |
*/ | |
/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ | |
#define loadfactor(loadav) (2 * (loadav)) | |
#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) | |
/* decay 95% of `ke_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ | |
static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ | |
SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); | |
/* | |
* If `ccpu' is not equal to `exp(-1/20)' and you still want to use the | |
* faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below | |
* and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). | |
* | |
* To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: | |
* 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). | |
* | |
* If you don't want to bother with the faster/more-accurate formula, you | |
* can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate | |
* (more general) method of calculating the %age of CPU used by a process. | |
*/ | |
#define CCPU_SHIFT 11 | |
static void | |
reset_td_prio(struct thread * td) { | |
u_char prio; | |
// the constant 3 is due to we are mapping 200 ticks (maximum) | |
// to 64 priorities | |
prio = PUSER + (td->td_runtime / 3) + NICE_WEIGHT * (td->td_ksegrp->kg_nice - PRIO_MIN); | |
prio = min(max(prio, PRI_MIN_TIMESHARE), | |
PRI_MAX_TIMESHARE); | |
td->td_usr_pri = prio; | |
CTR3(KTR_MYKTR, "UPDATE thread (PID = %d) priority (%d -> %d)", td->td_proc->p_pid, td->td_priority, td->td_usr_pri ); | |
} | |
/* | |
* Recompute process priorities, every hz ticks. | |
* MP-safe, called without the Giant mutex. | |
*/ | |
/* ARGSUSED */ | |
static void | |
schedcpu(void *arg) | |
{ | |
register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); | |
struct thread *td; | |
struct proc *p; | |
struct kse *ke; | |
struct ksegrp *kg; | |
int awake, realstathz; | |
realstathz = stathz ? stathz : hz; | |
sx_slock(&allproc_lock); | |
FOREACH_PROC_IN_SYSTEM(p) { | |
/* | |
* Prevent state changes and protect run queue. | |
*/ | |
mtx_lock_spin(&sched_lock); | |
/* | |
* Increment time in/out of memory. We ignore overflow; with | |
* 16-bit int's (remember them?) overflow takes 45 days. | |
*/ | |
p->p_swtime++; | |
FOREACH_THREAD_IN_PROC(p, td) { | |
CTR3(KTR_MYKTR, "AGE thread (PID = %d) tick count (%d -> %d)", td->td_proc->p_pid, td->td_runtime, td->td_runtime/2 ); | |
td->td_runtime /= 2; | |
} | |
FOREACH_KSEGRP_IN_PROC(p, kg) { | |
awake = 0; | |
FOREACH_KSE_IN_GROUP(kg, ke) { | |
/* | |
* Increment sleep time (if sleeping). We | |
* ignore overflow, as above. | |
*/ | |
/* | |
* The kse slptimes are not touched in wakeup | |
* because the thread may not HAVE a KSE. | |
*/ | |
if (ke->ke_state == KES_ONRUNQ) { | |
awake = 1; | |
ke->ke_flags &= ~KEF_DIDRUN; | |
} else if ((ke->ke_state == KES_THREAD) && | |
(TD_IS_RUNNING(ke->ke_thread))) { | |
awake = 1; | |
/* Do not clear KEF_DIDRUN */ | |
} else if (ke->ke_flags & KEF_DIDRUN) { | |
awake = 1; | |
ke->ke_flags &= ~KEF_DIDRUN; | |
} | |
/* | |
* ke_pctcpu is only for ps and ttyinfo(). | |
* Do it per kse, and add them up at the end? | |
* XXXKSE | |
*/ | |
ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> | |
FSHIFT; | |
/* | |
* If the kse has been idle the entire second, | |
* stop recalculating its priority until | |
* it wakes up. | |
*/ | |
if (ke->ke_sched->ske_cpticks == 0) | |
continue; | |
#if (FSHIFT >= CCPU_SHIFT) | |
ke->ke_pctcpu += (realstathz == 100) | |
? ((fixpt_t) ke->ke_sched->ske_cpticks) << | |
(FSHIFT - CCPU_SHIFT) : | |
100 * (((fixpt_t) ke->ke_sched->ske_cpticks) | |
<< (FSHIFT - CCPU_SHIFT)) / realstathz; | |
#else | |
ke->ke_pctcpu += ((FSCALE - ccpu) * | |
(ke->ke_sched->ske_cpticks * | |
FSCALE / realstathz)) >> FSHIFT; | |
#endif | |
ke->ke_sched->ske_cpticks = 0; | |
} /* end of kse loop */ | |
/* | |
* If there are ANY running threads in this KSEGRP, | |
* then don't count it as sleeping. | |
*/ | |
if (awake) { | |
if (kg->kg_slptime > 1) { | |
/* | |
* In an ideal world, this should not | |
* happen, because whoever woke us | |
* up from the long sleep should have | |
* unwound the slptime and reset our | |
* priority before we run at the stale | |
* priority. Should KASSERT at some | |
* point when all the cases are fixed. | |
*/ | |
updatepri(kg); | |
} | |
kg->kg_slptime = 0; | |
} else | |
kg->kg_slptime++; | |
if (kg->kg_slptime > 1) | |
continue; | |
kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); | |
resetpriority(kg); | |
FOREACH_THREAD_IN_GROUP(kg, td) { | |
if (td->td_priority >= PUSER) { | |
sched_prio(td, kg->kg_user_pri); | |
} | |
} | |
} /* end of ksegrp loop */ | |
mtx_unlock_spin(&sched_lock); | |
} /* end of process loop */ | |
sx_sunlock(&allproc_lock); | |
callout_reset(&schedcpu_callout, hz, schedcpu, NULL); | |
} | |
/* | |
* Recalculate the priority of a process after it has slept for a while. | |
* For all load averages >= 1 and max kg_estcpu of 255, sleeping for at | |
* least six times the loadfactor will decay kg_estcpu to zero. | |
*/ | |
static void | |
updatepri(struct ksegrp *kg) | |
{ | |
register fixpt_t loadfac; | |
register unsigned int newcpu; | |
loadfac = loadfactor(averunnable.ldavg[0]); | |
if (kg->kg_slptime > 5 * loadfac) | |
kg->kg_estcpu = 0; | |
else { | |
newcpu = kg->kg_estcpu; | |
kg->kg_slptime--; /* was incremented in schedcpu() */ | |
while (newcpu && --kg->kg_slptime) | |
newcpu = decay_cpu(loadfac, newcpu); | |
kg->kg_estcpu = newcpu; | |
} | |
resetpriority(kg); | |
} | |
/* | |
* Compute the priority of a process when running in user mode. | |
* Arrange to reschedule if the resulting priority is better | |
* than that of the current process. | |
*/ | |
static void | |
resetpriority(struct ksegrp *kg) | |
{ | |
register unsigned int newpriority; | |
struct thread *td; | |
if (kg->kg_pri_class == PRI_TIMESHARE) { | |
newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + | |
NICE_WEIGHT * (kg->kg_nice - PRIO_MIN); | |
newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), | |
PRI_MAX_TIMESHARE); | |
kg->kg_user_pri = newpriority; | |
} | |
FOREACH_THREAD_IN_GROUP(kg, td) { | |
maybe_resched(td); /* XXXKSE silly */ | |
} | |
} | |
/* ARGSUSED */ | |
static void | |
sched_setup(void *dummy) | |
{ | |
if (sched_quantum == 0) | |
sched_quantum = SCHED_QUANTUM; | |
hogticks = 2 * sched_quantum; | |
callout_init(&schedcpu_callout, CALLOUT_MPSAFE); | |
callout_init(&roundrobin_callout, 0); | |
/* Kick off timeout driven events by calling first time. */ | |
roundrobin(NULL); | |
schedcpu(NULL); | |
} | |
/* External interfaces start here */ | |
int | |
sched_runnable(void) | |
{ | |
return runq_check(&runq); | |
} | |
int | |
sched_rr_interval(void) | |
{ | |
if (sched_quantum == 0) | |
sched_quantum = SCHED_QUANTUM; | |
return (sched_quantum); | |
} | |
/* | |
* We adjust the priority of the current process. The priority of | |
* a process gets worse as it accumulates CPU time. The cpu usage | |
* estimator (kg_estcpu) is increased here. resetpriority() will | |
* compute a different priority each time kg_estcpu increases by | |
* INVERSE_ESTCPU_WEIGHT | |
* (until MAXPRI is reached). The cpu usage estimator ramps up | |
* quite quickly when the process is running (linearly), and decays | |
* away exponentially, at a rate which is proportionally slower when | |
* the system is busy. The basic principle is that the system will | |
* 90% forget that the process used a lot of CPU time in 5 * loadav | |
* seconds. This causes the system to favor processes which haven't | |
* run much recently, and to round-robin among other processes. | |
*/ | |
void | |
sched_clock(struct thread *td) | |
{ | |
struct ksegrp *kg; | |
struct kse *ke; | |
mtx_assert(&sched_lock, MA_OWNED); | |
kg = td->td_ksegrp; | |
ke = td->td_kse; | |
ke->ke_sched->ske_cpticks++; | |
kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); | |
if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { | |
resetpriority(kg); | |
// change here | |
reset_td_prio(td); | |
if (td->td_priority >= PUSER) | |
//td->td_priority = kg->kg_user_pri; | |
td->td_priority = td->td_usr_pri; | |
} | |
} | |
/* | |
* charge childs scheduling cpu usage to parent. | |
* | |
* XXXKSE assume only one thread & kse & ksegrp keep estcpu in each ksegrp. | |
* Charge it to the ksegrp that did the wait since process estcpu is sum of | |
* all ksegrps, this is strictly as expected. Assume that the child process | |
* aggregated all the estcpu into the 'built-in' ksegrp. | |
*/ | |
void | |
sched_exit(struct proc *p, struct proc *p1) | |
{ | |
sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); | |
sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); | |
sched_exit_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); | |
} | |
void | |
sched_exit_kse(struct kse *ke, struct kse *child) | |
{ | |
} | |
void | |
sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) | |
{ | |
mtx_assert(&sched_lock, MA_OWNED); | |
kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + child->kg_estcpu); | |
} | |
void | |
sched_exit_thread(struct thread *td, struct thread *child) | |
{ | |
} | |
void | |
sched_fork(struct proc *p, struct proc *p1) | |
{ | |
sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); | |
sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); | |
sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); | |
} | |
void | |
sched_fork_kse(struct kse *ke, struct kse *child) | |
{ | |
child->ke_sched->ske_cpticks = 0; | |
} | |
void | |
sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) | |
{ | |
mtx_assert(&sched_lock, MA_OWNED); | |
child->kg_estcpu = kg->kg_estcpu; | |
} | |
void | |
sched_fork_thread(struct thread *td, struct thread *child) | |
{ | |
} | |
void | |
sched_nice(struct ksegrp *kg, int nice) | |
{ | |
PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED); | |
mtx_assert(&sched_lock, MA_OWNED); | |
kg->kg_nice = nice; | |
resetpriority(kg); | |
} | |
void | |
sched_class(struct ksegrp *kg, int class) | |
{ | |
mtx_assert(&sched_lock, MA_OWNED); | |
kg->kg_pri_class = class; | |
} | |
/* | |
* Adjust the priority of a thread. | |
* This may include moving the thread within the KSEGRP, | |
* changing the assignment of a kse to the thread, | |
* and moving a KSE in the system run queue. | |
*/ | |
void | |
sched_prio(struct thread *td, u_char prio) | |
{ | |
mtx_assert(&sched_lock, MA_OWNED); | |
// normally the "prio" parameter is equal to kg_user_pri | |
// however, sometimes the priority will be set to other values | |
// such as PRI_MAX_TIMESHARE in the yield() function. | |
// here is a dummy way to avoid changing the manually set value | |
// but we can not tell whether the prio happens to be equal to kg_user_prio | |
if (prio == td->td_ksegrp->kg_user_pri) { | |
reset_td_prio(td); | |
prio = td->td_usr_pri; | |
} | |
if (TD_ON_RUNQ(td)) { | |
adjustrunqueue(td, prio); | |
} else { | |
td->td_priority = prio; | |
} | |
} | |
void | |
sched_sleep(struct thread *td, u_char prio) | |
{ | |
mtx_assert(&sched_lock, MA_OWNED); | |
td->td_ksegrp->kg_slptime = 0; | |
td->td_priority = prio; | |
} | |
void | |
sched_switch(struct thread *td) | |
{ | |
struct thread *newtd; | |
struct kse *ke; | |
struct proc *p; | |
ke = td->td_kse; | |
p = td->td_proc; | |
mtx_assert(&sched_lock, MA_OWNED); | |
KASSERT((ke->ke_state == KES_THREAD), ("mi_switch: kse state?")); | |
td->td_lastcpu = td->td_oncpu; | |
td->td_last_kse = ke; | |
td->td_oncpu = NOCPU; | |
td->td_flags &= ~TDF_NEEDRESCHED; | |
/* | |
* At the last moment, if this thread is still marked RUNNING, | |
* then put it back on the run queue as it has not been suspended | |
* or stopped or any thing else similar. | |
*/ | |
if (TD_IS_RUNNING(td)) { | |
/* Put us back on the run queue (kse and all). */ | |
setrunqueue(td); | |
} else if (p->p_flag & P_SA) { | |
/* | |
* We will not be on the run queue. So we must be | |
* sleeping or similar. As it's available, | |
* someone else can use the KSE if they need it. | |
*/ | |
kse_reassign(ke); | |
} | |
newtd = choosethread(); | |
if (td != newtd) | |
cpu_switch(td, newtd); | |
sched_lock.mtx_lock = (uintptr_t)td; | |
td->td_oncpu = PCPU_GET(cpuid); | |
} | |
void | |
sched_wakeup(struct thread *td) | |
{ | |
struct ksegrp *kg; | |
mtx_assert(&sched_lock, MA_OWNED); | |
kg = td->td_ksegrp; | |
if (kg->kg_slptime > 1) | |
updatepri(kg); | |
kg->kg_slptime = 0; | |
setrunqueue(td); | |
maybe_resched(td); | |
} | |
void | |
sched_add(struct thread *td) | |
{ | |
struct kse *ke; | |
ke = td->td_kse; | |
mtx_assert(&sched_lock, MA_OWNED); | |
KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE")); | |
KASSERT((ke->ke_thread->td_kse != NULL), | |
("runq_add: No KSE on thread")); | |
KASSERT(ke->ke_state != KES_ONRUNQ, | |
("runq_add: kse %p (%s) already in run queue", ke, | |
ke->ke_proc->p_comm)); | |
KASSERT(ke->ke_proc->p_sflag & PS_INMEM, | |
("runq_add: process swapped out")); | |
ke->ke_ksegrp->kg_runq_kses++; | |
ke->ke_state = KES_ONRUNQ; | |
runq_add(&runq, ke); | |
} | |
void | |
sched_rem(struct thread *td) | |
{ | |
struct kse *ke; | |
ke = td->td_kse; | |
KASSERT(ke->ke_proc->p_sflag & PS_INMEM, | |
("runq_remove: process swapped out")); | |
KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); | |
mtx_assert(&sched_lock, MA_OWNED); | |
runq_remove(&runq, ke); | |
ke->ke_state = KES_THREAD; | |
ke->ke_ksegrp->kg_runq_kses--; | |
} | |
struct kse * | |
sched_choose(void) | |
{ | |
struct kse *ke; | |
ke = runq_choose(&runq); | |
if (ke != NULL) { | |
runq_remove(&runq, ke); | |
ke->ke_state = KES_THREAD; | |
KASSERT((ke->ke_thread != NULL), | |
("runq_choose: No thread on KSE")); | |
KASSERT((ke->ke_thread->td_kse != NULL), | |
("runq_choose: No KSE on thread")); | |
KASSERT(ke->ke_proc->p_sflag & PS_INMEM, | |
("runq_choose: process swapped out")); | |
} | |
return (ke); | |
} | |
void | |
sched_userret(struct thread *td) | |
{ | |
//struct ksegrp *kg; | |
/* | |
* XXX we cheat slightly on the locking here to avoid locking in | |
* the usual case. Setting td_priority here is essentially an | |
* incomplete workaround for not setting it properly elsewhere. | |
* Now that some interrupt handlers are threads, not setting it | |
* properly elsewhere can clobber it in the window between setting | |
* it here and returning to user mode, so don't waste time setting | |
* it perfectly here. | |
*/ | |
CTR3(KTR_MYKTR, "RESTORE thread (PID = %d) priority (%d -> %d)", td->td_proc->p_pid, td->td_priority, td->td_usr_pri ); | |
// restore user privilege | |
td->td_priority = td->td_usr_pri; | |
/* | |
kg = td->td_ksegrp; | |
if (td->td_priority != kg->kg_user_pri) { | |
mtx_lock_spin(&sched_lock); | |
td->td_priority = kg->kg_user_pri; | |
mtx_unlock_spin(&sched_lock); | |
} | |
*/ | |
} | |
int | |
sched_sizeof_kse(void) | |
{ | |
return (sizeof(struct kse) + sizeof(struct ke_sched)); | |
} | |
int | |
sched_sizeof_ksegrp(void) | |
{ | |
return (sizeof(struct ksegrp)); | |
} | |
int | |
sched_sizeof_proc(void) | |
{ | |
return (sizeof(struct proc)); | |
} | |
int | |
sched_sizeof_thread(void) | |
{ | |
return (sizeof(struct thread)); | |
} | |
fixpt_t | |
sched_pctcpu(struct thread *td) | |
{ | |
struct kse *ke; | |
ke = td->td_kse; | |
if (ke == NULL) | |
ke = td->td_last_kse; | |
if (ke) | |
return (ke->ke_pctcpu); | |
return (0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment