Lines Matching +full:kernel +full:- +full:policy
1 // SPDX-License-Identifier: GPL-2.0-only
3 * kernel/sched/syscalls.c
5 * Core kernel scheduler syscalls related code
7 * Copyright (C) 1991-2002 Linus Torvalds
8 * Copyright (C) 1998-2024 Ingo Molnar, Red Hat
19 static inline int __normal_prio(int policy, int rt_prio, int nice) in __normal_prio() argument
23 if (dl_policy(policy)) in __normal_prio()
24 prio = MAX_DL_PRIO - 1; in __normal_prio()
25 else if (rt_policy(policy)) in __normal_prio()
26 prio = MAX_RT_PRIO - 1 - rt_prio; in __normal_prio()
35 * without taking RT-inheritance into account. Might be
42 return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); in normal_prio()
50 * RT-boosted. If not then it returns p->normal_prio.
54 p->normal_prio = normal_prio(p); in effective_prio()
60 if (!rt_or_dl_prio(p->prio)) in effective_prio()
61 return p->normal_prio; in effective_prio()
62 return p->prio; in effective_prio()
84 * allow the 'normal' nice value to be set - but as expected in set_user_nice()
89 p->static_prio = NICE_TO_PRIO(nice); in set_user_nice()
100 p->static_prio = NICE_TO_PRIO(nice); in set_user_nice()
102 old_prio = p->prio; in set_user_nice()
103 p->prio = effective_prio(p); in set_user_nice()
114 p->sched_class->prio_changed(rq, p, old_prio); in set_user_nice()
119 * is_nice_reduction - check if nice value is an actual reduction
128 /* Convert nice value [19,-20] to rlimit style value [1,40]: */ in is_nice_reduction()
135 * can_nice - check if a task can reduce its nice value
147 * sys_nice - change the priority of the current process.
162 increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH); in SYSCALL_DEFINE1()
167 return -EPERM; in SYSCALL_DEFINE1()
180 * task_prio - return the priority value of a given task.
185 * sched policy return value kernel prio user prio/nice
187 * normal, batch, idle [0 ... 39] [100 ... 139] 0/[-20 ... 19]
188 * fifo, rr [-2 ... -100] [98 ... 0] [1 ... 99]
189 * deadline -101 -1 0
193 return p->prio - MAX_RT_PRIO; in task_prio()
197 * idle_cpu - is a given CPU idle currently?
206 if (rq->curr != rq->idle) in idle_cpu()
209 if (rq->nr_running) in idle_cpu()
213 if (rq->ttwu_pending) in idle_cpu()
221 * available_idle_cpu - is a given CPU idle for enqueuing work.
238 * idle_task - return the idle task for a given CPU.
245 return cpu_rq(cpu)->idle; in idle_task()
253 if (sched_core_enabled(rq) && rq->curr == rq->idle) in sched_core_idle_cpu()
262 * find_process_by_pid - find a process with a matching PID value.
288 * sched_setparam() passes in -1 for its policy, to let the functions in DEFINE_CLASS()
291 #define SETPARAM_POLICY -1 in DEFINE_CLASS()
296 int policy = attr->sched_policy; in DEFINE_CLASS() local
298 if (policy == SETPARAM_POLICY) in DEFINE_CLASS()
299 policy = p->policy; in DEFINE_CLASS()
301 p->policy = policy; in DEFINE_CLASS()
303 if (dl_policy(policy)) in DEFINE_CLASS()
305 else if (fair_policy(policy)) in DEFINE_CLASS()
308 /* rt-policy tasks do not have a timerslack */ in DEFINE_CLASS()
310 p->timer_slack_ns = 0; in DEFINE_CLASS()
311 } else if (p->timer_slack_ns == 0) { in DEFINE_CLASS()
312 /* when switching back to non-rt policy, restore timerslack */ in DEFINE_CLASS()
313 p->timer_slack_ns = p->default_timer_slack_ns; in DEFINE_CLASS()
317 * __sched_setscheduler() ensures attr->sched_priority == 0 when in DEFINE_CLASS()
321 p->rt_priority = attr->sched_priority; in DEFINE_CLASS()
322 p->normal_prio = normal_prio(p); in DEFINE_CLASS()
335 return (uid_eq(cred->euid, pcred->euid) || in check_same_owner()
336 uid_eq(cred->euid, pcred->uid)); in check_same_owner()
344 int util_min = p->uclamp_req[UCLAMP_MIN].value; in uclamp_validate()
345 int util_max = p->uclamp_req[UCLAMP_MAX].value; in uclamp_validate()
347 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { in uclamp_validate()
348 util_min = attr->sched_util_min; in uclamp_validate()
351 return -EINVAL; in uclamp_validate()
354 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { in uclamp_validate()
355 util_max = attr->sched_util_max; in uclamp_validate()
358 return -EINVAL; in uclamp_validate()
361 if (util_min != -1 && util_max != -1 && util_min > util_max) in uclamp_validate()
362 return -EINVAL; in uclamp_validate()
380 /* Reset on sched class change for a non user-defined clamp value. */ in uclamp_reset()
381 if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) && in uclamp_reset()
382 !uc_se->user_defined) in uclamp_reset()
385 /* Reset on sched_util_{min,max} == -1. */ in uclamp_reset()
387 attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && in uclamp_reset()
388 attr->sched_util_min == -1) { in uclamp_reset()
393 attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && in uclamp_reset()
394 attr->sched_util_max == -1) { in uclamp_reset()
407 struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; in __setscheduler_uclamp()
426 if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) in __setscheduler_uclamp()
429 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && in __setscheduler_uclamp()
430 attr->sched_util_min != -1) { in __setscheduler_uclamp()
431 uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], in __setscheduler_uclamp()
432 attr->sched_util_min, true); in __setscheduler_uclamp()
435 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && in __setscheduler_uclamp()
436 attr->sched_util_max != -1) { in __setscheduler_uclamp()
437 uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], in __setscheduler_uclamp()
438 attr->sched_util_max, true); in __setscheduler_uclamp()
447 return -EOPNOTSUPP; in uclamp_validate()
456 * event on permitted non-privileged operations:
460 int policy, int reset_on_fork) in user_check_sched_setscheduler() argument
462 if (fair_policy(policy)) { in user_check_sched_setscheduler()
463 if (attr->sched_nice < task_nice(p) && in user_check_sched_setscheduler()
464 !is_nice_reduction(p, attr->sched_nice)) in user_check_sched_setscheduler()
468 if (rt_policy(policy)) { in user_check_sched_setscheduler()
471 /* Can't set/change the rt policy: */ in user_check_sched_setscheduler()
472 if (policy != p->policy && !rlim_rtprio) in user_check_sched_setscheduler()
476 if (attr->sched_priority > p->rt_priority && in user_check_sched_setscheduler()
477 attr->sched_priority > rlim_rtprio) in user_check_sched_setscheduler()
482 * Can't set/change SCHED_DEADLINE policy at all for now in user_check_sched_setscheduler()
487 if (dl_policy(policy)) in user_check_sched_setscheduler()
494 if (task_has_idle_policy(p) && !idle_policy(policy)) { in user_check_sched_setscheduler()
504 if (p->sched_reset_on_fork && !reset_on_fork) in user_check_sched_setscheduler()
511 return -EPERM; in user_check_sched_setscheduler()
520 int oldpolicy = -1, policy = attr->sched_policy; in __sched_setscheduler() local
533 /* Double check policy once rq lock held: */ in __sched_setscheduler()
534 if (policy < 0) { in __sched_setscheduler()
535 reset_on_fork = p->sched_reset_on_fork; in __sched_setscheduler()
536 policy = oldpolicy = p->policy; in __sched_setscheduler()
538 reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK); in __sched_setscheduler()
540 if (!valid_policy(policy)) in __sched_setscheduler()
541 return -EINVAL; in __sched_setscheduler()
544 if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV)) in __sched_setscheduler()
545 return -EINVAL; in __sched_setscheduler()
549 * 1..MAX_RT_PRIO-1, valid priority for SCHED_NORMAL, in __sched_setscheduler()
552 if (attr->sched_priority > MAX_RT_PRIO-1) in __sched_setscheduler()
553 return -EINVAL; in __sched_setscheduler()
554 if ((dl_policy(policy) && !__checkparam_dl(attr)) || in __sched_setscheduler()
555 (rt_policy(policy) != (attr->sched_priority != 0))) in __sched_setscheduler()
556 return -EINVAL; in __sched_setscheduler()
559 retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork); in __sched_setscheduler()
563 if (attr->sched_flags & SCHED_FLAG_SUGOV) in __sched_setscheduler()
564 return -EINVAL; in __sched_setscheduler()
572 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) { in __sched_setscheduler()
582 if (dl_policy(policy) || dl_policy(p->policy)) { in __sched_setscheduler()
588 * Make sure no PI-waiters arrive (or leave) while we are in __sched_setscheduler()
591 * To be able to change p->policy safely, the appropriate in __sched_setscheduler()
598 * Changing the policy of the stop threads its a very bad idea: in __sched_setscheduler()
600 if (p == rq->stop) { in __sched_setscheduler()
601 retval = -EINVAL; in __sched_setscheduler()
605 retval = scx_check_setscheduler(p, policy); in __sched_setscheduler()
613 if (unlikely(policy == p->policy)) { in __sched_setscheduler()
614 if (fair_policy(policy) && in __sched_setscheduler()
615 (attr->sched_nice != task_nice(p) || in __sched_setscheduler()
616 (attr->sched_runtime != p->se.slice))) in __sched_setscheduler()
618 if (rt_policy(policy) && attr->sched_priority != p->rt_priority) in __sched_setscheduler()
620 if (dl_policy(policy) && dl_param_changed(p, attr)) in __sched_setscheduler()
622 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) in __sched_setscheduler()
625 p->sched_reset_on_fork = reset_on_fork; in __sched_setscheduler()
634 * Do not allow real-time tasks into groups that have no runtime in __sched_setscheduler()
637 if (rt_bandwidth_enabled() && rt_policy(policy) && in __sched_setscheduler()
638 task_group(p)->rt_bandwidth.rt_runtime == 0 && in __sched_setscheduler()
640 retval = -EPERM; in __sched_setscheduler()
645 if (dl_bandwidth_enabled() && dl_policy(policy) && in __sched_setscheduler()
646 !(attr->sched_flags & SCHED_FLAG_SUGOV)) { in __sched_setscheduler()
647 cpumask_t *span = rq->rd->span; in __sched_setscheduler()
654 if (!cpumask_subset(span, p->cpus_ptr) || in __sched_setscheduler()
655 rq->rd->dl_bw.bw == 0) { in __sched_setscheduler()
656 retval = -EPERM; in __sched_setscheduler()
663 /* Re-check policy now with rq lock held: */ in __sched_setscheduler()
664 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { in __sched_setscheduler()
665 policy = oldpolicy = -1; in __sched_setscheduler()
677 if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { in __sched_setscheduler()
678 retval = -EBUSY; in __sched_setscheduler()
682 p->sched_reset_on_fork = reset_on_fork; in __sched_setscheduler()
683 oldprio = p->prio; in __sched_setscheduler()
685 newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); in __sched_setscheduler()
699 prev_class = p->sched_class; in __sched_setscheduler()
700 next_class = __setscheduler_class(policy, newprio); in __sched_setscheduler()
702 if (prev_class != next_class && p->se.sched_delayed) in __sched_setscheduler()
712 if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { in __sched_setscheduler()
714 p->sched_class = next_class; in __sched_setscheduler()
715 p->prio = newprio; in __sched_setscheduler()
725 if (oldprio < p->prio) in __sched_setscheduler()
759 static int _sched_setscheduler(struct task_struct *p, int policy, in _sched_setscheduler() argument
763 .sched_policy = policy, in _sched_setscheduler()
764 .sched_priority = param->sched_priority, in _sched_setscheduler()
765 .sched_nice = PRIO_TO_NICE(p->static_prio), in _sched_setscheduler()
768 if (p->se.custom_slice) in _sched_setscheduler()
769 attr.sched_runtime = p->se.slice; in _sched_setscheduler()
772 if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { in _sched_setscheduler()
774 policy &= ~SCHED_RESET_ON_FORK; in _sched_setscheduler()
775 attr.sched_policy = policy; in _sched_setscheduler()
781 * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
783 * @policy: new policy.
792 int sched_setscheduler(struct task_struct *p, int policy, in sched_setscheduler() argument
795 return _sched_setscheduler(p, policy, param, true); in sched_setscheduler()
810 …* sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from ke…
812 * @policy: new policy.
822 int sched_setscheduler_nocheck(struct task_struct *p, int policy, in sched_setscheduler_nocheck() argument
825 return _sched_setscheduler(p, policy, param, false); in sched_setscheduler_nocheck()
839 * For this reason 'all' FIFO tasks the kernel creates are basically at:
843 * The administrator _MUST_ configure the system, the kernel simply doesn't
874 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) in do_sched_setscheduler() argument
879 return -EINVAL; in do_sched_setscheduler()
881 return -EFAULT; in do_sched_setscheduler()
885 return -ESRCH; in do_sched_setscheduler()
887 return sched_setscheduler(p, policy, &lparam); in do_sched_setscheduler()
891 * Mimics kernel/events/core.c perf_copy_attr().
901 ret = get_user(size, &uattr->size); in sched_copy_attr()
913 if (ret == -E2BIG) in sched_copy_attr()
918 if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && in sched_copy_attr()
920 return -EINVAL; in sched_copy_attr()
924 * to be strict and return an error on out-of-bounds values? in sched_copy_attr()
926 attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); in sched_copy_attr()
931 put_user(sizeof(*attr), &uattr->size); in sched_copy_attr()
932 return -E2BIG; in sched_copy_attr()
940 attr->sched_priority = p->rt_priority; in get_params()
942 attr->sched_nice = task_nice(p); in get_params()
943 attr->sched_runtime = p->se.slice; in get_params()
948 * sys_sched_setscheduler - set/change the scheduler policy and RT priority
950 * @policy: new policy.
955 SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param) in SYSCALL_DEFINE3() argument
957 if (policy < 0) in SYSCALL_DEFINE3()
958 return -EINVAL; in SYSCALL_DEFINE3()
960 return do_sched_setscheduler(pid, policy, param); in SYSCALL_DEFINE3()
964 * sys_sched_setparam - set/change the RT priority of a thread
976 * sys_sched_setattr - same as above, but with extended sched_attr
988 return -EINVAL; in SYSCALL_DEFINE3()
995 return -EINVAL; in SYSCALL_DEFINE3()
1001 return -ESRCH; in SYSCALL_DEFINE3()
1010 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
1013 * Return: On success, the policy of the thread. Otherwise, a negative error
1022 return -EINVAL; in SYSCALL_DEFINE1()
1027 return -ESRCH; in SYSCALL_DEFINE1()
1031 retval = p->policy; in SYSCALL_DEFINE1()
1032 if (p->sched_reset_on_fork) in SYSCALL_DEFINE1()
1039 * sys_sched_getparam - get the RT priority of a thread
1053 return -EINVAL; in SYSCALL_DEFINE2()
1058 return -ESRCH; in SYSCALL_DEFINE2()
1065 lp.sched_priority = p->rt_priority; in SYSCALL_DEFINE2()
1071 return copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; in SYSCALL_DEFINE2()
1075 * sys_sched_getattr - similar to sched_getparam, but with sched_attr
1090 return -EINVAL; in SYSCALL_DEFINE4()
1095 return -ESRCH; in SYSCALL_DEFINE4()
1101 kattr.sched_policy = p->policy; in SYSCALL_DEFINE4()
1102 if (p->sched_reset_on_fork) in SYSCALL_DEFINE4()
1113 kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; in SYSCALL_DEFINE4()
1114 kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; in SYSCALL_DEFINE4()
1136 if (dl_entity_is_special(&p->dl)) in dl_task_check_affinity()
1141 * if admission test is enabled, we only admit -deadline in dl_task_check_affinity()
1146 if (!cpumask_subset(task_rq(p)->rd->span, mask)) in dl_task_check_affinity()
1147 return -EBUSY; in dl_task_check_affinity()
1159 return -ENOMEM; in __sched_setaffinity()
1162 retval = -ENOMEM; in __sched_setaffinity()
1167 cpumask_and(new_mask, ctx->new_mask, cpus_allowed); in __sched_setaffinity()
1169 ctx->new_mask = new_mask; in __sched_setaffinity()
1170 ctx->flags |= SCA_CHECK; in __sched_setaffinity()
1196 if (unlikely((ctx->flags & SCA_USER) && ctx->user_mask)) { in __sched_setaffinity()
1198 ctx->user_mask); in __sched_setaffinity()
1204 retval = -EINVAL; in __sched_setaffinity()
1222 return -ESRCH; in sched_setaffinity()
1224 if (p->flags & PF_NO_SETAFFINITY) in sched_setaffinity()
1225 return -EINVAL; in sched_setaffinity()
1229 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) in sched_setaffinity()
1230 return -EPERM; in sched_setaffinity()
1238 * With non-SMP configs, user_cpus_ptr/user_mask isn't used and in sched_setaffinity()
1245 return -ENOMEM; in sched_setaffinity()
1268 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; in get_user_cpu_mask()
1272 * sys_sched_setaffinity - set the CPU affinity of a process
1275 * @user_mask_ptr: user-space pointer to the new CPU mask
1286 return -ENOMEM; in SYSCALL_DEFINE3()
1303 return -ESRCH; in sched_getaffinity()
1309 guard(raw_spinlock_irqsave)(&p->pi_lock); in sched_getaffinity()
1310 cpumask_and(mask, &p->cpus_mask, cpu_active_mask); in sched_getaffinity()
1316 * sys_sched_getaffinity - get the CPU affinity of a process
1319 * @user_mask_ptr: user-space pointer to hold the current CPU mask
1331 return -EINVAL; in SYSCALL_DEFINE3()
1332 if (len & (sizeof(unsigned long)-1)) in SYSCALL_DEFINE3()
1333 return -EINVAL; in SYSCALL_DEFINE3()
1336 return -ENOMEM; in SYSCALL_DEFINE3()
1343 ret = -EFAULT; in SYSCALL_DEFINE3()
1359 schedstat_inc(rq->yld_count); in do_sched_yield()
1360 current->sched_class->yield_task(rq); in do_sched_yield()
1370 * sys_sched_yield - yield the current processor to other threads.
1384 * yield - yield the current processor to other threads.
1413 * yield_to - yield the current processor to another thread in
1425 * -ESRCH if there's no task to yield to.
1433 scoped_guard (raw_spinlock_irqsave, &p->pi_lock) { in yield_to()
1442 if (rq->nr_running == 1 && p_rq->nr_running == 1) in yield_to()
1443 return -ESRCH; in yield_to()
1449 if (!curr->sched_class->yield_to_task) in yield_to()
1452 if (curr->sched_class != p->sched_class) in yield_to()
1458 yielded = curr->sched_class->yield_to_task(rq, p); in yield_to()
1460 schedstat_inc(rq->yld_count); in yield_to()
1478 * sys_sched_get_priority_max - return maximum RT priority.
1479 * @policy: scheduling class.
1485 SYSCALL_DEFINE1(sched_get_priority_max, int, policy) in SYSCALL_DEFINE1() argument
1487 int ret = -EINVAL; in SYSCALL_DEFINE1()
1489 switch (policy) { in SYSCALL_DEFINE1()
1492 ret = MAX_RT_PRIO-1; in SYSCALL_DEFINE1()
1506 * sys_sched_get_priority_min - return minimum RT priority.
1507 * @policy: scheduling class.
1513 SYSCALL_DEFINE1(sched_get_priority_min, int, policy) in SYSCALL_DEFINE1() argument
1515 int ret = -EINVAL; in SYSCALL_DEFINE1()
1517 switch (policy) { in SYSCALL_DEFINE1()
1538 return -EINVAL; in sched_rr_get_interval()
1543 return -ESRCH; in sched_rr_get_interval()
1551 if (p->sched_class->get_rr_interval) in sched_rr_get_interval()
1552 time_slice = p->sched_class->get_rr_interval(rq, p); in sched_rr_get_interval()
1561 * sys_sched_rr_get_interval - return the default time-slice of a process.
1563 * @interval: userspace pointer to the time-slice value.
1565 * this syscall writes the default time-slice value of a given process
1566 * into the user-space timespec buffer. A value of '0' means infinity.
1568 * Return: On success, 0 and the time-slice is in @interval. Otherwise,