/* * include/haproxy/task.h * Functions for task management. * * Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, version 2.1 * exclusively. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _HAPROXY_TASK_H #define _HAPROXY_TASK_H #include #include #include #include #include #include #include #include #include #include #include #include /* Principle of the wait queue. * * We want to be able to tell whether an expiration date is before of after the * current time . We KNOW that expiration dates are never too far apart, * because they are measured in ticks (milliseconds). We also know that almost * all dates will be in the future, and that a very small part of them will be * in the past, they are the ones which have expired since last time we checked * them. Using ticks, we know if a date is in the future or in the past, but we * cannot use that to store sorted information because that reference changes * all the time. * * We'll use the fact that the time wraps to sort timers. Timers above * are in the future, timers below are in the past. Here, "above" and * "below" are to be considered modulo 2^31. * * Timers are stored sorted in an ebtree. We use the new ability for ebtrees to * lookup values starting from X to only expire tasks between - 2^31 and * . If the end of the tree is reached while walking over it, we simply * loop back to the beginning. That way, we have no problem keeping sorted * wrapping timers in a tree, between (now - 24 days) and (now + 24 days). The * keys in the tree always reflect their real position, none can be infinite. * This reduces the number of checks to be performed. * * Another nice optimisation is to allow a timer to stay at an old place in the * queue as long as it's not further than the real expiration date. That way, * we use the tree as a place holder for a minorant of the real expiration * date. Since we have a very low chance of hitting a timeout anyway, we can * bounce the nodes to their right place when we scan the tree if we encounter * a misplaced node once in a while. This even allows us not to remove the * infinite timers from the wait queue. * * So, to summarize, we have : * - node->key always defines current position in the wait queue * - timer is the real expiration date (possibly infinite) * - node->key is always before or equal to timer * * The run queue works similarly to the wait queue except that the current date * is replaced by an insertion counter which can also wrap without any problem. */ /* The farthest we can look back in a timer tree */ #define TIMER_LOOK_BACK (1U << 31) /* tasklets are recognized with nice==-32768 */ #define TASK_IS_TASKLET(t) ((t)->nice == -32768) /* a few exported variables */ extern unsigned int nb_tasks; /* total number of tasks */ extern volatile unsigned long global_tasks_mask; /* Mask of threads with tasks in the global runqueue */ extern unsigned int tasks_run_queue; /* run queue size */ extern unsigned int tasks_run_queue_cur; extern unsigned int nb_tasks_cur; extern unsigned int niced_tasks; /* number of niced tasks in the run queue */ extern struct pool_head *pool_head_task; extern struct pool_head *pool_head_tasklet; extern struct pool_head *pool_head_notification; extern THREAD_LOCAL struct task_per_thread *sched; /* current's thread scheduler context */ #ifdef USE_THREAD extern struct eb_root timers; /* sorted timers tree, global */ extern struct eb_root rqueue; /* tree constituting the run queue */ #endif extern struct task_per_thread task_per_thread[MAX_THREADS]; __decl_thread(extern HA_SPINLOCK_T rq_lock); /* spin lock related to run queue */ __decl_thread(extern HA_RWLOCK_T wq_lock); /* RW lock related to the wait queue */ void task_kill(struct task *t); void __task_wakeup(struct task *t, struct eb_root *); void __task_queue(struct task *task, struct eb_root *wq); struct work_list *work_list_create(int nbthread, struct task *(*fct)(struct task *, void *, unsigned short), void *arg); void work_list_destroy(struct work_list *work, int nbthread); unsigned int run_tasks_from_lists(unsigned int budgets[]); /* * This does 3 things : * - wake up all expired tasks * - call all runnable tasks * - return the date of next event in or eternity. */ void process_runnable_tasks(); /* * Extract all expired timers from the timer queue, and wakes up all * associated tasks. */ void wake_expired_tasks(); /* Checks the next timer for the current thread by looking into its own timer * list and the global one. It may return TICK_ETERNITY if no timer is present. * Note that the next timer might very well be slightly in the past. */ int next_timer_expiry(); /* * Delete every tasks before running the master polling loop */ void mworker_cleantasks(); /* return 0 if task is in run queue, otherwise non-zero */ static inline int task_in_rq(struct task *t) { /* Check if leaf_p is NULL, in case he's not in the runqueue, and if * it's not 0x1, which would mean it's in the tasklet list. */ return t->rq.node.leaf_p != NULL; } /* return 0 if task is in wait queue, otherwise non-zero */ static inline int task_in_wq(struct task *t) { return t->wq.node.leaf_p != NULL; } /* returns true if the current thread has some work to do */ static inline int thread_has_tasks(void) { return (!!(global_tasks_mask & tid_bit) | (sched->rqueue_size > 0) | !!sched->tl_class_mask | !MT_LIST_ISEMPTY(&sched->shared_tasklet_list)); } /* puts the task in run queue with reason flags , and returns */ /* This will put the task in the local runqueue if the task is only runnable * by the current thread, in the global runqueue otherwies. */ static inline void task_wakeup(struct task *t, unsigned int f) { unsigned short state; #ifdef USE_THREAD struct eb_root *root; if (t->thread_mask == tid_bit || global.nbthread == 1) root = &sched->rqueue; else root = &rqueue; #else struct eb_root *root = &sched->rqueue; #endif state = _HA_ATOMIC_OR(&t->state, f); while (!(state & (TASK_RUNNING | TASK_QUEUED))) { if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED)) { __task_wakeup(t, root); break; } } } /* * Unlink the task from the wait queue, and possibly update the last_timer * pointer. A pointer to the task itself is returned. The task *must* already * be in the wait queue before calling this function. If unsure, use the safer * task_unlink_wq() function. */ static inline struct task *__task_unlink_wq(struct task *t) { eb32_delete(&t->wq); return t; } /* remove a task from its wait queue. It may either be the local wait queue if * the task is bound to a single thread or the global queue. If the task uses a * shared wait queue, the global wait queue lock is used. */ static inline struct task *task_unlink_wq(struct task *t) { unsigned long locked; if (likely(task_in_wq(t))) { locked = t->state & TASK_SHARED_WQ; BUG_ON(!locked && t->thread_mask != tid_bit); if (locked) HA_RWLOCK_WRLOCK(TASK_WQ_LOCK, &wq_lock); __task_unlink_wq(t); if (locked) HA_RWLOCK_WRUNLOCK(TASK_WQ_LOCK, &wq_lock); } return t; } /* Place into the wait queue, where it may already be. If the expiration * timer is infinite, do nothing and rely on wake_expired_task to clean up. * If the task uses a shared wait queue, it's queued into the global wait queue, * protected by the global wq_lock, otherwise by it necessarily belongs to the * current thread'sand is queued without locking. */ static inline void task_queue(struct task *task) { /* If we already have a place in the wait queue no later than the * timeout we're trying to set, we'll stay there, because it is very * unlikely that we will reach the timeout anyway. If the timeout * has been disabled, it's useless to leave the queue as well. We'll * rely on wake_expired_tasks() to catch the node and move it to the * proper place should it ever happen. Finally we only add the task * to the queue if it was not there or if it was further than what * we want. */ if (!tick_isset(task->expire)) return; #ifdef USE_THREAD if (task->state & TASK_SHARED_WQ) { HA_RWLOCK_WRLOCK(TASK_WQ_LOCK, &wq_lock); if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) __task_queue(task, &timers); HA_RWLOCK_WRUNLOCK(TASK_WQ_LOCK, &wq_lock); } else #endif { BUG_ON(task->thread_mask != tid_bit); // should have TASK_SHARED_WQ if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) __task_queue(task, &sched->timers); } } /* change the thread affinity of a task to . * This may only be done from within the running task itself or during its * initialization. It will unqueue and requeue the task from the wait queue * if it was in it. This is safe against a concurrent task_queue() call because * task_queue() itself will unlink again if needed after taking into account * the new thread_mask. */ static inline void task_set_affinity(struct task *t, unsigned long thread_mask) { if (unlikely(task_in_wq(t))) { task_unlink_wq(t); t->thread_mask = thread_mask; task_queue(t); } else t->thread_mask = thread_mask; } /* * Unlink the task from the run queue. The tasks_run_queue size and number of * niced tasks are updated too. A pointer to the task itself is returned. The * task *must* already be in the run queue before calling this function. If * unsure, use the safer task_unlink_rq() function. Note that the pointer to the * next run queue entry is neither checked nor updated. */ static inline struct task *__task_unlink_rq(struct task *t) { _HA_ATOMIC_SUB(&tasks_run_queue, 1); #ifdef USE_THREAD if (t->state & TASK_GLOBAL) _HA_ATOMIC_AND(&t->state, ~TASK_GLOBAL); else #endif sched->rqueue_size--; eb32sc_delete(&t->rq); if (likely(t->nice)) _HA_ATOMIC_SUB(&niced_tasks, 1); return t; } /* This function unlinks task from the run queue if it is in it. It also * takes care of updating the next run queue task if it was this task. */ static inline struct task *task_unlink_rq(struct task *t) { int is_global = t->state & TASK_GLOBAL; if (is_global) HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock); if (likely(task_in_rq(t))) __task_unlink_rq(t); if (is_global) HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock); return t; } /* schedules tasklet to run onto thread or the current thread if * is negative. Note that it is illegal to wakeup a foreign tasklet if * its tid is negative and it is illegal to self-assign a tasklet that was * at least once scheduled on a specific thread. */ static inline void tasklet_wakeup_on(struct tasklet *tl, int thr) { unsigned short state = tl->state; do { /* do nothing if someone else already added it */ if (state & TASK_IN_LIST) return; } while (!_HA_ATOMIC_CAS(&tl->state, &state, state | TASK_IN_LIST)); /* at this pint we're the first ones to add this task to the list */ if (likely(thr < 0)) { /* this tasklet runs on the caller thread */ if (tl->state & TASK_SELF_WAKING) { LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list); sched->tl_class_mask |= 1 << TL_BULK; } else if ((struct task *)tl == sched->current) { _HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING); LIST_ADDQ(&sched->tasklets[TL_BULK], &tl->list); sched->tl_class_mask |= 1 << TL_BULK; } else if (sched->current_queue < 0) { LIST_ADDQ(&sched->tasklets[TL_URGENT], &tl->list); sched->tl_class_mask |= 1 << TL_URGENT; } else { LIST_ADDQ(&sched->tasklets[sched->current_queue], &tl->list); sched->tl_class_mask |= 1 << sched->current_queue; } } else { /* this tasklet runs on a specific thread. */ MT_LIST_ADDQ(&task_per_thread[thr].shared_tasklet_list, (struct mt_list *)&tl->list); if (sleeping_thread_mask & (1UL << thr)) { _HA_ATOMIC_AND(&sleeping_thread_mask, ~(1UL << thr)); wake_thread(thr); } } _HA_ATOMIC_ADD(&tasks_run_queue, 1); } /* schedules tasklet to run onto the thread designated by tl->tid, which * is either its owner thread if >= 0 or the current thread if < 0. */ static inline void tasklet_wakeup(struct tasklet *tl) { tasklet_wakeup_on(tl, tl->tid); } /* Try to remove a tasklet from the list. This call is inherently racy and may * only be performed on the thread that was supposed to dequeue this tasklet. * This way it is safe to call MT_LIST_DEL without first removing the * TASK_IN_LIST bit, which must absolutely be removed afterwards in case * another thread would want to wake this tasklet up in parallel. */ static inline void tasklet_remove_from_tasklet_list(struct tasklet *t) { if (MT_LIST_DEL((struct mt_list *)&t->list)) { _HA_ATOMIC_AND(&t->state, ~TASK_IN_LIST); _HA_ATOMIC_SUB(&tasks_run_queue, 1); } } /* * Initialize a new task. The bare minimum is performed (queue pointers and * state). The task is returned. This function should not be used outside of * task_new(). If the thread mask contains more than one thread, TASK_SHARED_WQ * is set. */ static inline struct task *task_init(struct task *t, unsigned long thread_mask) { t->wq.node.leaf_p = NULL; t->rq.node.leaf_p = NULL; t->state = TASK_SLEEPING; t->thread_mask = thread_mask; if (atleast2(thread_mask)) t->state |= TASK_SHARED_WQ; t->nice = 0; t->calls = 0; t->call_date = 0; t->cpu_time = 0; t->lat_time = 0; t->expire = TICK_ETERNITY; return t; } /* Initialize a new tasklet. It's identified as a tasklet by ->nice=-32768. It * is expected to run on the calling thread by default, it's up to the caller * to change ->tid if it wants to own it. */ static inline void tasklet_init(struct tasklet *t) { t->nice = -32768; t->calls = 0; t->state = 0; t->process = NULL; t->tid = -1; LIST_INIT(&t->list); } /* Allocate and initialize a new tasklet, local to the thread by default. The * caller may assign its tid if it wants to own the tasklet. */ static inline struct tasklet *tasklet_new(void) { struct tasklet *t = pool_alloc(pool_head_tasklet); if (t) { tasklet_init(t); } return t; } /* * Allocate and initialise a new task. The new task is returned, or NULL in * case of lack of memory. The task count is incremented. Tasks should only * be allocated this way, and must be freed using task_free(). */ static inline struct task *task_new(unsigned long thread_mask) { struct task *t = pool_alloc(pool_head_task); if (t) { _HA_ATOMIC_ADD(&nb_tasks, 1); task_init(t, thread_mask); } return t; } /* * Free a task. Its context must have been freed since it will be lost. The * task count is decremented. It it is the current task, this one is reset. */ static inline void __task_free(struct task *t) { if (t == sched->current) { sched->current = NULL; __ha_barrier_store(); } BUG_ON(task_in_wq(t) || task_in_rq(t)); pool_free(pool_head_task, t); if (unlikely(stopping)) pool_flush(pool_head_task); _HA_ATOMIC_SUB(&nb_tasks, 1); } /* Destroys a task : it's unlinked from the wait queues and is freed if it's * the current task or not queued otherwise it's marked to be freed by the * scheduler. It does nothing if is NULL. */ static inline void task_destroy(struct task *t) { if (!t) return; task_unlink_wq(t); /* We don't have to explicitly remove from the run queue. * If we are in the runqueue, the test below will set t->process * to NULL, and the task will be free'd when it'll be its turn * to run. */ /* There's no need to protect t->state with a lock, as the task * has to run on the current thread. */ if (t == sched->current || !(t->state & (TASK_QUEUED | TASK_RUNNING))) __task_free(t); else t->process = NULL; } /* Should only be called by the thread responsible for the tasklet */ static inline void tasklet_free(struct tasklet *tl) { if (MT_LIST_DEL((struct mt_list *)&tl->list)) _HA_ATOMIC_SUB(&tasks_run_queue, 1); pool_free(pool_head_tasklet, tl); if (unlikely(stopping)) pool_flush(pool_head_tasklet); } static inline void tasklet_set_tid(struct tasklet *tl, int tid) { tl->tid = tid; } /* Ensure will be woken up at most at . If the task is already in * the run queue (but not running), nothing is done. It may be used that way * with a delay : task_schedule(task, tick_add(now_ms, delay)); */ static inline void task_schedule(struct task *task, int when) { /* TODO: mthread, check if there is no tisk with this test */ if (task_in_rq(task)) return; #ifdef USE_THREAD if (task->state & TASK_SHARED_WQ) { /* FIXME: is it really needed to lock the WQ during the check ? */ HA_RWLOCK_WRLOCK(TASK_WQ_LOCK, &wq_lock); if (task_in_wq(task)) when = tick_first(when, task->expire); task->expire = when; if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) __task_queue(task, &timers); HA_RWLOCK_WRUNLOCK(TASK_WQ_LOCK, &wq_lock); } else #endif { BUG_ON((task->thread_mask & tid_bit) == 0); // should have TASK_SHARED_WQ if (task_in_wq(task)) when = tick_first(when, task->expire); task->expire = when; if (!task_in_wq(task) || tick_is_lt(task->expire, task->wq.key)) __task_queue(task, &sched->timers); } } /* This function register a new signal. "lua" is the current lua * execution context. It contains a pointer to the associated task. * "link" is a list head attached to an other task that must be wake * the lua task if an event occurs. This is useful with external * events like TCP I/O or sleep functions. This function allocate * memory for the signal. */ static inline struct notification *notification_new(struct list *purge, struct list *event, struct task *wakeup) { struct notification *com = pool_alloc(pool_head_notification); if (!com) return NULL; LIST_ADDQ(purge, &com->purge_me); LIST_ADDQ(event, &com->wake_me); HA_SPIN_INIT(&com->lock); com->task = wakeup; return com; } /* This function purge all the pending signals when the LUA execution * is finished. This prevent than a coprocess try to wake a deleted * task. This function remove the memory associated to the signal. * The purge list is not locked because it is owned by only one * process. before browsing this list, the caller must ensure to be * the only one browser. */ static inline void notification_purge(struct list *purge) { struct notification *com, *back; /* Delete all pending communication signals. */ list_for_each_entry_safe(com, back, purge, purge_me) { HA_SPIN_LOCK(NOTIF_LOCK, &com->lock); LIST_DEL(&com->purge_me); if (!com->task) { HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); pool_free(pool_head_notification, com); continue; } com->task = NULL; HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); } } /* In some cases, the disconnected notifications must be cleared. * This function just release memory blocks. The purge list is not * locked because it is owned by only one process. Before browsing * this list, the caller must ensure to be the only one browser. * The "com" is not locked because when com->task is NULL, the * notification is no longer used. */ static inline void notification_gc(struct list *purge) { struct notification *com, *back; /* Delete all pending communication signals. */ list_for_each_entry_safe (com, back, purge, purge_me) { if (com->task) continue; LIST_DEL(&com->purge_me); pool_free(pool_head_notification, com); } } /* This function sends signals. It wakes all the tasks attached * to a list head, and remove the signal, and free the used * memory. The wake list is not locked because it is owned by * only one process. before browsing this list, the caller must * ensure to be the only one browser. */ static inline void notification_wake(struct list *wake) { struct notification *com, *back; /* Wake task and delete all pending communication signals. */ list_for_each_entry_safe(com, back, wake, wake_me) { HA_SPIN_LOCK(NOTIF_LOCK, &com->lock); LIST_DEL(&com->wake_me); if (!com->task) { HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); pool_free(pool_head_notification, com); continue; } task_wakeup(com->task, TASK_WOKEN_MSG); com->task = NULL; HA_SPIN_UNLOCK(NOTIF_LOCK, &com->lock); } } /* This function returns true is some notification are pending */ static inline int notification_registered(struct list *wake) { return !LIST_ISEMPTY(wake); } /* adds list item to work list and wake up the associated task */ static inline void work_list_add(struct work_list *work, struct mt_list *item) { MT_LIST_TRY_ADDQ(&work->head, item); task_wakeup(work->task, TASK_WOKEN_OTHER); } #endif /* _HAPROXY_TASK_H */ /* * Local variables: * c-indent-level: 8 * c-basic-offset: 8 * End: */