/** @file @brief Process (aka task) info storage @details Copyright (c) 2017-2021 Acronis International GmbH @author Ivan Matveev ([email protected]) @since $Id: $ */ #include "task_info_map.h" #include "debug.h" #include "file_key_tools.h" #include "memory.h" #include "path_tools.h" #include "task_tools.h" #include "transport.h" #include <linux/ktime.h> #include <linux/mm.h> // get_task_exe_file() static task_info_map_t global_task_info_map = {0}; static void task_info_context_init(transport_task_context_t *context, transport_id_t id) { WRITE_ONCE(context->data.sent_pid_version, 0); WRITE_ONCE(context->data.listening_mask, ~0ULL); WRITE_ONCE(context->transport_id, id); } // Called when pid_version is changed static void task_info_context_invalidate(transport_task_context_t *context) { // 'sent_pid_version' can survive the invalidation, everything else must be appropriately updated WRITE_ONCE(context->data.listening_mask, ~0ULL); } static void task_info_set_pid_version(task_info_t* task_info, uint64_t pid_version, uint64_t artificial_start_time_us) { WRITE_ONCE(task_info->pid_version, pid_version); WRITE_ONCE(task_info->artificial_start_time_us, artificial_start_time_us); } static task_info_t *task_info_init(task_info_t *task_info, pid_t pid, uint64_t unique_pid, uint64_t pid_version, const struct path* exe_path, uint64_t start_time_us) { int i; DPRINTF("task_info=%p pid=%i", task_info, pid); RB_CLEAR_NODE(&task_info->rb_node); task_info->pid = pid; atomic_set(&task_info->ref_cnt, 1); spin_lock_init(&task_info->spinlock); task_info->unique_pid = unique_pid; task_info_set_pid_version(task_info, pid_version, start_time_us); if (exe_path) make_key(&task_info->exe_file_key, exe_path); else task_info->exe_file_key = (file_key_t){0}; for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_id_t id = transport_id_make_not_taken(i); transport_task_context_t *context = &task_info->contexts[i]; task_info_context_init(context, id); } return task_info; } static task_info_t *task_info_new_with_alloc_flags(pid_t pid, uint64_t unique_pid, uint64_t pid_version, const struct path* exe_path, uint64_t start_time_us, bool nowait) { task_info_t *task_info = mem_alloc_with_alloc_flags(sizeof(task_info_t), nowait); if (task_info) { uint64_t boot_time_us; TIMESPEC boot_time; getboottime(&boot_time); boot_time_us = boot_time.tv_sec * 1000000 + boot_time.tv_nsec / 1000; task_info_init(task_info, pid, unique_pid, pid_version, exe_path, start_time_us + boot_time_us); } return task_info; } static task_info_t *task_info_ref(task_info_t *task_info) { atomic_inc(&task_info->ref_cnt); return task_info; } static void task_info_free(task_info_t *task_info) { DPRINTF("task_info=%p", task_info); mem_free(task_info); } static void task_info_unref(task_info_t *task_info) { DPRINTF("pid=%d ref_cnt=%d", task_info->pid, atomic_read(&task_info->ref_cnt)); if (atomic_dec_and_test(&task_info->ref_cnt)) { task_info_free(task_info); } } void task_info_maps_init(void) { rwlock_init(&global_task_info_map.lock); global_task_info_map.root = RB_ROOT; atomic64_set(&global_task_info_map.next_pid_version, 0); } // Called on module shutdown void task_info_maps_clear(void) { struct rb_root root; struct rb_node *node; write_lock(&global_task_info_map.lock); root = global_task_info_map.root; global_task_info_map.root = RB_ROOT; node = root.rb_node; while (node) { task_info_t *task_info; task_info = rb_entry(node, task_info_t, rb_node); rb_erase(&task_info->rb_node, &root); node = root.rb_node; if (atomic_read(&task_info->ref_cnt) != 1) { WPRINTF("task info [%d] ref_cnf[%d] is not equal to 1 when clearing", task_info->pid, atomic_read(&task_info->ref_cnt)); } task_info_unref(task_info); } write_unlock(&global_task_info_map.lock); } task_info_t *task_info_lookup(pid_t pid, uint64_t unique_pid) { struct rb_node *node; task_info_t *task_info = NULL; DPRINTF("pid=%d", pid); read_lock(&global_task_info_map.lock); node = global_task_info_map.root.rb_node; while (node) { task_info_t *node_task_info = rb_entry(node, task_info_t, rb_node); pid_t node_pid = node_task_info->pid; if (pid < node_pid) { node = node->rb_left; } else if (pid > node_pid) { node = node->rb_right; } else { if (!unique_pid || node_task_info->unique_pid == unique_pid) { task_info = task_info_ref(node_task_info); } else { task_info = NULL; } break; } } read_unlock(&global_task_info_map.lock); DPRINTF_RATELIMITED("task_info=%p pid=%d", task_info, pid); return task_info; } // Notice that this function may return NULL if it detected pid reuse // It is incredibly unlikely but it is still a possibility static task_info_t *task_info_map_insert(task_info_t *new_task_info) { pid_t pid = new_task_info->pid; uint64_t unique_pid = new_task_info->unique_pid; struct rb_node *parent = NULL; struct rb_node **link; DPRINTF_RATELIMITED("new_task_info=%p pid=%i", new_task_info, pid); write_lock(&global_task_info_map.lock); link = &(global_task_info_map.root.rb_node); while (*link) { task_info_t *node_task_info; pid_t node_pid; parent = *link; node_task_info = rb_entry(parent, task_info_t, rb_node); node_pid = node_task_info->pid; if (pid < node_pid) { link = &parent->rb_left; } else if (pid > node_pid) { link = &parent->rb_right; } else { // collision - check unique pid if (node_task_info->unique_pid > unique_pid) { DPRINTF_RATELIMITED("reuse"); write_unlock(&global_task_info_map.lock); return NULL; } else if (node_task_info->unique_pid == unique_pid) { DPRINTF_RATELIMITED("collision"); // unique pid in the tree is greater or equal to the one we want to insert // this mean that we need to drop new node and use the one from the tree task_info_ref(node_task_info); write_unlock(&global_task_info_map.lock); return node_task_info; } else { // pid reuse detected - entry in the tree is older than the one we want to insert // replace the node in rbtree DPRINTF_RATELIMITED("pid reuse detected"); task_info_ref(new_task_info); rb_replace_node(&node_task_info->rb_node, &new_task_info->rb_node, &global_task_info_map.root); RB_CLEAR_NODE(&node_task_info->rb_node); write_unlock(&global_task_info_map.lock); task_info_unref(node_task_info); return new_task_info; } } } // do 'inc' for 'global_task_info_map.root' task_info_ref(new_task_info); rb_link_node(&new_task_info->rb_node, parent, link); rb_insert_color(&new_task_info->rb_node, &global_task_info_map.root); DPRINTF_RATELIMITED("inserted"); write_unlock(&global_task_info_map.lock); return new_task_info; } static bool task_info_may_need_refresh(const task_info_t *task_info, const file_key_t* key) { file_key_t exe_file_key; exe_file_key.ptr = READ_ONCE(task_info->exe_file_key.ptr); exe_file_key.ino = READ_ONCE(task_info->exe_file_key.ino); exe_file_key.gen = READ_ONCE(task_info->exe_file_key.gen); exe_file_key.dev = READ_ONCE(task_info->exe_file_key.dev); return 0 != cmp_file_key(&exe_file_key, key); } static void task_info_refresh(task_info_t *task_info, const struct path* exe_path, bool force_advance_pid_version) { file_key_t key; int i; if (!exe_path) return; make_key(&key, exe_path); if (!force_advance_pid_version) { if (!task_info_may_need_refresh(task_info, &key)) return; } spin_lock(&task_info->spinlock); if (force_advance_pid_version || 0 != cmp_file_key(&key, &task_info->exe_file_key)) { // If 'exe_file_key.ptr==0', exe was not provided on creation. This means that we get to keep the same 'pid_version' // but 'exe_file_key' should be updated to the current 'exe'. Keep the normal logic for 'force_advance_pid_version'. if (force_advance_pid_version || 0 != task_info->exe_file_key.ptr) { task_info_set_pid_version(task_info, atomic64_add_return(1, &global_task_info_map.next_pid_version), ktime_to_us(ktime_get_real())); for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_task_context_t *context = &task_info->contexts[i]; task_info_context_invalidate(context); } } WRITE_ONCE(task_info->exe_file_key.ptr, key.ptr); WRITE_ONCE(task_info->exe_file_key.ino, key.ino); WRITE_ONCE(task_info->exe_file_key.gen, key.gen); WRITE_ONCE(task_info->exe_file_key.dev, key.dev); } spin_unlock(&task_info->spinlock); } task_info_t *task_info_map_add(pid_t pid, uint64_t unique_pid, const struct path* exe_path, uint64_t start_time_us) { return task_info_map_add_with_alloc_flags(pid , unique_pid , exe_path , start_time_us , false /*nowait*/ , false /*force_advance_pid_version*/); } task_info_t *task_info_map_add_with_alloc_flags(pid_t pid, uint64_t unique_pid, const struct path* exe_path, uint64_t start_time_us, bool nowait, bool force_advance_pid_version) { task_info_t *task_info; task_info_t *new_task_info; DPRINTF_RATELIMITED("pid=%d", pid); task_info = task_info_lookup(pid, unique_pid); if (task_info) { DPRINTF_RATELIMITED("pid=%i is already in the map (task_info=%p)", pid, task_info); task_info_refresh(task_info, exe_path, force_advance_pid_version); return task_info; } new_task_info = task_info_new_with_alloc_flags(pid , unique_pid , atomic64_add_return(1, &global_task_info_map.next_pid_version) , exe_path , start_time_us , nowait); if (!new_task_info) { DPRINTF_RATELIMITED("out of memory"); return NULL; } task_info = task_info_map_insert(new_task_info); if (task_info != new_task_info) { // collision, this is a normal situation DPRINTF_RATELIMITED("collision"); if (task_info) task_info_refresh(task_info, exe_path, force_advance_pid_version); task_info_unref(new_task_info); } return task_info; } task_info_t* task_info_map_get(struct task_struct* tsk) { struct path* exe_path = NULL; task_info_t* task_info; struct file* exe_file = get_task_exe_file_compat(tsk); if (exe_file) { exe_path = &exe_file->f_path; if (!path_is_usable(exe_path)) { exe_path = NULL; } } task_info = task_info_map_get_with_exe(tsk, exe_path); if (exe_file) { fput(exe_file); } return task_info; } task_info_t* task_info_map_get_with_exe(struct task_struct* tsk, const struct path* exe_path) { SiTimeMicroseconds start_time = make_process_start_time(tsk); return task_info_map_add(tsk->tgid , make_unique_pid(tsk) , exe_path , start_time.microseconds); } task_info_t* task_info_map_get_by_pid(pid_t pid, uint64_t target_unique_pid) { task_info_t* task_info; struct pid *spid; struct task_struct* task; uint64_t unique_pid; task_info = task_info_lookup(pid, 0); if (task_info) { return task_info; } spid = find_get_pid(pid); task = get_pid_task(spid, PIDTYPE_PID); put_pid(spid); if (!task) { return NULL; } unique_pid = make_unique_pid(task); if (target_unique_pid && target_unique_pid != unique_pid) { put_task_struct(task); return NULL; } task_info = task_info_map_get(task); put_task_struct(task); return task_info; } task_info_t* task_info_map_get_with_alloc_flags(struct task_struct* tsk, const struct path* exe_path, bool nowait) { SiTimeMicroseconds start_time = make_process_start_time(tsk); return task_info_map_add_with_alloc_flags(tsk->tgid , make_unique_pid(tsk) , exe_path , start_time.microseconds , nowait , false /*force_advance_pid_version*/); } task_info_t * task_info_get(task_info_t *task_info) { if (task_info) task_info_ref(task_info); return task_info; } void task_info_put(task_info_t *task_info) { task_info_unref(task_info); } static void task_info_map_remove(task_info_t *task_info) { write_lock(&global_task_info_map.lock); if (!RB_EMPTY_NODE(&task_info->rb_node)) { rb_erase(&task_info->rb_node, &global_task_info_map.root); RB_CLEAR_NODE(&task_info->rb_node); } if (atomic_read(&task_info->ref_cnt) >= 2) { // undo 'inc' done for 'global_task_info_map.root' task_info_unref(task_info); } write_unlock(&global_task_info_map.lock); } int task_info_set_listening_mask(task_info_t *task_info, transport_id_t id, uint64_t listening_mask, uint64_t pid_version) { int map_idx = transport_id_index(id); transport_task_context_t *context; int ret = -EACCES; DPRINTF_LEVEL(LOG_LEVEL_DEBUG1, "map_id:%lu pid=%i mask=0x%lx", id, task_info->pid, listening_mask); if (map_idx < 0 || map_idx >= MAX_TRANSPORT_SIZE) { return -ENOENT; } context = &task_info->contexts[map_idx]; spin_lock(&task_info->spinlock); if (id > context->transport_id) { task_info_context_init(context, id); } if (id == context->transport_id) { uint64_t context_pid_version = READ_ONCE(task_info->pid_version); if (!pid_version || context_pid_version == pid_version) { WRITE_ONCE(context->data.listening_mask, listening_mask); ret = 0; } else { ret = -EACCES; } } else { ret = -ESRCH; } spin_unlock(&task_info->spinlock); return ret; } void task_info_map_on_exit_event(struct task_struct* tsk) { pid_t pid = tsk->tgid; uint64_t unique_pid = make_unique_pid(tsk); task_info_t *task_info = NULL; task_info = task_info_lookup(pid, unique_pid); if (!task_info) { DPRINTF("%u is missing in 'map'", pid); } else { task_info_map_remove(task_info); task_info_unref(task_info); } } bool task_info_can_skip(task_info_t *task_info, const transport_ids_t *ids, uint64_t mask) { int i = 0; bool ret = true; for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_id_t id = ids->ids[i]; transport_task_context_t *context = &task_info->contexts[i]; if (0 == id) continue; if (id > READ_ONCE(context->transport_id) || READ_ONCE(context->data.sent_pid_version) != READ_ONCE(task_info->pid_version)) { ret = false; } else { // exec image matches // If any flag from 'mask' is set in 'listening_mask' then should not skip ret = !(READ_ONCE(context->data.listening_mask) & mask); } if (!ret) break; } return ret; } static bool task_info_may_need_to_make_exec_event(const task_info_t *task_info, const transport_ids_t *ids) { int i = 0; bool ret = false; for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_id_t id = ids->ids[i]; const transport_task_context_t *context = &task_info->contexts[i]; if (0 == id) continue; if (id > READ_ONCE(context->transport_id) || READ_ONCE(context->data.sent_pid_version) != READ_ONCE(task_info->pid_version)) { ret = true; } if (ret) break; } return ret; } bool task_info_need_to_make_exec_event(task_info_t *task_info, const transport_ids_t *ids) { int i = 0; bool ret = false; if (!task_info_may_need_to_make_exec_event(task_info, ids)) return false; spin_lock(&task_info->spinlock); for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_id_t id = ids->ids[i]; transport_task_context_t *context = &task_info->contexts[i]; if (0 == id) continue; if (id > context->transport_id || context->data.sent_pid_version != READ_ONCE(task_info->pid_version)) { ret = true; } if (ret) break; } spin_unlock(&task_info->spinlock); return ret; } static bool task_info_may_want_exec_event(const task_info_t *task_info, transport_id_t transport_id, uint64_t pid_version_to_be_sent) { int i = transport_id_index(transport_id); const transport_task_context_t* context = &task_info->contexts[i]; return READ_ONCE(context->transport_id) != transport_id || READ_ONCE(context->data.sent_pid_version) != pid_version_to_be_sent; } bool task_info_wants_exec_event(task_info_t *task_info, transport_id_t transport_id, uint64_t pid_version_to_be_sent) { int i = transport_id_index(transport_id); bool ret = false; transport_task_context_t* context = &task_info->contexts[i]; if (!task_info_may_want_exec_event(task_info, transport_id, pid_version_to_be_sent)) return false; spin_lock(&task_info->spinlock); if (transport_id > context->transport_id) { task_info_context_init(context, transport_id); } if (context->data.sent_pid_version != pid_version_to_be_sent) { WRITE_ONCE(context->data.sent_pid_version, pid_version_to_be_sent); ret = true; } spin_unlock(&task_info->spinlock); return ret; }