shell bypass 403
/** @file @brief Message transport between kernel and userspace @details Copyright (c) 2017-2021 Acronis International GmbH @author Mikhail Krivtsov ([email protected]) @since $Id: $ */ #include "transport.h" #include "debug.h" #include "device.h" #include "file_contexts.h" #include "ftrace_hooks/ftrace_events.h" #include "ftrace_hooks/fsnotify_listener.h" #include "lsm_common.h" #include "memory.h" #include "message.h" #include "si_fp_properties.h" #include "syscall_common.h" #include "task_info_map.h" #include "tracepoints.h" #include "transport_protocol.h" #include <linux/bitmap.h> #include <asm/io.h> #include <linux/fcntl.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/list.h> #include <linux/jiffies.h> // msecs_to_jiffies() #include <linux/mutex.h> #ifndef KERNEL_MOCK #include <linux/sched.h> #else #include <mock/mock_sched.h> #endif #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/uaccess.h> // copy_from_user(), copy_to_user() #include <linux/vmalloc.h> #include <linux/wait.h> // wait_event*(), wake_up*() #define TRANSPORT_MSG_SIZE_MAX (1<<10) // This size is reasonable to a good amount of messages, although it is really a bare minimum #define TRANSPORT_MINIMAL_SHARED_DATA_QUEUE_SIZE (128 * 1024) #define TRANSPORT_QUEUE_CAPACITY (0x1000 / sizeof(msg_t *)) #define TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS (60*1000) #define TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS (5*1000) #define TRANSPORT_PRINTF(format, args...) DPRINTF(format, ##args) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #define DATA_QUEUE_HEADER_SIZE sizeof(shared_data_queue_t) #define DATA_QUEUE_ENTRY_HEADER_SIZE sizeof(data_queue_entry_t) static transport_event_t* transport_event_new(void) { transport_event_t* event = mem_alloc0(sizeof(transport_event_t)); if (!event) return NULL; atomic64_add(1, &g_memory_metrics->total_transport_events); atomic_set(&event->refcount, 1); init_waitqueue_head(&event->msg_wait_queue); return event; } static transport_event_t* transport_event_ref(transport_event_t* event) { atomic_inc(&event->refcount); return event; } static void transport_event_unref(transport_event_t* event) { if (atomic_dec_and_test(&event->refcount)) { mem_free(event); atomic64_sub(1, &g_memory_metrics->total_transport_events); } } transport_global_t transport_global; static void transport_global_init(void) { mutex_init(&transport_global.transport_mutex); transport_global.transport_count = 0; transport_global.transports = (transports_t) {0}; atomic64_set(&transport_global.msg_id_sequence, 0); transport_global.last_transport_seq_num = 0; transport_global.transport_ids = (transport_ids_t) {0}; } // must be called under 'transport_global.transport_mutex' static transport_id_t transport_acquire_id(void) { transport_id_t transport_id = 0; int i; for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { if (0 == READ_ONCE(transport_global.transport_ids.ids[i])) { transport_id = transport_id_make(++transport_global.last_transport_seq_num, i); WRITE_ONCE(transport_global.transport_ids.ids[i], transport_id); break; } } return transport_id; } // must be called under 'transport_global.transport_mutex' static void transport_release_id(transport_id_t id) { int i = transport_id_index(id); if (id == READ_ONCE(transport_global.transport_ids.ids[i])) { WRITE_ONCE(transport_global.transport_ids.ids[i], 0); } else { WPRINTF("transport id %llu not found", id); } } // must be called under 'transport_global.transport_mutex' static void transport_global_register(transport_t *transport) { int idx = transport_id_index(transport->transport_id); rcu_assign_pointer(transport_global.transports.transports[idx], transport); WRITE_ONCE(transport_global.transports.control_tgid[idx], transport->control_tgid); } // must be called under 'transport_global.transport_mutex' static void transport_global_unregister(transport_t *transport) { int idx = transport_id_index(transport->transport_id); rcu_assign_pointer(transport_global.transports.transports[idx], NULL); WRITE_ONCE(transport_global.transports.control_tgid[idx], 0); } // must be called under 'transport_global.transport_mutex' static void transport_global_recalculate_combined_event_mask_impl(void) { int i; uint64_t combined_mask = 0; for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_t *transport; transport_id_t transport_id = READ_ONCE(transport_global.transport_ids.ids[i]); if (!transport_id) continue; // do not need to RCU access this pointer because under 'transport_mutex' transport = transport_global.transports.transports[i]; combined_mask |= READ_ONCE(transport->events_mask); } WRITE_ONCE(transport_global.combined_events_mask, combined_mask); } struct event_subtype_relation_t { uint64_t events_mask; uint64_t generated_subtype_mask; }; static const struct event_subtype_relation_t k_subtypes_relations[] = { { MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_OPEN) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPEN_MODIFY) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPEN_READ) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPEN_MAY_CREATE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_OPENDIR) }, { MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_OPEN) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPEN_MODIFY) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPEN_READ) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPEN_MAY_CREATE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_OPENDIR) }, { MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_CLOSE) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_CLOSE_NON_WRITE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_CLOSE_WRITE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_CLOSEDIR) }, { MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_CLOSE), MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_CLOSE_NON_WRITE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_CLOSE_WRITE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_CLOSEDIR) }, { MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_MMAP) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_MMAP_NON_WRITE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SYNC_MMAP_WRITE) }, { MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_MMAP) , MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_MMAP_NON_WRITE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_NOTIFY_MMAP_WRITE) }, // All fs operations relate to generic subtypes like 'special' { MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_OPEN) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_OPEN) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_CREATE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_RENAME) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_RENAME) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_RENAME) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_UNLINK) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_UNLINK) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_CLOSE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_CLOSE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_OPEN) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_RENAME) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_UNLINK) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FSNOTIFY_CREATE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_MMAP) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_MMAP) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_LINK) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_LINK) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_SYNC_FILE_PRE_TRUNCATE) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_NOTIFY_FILE_PRE_TRUNCATE), MSG_TYPE_TO_EVENT_MASK(FP_SI_ST_SPECIAL) }, }; // If any transport includes subtype, combined should have the subtype static void transport_global_recalculate_combined_event_subtype_inclusion_mask_impl(void) { int i; uint64_t combined_mask = 0; for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { int j; transport_t *transport; transport_id_t transport_id = READ_ONCE(transport_global.transport_ids.ids[i]); if (!transport_id) continue; // do not need to RCU access this pointer because under 'transport_mutex' transport = transport_global.transports.transports[i]; for (j = 0; j < (int) ARRAY_SIZE(k_subtypes_relations); j++) { const struct event_subtype_relation_t* relation = &k_subtypes_relations[j]; if (transport->events_mask & relation->events_mask) { combined_mask |= transport->events_subtype_inclusion_mask & relation->generated_subtype_mask; } } } WRITE_ONCE(transport_global.combined_events_subtype_inclusion_mask, combined_mask); } // If all transports excludes subtype, combined excludes subtype static void transport_global_recalculate_combined_event_subtype_exclusion_mask_impl(void) { int i; uint64_t combined_mask = ~0ULL; for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { int j; uint64_t transport_not_excluded_subtypes; transport_t *transport; transport_id_t transport_id = READ_ONCE(transport_global.transport_ids.ids[i]); if (!transport_id) continue; // do not need to RCU access this pointer because under 'transport_mutex' transport = transport_global.transports.transports[i]; transport_not_excluded_subtypes = ~transport->events_subtype_exclusion_mask; for (j = 0; j < (int) ARRAY_SIZE(k_subtypes_relations); j++) { const struct event_subtype_relation_t* relation = &k_subtypes_relations[j]; if (transport->events_mask & relation->events_mask) { // drop bits from the combined mask if it is known that generated events are not being excluded combined_mask &= ~(transport_not_excluded_subtypes & relation->generated_subtype_mask); } } } WRITE_ONCE(transport_global.combined_events_subtype_exclusion_mask, combined_mask); } static void transport_global_recalculate_combined_all_event_masks_impl(void) { transport_global_recalculate_combined_event_mask_impl(); transport_global_recalculate_combined_event_subtype_inclusion_mask_impl(); transport_global_recalculate_combined_event_subtype_exclusion_mask_impl(); } static void transport_global_recalculate_combined_all_event_masks(void) { mutex_lock(&transport_global.transport_mutex); transport_global_recalculate_combined_all_event_masks_impl(); mutex_unlock(&transport_global.transport_mutex); } static void drop_msgs_impl(ring_t *ring) { while (!ring_is_empty(ring)) { msg_t *msg = *(msg_t **) ring_consumer_ptr(ring); msg_unref(msg); ring_consumer_index_move_one(ring); } } /* 'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in 'msg_reply_wait_count inc/dec'. There is no need for separate 'msg ref/unref' calls. */ static void drop_sent_msgs_impl(set_t *set) { void *item_ptr = set_begin_ptr(set); void *end_ptr = set_end_ptr(set); while (item_ptr < end_ptr) { msg_t *msg = *(msg_t **) item_ptr; msg_reply_wait_count_dec(msg); item_ptr = set_ptr_next(set, item_ptr); } set->count = 0; } static void transport_shutdown(transport_t *transport) { DPRINTF("transport=%p", transport); spin_lock(&transport->msg_spinlock); { WRITE_ONCE(transport->events_mask, 0); WRITE_ONCE(transport->shutdown, true); // Discard undelivered messages drop_msgs_impl(&transport->msg_ring); // Discard messages waiting for 'reply' drop_sent_msgs_impl(&transport->sent_msgs_set); } spin_unlock(&transport->msg_spinlock); // wakeup all userspace 'read' waiters wake_up_all(&transport->event->msg_wait_queue); } static const char* transport_name(transport_t* transport) { client_type_t client_type = transport->client_type; switch (client_type) { case CLIENT_UNKNOWN: return "?"; case CLIENT_TEST: return "test"; case CLIENT_AP: return "ap"; case CLIENT_BE: return "be"; case CLIENT_RTP: return "rtp"; } return "?"; } #define TRANSPORT_FMT "%s[%d]" #define TRANSPORT_PRINT(transport) transport_name(transport), transport->client_type // identify and shutdown transport failed to reply static void transport_shutdown_msg(transport_t *transport, msg_t *unreplied_msg) { bool found = false; DPRINTF("transport=%p unreplied_msg=%p", transport, unreplied_msg); spin_lock(&transport->msg_spinlock); { void *item_ptr = set_begin_ptr(&transport->sent_msgs_set); void *end_ptr = set_end_ptr(&transport->sent_msgs_set); while (item_ptr < end_ptr) { if (unreplied_msg == *(msg_t **) item_ptr) { found = true; break; } item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr); } } spin_unlock(&transport->msg_spinlock); if (found) { WPRINTF("deactivating transport " TRANSPORT_FMT " on reply wait timeout", TRANSPORT_PRINT(transport)); transport_shutdown(transport); } } // identify and shutdown transport failed to reply static void transport_global_shutdown_msg(msg_t *unreplied_msg) { int i; DPRINTF("unreplied_msg=%p", unreplied_msg); rcu_read_lock(); for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_t *transport = rcu_dereference(transport_global.transports.transports[i]); if (!transport) continue; transport_shutdown_msg(transport, unreplied_msg); } rcu_read_unlock(); } static void transport_disable(transport_t *transport) { DPRINTF("transport=%p", transport); transport_global_unregister(transport); transport_shutdown(transport); transport_release_id(transport->transport_id); release_file_context_entry(transport->transport_id); } static void transport_free(transport_t *transport) { DPRINTF("transport=%p", transport); IPRINTF("message queue items_count_max=%u capacity=%u", ring_items_count_max(&transport->msg_ring), ring_capacity(&transport->msg_ring)); IPRINTF("sent_msgs_set items_count_max=%u capacity=%u", set_items_count_max(&transport->sent_msgs_set), set_fetch_capacity(&transport->sent_msgs_set)); mem_free(ring_buffer(&transport->msg_ring)); mem_free(set_buffer(&transport->sent_msgs_set)); if (transport->queue) { vfree(transport->queue); } transport_event_unref(transport->event); mem_free(transport); atomic64_sub(1, &g_memory_metrics->total_transports); } struct transport_attribute { struct attribute attr; ssize_t (*show)(transport_t* transport, char *buf); }; static ssize_t bytes_written_show(transport_t* transport, char *buf) { return sprintf(buf, "%lu\n", (long) transport->bytes_written); } static ssize_t queue_size_show(transport_t* transport, char *buf) { return sprintf(buf, "%u\n", transport->queue_size); } static ssize_t insert_filled_size_show(transport_t* transport, char *buf) { return sprintf(buf, "%u\n", transport->insert_filled_size); } static ssize_t insert_filled_size_max_show(transport_t* transport, char *buf) { return sprintf(buf, "%u\n", transport->insert_filled_size_max); } static ssize_t wait_set_count_show(transport_t* transport, char *buf) { return sprintf(buf, "%u\n", transport->sent_msgs_set.count); } static ssize_t wait_set_count_max_show(transport_t* transport, char *buf) { return sprintf(buf, "%u\n", transport->sent_msgs_set.count_max); } #define TRANSPORT_METRIC_X \ TRANSPORT_METRIC(bytes_written) \ TRANSPORT_METRIC(queue_size) \ TRANSPORT_METRIC(insert_filled_size) \ TRANSPORT_METRIC(insert_filled_size_max) \ TRANSPORT_METRIC(wait_set_count) \ TRANSPORT_METRIC(wait_set_count_max) #define TRANSPORT_METRIC(_name) static const struct transport_attribute s_transport_attr_##_name = __ATTR_RO(_name); TRANSPORT_METRIC_X #undef TRANSPORT_METRIC static const struct attribute *s_transport_attrs[] = { #define TRANSPORT_METRIC(_name) &s_transport_attr_##_name.attr, TRANSPORT_METRIC_X #undef TRANSPORT_METRIC NULL }; #define to_transport(_at) container_of(to_safe_kobject(_at), transport_t, skobj) #define to_transport_attr(_at) container_of(_at, struct transport_attribute, attr) static ssize_t transport_show(struct kobject *kobj, struct attribute *attr, char *buf) { transport_t* transport = to_transport(kobj); struct transport_attribute *transport_attr = to_transport_attr(attr); return transport_attr->show(transport, buf); } const struct sysfs_ops s_transport_sysfs_ops = { .show = transport_show, }; static struct kobj_type s_transport_ktype = { .release = safe_kobject_sysfs_release, .sysfs_ops = &s_transport_sysfs_ops, }; static void transport_del(transport_t *transport) { sysfs_remove_files(&transport->skobj.kobj, s_transport_attrs); safe_kobject_del(&transport->skobj); transport_free(transport); } static bool transport_ring_init(ring_t *ring) { size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *); msg_t **msgs; bool success; if (!buffer_size) { msgs = NULL; success = true; } else { msgs = mem_alloc0(buffer_size); success = (bool) msgs; } ring_init(ring, msgs, buffer_size, sizeof(msg_t *)); return success; } static bool transport_set_init(set_t *set) { size_t buffer_size = TRANSPORT_QUEUE_CAPACITY * sizeof(msg_t *); msg_t **msgs; bool success; if (!buffer_size) { msgs = NULL; success = true; } else { msgs = mem_alloc0(buffer_size); success = (bool) msgs; } set_init(set, msgs, buffer_size, sizeof(msg_t *)); return success; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Shared with userspace Data Queue implementation #ifndef smp_store_release #define smp_store_release(p, v) \ do { \ barrier(); \ WRITE_ONCE(*p, v); \ } while (0) #endif #ifndef smp_load_acquire #define smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = READ_ONCE(*p); \ barrier(); \ ___p1; \ }) #endif #define DATA_QUEUE_ENTRY_AT(queue, v) (data_queue_entry_t*)((uint8_t *)queue->entries + v) #ifdef ROUND_UP #undef ROUND_UP #endif #define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) static inline void data_queue_write_new_entry(shared_data_queue_t *queue, uint32_t offset, const SiEvent* event) { data_queue_entry_t *entry = DATA_QUEUE_ENTRY_AT(queue, offset); memcpy(entry, event, event->Size); } // This function is called from data queue 'writer' under the spin_lock as it is NOT thread-safe // As such, reads from 'queue->tail' can be done use 'READ_ONCE' (relaxed), writes must be done using 'smp_store_release'. // 'reader' may decide to alter 'queue->head' so 'smp_load_acquire' must be used to read it, writes are NOT allowed. static bool transport_shared_data_queue_enqueue_impl(transport_t *transport, const SiEvent* event, transport_event_t **deferred_wakeup) { uint32_t head, tail, new_tail; uint32_t entry_size = event->Size; shared_data_queue_t *queue = transport->queue; uint32_t queue_size = transport->queue_size; transport->bytes_written += entry_size; // Notice that we are not doing any memory shenanigans here to load tail & head. // The barriers will be done later if it will appear that they are necessary. // !!! 'head' might not be synchronized with 'reader', it is OK and will be handled in the end. tail = READ_ONCE(queue->tail); head = smp_load_acquire(&queue->head); // Check for unreasonable 'tail' or 'head', must never happen. if (queue_size < tail || queue_size < head) { WPRINTF("Invalid tail/head detected: tail=%u, head=%u, size=%u" , (unsigned) tail, (unsigned) head, (unsigned) queue_size); return false; } // Start inserting the contents of 'data' in the shared data queue if (tail >= head) { uint32_t filled_size = tail - head; transport->insert_filled_size = filled_size; if (filled_size > transport->insert_filled_size_max) transport->insert_filled_size_max = filled_size; // Tail is further than head, it is a regular scenario. Handle it // head tail // V V // -----|*************|----------------- // ^ ^ // data to be dequeued | // free space if ((tail + entry_size) <= queue_size) { // There is enough buffer in the 'tail' of the queue, write the entry and move the tail // head tail new_tail // V V V // -----|*************|+++++++|------- // ^ // new entry data_queue_write_new_entry(queue, tail /*off*/, event); new_tail = tail + entry_size; } else if (head > entry_size) { // As first condition did not satisfy, cannot put data after 'tail' // Have to loop back to the start and there is enough space before userspace 'head' // head tail // V V // |++++++|------------|*************|?? <- zapped entry w/ size>queue_size-tail, if fits // ^ ^ // off new_tail // Need to explain the userspace that current entry is too long to fit. // If there is not enough space to even place a entry header, do nothing. // Otherwise, deliberately zap the entry by putting 'data_size' that is too big. if ((queue_size - tail) >= DATA_QUEUE_ENTRY_HEADER_SIZE) { data_queue_entry_t *entry_to_zap = DATA_QUEUE_ENTRY_AT(queue, tail); entry_to_zap->size = entry_size; // do not touch 'entry_to_zap->data', it is bogus. entry just says go to the start } // Write data at the beginning of the queue data_queue_write_new_entry(queue, 0 /*off*/, event); new_tail = /*off==0 + */ entry_size; } else { // There is neither enough space after 'tail' nor before 'head', bail WPRINTF("no more space is left in " TRANSPORT_FMT ", head=%u, tail=%u, entry_size=%u, written=%llu", TRANSPORT_PRINT(transport), head, tail, entry_size, transport->bytes_written); return false; } } else { // Catching to the 'head' from the other size. // tail head // V V // ****|--------------|*************** uint32_t filled_size = (queue_size - head) + tail; transport->insert_filled_size = filled_size; if (filled_size > transport->insert_filled_size_max) transport->insert_filled_size_max = filled_size; // Insert can still be done if 'head' will not be overrun if ((head - tail) > entry_size) { // tail head // V V // ****|+++++|------|*************** // ^ // new_tail data_queue_write_new_entry(queue, tail, event); new_tail = tail + entry_size; } else { // There is not enough space not to overrun 'head', bail WPRINTF("no more space is left in " TRANSPORT_FMT ", head=%u, tail=%u, entry_size=%u, written=%llu", TRANSPORT_PRINT(transport), head, tail, entry_size, transport->bytes_written); return false; } } // Expose all the content written in this thread as per 'release' semantics. // Reader must do 'smp_store_acquire' on the same variable ('tail') to see 'entries' written. // !!! This logic does NOT enforce 'tail' in 'reader' to be equal to 'tail' in 'writer' new_tail = ROUND_UP(new_tail, sizeof(uint32_t)); smp_store_release(&queue->tail, new_tail); // The new tail was published to the 'queue' but is it necessary to notify the 'reader'? // If in the beginning 'tail == head', it means that userspace has finished reading all the // content and is going to wait or is already waiting for the 'event'. // In such case it is clear that will must notify the 'reader' no matter what. // Moreover 'reader' cannot move the 'head' past the 'tail' so it is guaranteed that it is // indeed the latest published 'head' by the reader. if (tail != head) { // If 'tail != head', it is not as clear. If it so happened that userspace moved the 'head' // to be equal to 'tail' while 'writer' was adding the new entry, 'reader' will go 'wait'. // So we must refresh the 'head' to ensure we actually do not need to wakeup the 'reader'. // The other situation is also valid - 'writer' might delay writes to the head as 'atomic' ops. // We need to make sure userspace will continue consuming events as we wrote the 'tail'. // Whenever userspace will detect that its current 'tail==head', it will perform 'smb_mb' // to fetch the new 'tail' we just wrote to ensure it does not need to consume anymore. smp_mb(); head = READ_ONCE(queue->head); } if (tail == head) { // atomic_ops.rst: atomic_read() and atomic_set() DO NOT IMPLY BARRIERS! atomic_set(&transport->queue_event, 1); // The data queue was empty, wakeup the 'reader' which is waiting for us. // Use 'smp_wmb' to make sure 'tail' that we stored will be seen by the user. // It is also fine if we did 'smp_mb' before, we will pair with 'smb_rmb' just fine. // Also using 'smp_wmb' to ensure 'atomic_set' did set the 'queue_event'. smp_wmb(); if (deferred_wakeup) { *deferred_wakeup = transport_event_ref(transport->event); } else { wake_up_interruptible(&transport->event->msg_wait_queue); } TRANSPORT_PRINTF("woken up listener ht=%u nt=%u", tail, new_tail); } return true; } static long transport_queue_events_available(transport_t *transport) { uint32_t tail, head; int ev; int shutdown; shared_data_queue_t *queue = READ_ONCE(transport->queue); smp_rmb(); ev = atomic_xchg(&transport->queue_event, 0); if (ev) { TRANSPORT_PRINTF("check ev active"); return 1; } // This should not be necessary but doing it just in case. tail = READ_ONCE(queue->tail); head = READ_ONCE(queue->head); shutdown = READ_ONCE(transport->shutdown); TRANSPORT_PRINTF("check s=%u h=%u t=%u", shutdown, head, tail); return shutdown || (head != tail); } // This function is called whenever userspace 'reader' deemed that there are no more events to read. // It will be waiting for the data queue to gain new content using 'msg_wait_queue'. // 'wake_up_interruptible' does 'wakeup' when it detects that the queue is being empty. static long transport_data_queue_wait(transport_t *transport) { shared_data_queue_t *queue = READ_ONCE(transport->queue); long ret; if (!queue) { EPRINTF("queue is NULL"); return -EINVAL; } if (wait_event_interruptible_exclusive(transport->event->msg_wait_queue, transport_queue_events_available(transport))) { ret = -EINTR; } else { if (READ_ONCE(transport->shutdown)) { ret = -EIO; } else { ret = 0; } } return ret; } static int transport_data_queue_mmap(transport_t *transport, struct vm_area_struct *vma) { unsigned long sz = vma->vm_end - vma->vm_start; void *ptr; // Technically userspace may mmap part of the buffer but // it is absolutely going to be a BUG later on when code will manage // shared data queue so instead let's complain immediately if (0 != vma->vm_pgoff) { EPRINTF("mmaped offset is not zero"); return -EINVAL; } if (sz != transport->queue_size + DATA_QUEUE_HEADER_SIZE) { EPRINTF("mmaped size is invalid, 0x%lx != 0x%x", sz, transport->queue_size); return -EINVAL; } ptr = READ_ONCE(transport->queue); if (!ptr) { EPRINTF("queue is NULL"); return -EINVAL; } return remap_vmalloc_range(vma, transport->queue, 0); } static long data_queue_create(const data_queue_params_t *params, shared_data_queue_t **pqueue) { shared_data_queue_t *queue; uint32_t size = params->size; // Really 'DATA_QUEUE_HEADER_SIZE' is a bare minimum but // let's use slightly larger size to fit at least a few messages if (size <= TRANSPORT_MINIMAL_SHARED_DATA_QUEUE_SIZE) { EPRINTF("size provided is too small"); return -EINVAL; } // check if size is PAGE_SIZE aligned because it will later be used in 'mmap' if (size & (PAGE_SIZE - 1)) { EPRINTF("size is not PAGE_SIZE aligned"); return -EINVAL; } queue = (shared_data_queue_t*) vmalloc_user(size); if (!queue) return -ENOMEM; *pqueue = queue; return 0; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - static bool transport_send_msg_nowait(transport_t *transport, msg_t *msg, transport_event_t **deferred_wakeup) { bool need_wakeup = false; const SiEvent *event = &msg->event; spin_lock(&transport->msg_spinlock); { if (READ_ONCE(transport->shutdown)) { spin_unlock(&transport->msg_spinlock); return false; } if (FP_SI_CT_WANT_REPLY == event->CallbackType) { unsigned item_index; if (set_is_full(&transport->sent_msgs_set)) { WPRINTF("'sent_msgs_set' overflow for " TRANSPORT_FMT " (capacity=%u)", TRANSPORT_PRINT(transport), set_fetch_capacity(&transport->sent_msgs_set)); spin_unlock(&transport->msg_spinlock); transport_shutdown(transport); return false; } item_index = set_items_count(&transport->sent_msgs_set); /* 'msg ref/unref' for messages stored in 'sent_msgs_set' are invoked in 'msg_reply_wait_count inc/dec'. There is no need for separate 'msg ref/unref' calls. */ *(msg_t **) set_item_ptr(&transport->sent_msgs_set, item_index) = msg_reply_wait_count_inc(msg); set_items_count_set(&transport->sent_msgs_set, item_index + 1); } if (transport->queue) { need_wakeup = false; if (!transport_shared_data_queue_enqueue_impl(transport, event, deferred_wakeup)) { WPRINTF("mmaped queue overflow for " TRANSPORT_FMT, TRANSPORT_PRINT(transport)); spin_unlock(&transport->msg_spinlock); transport_shutdown(transport); return false; } } else { need_wakeup = true; if (ring_is_full(&transport->msg_ring)) { WPRINTF("message queue overflow for " TRANSPORT_FMT " (capacity=%u)", TRANSPORT_PRINT(transport), ring_capacity(&transport->msg_ring)); spin_unlock(&transport->msg_spinlock); transport_shutdown(transport); return false; } *(msg_t **) ring_producer_ptr(&transport->msg_ring) = msg_ref(msg); ring_producer_index_move_one(&transport->msg_ring); } } spin_unlock(&transport->msg_spinlock); if (need_wakeup) { // wakeup userspace reader if (deferred_wakeup) *deferred_wakeup = transport_event_ref(transport->event); else wake_up_interruptible(&transport->event->msg_wait_queue); } return true; } static bool transport_send_hello_nowait(transport_t *transport) { msg_t *msg = hello_msg_new(); bool success; if (!msg) { success = false; } else { success = transport_send_msg_nowait(transport, msg, NULL /*deferred_wakeup*/); msg_unref(msg); } return success; } static bool should_send(transport_t* transport, msg_t* msg) { transport_id_t transport_id = transport->transport_id; bool ret = true; int idx = transport_id_index(transport_id); task_info_t* task_info = msg->task_info; const SiEvent* event = &msg->event; uint16_t operation = event->Operation; if (transport->control_tgid == current->tgid) { return false; } // Check if transport needs an event if (!(READ_ONCE(transport->events_mask) & MSG_TYPE_TO_EVENT_MASK(operation))) { return false; } if (msg->subtype_mask) { if (!(READ_ONCE(transport->events_subtype_inclusion_mask) & msg->subtype_mask)) { return false; } if (READ_ONCE(transport->events_subtype_exclusion_mask) & msg->subtype_mask) { return false; } } // Check if current transport does not need the event if (msg->file_context_msg_info.skipped_transport_ids[idx] == transport_id) { return false; } // Test task info, dependends on the type of the SiEvent // task_info might not be available, in this case do nothing if (!task_info) { return true; } if (FP_SI_OT_NOTIFY_PROCESS_EXEC == operation) { uint64_t pid_version = msg->exec.pid_version; ret = task_info_wants_exec_event(task_info, transport_id, pid_version); } else if (FP_SI_OT_NOTIFY_PROCESS_FORK == operation) { uint64_t pid_version = msg->fork.pid_version; // FORK when task_info is provided is basically an EXEC event so update the pid_version (void) task_info_wants_exec_event(task_info, transport_id, pid_version); // for FORK event, always send } else { uint64_t listening_mask = ~0ULL; if (READ_ONCE(task_info->contexts[idx].transport_id) == transport_id) { listening_mask = READ_ONCE(task_info->contexts[idx].data.listening_mask); } ret = !!(listening_mask & MSG_TYPE_TO_EVENT_MASK(operation)); } return ret; } static bool send_msg_nowait(msg_t *msg) { bool sync = FP_SI_CT_WANT_REPLY == msg->event.CallbackType; int i; bool sent = false; transport_event_t *deferred_wakeups[MAX_TRANSPORT_SIZE]; int deferred_wakeups_count = 0; rcu_read_lock(); for (i = 0; i < MAX_TRANSPORT_SIZE; i++) { transport_t *transport = rcu_dereference(transport_global.transports.transports[i]); if (transport && should_send(transport, msg)) { deferred_wakeups[deferred_wakeups_count] = NULL; sent |= transport_send_msg_nowait(transport, msg, &deferred_wakeups[deferred_wakeups_count]); if (deferred_wakeups[deferred_wakeups_count]) { deferred_wakeups_count++; } } } rcu_read_unlock(); for (i = 0; i < deferred_wakeups_count; i++) { // Wake up all deferred wakeups, last one wakeup synchronously to switch to it immediately if (sync && (i == deferred_wakeups_count - 1)) { wake_up_interruptible_sync(&deferred_wakeups[i]->msg_wait_queue); } else { wake_up_interruptible(&deferred_wakeups[i]->msg_wait_queue); } transport_event_unref(deferred_wakeups[i]); } return sent; } static transport_t *transport_new(void) { transport_t *transport = mem_alloc0(sizeof(transport_t)); if (transport) { int err; transport->transport_id = transport_acquire_id(); transport->bytes_written = 0; transport->insert_filled_size = 0; transport->insert_filled_size_max = 0; transport->client_type = CLIENT_UNKNOWN; if (0 == transport->transport_id) { EPRINTF("transport %p failed to acquire transport id", transport); mem_free(transport); return NULL; } transport->event = transport_event_new(); if (!transport->event) { EPRINTF("transport %p failed to allocate event", transport); mem_free(transport); return NULL; } acquire_file_context_entry(transport->transport_id); // remember client's process doing 'open' to auto-ignore it WRITE_ONCE(transport_global.transport_ids.ids[transport_id_index(transport->transport_id)], transport->transport_id); transport->control_tgid = current->tgid; spin_lock_init(&transport->msg_spinlock); WRITE_ONCE(transport->events_mask, MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_HELLO) | MSG_TYPE_TO_EVENT_MASK(FP_SI_OT_PONG)); WRITE_ONCE(transport->events_subtype_inclusion_mask, ~0ULL); WRITE_ONCE(transport->events_subtype_exclusion_mask, 0); WRITE_ONCE(transport->shutdown, false); transport->queue = NULL; atomic_set(&transport->queue_event, 0); if (transport_ring_init(&transport->msg_ring) && transport_set_init(&transport->sent_msgs_set) && transport_send_hello_nowait(transport)) { transport_global_register(transport); } else { transport_disable(transport); transport_free(transport); return NULL; } safe_kobject_init(&transport->skobj); err = kobject_init_and_add(&transport->skobj.kobj, &s_transport_ktype, &g_memory_metrics->skobj.kobj, "transport%lld", transport->transport_id); if (err) { transport_disable(transport); kobject_put(&transport->skobj.kobj); transport_free(transport); return NULL; } err = sysfs_create_files(&transport->skobj.kobj, s_transport_attrs); if (err) { transport_disable(transport); safe_kobject_del(&transport->skobj); transport_free(transport); return NULL; } } DPRINTF("transport=%p", transport); atomic64_add(1, &g_memory_metrics->total_transports); return transport; } int __init transport_mod_init(void) { int ret; transport_global_init(); ret = device_mod_init(); if (ret) { EPRINTF("'device_mod_init()' failure %i", ret); } return ret; } void transport_mod_down(void) { DPRINTF(""); device_mod_down(); DPRINTF(""); } static msg_t *transport_lookup_msg_ref(transport_t *transport, msg_id_t reply_id) { msg_t* msg = NULL; DPRINTF(""); // TODO DK: Is it possible to use radix tree here instead? spin_lock(&transport->msg_spinlock); { void *item_ptr = set_begin_ptr(&transport->sent_msgs_set); void *end_ptr = set_end_ptr(&transport->sent_msgs_set); while (item_ptr < end_ptr) { msg_t *query = *(msg_t **) item_ptr; if (query->id == reply_id) { msg = query; msg_ref(msg); goto unlock; } item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr); } } unlock: spin_unlock(&transport->msg_spinlock); DPRINTF("ret=%p", msg); return msg; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - static long install_file(struct file *file, int fd, int *pfd) { if (IS_ERR(file)) { put_unused_fd(fd); return PTR_ERR(file); } // file is consumed so no need to call 'fput' fd_install(fd, file); *pfd = fd; return 0; } static long open_file_with_flags(const char *full_path, int uflags, int mode, int *pfd) { struct file *file; int flags; int fd; DPRINTF("Opening file '%s', uflags '%d', mode '%d'", full_path, uflags, mode); fd = get_unused_fd_compat(); if (fd < 0) { return fd; } flags = uflags #ifdef O_LARGEFILE | O_LARGEFILE #endif #ifdef O_NOATIME | O_NOATIME #endif // 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file. #ifdef FMODE_NONOTIFY | FMODE_NONOTIFY #endif ; file = filp_open(full_path, flags, mode); return install_file(file, fd, pfd); } static inline long open_file(struct path *path, int *pfd) { struct file *file; int flags; int fd; if (!path->dentry && !path->mnt) { return -ENOENT; } fd = get_unused_fd_compat(); if (fd < 0) { return fd; } flags = O_RDONLY #ifdef O_LARGEFILE | O_LARGEFILE #endif #ifdef O_NOATIME | O_NOATIME #endif // 'FMODE_NONOTIFY' refers to 'fanotify' not scanning the file. #ifdef FMODE_NONOTIFY | FMODE_NONOTIFY #endif ; file = dentry_open_compat(path, flags); if (IS_ERR(file)) { // If open failed, let's try to open via the path. // Notice that this open will be inside 'client' service context // so this 'filp_open' has a good chance of failing as // 'mount namespaces' might be different in the process. // Perhaps a proper solution would be opening the file inside the original // process context, but that would result in having to create 'file' // early or to do extra context switches to the scanned process. // Either way seems inefficient so it is currently avoided. size_t size = PAGE_SIZE; char *buf = __getname(); const char *full_path; if (!buf) { return -ENOMEM; } full_path = d_path(path, buf, size); if (!IS_ERR(full_path)) { file = filp_open(full_path, flags, 0); } __putname(buf); } return install_file(file, fd, pfd); } static long transport_ioctl_handle_open_file_from_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; msg_t* msg; request_msg_img_t *msg_img = MSG_IMG(query_msg); open_file_from_msg_img_t *img = IMG_PAYLOAD(msg_img); if (MSG_SIZE(query_msg) < sizeof(request_msg_img_t) + sizeof(open_file_from_msg_img_t)) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); return -EINVAL; } msg = transport_lookup_msg_ref(transport, MSG_ID(query_msg)); if (!msg) { ret = -ESRCH; } else { int fd = -1; struct path path; thread_safe_path_load(img->num == 0 ? &msg->path : &msg->path2, &path); msg_unref(msg); ret = open_file(&path, &fd); path_put(&path); if (0 == ret) { ret = open_file_return_msg_new(reply_msg, fd); } } return ret; } static long transport_ioctl_handle_open_file_by_path(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; char *path; size_t pathSize; int fd = -1; request_msg_img_t *msg_img = MSG_IMG(query_msg); open_file_by_path_img_t *img = IMG_PAYLOAD(msg_img); (void) transport; if (MSG_SIZE(query_msg) <= sizeof(request_msg_img_t) + sizeof(open_file_by_path_img_t)) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); return -EINVAL; } path = img->path; pathSize = MSG_SIZE(query_msg) - (sizeof(request_msg_img_t) + sizeof(open_file_by_path_img_t)); path[pathSize - 1] = '\0'; ret = open_file_with_flags(path, img->flags, img->mode, &fd); if (0 == ret) { ret = open_file_return_msg_new(reply_msg, fd); } return ret; } static long transport_ioctl_handle_get_version(msg_varsized_t *reply_msg) { return version_info_return_msg_new(reply_msg); } static long transport_ioctl_handle_data_queue_init(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { request_msg_img_t *msg_img = MSG_IMG(query_msg); data_queue_params_t *params = IMG_PAYLOAD(msg_img); long err; shared_data_queue_t *queue; uint32_t queue_size; if (MSG_SIZE(query_msg) < sizeof(request_msg_img_t) + sizeof(data_queue_params_t)) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); return -EINVAL; } err = data_queue_create(params, &queue); if (err) { return err; } queue_size = params->size - DATA_QUEUE_HEADER_SIZE; { spin_lock(&transport->msg_spinlock); if (transport->queue) { spin_unlock(&transport->msg_spinlock); vfree(queue); return -EEXIST; } transport->queue = queue; transport->queue_size = queue_size; spin_unlock(&transport->msg_spinlock); } return data_queue_offsets_return_msg_new(reply_msg, queue_size); } static int mnt_info_ret_msg_new(msg_varsized_t *msg, bool ok) { size_t msg_img_size; request_msg_img_t *msg_img; mnt_info_ret_img_t *mnt_info_ret_img; msg_sized_t *smsg; msg_img_size = sizeof(request_msg_img_t) + sizeof(mnt_info_ret_img_t); smsg = msg_varsized_init(msg, msg_img_size); if (smsg) { MSG_TYPE(smsg) = RT_GET_MNT_ID_OFFSET; } if (!smsg) { return -ENOMEM; } msg_img = MSG_IMG(smsg); mnt_info_ret_img = IMG_PAYLOAD(msg_img); mnt_info_ret_img->ok = ok; return 0; } #define MNT_ID_FIND_BUFFER_LEN (128) static inline bool file_ok(struct file *file) { struct dentry *dentry; struct inode *inode; if (!file) return false; dentry = file->f_path.dentry; if (!dentry) return false; inode = dentry->d_inode; if (!inode) return false; if (!inode->i_sb) return false; return true; } #ifndef VFSMOUNT_HAS_MNT_ID // return 0 on error, offset on success static int find_mnt_id_offset(mnt_info_img_t *img) { uint32_t i = 0; uintptr_t info_ptr = (uintptr_t)img->data; // offset[-MNT_ID_FIND_BUFFER_LEN, -1] => offset_map[0, MNT_ID_FIND_BUFFER_LEN - 1], // offset[1, MNT_ID_FIND_BUFFER_LEN] => offset_map[MNT_ID_FIND_BUFFER_LEN + 1, MNT_ID_FIND_BUFFER_LEN * 2] DECLARE_BITMAP(offset_map, MNT_ID_FIND_BUFFER_LEN * 2 + 1); bitmap_fill(offset_map, MNT_ID_FIND_BUFFER_LEN * 2 + 1); if (img->count <= 0) { return 0; } for (i = 0; i < (sizeof(struct vfsmount) / sizeof(int)); i++) { clear_bit(i + MNT_ID_FIND_BUFFER_LEN, offset_map); } for (i = 0; i < img->count; i++) { struct compat_fd fd; mnt_info_t *info = (mnt_info_t *)info_ptr; info_ptr += (sizeof(mnt_info_t)); fd = compat_fdget(info->fd); if (file_ok(fd_file(fd))) { struct path path = fd_file(fd)->f_path; int *mnt_ptr = (int *)path.mnt; int count = 0; while (count <= MNT_ID_FIND_BUFFER_LEN) { int *mnt_id = (int *)(mnt_ptr + count); if (*mnt_id != info->mntId) { clear_bit(count + MNT_ID_FIND_BUFFER_LEN, offset_map); } mnt_id = (int *)(mnt_ptr - count); if (*mnt_id != info->mntId) { clear_bit(MNT_ID_FIND_BUFFER_LEN - count, offset_map); } count++; } compat_fdput(fd); } } for (i = 0; i < MNT_ID_FIND_BUFFER_LEN * 2; i++) { if (test_bit(i, offset_map)) { return i - MNT_ID_FIND_BUFFER_LEN; } } return 0; } int global_mnt_id_offset = 0; #endif static long transport_ioctl_handle_mnt_info(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { int ret; int offset = 0; request_msg_img_t *msg_img = MSG_IMG(query_msg); mnt_info_img_t *img = IMG_PAYLOAD(msg_img); (void)transport; if (MSG_SIZE(query_msg) < sizeof(request_msg_img_t) + sizeof(mnt_info_img_t)) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); return -EINVAL; } #ifdef VFSMOUNT_HAS_MNT_ID (void) img; (void) offset; ret = mnt_info_ret_msg_new(reply_msg, true); #else offset = find_mnt_id_offset(img); IPRINTF("mnt offset in vfsmount: %d", offset); if (offset != 0) { WRITE_ONCE(global_mnt_id_offset, offset); ret = mnt_info_ret_msg_new(reply_msg, true); } else { ret = mnt_info_ret_msg_new(reply_msg, false); } #endif return ret; } static long transport_ioctl_process_info(msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; if (query_msg->img_size < (sizeof(request_msg_img_t) + sizeof(get_process_info_img_t))) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); ret = -EINVAL; } else { request_msg_img_t *msg_img = MSG_IMG(query_msg); get_process_info_img_t *img = IMG_PAYLOAD(msg_img); pid_t pid = img->pid; ret = process_info_return_msg_new(reply_msg, pid); } DPRINTF("ret=%li", ret); return ret; } static long transport_ioctl_process_pid_version(msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; if (query_msg->img_size < (sizeof(request_msg_img_t) + sizeof(get_process_info_img_t))) { EPRINTF("'%s' message is too short", action_type_to_string(MSG_TYPE(query_msg))); ret = -EINVAL; } else { request_msg_img_t *msg_img = MSG_IMG(query_msg); get_process_info_img_t *img = IMG_PAYLOAD(msg_img); pid_t pid = img->pid; ret = process_pid_version_return_msg_new(reply_msg, pid); } DPRINTF("ret=%li", ret); return ret; } static long transport_ioctl_write_read_msg(transport_t *transport, msg_varsized_t *reply_msg, msg_sized_t *query_msg) { long ret; action_type_t action_type = 0; if (MSG_REPLY(query_msg)) { EPRINTF("'reply' ioctl is not supported"); ret = -EINVAL; goto out; } action_type = MSG_TYPE(query_msg); switch (action_type) { case AT_OPEN_FILE_FROM_MSG: ret = transport_ioctl_handle_open_file_from_msg(transport, reply_msg, query_msg); break; case AT_OPEN_FILE_BY_PATH: ret = transport_ioctl_handle_open_file_by_path(transport, reply_msg, query_msg); break; case AT_GET_VERSION: ret = transport_ioctl_handle_get_version(reply_msg); break; case AT_INIT_SHARED_DATA_QUEUE: ret = transport_ioctl_handle_data_queue_init(transport, reply_msg, query_msg); break; case AT_GET_MNT_ID_OFFSET: ret = transport_ioctl_handle_mnt_info(transport, reply_msg, query_msg); break; case AT_GET_PROCESS_INFO: ret = transport_ioctl_process_info(reply_msg, query_msg); break; case AT_GET_PROCESS_PID_VERSION: ret = transport_ioctl_process_pid_version(reply_msg, query_msg); break; default: EPRINTF("Unexpected '%s' message", action_type_to_string(action_type)); HEX_DUMP("query_msg: ", MSG_IMG(query_msg), MSG_SIZE(query_msg)); ret = -EINVAL; break; } out: DPRINTF("action_type=%d ret=%li", (int) action_type, ret); return ret; } static long transport_ioctl_copy_from_user(ioctl_hdr_t *ioctl_hdr, msg_varsized_t *query_msg, void __user *user_data) { long ret; size_t msg_size; msg_sized_t *msg; request_msg_img_t *msg_img; void *payload; if (copy_from_user(ioctl_hdr, user_data, sizeof(ioctl_hdr_t))) { EPRINTF("'copy_from_user()' failure"); ret = -EFAULT; goto out; } msg_size = ioctl_hdr->size; if (msg_size < sizeof(request_msg_img_t)) { EPRINTF("message image is too small"); ret = -EINVAL; goto out; } if (msg_size > TRANSPORT_MSG_SIZE_MAX) { EPRINTF("size > TRANSPORT_MSG_SIZE_MAX"); ret = -E2BIG; goto out; } msg = msg_varsized_init(query_msg, msg_size); if (!msg) { ret = -ENOMEM; goto out; } msg_img = MSG_IMG(msg); payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t); if (copy_from_user(msg_img, payload, msg_size)) { msg_varsized_uninit(query_msg); EPRINTF("'copy_from_user()' failure"); ret = -EFAULT; goto out; } ret = 0; out: DPRINTF("ret=%li", ret); return ret; } static long transport_ioctl_copy_to_user(ioctl_hdr_t *ioctl_hdr, msg_sized_t *reply_msg, void __user *user_data) { long ret; size_t msg_size = MSG_SIZE(reply_msg); size_t capacity; void *payload; request_msg_img_t *msg_img; ioctl_hdr->size = msg_size; if (copy_to_user(user_data, ioctl_hdr, sizeof(ioctl_hdr_t))) { EPRINTF("'copy_to_user()' failure"); ret = -EFAULT; goto out; } capacity = ioctl_hdr->capacity; if (capacity < msg_size) { WPRINTF("capacity=%zu < msg_size=%zu", capacity, msg_size); ret = -ENOSPC; goto out; } payload = (uint8_t *)user_data + sizeof(ioctl_hdr_t); msg_img = MSG_IMG(reply_msg); if (copy_to_user(payload, msg_img, msg_size)) { EPRINTF("'copy_to_user()' failure"); ret = -EFAULT; goto out; } ret = 0; out: DPRINTF("ret=%li", ret); return ret; } long transport_device_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { transport_t *transport = filp->private_data; long ret; if (READ_ONCE(transport->shutdown)) { ret = -EIO; goto out; } switch (cmd) { case IOCTL_WRITE_AND_READ_MSG: case IOCTL_READ_VERSION: { ioctl_hdr_t ioctl_hdr; void *user_data = (void *)arg; msg_varsized_t query_msg; ret = transport_ioctl_copy_from_user(&ioctl_hdr, &query_msg, user_data); if (!ret) { msg_varsized_t reply_msg; ret = transport_ioctl_write_read_msg(transport, &reply_msg, MSG_VARSIZED_GET_SIZED(&query_msg)); if (!ret) { ret = transport_ioctl_copy_to_user(&ioctl_hdr, MSG_VARSIZED_GET_SIZED(&reply_msg), user_data); msg_varsized_uninit(&reply_msg); } msg_varsized_uninit(&query_msg); } break; } default: EPRINTF("Unexpected IOCTL cmd=%u", cmd); ret = -ENOIOCTLCMD; } out: if (-EINVAL == ret) { EPRINTF("ioctl failed with EINVAL, dropping the transport"); transport_shutdown(transport); } DPRINTF("ret=%li", ret); return ret; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ssize_t transport_device_read(struct file *filp, char __user *user_data, size_t size, loff_t *offset) { msg_t *msg; transport_t *transport = filp->private_data; size_t img_size; ssize_t ret; (void) offset; if (filp->f_flags & O_NONBLOCK) { EPRINTF("'non-blocking' mode is not supported yet"); ret = -EINVAL; transport_shutdown(transport); goto out; } if (!size) { EPRINTF("'empty read' is not supported"); ret = -EINVAL; transport_shutdown(transport); goto out; } retry_wait: // We may start with 'wait*()' because it itself starts // with 'condition' check. if (wait_event_interruptible_exclusive(transport->event->msg_wait_queue, READ_ONCE(transport->shutdown) || !ring_is_empty(&transport->msg_ring))) { ret = -EINTR; goto out; } // Lock the state and check if processing is actually possible. spin_lock(&transport->msg_spinlock); { if (READ_ONCE(transport->shutdown)) { ret = -EIO; spin_unlock(&transport->msg_spinlock); goto out; } if (ring_is_empty(&transport->msg_ring)) { WPRINTF("wakeup without messages"); spin_unlock(&transport->msg_spinlock); goto retry_wait; } msg = *(msg_t **) ring_consumer_ptr(&transport->msg_ring); img_size = msg->event.Size; DPRINTF("size=%zu img_size=%zu", size, img_size); if (size < img_size) { ret = -ENOSPC; spin_unlock(&transport->msg_spinlock); goto out; } ring_consumer_index_move_one(&transport->msg_ring); } spin_unlock(&transport->msg_spinlock); // 'copy_to_user' MAY sleep (for example in page fault handler) if (copy_to_user(user_data, &msg->event, img_size)) { WPRINTF("'copy_to_user()' failure"); ret = -EFAULT; transport_shutdown(transport); } else { ret = img_size; } msg_unref(msg); out: DPRINTF("ret=%zi", ret); return ret; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Forward declaration, to be available in the end of the code static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies); static void msg_wait_reply(msg_t *msg) { long ret; // We may start with 'wait*()' because it itself starts // with 'condition' check. DPRINTF("waiting for userspace reply..."); ret = wait_msg_killable_timeout(msg, msecs_to_jiffies(TRANSPORT_WAIT_REPLY_TIMEOUT_MSECS)); if (!ret) { msg_type_t operation = (msg_type_t) msg->event.Operation; // Timeout here means unexpected issue with userspace. FPRINTF("timeout waiting for userspace reply (msg_type=%d/%s)", operation, msg_type_to_string(operation)); HEX_DUMP("msg: ", &msg->event, msg->event.Size); dump_stack(); // identify and shutdown transport failed to reply transport_global_shutdown_msg(msg); } else if (ret < 0) { // Calling process has been interrupted as SIGKILL was received. // In practice this means 'block'. DPRINTF("message was interrupted..."); msg->interrupted = true; } else { // Userspace reply has been received (msg->reply_msg) or // waiting has been explicitly aborted (msg->aborted) for // example on userspace disconnect. DPRINTF("wait finished (msg->block=%i, wc=%d)", msg->block, atomic_read(&msg->reply_wait_count)); } } void send_msg_async(msg_t *msg) { DPRINTF("msg=%p", msg); send_msg_nowait(msg); DPRINTF(""); } void send_msg_async_unref_unchecked(msg_t *msg) { send_msg_async(msg); msg_unref(msg); } static void msg_mark_sync(msg_t *msg) { msg->event.CallbackType = FP_SI_CT_WANT_REPLY; } static bool send_msg_sync_nowait(msg_t *msg) { bool sent; DPRINTF("msg=%p", msg); msg_mark_sync(msg); sent = send_msg_nowait(msg); DPRINTF("msg=%p sent=%i", msg, sent); return sent; } void send_msg_sync(msg_t *msg) { DPRINTF("msg=%p", msg); if (send_msg_sync_nowait(msg)) { msg_wait_reply(msg); } DPRINTF(""); } void send_msg_sync_unref_unchecked(msg_t *msg) { send_msg_sync(msg); thread_safe_path_clear(&msg->path); // TODO: Why path2 is not cleared? msg_unref(msg); } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - static int transport_handle_ping_msg(transport_t *transport, msg_sized_t *ping) { int ret; msg_t *msg; bool sync; uint64_t event_uid; transport_event_t *event = NULL; if (ping->img_size < (sizeof(request_msg_img_t) + sizeof(ping_img_t))) { DPRINTF("'ping' message is too short. ignoring it."); ret = -EINVAL; goto out; } event_uid = transport_global_sequence_next(); msg = pong_msg_new(ping, event_uid); if (!msg) { ret = -ENOMEM; goto out; } // reflect ping's 'reply' policy sync = !!MSG_ID(ping); if (sync) { msg->event.CallbackType = FP_SI_CT_WANT_REPLY; } transport_send_msg_nowait(transport, msg, &event); if (event) { wake_up_interruptible_sync(&event->msg_wait_queue); transport_event_unref(event); } msg_wait_reply(msg); ret = 0; msg_unref(msg); out: return ret; } static int transport_handle_set_listening_mask_process(transport_t *transport, msg_sized_t *msg) { request_msg_img_t *msg_img; process_set_listening_mask_img_t *img; pid_t pid; uint64_t events_mask; int ret; task_info_t *info; uint64_t unique_pid; uint64_t pid_version; if (msg->img_size < (sizeof(request_msg_img_t) + sizeof(process_set_listening_mask_img_t))) { DPRINTF("'pid' message is too short. ignoring it."); ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); pid = img->pid; events_mask = img->events_mask; unique_pid = img->unique_pid; pid_version = img->pid_version; // If user gives us 'pid_version', do not attempt to do weird gets, fetch the current info in the map. if (0 == pid_version) info = task_info_map_get_by_pid(pid, unique_pid); else info = task_info_lookup(pid, unique_pid); if (info) { ret = task_info_set_listening_mask(info, transport->transport_id, events_mask, pid_version); task_info_put(info); } else { ret = -ESRCH; } out: DPRINTF("ret=%i", ret); return ret; } // This msg is received when userspace is processing a event msg static int transport_handle_file_context_add_msg(transport_t *transport, msg_sized_t *add_msg) { request_msg_img_t *add_msg_img; msg_t *msg; int ret = 0; if (add_msg->img_size < (sizeof(request_msg_img_t))) { DPRINTF("message is too short. ignoring it."); ret = -EINVAL; goto out; } add_msg_img = MSG_IMG(add_msg); msg = transport_lookup_msg_ref(transport, add_msg_img->id); if (!msg) { ret = -ESRCH; } else { // At this moment, the event msgs sent with sync is still exist, // we can utilize their informaton here. if (msg->file_context_msg_info.key.file_key.ptr != 0) { int flags = 0; file_context_info_t info = {0}; uint16_t operation = msg->event.Operation; if (FP_SI_OT_SYNC_FILE_PRE_OPEN == operation || FP_SI_OT_SYNC_FILE_PRE_WRITE == operation) { info.pid_version = msg->open.pid_version; flags = msg->open.flags; info.low = msg->write.low; info.high = msg->write.high; } else { EPRINTF("%s: unsupported msg type", __func__); msg_unref(msg); return -EFAULT; } if ((flags & O_ACCMODE) <= O_RDWR) { flags += 1; } info.msg_info.key = msg->file_context_msg_info.key; if (FP_SI_OT_SYNC_FILE_PRE_OPEN == operation) { file_context_open_file_t *file_node = NULL; file_context_open_process_t *process_node = NULL; add_open_cache(transport->transport_id, &info, &file_node, &process_node); if (process_node) { atomic_or_compat(flags, &process_node->flags); } put_open_cache(file_node, process_node); } else if (FP_SI_OT_SYNC_FILE_PRE_WRITE == operation) { file_context_rw_t *node = NULL; node = add_rw_cache(transport->transport_id, &info, FILE_CONTEXT_WRITE_TABLE); if (node) { put_rw_cache(node); } } } msg_unref(msg); } out: DPRINTF("ret=%i", ret); return ret; } static int transport_handle_set_listening_mask_global(transport_t *transport, msg_sized_t *msg) { request_msg_img_t *msg_img; events_mask_img_t *img; uint64_t mask; int ret; if (msg->img_size < (sizeof(request_msg_img_t) + sizeof(events_mask_img_t))) { DPRINTF("'events' message is too short. ignoring it."); ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); mask = img->events_mask; spin_lock(&transport->msg_spinlock); { if (READ_ONCE(transport->shutdown)) { // Do not allow changing the mask when shutdown. // Transport will not be able to receive any events. ret = -EFAULT; } else { WRITE_ONCE(transport->events_mask, mask); ret = 0; } } spin_unlock(&transport->msg_spinlock); transport_global_recalculate_combined_all_event_masks(); out: DPRINTF("ret=%i", ret); return ret; } static int transport_handle_set_listening_subtype_mask(transport_t *transport, uint64_t* target, msg_sized_t *msg) { request_msg_img_t *msg_img; events_mask_img_t *img; uint64_t mask; int ret; if (msg->img_size < (sizeof(request_msg_img_t) + sizeof(events_mask_img_t))) { DPRINTF("'events' message is too short. ignoring it."); ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); mask = img->events_mask; spin_lock(&transport->msg_spinlock); { if (READ_ONCE(transport->shutdown)) { // Do not allow changing the mask when shutdown. // Transport will not be able to receive any events. ret = -EFAULT; } else { WRITE_ONCE(*target, mask); ret = 0; } } spin_unlock(&transport->msg_spinlock); transport_global_recalculate_combined_all_event_masks(); out: DPRINTF("ret=%i", ret); return ret; } static int transport_handle_set_client_type(transport_t *transport, msg_sized_t *msg) { request_msg_img_t *msg_img; transport_client_type_img_t *img; int ret = 0; char* name; if (msg->img_size < (sizeof(request_msg_img_t) + sizeof(transport_client_type_img_t))) { ret = -EINVAL; goto out; } msg_img = MSG_IMG(msg); img = IMG_PAYLOAD(msg_img); transport->client_type = img->client_type; name = (char*) mem_alloc(256); if (name) { snprintf(name, 256, "transport%lld_%s", transport->transport_id, transport_name(transport)); ret = kobject_rename(&transport->skobj.kobj, name); mem_free(name); } out: DPRINTF("ret=%i", ret); return ret; } // FIXME: do something with 'reply'. For example merge several replies // into one; link replies into list; extract 'responces' and merge them. static void handle_reply(msg_t *query_msg, msg_sized_t *reply_msg) { // handle 'long' 'reply' size_t headers_size = sizeof(request_msg_img_t) + sizeof(reply_img_t); // Note: for compatibility with legacy short 'reply_img_t' default 'reply_type' is RT_ALLOW if (MSG_SIZE(reply_msg) >= headers_size) { request_msg_img_t *reply_msg_img = MSG_IMG(reply_msg); reply_img_t *reply_img = IMG_PAYLOAD(reply_msg_img); reply_type_t reply_type = reply_img->type; DPRINTF("MSG_SIZE(reply_msg)=%zu - headers_size=%zu = %zu reply_type=%u", MSG_SIZE(reply_msg), headers_size, MSG_SIZE(reply_msg) - headers_size, reply_type); if (RT_BLOCK == reply_type) { query_msg->block = true; } } } static int transport_handle_reply(transport_t *transport, msg_sized_t *reply) { msg_id_t reply_id = MSG_ID(reply); msg_type_t reply_type = MSG_TYPE(reply); msg_t* msg = NULL; DPRINTF("%lu %d", reply_id, reply_type); // find 'query' matching this 'reply' spin_lock(&transport->msg_spinlock); { void *item_ptr = set_begin_ptr(&transport->sent_msgs_set); void *end_ptr = set_end_ptr(&transport->sent_msgs_set); while (item_ptr < end_ptr) { msg_t *query = *(msg_t **) item_ptr; if (query->id == reply_id) { // remove 'query' from 'set' *(msg_t **) item_ptr = *(msg_t **) set_item_ptr( &transport->sent_msgs_set, set_items_count_dec(&transport->sent_msgs_set)); msg = query; goto unlock; } item_ptr = set_ptr_next(&transport->sent_msgs_set, item_ptr); } WPRINTF("Unexpected 'reply' with type=%i id=%llX", reply_type, reply_id); } unlock: spin_unlock(&transport->msg_spinlock); if (msg) { handle_reply(msg, reply); msg_reply_wait_count_dec(msg); } return msg ? 0 : -ESRCH; } static int transport_handle_msg(transport_t *transport, msg_sized_t *msg) { int ret; if (msg->img_size < sizeof(request_msg_img_t)) { DPRINTF("message image is too small"); ret = -EINVAL; goto out; } if (MSG_REPLY(msg)) { ret = transport_handle_reply(transport, msg); } else { // !reply action_type_t type = MSG_TYPE(msg); DPRINTF("type=%i", type); switch (type) { case AT_PING: ret = transport_handle_ping_msg(transport, msg); break; case AT_WAIT_SHARED_DATA_QUEUE: ret = transport_data_queue_wait(transport); break; case AT_FILE_CONTEXT_ADD: ret = transport_handle_file_context_add_msg(transport, msg); break; case AT_SET_LISTENING_MASK_GLOBAL: ret = transport_handle_set_listening_mask_global(transport, msg); break; case AT_SET_LISTENING_MASK_PROCESS: ret = transport_handle_set_listening_mask_process(transport, msg); break; case AT_SET_LISTENING_SUBTYPE_INCLUSION_MASK: ret = transport_handle_set_listening_subtype_mask(transport, &transport->events_subtype_inclusion_mask, msg); break; case AT_SET_LISTENING_SUBTYPE_EXCLUSION_MASK: ret = transport_handle_set_listening_subtype_mask(transport, &transport->events_subtype_exclusion_mask, msg); break; case AT_SET_TRANSPORT_CLIENT_TYPE: ret = transport_handle_set_client_type(transport, msg); break; default: WPRINTF("Unexpected message type=%i/%s", type, action_type_to_string(type)); ret = -EINVAL; } } out: DPRINTF("ret=%i", ret); return ret; } ssize_t transport_device_write(struct file *filp, const char __user *user_data, size_t size, loff_t *offset) { transport_t *transport = filp->private_data; msg_varsized_t msg; msg_sized_t* smsg; request_msg_img_t *msg_img; ssize_t ret; (void) offset; if (READ_ONCE(transport->shutdown)) { ret = -EIO; goto out; } if (filp->f_flags & O_NONBLOCK) { EPRINTF("'non-blocking' mode is not supported yet"); ret = -EINVAL; transport_shutdown(transport); goto out; } if (!size) { WPRINTF("'zero write' is not supported for " TRANSPORT_FMT, TRANSPORT_PRINT(transport)); ret = -EINVAL; transport_shutdown(transport); goto out; } if (size > TRANSPORT_MSG_SIZE_MAX) { WPRINTF("size > TRANSPORT_MSG_SIZE_MAX"); ret = -E2BIG; goto out; } smsg = msg_varsized_init(&msg, size); if (!smsg) { ret = -ENOMEM; goto out; } msg_img = MSG_IMG(smsg); if (copy_from_user(msg_img, user_data, size)) { EPRINTF("'copy_from_user()' failure"); ret = -EFAULT; transport_shutdown(transport); goto free_msg; } ret = transport_handle_msg(transport, smsg); if (ret) { // make sure error code is negative if (ret > 0) { EPRINTF("error code must be negative"); ret = -ret; } goto free_msg; } ret = size; free_msg: msg_varsized_uninit(&msg); out: DPRINTF("ret=%zi", ret); return ret; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - /* Warning: 'transport_open()' and 'transport_release()' may be simultaneously invoked by several threads or processes. Note: We can match different 'transport' instances using 'device' 'major'/'minor' from 'inode->i_rdev'. Pointer to selected 'trasport' can be stored in 'filp->private_data' for later use in '*_read()', '*_write()', etc. Note: We may create 'transport' on 'first open' and destroy it on 'last close'. */ /* There is possibility of 'deadlock' between our 'kernel' and 'userspace' code while processing events generated by our userspace process until registration of our userspace process in 'ignore' list. */ int transport_device_open(struct inode *inode, struct file *filp) { bool ok; transport_t *transport; int ret; bool safe_mode; #ifdef FMODE_NONOTIFY // We are using 'fsnotify' ourselves so avoid raising 'fsnotify' events filp->f_mode |= FMODE_NONOTIFY; #endif DPRINTF("inode->i_rdev: major=%u minor=%u", imajor(inode), iminor(inode)); DPRINTF("filp->f_flags=%X", filp->f_flags); if (filp->f_flags & O_NONBLOCK) { EPRINTF("'non-blocking' mode is not supported yet"); ret = -EINVAL; goto out; } safe_mode = filp->f_flags & O_NOCTTY; #ifndef HOOK_LIST_USE_HLIST if (safe_mode) { EPRINTF("safe mode requires LSM hook list"); ret = -EINVAL; goto out; } #endif #ifndef CONFIG_SECURITY_PATH if (safe_mode) { EPRINTF("safe mode requires LSM security path"); ret = -EINVAL; goto out; } #endif #ifndef FMODE_NONOTIFY if (safe_mode) { EPRINTF("safe mode requires fanotify FMODE_NONOTIFY flag"); ret = -EINVAL; goto out; } #endif mutex_lock(&transport_global.transport_mutex); { DPRINTF("transport_count=%u", transport_global.transport_count); transport = transport_new(); if (!transport) { WPRINTF("'%s()' failure", "transport_new"); ret = -ENOMEM; goto unlock_open_close_mutex; } filp->private_data = transport; if (!transport_global.transport_count) { // FIXME: 'attach' may fail IPRINTF("attaching interceptors"); ret = acquire_file_modify_entry(); if (ret != 0) { goto unlock_open_close_mutex; } mod_rundown_protection_set_ready(); fsnotify_events_listener_init(); register_ftrace_post_events(); lsm_hooks_init(); ret = syscall_hooks_attach(safe_mode); if (ret) { EPRINTF("'%s()' failure %i", "syscall_hooks_attach", ret); lsm_hooks_exit(); unregister_ftrace_post_events(); mod_rundown_protection_set_rundown_active(); ok = mod_rundown_protection_wait_for_rundown_timeout(msecs_to_jiffies(TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS)); if (!ok) { WPRINTF("Failed to wait for module rundown"); } fsnotify_events_listener_deinit(); transport_disable(transport); transport_del(transport); filp->private_data = NULL; release_file_modify_entry(); goto unlock_open_close_mutex; } ret = tracepoints_attach(); if (ret) { EPRINTF("'%s()' failure %i", "tracepoints_attach", ret); syscall_hooks_detach(); lsm_hooks_exit(); unregister_ftrace_post_events(); mod_rundown_protection_set_rundown_active(); ok = mod_rundown_protection_wait_for_rundown_timeout(msecs_to_jiffies(TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS)); if (!ok) { WPRINTF("Failed to wait for module rundown"); } fsnotify_events_listener_deinit(); transport_disable(transport); transport_del(transport); filp->private_data = NULL; release_file_modify_entry(); goto unlock_open_close_mutex; } IPRINTF("interceptors attached"); } ++transport_global.transport_count; ret = 0; } unlock_open_close_mutex: mutex_unlock(&transport_global.transport_mutex); out: DPRINTF("ret=%i", ret); if (ret) return ret; #ifdef FMODE_STREAM return stream_open(inode, filp); #else #ifdef FMODE_ATOMIC_POS filp->f_mode &= ~(FMODE_ATOMIC_POS); #endif return nonseekable_open(inode, filp); #endif } // 'release()' means 'close()' int transport_device_release(struct inode *inode, struct file *filp) { bool ok; transport_t *transport = filp->private_data; (void) inode; mutex_lock(&transport_global.transport_mutex); { transport_disable(transport); transport_global_recalculate_combined_all_event_masks_impl(); DPRINTF("transport_count=%u", transport_global.transport_count); if (!--transport_global.transport_count) { IPRINTF("detaching interceptors"); tracepoints_detach(); // FIXME: 'syscall_hooks_detach()' may fail syscall_hooks_detach(); lsm_hooks_exit(); unregister_ftrace_post_events(); mod_rundown_protection_set_rundown_active(); ok = mod_rundown_protection_wait_for_rundown_timeout(msecs_to_jiffies(TRANSPORT_WAIT_RUNDOWN_TIMEOUT_MSECS)); if (!ok) { WPRINTF("Failed to wait for module rundown"); } // It is absolutely crucial to call this after rundown protection!!! fsnotify_events_listener_deinit(); task_info_maps_clear(); release_file_modify_entry(); IPRINTF("interceptors detached"); } } mutex_unlock(&transport_global.transport_mutex); synchronize_rcu(); transport_del(transport); return 0; } int transport_device_mmap(struct file *filp, struct vm_area_struct *vma) { int ret; transport_t *transport = filp->private_data; if (READ_ONCE(transport->shutdown)) { ret = -EIO; goto out; } ret = transport_data_queue_mmap(transport, vma); out: return ret; } static long wait_msg_killable_timeout(msg_t* msg, unsigned long timeout_jiffies) { #ifndef HAVE_WAIT_EVENT_KILLABLE_TIMEOUT // 'wait_event_interruptible_timeout' has to be a define and so is // 'TASK_KILLABLE' and 'TASK_INTERRUPTIBLE'. // I need functionality of 'wait_event_interruptible_timeout' // but 'TASK_INTERRUPTIBLE' replaced with 'TASK_KILLABLE' which // is achieved using the 'define' tricks by redefining 'TASK_INTERRUPTIBLE'. // If the trick won't work, using the regular 'wait_event_timeout'. #if defined(TASK_KILLABLE) && defined(TASK_INTERRUPTIBLE) && defined(wait_event_interruptible_timeout) && !defined(signal_pending) #undef TASK_INTERRUPTIBLE #define TASK_INTERRUPTIBLE TASK_KILLABLE #define signal_pending fatal_signal_pending return wait_event_interruptible_timeout(msg->wait_queue, !atomic_read(&msg->reply_wait_count), timeout_jiffies); #undef TASK_INTERRUPTIBLE #undef signal_pending #else // Something weird is going on, rollback to 'TASK_UNINTERRUPTIBLE' variant. // It should not cause any issues though as far as APL // daemon is responding to events so it is not bad. return wait_event_timeout(msg->wait_queue, !atomic_read(&msg->reply_wait_count), timeout_jiffies); #endif #else // Just use the well defined macros available. return wait_event_killable_timeout(msg->wait_queue, !atomic_read(&msg->reply_wait_count), timeout_jiffies); #endif }