/** @file file_contexts.c @brief Cache sent fs events @details Copyright (c) 2023 Acronis International GmbH @author Bruce Wang ([email protected]) @since $Id: $ */ #include "compat.h" #include "debug.h" #include "file_contexts.h" #include "file_contexts_priv.h" #include "memory.h" #include <linux/jiffies.h> #ifndef list_first_entry_or_null #define list_first_entry_or_null(ptr, type, member) (list_empty(ptr) ? NULL : list_first_entry(ptr, type, member)) #endif static file_context_manager_t global_fs_event_cache_manager; static KMEM_STRUCT_CACHE_DECLARE(file_context_open_process_node); static KMEM_STRUCT_CACHE_DECLARE(file_context_open_file_node); static KMEM_STRUCT_CACHE_DECLARE(file_context_rw_node); static KMEM_STRUCT_CACHE_DECLARE(file_context_process_node); static KMEM_STRUCT_CACHE_DECLARE(file_context_file_modify_node); static KMEM_STRUCT_CACHE_DECLARE(interval_node); int file_contexts_init(void) { int i = 0; spin_lock_init(&global_fs_event_cache_manager.writer_lock); for (; i < MAX_TRANSPORT_EXTENDED_SIZE; i++) { global_fs_event_cache_manager.tables[i] = NULL; } KMEM_STRUCT_CACHE_NAME(file_context_open_process_node) = NULL; KMEM_STRUCT_CACHE_NAME(file_context_open_file_node) = NULL; KMEM_STRUCT_CACHE_NAME(file_context_rw_node) = NULL; KMEM_STRUCT_CACHE_NAME(file_context_process_node) = NULL; KMEM_STRUCT_CACHE_NAME(file_context_file_modify_node) = NULL; KMEM_STRUCT_CACHE_NAME(interval_node) = NULL; if (!KMEM_STRUCT_CACHE_INIT(file_context_open_process_node, 0, NULL)) { EPRINTF("Failed to create file_context_open_process_node cache"); goto fail; } if (!KMEM_STRUCT_CACHE_INIT(file_context_open_file_node, 0, NULL)) { EPRINTF("Failed to create file_context_open_file_node_t cache"); goto fail; } if (!KMEM_STRUCT_CACHE_INIT(file_context_rw_node, 0, NULL)) { EPRINTF("Failed to create file_context_rw_node cache"); goto fail; } if (!KMEM_STRUCT_CACHE_INIT(file_context_process_node, 0, NULL)) { EPRINTF("Failed to create file_context_process_node cache"); goto fail; } if (!KMEM_STRUCT_CACHE_INIT(file_context_file_modify_node, 0, NULL)) { EPRINTF("Failed to create file_context_file_modify_node cache"); goto fail; } if (!KMEM_STRUCT_CACHE_INIT(interval_node, 0, NULL)) { EPRINTF("Failed to create interval_node cache"); goto fail; } return 0; fail: file_contexts_init_fail_free(); return -ENOMEM; } void file_contexts_init_fail_free(void) { KMEM_STRUCT_CACHE_DEINIT(file_context_open_process_node); KMEM_STRUCT_CACHE_DEINIT(file_context_open_file_node); KMEM_STRUCT_CACHE_DEINIT(file_context_rw_node); KMEM_STRUCT_CACHE_DEINIT(file_context_process_node); KMEM_STRUCT_CACHE_DEINIT(file_context_file_modify_node); KMEM_STRUCT_CACHE_DEINIT(interval_node); } static inline void put_file_context_entry(file_context_tables_t *entry); void file_contexts_deinit(void) { int i = 0; spin_lock(&global_fs_event_cache_manager.writer_lock); for (; i < MAX_TRANSPORT_EXTENDED_SIZE; i++) { if (global_fs_event_cache_manager.tables[i]) { put_file_context_entry(global_fs_event_cache_manager.tables[i]); global_fs_event_cache_manager.tables[i] = NULL; } } spin_unlock(&global_fs_event_cache_manager.writer_lock); // For 'put_file_context_entry' synchronization synchronize_rcu(); rcu_barrier(); KMEM_STRUCT_CACHE_DEINIT(file_context_open_process_node); KMEM_STRUCT_CACHE_DEINIT(file_context_open_file_node); KMEM_STRUCT_CACHE_DEINIT(file_context_rw_node); KMEM_STRUCT_CACHE_DEINIT(file_context_process_node); KMEM_STRUCT_CACHE_DEINIT(file_context_file_modify_node); KMEM_STRUCT_CACHE_DEINIT(interval_node); } static inline void free_common_node(file_context_common_node_t *common_node) { if (common_node->pre_free_func) { common_node->pre_free_func(common_node); } kmem_cache_free(common_node->kmem, common_node); } static inline void deferred_free_common_node(struct rcu_head *head) { file_context_common_node_t *common_node = container_of(head, file_context_common_node_t, rcu); free_common_node(common_node); } static inline void get_common_node(file_context_common_node_t *common_node) { atomic_inc(&common_node->ref_count); } static inline bool get_common_node_rcu(file_context_common_node_t *common_node) { return atomic_inc_not_zero(&common_node->ref_count); } static inline void put_common_node(file_context_common_node_t *common_node) { if (atomic_dec_and_test(&common_node->ref_count)) { call_rcu(&common_node->rcu, deferred_free_common_node); } } /* This function requires lock*/ static inline void remove_common_node_from_lru(file_context_common_node_t *common_node) { list_del(&common_node->lru_list_node); common_node->lru_list_node_inserted = false; } /* This function requires lock*/ static inline void remove_and_put_common_node(file_context_common_node_t *common_node, file_context_common_table_t *common_table) { remove_common_node_from_lru(common_node); hash_del_rcu(&common_node->hash_node); common_table->size -= 1; put_common_node(common_node); } static void clear_common_table(file_context_common_table_t *common_table) { file_context_common_node_t *common_node = NULL; /* RCU WRITER */ spin_lock(&common_table->spinlock); while (1) { common_node = list_first_entry_or_null(&common_table->lru_list, file_context_common_node_t, lru_list_node); if (!common_node) { break; } remove_and_put_common_node(common_node, common_table); } spin_unlock(&common_table->spinlock); /* RCU WRITER */ } static void context_entry_clear(file_context_tables_t *tables) { DPRINTF("%s:open_table [%u] [%u]", __func__, tables->open_table.common_table.size, tables->open_table.common_table.max_size); clear_common_table(&tables->open_table.common_table); DPRINTF("%s:open_table [%u] [%u]", __func__, tables->open_table.common_table.size, tables->open_table.common_table.max_size); DPRINTF("%s:read_table [%u] [%u]", __func__, tables->read_table.common_table.size, tables->read_table.common_table.max_size); clear_common_table(&tables->read_table.common_table); DPRINTF("%s:read_table [%u] [%u]", __func__, tables->read_table.common_table.size, tables->read_table.common_table.max_size); DPRINTF("%s:write_table [%u] [%u]", __func__, tables->write_table.common_table.size, tables->write_table.common_table.max_size); clear_common_table(&tables->write_table.common_table); DPRINTF("%s:write_table [%u] [%u]", __func__, tables->write_table.common_table.size, tables->write_table.common_table.max_size); } static inline void deferred_free_context_entry(struct rcu_head *head) { file_context_tables_t *table = container_of(head, file_context_tables_t, rcu); // perhaps this is excessive, but it is better to be safe context_entry_clear(table); vmem_free(table); atomic64_sub(1, &g_memory_metrics->total_file_contexts_tables); } static inline void put_file_context_entry(file_context_tables_t *entry) { if (atomic_dec_and_test(&entry->ref_count)) { call_rcu(&entry->rcu, deferred_free_context_entry); } } static inline file_context_tables_t *get_file_context_entry(transport_id_t transport_id) { file_context_tables_t *entry; int idx = transport_id_index(transport_id); if (idx < 0 || idx >= MAX_TRANSPORT_EXTENDED_SIZE) { return NULL; } rcu_read_lock(); entry = rcu_dereference(global_fs_event_cache_manager.tables[idx]); if (entry) { if (entry->transport_id != transport_id) { DPRINTF("Transport id mismatch: %ld != %ld", entry->transport_id, transport_id); entry = NULL; } else { if (!atomic_inc_not_zero(&entry->ref_count)) { entry = NULL; } } } rcu_read_unlock(); return entry; } // This function does not verify the transport_id static inline file_context_tables_t *get_file_context_entry_by_idx(int idx) { file_context_tables_t *entry; if (idx < 0 || idx >= MAX_TRANSPORT_EXTENDED_SIZE) { return NULL; } rcu_read_lock(); entry = rcu_dereference(global_fs_event_cache_manager.tables[idx]); if (entry) { if (!atomic_inc_not_zero(&entry->ref_count)) { entry = NULL; } } rcu_read_unlock(); return entry; } void release_file_context_entry(transport_id_t id) { file_context_tables_t* table = NULL; int idx = transport_id_index(id); spin_lock(&global_fs_event_cache_manager.writer_lock); if (global_fs_event_cache_manager.tables[idx]) { if (global_fs_event_cache_manager.tables[idx]->transport_id == id) { table = global_fs_event_cache_manager.tables[idx]; rcu_assign_pointer(global_fs_event_cache_manager.tables[idx], NULL); } else { WPRINTF("release_file_context_entry: %d, id mismatch: %llu, %llu", idx, global_fs_event_cache_manager.tables[idx]->transport_id, id); } } spin_unlock(&global_fs_event_cache_manager.writer_lock); if (table) { put_file_context_entry(table); } IPRINTF("release_file_context_entry: %llu\n", id); } static void init_file_context_common_table(file_context_common_table_t *table, struct hlist_head *hashtbale_head, uint8_t hashbits, unsigned int max_size, unsigned short clean_count) { table->hashtable = hashtbale_head; INIT_LIST_HEAD(&table->lru_list); spin_lock_init(&table->spinlock); table->hashbits = hashbits; table->max_size = max_size; table->clean_count = clean_count; table->size = 0; } static file_context_tables_t* init_file_context_entry(transport_id_t id) { file_context_tables_t* tables = vmem_alloc(sizeof(file_context_tables_t)); if (!tables) { return NULL; } atomic64_add(1, &g_memory_metrics->total_file_contexts_tables); *tables = (file_context_tables_t){0}; tables->transport_id = id; atomic_set(&tables->ref_count, 1); hash_init(tables->open_table.hashtable); init_file_context_common_table(&tables->open_table.common_table, tables->open_table.hashtable, FILE_CONTEXT_BIG_TABLE_SIZE_BITS, FILE_CONTEXT_BIG_TBALE_SIZE, FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE); hash_init(tables->read_table.hashtable); init_file_context_common_table(&tables->read_table.common_table, tables->read_table.hashtable, FILE_CONTEXT_BIG_TABLE_SIZE_BITS, FILE_CONTEXT_BIG_TBALE_SIZE, FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE); hash_init(tables->write_table.hashtable); init_file_context_common_table(&tables->write_table.common_table, tables->write_table.hashtable, FILE_CONTEXT_BIG_TABLE_SIZE_BITS, FILE_CONTEXT_BIG_TBALE_SIZE, FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE); return tables; } int acquire_file_context_entry(transport_id_t id) { int err = 0; int idx = transport_id_index(id); file_context_tables_t *entry = init_file_context_entry(id); if (!entry) { return -ENOMEM; } spin_lock(&global_fs_event_cache_manager.writer_lock); if (global_fs_event_cache_manager.tables[idx]) { WPRINTF("acquire_file_context_entry: %d, already exists", idx); err = -EEXIST; } else { rcu_assign_pointer(global_fs_event_cache_manager.tables[idx], entry); entry = NULL; } spin_unlock(&global_fs_event_cache_manager.writer_lock); if (entry) { put_file_context_entry(entry); } return err; } /* This function requires lock*/ static inline void check_common_table_lru(file_context_common_table_t *common_table, unsigned int max_size, unsigned short clean_count) { file_context_common_node_t *common_node; unsigned long now; // Remove nodes if LRU list is too large if (common_table->size > max_size) { while (clean_count) { common_node = list_first_entry_or_null(&common_table->lru_list, file_context_common_node_t, lru_list_node); if (!common_node) { break; } remove_and_put_common_node(common_node, common_table); clean_count--; } } // Clean expired nodes now = jiffies; while (clean_count) { common_node = list_first_entry_or_null(&common_table->lru_list, file_context_common_node_t, lru_list_node); if (!common_node) { break; } if (now < msecs_to_jiffies(FILE_CONTEXT_EXPIRE_TIME_MS) + common_node->last_access_time) { break; } remove_and_put_common_node(common_node, common_table); clean_count--; } } static void insert_common_node(file_context_common_table_t *common_table, file_context_common_node_t *common_node, uint64_t key, uint8_t hashbits) { file_context_common_node_t *search_node; bool inserted = false; common_node->key = key; /* RCU WRITER */ spin_lock(&common_table->spinlock); list_add_tail(&common_node->lru_list_node, &common_table->lru_list); common_node->lru_list_node_inserted = true; // iterate through the hashtable to find the same key, if it exists, prefer the new node hash_for_each_possible_with_hashbits(common_table->hashtable, search_node, hash_node, key, hashbits) { if (common_node->key == search_node->key) { // TODO: it is better to ensure that file_key does not match but this approach is good enough remove_common_node_from_lru(search_node); hlist_replace_rcu(&search_node->hash_node, &common_node->hash_node); put_common_node(search_node); inserted = true; break; } } if (!inserted) { hash_add_rcu_hashbits(common_table->hashtable, &common_node->hash_node, key, hashbits); common_table->size += 1; check_common_table_lru(common_table, common_table->max_size, common_table->clean_count); } spin_unlock(&common_table->spinlock); /* RCU WRITER */ } static void init_common_node(file_context_common_node_t *node, struct kmem_cache* kmem) { node->last_access_time = jiffies; node->kmem = kmem; atomic_set(&node->ref_count, 1); } static inline file_context_common_node_t *find_hash_node(struct hlist_head *head, uint64_t key, uint8_t hashbits) { file_context_common_node_t *tmp = NULL; hash_for_each_possible_with_hashbits(head, tmp, hash_node, key, hashbits) { if (tmp->key == key) { return tmp; } } return NULL; } static inline file_context_common_node_t *find_hash_node_rcu(struct hlist_head *head, uint64_t key, uint8_t hashbits) { file_context_common_node_t *tmp = NULL; hash_for_each_possible_rcu_with_hashbits(head, tmp, hash_node, key, hashbits) { if (tmp->key == key) { return tmp; } } return NULL; } static inline file_context_common_node_t *lookup_common_node(file_context_common_table_t *table, uint64_t key, uint8_t hashbits) { file_context_common_node_t *common_node = NULL; /* RCU READER */ rcu_read_lock(); common_node = find_hash_node_rcu(table->hashtable, key, hashbits); if (common_node) { if (!get_common_node_rcu(common_node)) { common_node = NULL; } } rcu_read_unlock(); /* RCU READER */ if (common_node) { spin_lock(&table->spinlock); common_node->last_access_time = jiffies; if (common_node->lru_list_node_inserted) { list_del(&common_node->lru_list_node); list_add_tail(&common_node->lru_list_node, &table->lru_list); } spin_unlock(&table->spinlock); } return common_node; } static void lookup_common_node_all(file_context_table_type_t type , uint64_t key , file_context_common_node_t **common_nodes , const transport_ids_t* ids , bool* found_all) { int idx = 0; *found_all = true; for (; idx < MAX_TRANSPORT_SIZE; idx++) { transport_id_t transport_id = ids->ids[idx]; file_context_tables_t *tables; if (!transport_id) continue; tables = get_file_context_entry(transport_id); common_nodes[idx] = NULL; if (tables) { file_context_common_node_t *common_node = NULL; switch (type) { case FILE_CONTEXT_OPEN_TABLE: common_node = lookup_common_node(&tables->open_table.common_table, key, tables->open_table.common_table.hashbits); break; case FILE_CONTEXT_READ_TABLE: common_node = lookup_common_node(&tables->read_table.common_table, key, tables->read_table.common_table.hashbits); break; case FILE_CONTEXT_WRITE_TABLE: common_node = lookup_common_node(&tables->write_table.common_table, key, tables->write_table.common_table.hashbits); break; } if (common_node) { common_nodes[idx] = common_node; } else { *found_all = false; } put_file_context_entry(tables); } else { *found_all = false; } } } static int remove_common_node_by_key(file_context_common_table_t *common_table, uint64_t key) { file_context_common_node_t *common_node = NULL; /* RCU WRITER */ spin_lock(&common_table->spinlock); common_node = find_hash_node(common_table->hashtable, key, common_table->hashbits); if (common_node) { if (common_node->lru_list_node_inserted) { list_del(&common_node->lru_list_node); common_node->lru_list_node_inserted = false; } hash_del_rcu(&common_node->hash_node); common_table->size -= 1; } spin_unlock(&common_table->spinlock); /* RCU WRITER */ if (common_node) { put_common_node(common_node); return 0; } return -ENOENT; } // this function should be called inside common_table->spinlock // and should put the returned common_node static file_context_common_node_t * remove_common_node_by_key_no_lock(file_context_common_table_t *common_table, uint64_t key) { file_context_common_node_t *common_node = NULL; common_node = find_hash_node(common_table->hashtable, key, common_table->hashbits); if (common_node) { if (common_node->lru_list_node_inserted) { list_del(&common_node->lru_list_node); common_node->lru_list_node_inserted = false; } hash_del_rcu(&common_node->hash_node); common_table->size -= 1; } return common_node; } static int remove_common_cache(int idx, uint64_t key, file_context_table_type_t type) { int ret = 0; file_context_tables_t *tables = get_file_context_entry_by_idx(idx); if (!tables) { return -ENOENT; } switch (type) { case FILE_CONTEXT_OPEN_TABLE: ret = remove_common_node_by_key(&tables->open_table.common_table, key); break; case FILE_CONTEXT_READ_TABLE: ret = remove_common_node_by_key(&tables->read_table.common_table, key); break; case FILE_CONTEXT_WRITE_TABLE: ret = remove_common_node_by_key(&tables->write_table.common_table, key); break; } if (ret == 0) { DPRINTF("remove_common_cache[%d]: %llu", idx, key); } put_file_context_entry(tables); return ret; } void remove_common_cache_all(const file_key_t* file_key) { uint64_t key = file_key->ptr; int i = 0; for (; i < MAX_TRANSPORT_EXTENDED_SIZE; i++) { remove_common_cache(i, key, FILE_CONTEXT_OPEN_TABLE); remove_common_cache(i, key, FILE_CONTEXT_READ_TABLE); remove_common_cache(i, key, FILE_CONTEXT_WRITE_TABLE); } } static inline int cmp_file_context_key(const file_context_key_t *cache_key, const file_context_key_t *current_key) { if (cache_key->file_key.ptr != current_key->file_key.ptr || cache_key->file_key.ino != current_key->file_key.ino || cache_key->file_key.gen != current_key->file_key.gen || cache_key->file_key.dev != current_key->file_key.dev) { return -1; } return 0; } static inline int cmp_file_context_update_time(const file_context_key_t *cache_key, const file_context_key_t *current_key) { if (cache_key->i_mtime.tv_sec != current_key->i_mtime.tv_sec || cache_key->i_mtime.tv_nsec != current_key->i_mtime.tv_nsec || cache_key->i_ctime.tv_sec != current_key->i_ctime.tv_sec || cache_key->i_ctime.tv_nsec != current_key->i_ctime.tv_nsec) { return -1; } return 0; } static inline int cmp_file_context(const file_context_key_t *cache_key, const file_context_key_t *current_key) { if (cmp_file_context_key(cache_key, current_key) == 0 && cmp_file_context_update_time(cache_key, current_key) == 0) { return 0; } return -1; } bool check_open_cache(const transport_ids_t* ids, file_context_info_t *info) { file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0}; bool skip; int id = 0; // skip is mapped into 'found_all'. if not all were found, skip is false lookup_common_node_all(FILE_CONTEXT_OPEN_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip); for (id = 0; id < MAX_TRANSPORT_SIZE; id++) { file_context_open_file_node_t *file_node = NULL; file_context_open_process_node_t *process_node = NULL; file_context_common_node_t *common_node = NULL; if (common_nodes[id] == NULL) { continue; } file_node = (file_context_open_file_node_t *)container_of(common_nodes[id], file_context_open_file_node_t, common_node); // for open events caching, make sure that both file_ptr key and times match if (cmp_file_context(&file_node->key, &info->msg_info.key) != 0) { skip = false; put_common_node(&file_node->common_node); continue; } common_node = lookup_common_node(&file_node->process_table.common_table, info->pid_version, file_node->process_table.common_table.hashbits); if (!common_node) { skip = false; put_common_node(&file_node->common_node); continue; } process_node = (file_context_open_process_node_t *)container_of(common_node, file_context_open_process_node_t, common_node); if ((info->flags & atomic_read(&process_node->data.flags)) == info->flags) { info->msg_info.skipped_transport_ids[id] = ids->ids[id]; } else { skip = false; } put_common_node(&file_node->common_node); put_common_node(&process_node->common_node); } return skip; } static inline file_context_open_process_node_t *add_open_process_node(file_context_common_table_t *table, const file_context_info_t *info) { file_context_open_process_node_t *open_process_node = NULL; open_process_node = KMEM_NEW0(file_context_open_process_node); if (open_process_node) { init_common_node(&open_process_node->common_node, KMEM_STRUCT_CACHE_NAME(file_context_open_process_node)); get_common_node(&open_process_node->common_node); atomic_set(&open_process_node->data.flags, 0); insert_common_node(table, &open_process_node->common_node, info->pid_version, table->hashbits); } return open_process_node; } static inline void open_file_node_pre_free(void *common_node) { file_context_open_file_node_t *file_node = (file_context_open_file_node_t *)container_of(common_node, file_context_open_file_node_t, common_node); clear_common_table(&file_node->process_table.common_table); } static inline file_context_open_file_node_t *add_open_file_node(file_context_common_table_t *table, const file_context_info_t *info, file_context_open_process_node_t **target_process) { file_context_open_file_node_t *open_file_node = NULL; file_context_open_process_node_t *open_process_node = NULL; open_file_node = KMEM_NEW0(file_context_open_file_node); if (!open_file_node) { return NULL; } init_common_node(&open_file_node->common_node, KMEM_STRUCT_CACHE_NAME(file_context_open_file_node)); get_common_node(&open_file_node->common_node); open_file_node->common_node.pre_free_func = open_file_node_pre_free; hash_init(open_file_node->process_table.hashtable); open_file_node->key = info->msg_info.key; init_file_context_common_table(&open_file_node->process_table.common_table, open_file_node->process_table.hashtable, FILE_CONTEXT_SMALL_TABLE_SIZE_BITS, FILE_CONTEXT_SMALL_TABLE_MAX_SIZE, FILE_CONTEXT_SMALL_TABLE_LRU_CLEAN_SIZE); insert_common_node(table, &open_file_node->common_node, info->msg_info.key.file_key.ptr, table->hashbits); open_process_node = add_open_process_node(&open_file_node->process_table.common_table, info); if (open_process_node) { *target_process = open_process_node; } return open_file_node; } static int add_open_node(file_context_big_table_t *table, const file_context_info_t *info, file_context_open_file_node_t **file_node, file_context_open_process_node_t **process_node) { file_context_open_file_node_t *tmp_file_node = NULL; file_context_open_process_node_t *tmp_process_node = NULL; file_context_common_node_t *common_node = NULL; common_node = lookup_common_node(&table->common_table, info->msg_info.key.file_key.ptr, table->common_table.hashbits); if (common_node) { tmp_file_node = (file_context_open_file_node_t *)container_of(common_node, file_context_open_file_node_t, common_node); // If file_key mismatches, reinsert the node so 'forget' that we found a common_node if (cmp_file_context(&tmp_file_node->key, &info->msg_info.key) != 0) { put_common_node(common_node); common_node = NULL; } } if (common_node) { tmp_file_node = (file_context_open_file_node_t *)container_of(common_node, file_context_open_file_node_t, common_node); common_node = lookup_common_node(&tmp_file_node->process_table.common_table, info->pid_version, tmp_file_node->process_table.common_table.hashbits); if (common_node) { tmp_process_node = (file_context_open_process_node_t *)container_of(common_node, file_context_open_process_node_t, common_node); } else { tmp_process_node = add_open_process_node(&tmp_file_node->process_table.common_table, info); } } else { tmp_file_node = add_open_file_node(&table->common_table, info, &tmp_process_node); } *file_node = tmp_file_node; *process_node = tmp_process_node; if (*file_node && *process_node) { return 0; } return -ENOENT; } int add_open_cache(transport_id_t id, const file_context_info_t *info, file_context_open_file_t **file_node_data, file_context_open_process_t **process_node_data) { int ret = 0; file_context_open_file_node_t* file_node = NULL; file_context_open_process_node_t* process_node = NULL; file_context_tables_t *tables = get_file_context_entry(id); if (!tables) { return -ENOENT; } ret = add_open_node(&tables->open_table, info, &file_node, &process_node); if (ret == 0) { DPRINTF("add_open_cache[%llu]: %llu, %llu", id, info->msg_info.key.file_key.ptr, info->pid_version); } else { EPRINTF("add_open_cache failed[%llu]: %llu, %llu", id, info->msg_info.key.file_key.ptr, info->pid_version); } put_file_context_entry(tables); *file_node_data = file_node ? &file_node->data : NULL; *process_node_data = process_node ? &process_node->data : NULL; return ret; } void put_open_cache(file_context_open_file_t *file_node_data, file_context_open_process_t *process_node_data) { file_context_open_file_node_t *file_node; file_context_open_process_node_t *process_node; if (file_node_data) { file_node = container_of(file_node_data, file_context_open_file_node_t, data); put_common_node(&file_node->common_node); } if (process_node_data) { process_node = container_of(process_node_data, file_context_open_process_node_t, data); put_common_node(&process_node->common_node); } } /* This function requires lock*/ static interval_node_t *malloc_interval_node(uint64_t low, uint64_t high, interval_set_t *set) { interval_node_t *node = KMEM_NEW(interval_node); if (!node) { return NULL; } RB_CLEAR_NODE(&node->rb); node->low = low; node->high = high; set->interval_count++; #ifdef INTERVAL_SET_DEBUG set->total_interval_size += (node->high - node->low); #endif return node; } /* This function requires lock*/ static void remove_interval_node(struct rb_node *rb_node, interval_set_t *set) { interval_node_t *node; if (!rb_node) { return; } node = rb_entry(rb_node, interval_node_t, rb); set->interval_count--; #ifdef INTERVAL_SET_DEBUG set->total_interval_size -= (node->high - node->low); #endif rb_erase(rb_node, &set->root); KMEM_DELETE(interval_node, node); } /* This function requires lock*/ void clean_interval_tree(interval_set_t *set) { struct rb_node *rb_node = set->root.rb_node; while (rb_node) { remove_interval_node(rb_node, set); rb_node = set->root.rb_node; } } // node contains (low, high) static bool contain(interval_node_t *node, uint64_t low, uint64_t high) { if (node->low <= low && high <= node->high) return true; return false; } #ifndef list_last_entry #define list_last_entry(ptr, type, member) \ list_entry((ptr)->prev, type, member) #endif /* This function requires lock For each node and new node, possible situation: 1. node contains new node, return true 2. new node contains node, remove overlapped node, check left and right 3. new node is less/greater than node, check left/right 4. new node is left/right overlapped with node, extend new node's low/high, remove node, check left/right */ static bool check_overlap(uint64_t *low, uint64_t *high, struct rb_root *root, struct list_head *del_list) { interval_node_t *cur, *next; struct list_head stack; struct rb_node *rb_node; INIT_LIST_HEAD(&stack); rb_node = root->rb_node; if (!rb_node) { return false; } cur = rb_entry(rb_node, interval_node_t, rb); list_add_tail(&cur->stack_node, &stack); while (!list_empty(&stack)) { cur = list_last_entry(&stack, interval_node_t, stack_node); list_del(&cur->stack_node); // assume that s0 is current node, s1 is new node // current node contains new node /* tree: (14,20) (4,7) (21,22) (1,3) (9,13) stack: (14,20) new node: (17,18) (17,18) is contained by (14,20) ____s0---s1=s1--s0____ 14 17 18 20 do nothing, return true */ if (contain(cur, *low, *high)) { return true; } // new node contains current node /* tree: (14,17) (4,7) (18,19) (1,3) (9,13) stack: (14,17) new node: (10,20) (10,20) contains (14,17) ____s1====s0---s0===s1____ 10 14 17 20 after operation: deleted list: (14,17) stack: (18,19), (4,7) */ if ((*low < cur->low) && (*high > cur->high)) { list_add_tail(&cur->del_list_node, del_list); if (cur->rb.rb_right) { next = rb_entry(cur->rb.rb_right, interval_node_t, rb); list_add_tail(&next->stack_node, &stack); } if (cur->rb.rb_left) { next = rb_entry(cur->rb.rb_left, interval_node_t, rb); list_add_tail(&next->stack_node, &stack); } continue; } // new node is less than current node /* tree: (4,7) (1,3) (9,13) stack: (4,7) new node: (0,2) (0,2) is less than (4,7) ____s1==s1__s0---s0____ 0 2 4 7 after operation: deleted list: stack: (1,3) */ if (*high < cur->low) { if (cur->rb.rb_left) { next = rb_entry(cur->rb.rb_left, interval_node_t, rb); list_add_tail(&next->stack_node, &stack); } continue; } // new node is left overlapped with current node /* tree: (4,8) (1,3) (9,13) stack: (4,8) new node: (2,6) (2,6) is left overlapped with (4,8) ____s1==s0xxs1--s0____ 2 4 6 8 after operation: new node->(2, 8) deleted list: (4,8) stack: (1,3) */ else if (*high <= cur->high) { list_add_tail(&cur->del_list_node, del_list); *high = cur->high; if (cur->rb.rb_left) { next = rb_entry(cur->rb.rb_left, interval_node_t, rb); list_add_tail(&next->stack_node, &stack); } continue; } // new node is greater than node /* tree: (10,17) (4,7) (18,19) stack: (10,17) new node: (20,30) (20,30) is greater than (10,17) ____s0-------s0___s1=========s1____ 10 17 20 30 after operation: deleted list: stack: (18,19) */ if (*low > cur->high) { if (cur->rb.rb_right) { next = rb_entry(cur->rb.rb_right, interval_node_t, rb); list_add_tail(&next->stack_node, &stack); } continue; } // new node is right overlapped with node /* tree: (10,17) (4,7) (18,19) stack: (10,17) new node: (15,30) (15,30) is right overlapped with (10,17) s1 ____s0-----s1xxs0=========s1____ 10 15 17 30 after operation: new node->(10, 30) deleted list:(10,17) stack: (18,19) */ else if (cur->low <= *low) { list_add_tail(&cur->del_list_node, del_list); *low = cur->low; if (cur->rb.rb_right) { next = rb_entry(cur->rb.rb_right, interval_node_t, rb); list_add_tail(&next->stack_node, &stack); } continue; } EPRINTF("Something wrong"); } return false; } /* This function requires lock 1. find the overlapped interval 2. remove overlapped interval 3. insert new interval */ bool insert_interval(uint64_t low, uint64_t high, interval_set_t *set) { struct rb_node **new, *parent = NULL; interval_node_t *new_node, *cur_node; struct list_head del_list; uint64_t l = low, h = high; INIT_LIST_HEAD(&del_list); if (check_overlap(&l, &h, &set->root, &del_list)) { return true; } new_node = malloc_interval_node(l, h, set); if (!new_node) { return false; } while (!list_empty(&del_list)) { // erase from leaf node cur_node = list_last_entry(&del_list, interval_node_t, del_list_node); list_del(&cur_node->del_list_node); remove_interval_node(&cur_node->rb, set); } new = &set->root.rb_node; while (*new) { parent = *new; cur_node = rb_entry(parent, interval_node_t, rb); if (new_node->high < cur_node->low) new = &parent->rb_left; else new = &parent->rb_right; } rb_link_node(&new_node->rb, parent, new); rb_insert_color(&new_node->rb, &set->root); return true; } /* This function requires lock 1. node contains new node, return true 2. less/greater, check left/right 3. overlapped/new node contains node, return false */ bool check_interval(uint64_t low, uint64_t high, interval_set_t *set) { interval_node_t *cur; struct rb_node *rb_node = set->root.rb_node; while (rb_node) { cur = rb_entry(rb_node, interval_node_t, rb); if (contain(cur, low, high)) return true; if (high < cur->low) { rb_node = rb_node->rb_left; } else if (low > cur->high) { rb_node = rb_node->rb_right; } else { // overlapped return false; } } return false; } static file_context_rw_node_t *add_rw_cache_node(transport_id_t id, file_context_info_t *info, file_context_table_type_t type); /* Send read events only the first time the file is fully read The interval will be reset when the file is changed Skip event when return is true. */ bool check_and_update_read_cache(const transport_ids_t* ids, file_context_info_t *info) { file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0}; bool skip = true; bool found_all = false; int id = 0; loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr); if (file_size < 0 || file_size > FILE_CONTEXT_MAX_FILE_SIZE) { return true; } lookup_common_node_all(FILE_CONTEXT_READ_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &found_all); for (id = 0; id < MAX_TRANSPORT_SIZE; id++) { transport_id_t transport_id = ids->ids[id]; file_context_rw_node_t *read_node = NULL; // By default skip the read event info->msg_info.skipped_transport_ids[id] = transport_id; if (common_nodes[id]) { read_node = (file_context_rw_node_t *)container_of(common_nodes[id], file_context_rw_node_t, common_node); if (cmp_file_context_key(&read_node->key, &info->msg_info.key) != 0) { put_common_node(common_nodes[id]); common_nodes[id] = NULL; read_node = NULL; } } if (common_nodes[id] == NULL) { read_node = add_rw_cache_node(ids->ids[id], info, FILE_CONTEXT_READ_TABLE); if (read_node) { common_nodes[id] = &read_node->common_node; } } if (common_nodes[id] == NULL) { continue; } spin_lock(&read_node->data.spinlock); if (read_node->data.interval_set.interval_count < (uint64_t)file_size) { insert_interval(info->low, info->high, &read_node->data.interval_set); } if (check_interval(0, file_size, &read_node->data.interval_set)) { if (atomic_cmpxchg(&read_node->data.is_reported, false, true) == false) { // send full read event by setting this flag info->msg_info.skipped_transport_ids[id] = 0; skip = false; } } spin_unlock(&read_node->data.spinlock); put_common_node(common_nodes[id]); } return skip; } // Skip event when return is true. bool check_write_cache(const transport_ids_t* ids, file_context_info_t *info, file_context_table_type_t type) { file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0}; bool skip; int id = 0; lookup_common_node_all(type, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip); for (id = 0; id < MAX_TRANSPORT_SIZE; id++) { file_context_rw_node_t *write_node = NULL; if (common_nodes[id] == NULL) { continue; } write_node = (file_context_rw_node_t *)container_of(common_nodes[id], file_context_rw_node_t, common_node); if (cmp_file_context_key(&write_node->key, &info->msg_info.key) == 0) { spin_lock(&write_node->data.spinlock); if (check_interval(info->low, info->high, &write_node->data.interval_set)) { info->msg_info.skipped_transport_ids[id] = ids->ids[id]; } else { skip = false; } spin_unlock(&write_node->data.spinlock); } else { skip = false; } put_common_node(common_nodes[id]); } return skip; } static inline void rw_node_pre_free(void *common_node) { file_context_rw_node_t *rw_node = (file_context_rw_node_t *)container_of(common_node, file_context_rw_node_t, common_node); spin_lock(&rw_node->data.spinlock); clean_interval_tree(&rw_node->data.interval_set); spin_unlock(&rw_node->data.spinlock); } static file_context_rw_node_t *add_rw_node(file_context_big_table_t *table, file_context_info_t *info) { file_context_rw_node_t *node = NULL; file_context_common_node_t *common_node = NULL; loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr); if (file_size < 0 || file_size > FILE_CONTEXT_MAX_FILE_SIZE || info->low > (uint64_t) file_size) { return NULL; } if (info->high > (uint64_t) file_size) { info->high = (uint64_t)file_size; } common_node = lookup_common_node(&table->common_table, info->msg_info.key.file_key.ptr, table->common_table.hashbits); if (common_node) { node = (file_context_rw_node_t *)container_of(common_node, file_context_rw_node_t, common_node); // For 'read' and 'write' events, time changes are not important. // It is the fact that particular region was accessed that matters if (cmp_file_context_key(&node->key, &info->msg_info.key) != 0) { put_common_node(common_node); node = NULL; } } if (!node) { node = KMEM_NEW0(file_context_rw_node); if (node) { init_common_node(&node->common_node, KMEM_STRUCT_CACHE_NAME(file_context_rw_node)); get_common_node(&node->common_node); node->common_node.pre_free_func = rw_node_pre_free; node->key = info->msg_info.key; spin_lock_init(&node->data.spinlock); node->data.interval_set.root = RB_ROOT; insert_common_node(&table->common_table, &node->common_node, info->msg_info.key.file_key.ptr, table->common_table.hashbits); } } if (!node) { return NULL; } spin_lock(&node->data.spinlock); if (node->data.interval_set.interval_count < (uint64_t)file_size) { insert_interval(info->low, info->high, &node->data.interval_set); } spin_unlock(&node->data.spinlock); return node; } static file_context_rw_node_t *add_rw_cache_node(transport_id_t id, file_context_info_t *info, file_context_table_type_t type) { file_context_rw_node_t *node = NULL; file_context_tables_t *tables = get_file_context_entry(id); if (!tables) { return NULL; } info->low = FILE_CONTEXT_CHUNK_LOWER_BOUND(info->low); info->high = FILE_CONTEXT_CHUNK_UPPER_BOUND(info->high); switch (type) { case FILE_CONTEXT_READ_TABLE: node = add_rw_node(&tables->read_table, info); break; case FILE_CONTEXT_WRITE_TABLE: node = add_rw_node(&tables->write_table, info); break; default: break; } if (node) { DPRINTF("add_rw_cache_node[%llu]: %llu", id, info->msg_info.key.file_key.ptr); } else { EPRINTF("add_rw_cache_node failed[%llu]: %llu", id, info->msg_info.key.file_key.ptr); } put_file_context_entry(tables); return node; } file_context_rw_t *add_rw_cache(transport_id_t id, file_context_info_t *info, file_context_table_type_t type) { file_context_rw_node_t *node = add_rw_cache_node(id, info, type); return node ? &node->data : NULL; } void put_rw_cache(file_context_rw_t *node_data) { file_context_rw_node_t *node = container_of(node_data, file_context_rw_node_t, data); put_common_node(&node->common_node); } static inline file_context_process_node_t *lookup_or_add_process_node(file_context_common_table_t *table, const file_context_info_t *info) { file_context_process_node_t *process_node = NULL; file_context_common_node_t *common_node = NULL; common_node = lookup_common_node(table, info->unique_pid, table->hashbits); if (common_node) { process_node = (file_context_process_node_t *)container_of(common_node, file_context_process_node_t, common_node); } if (!process_node) { process_node = KMEM_NEW0(file_context_process_node); if (!process_node) { return NULL; } init_common_node(&process_node->common_node, KMEM_STRUCT_CACHE_NAME(file_context_process_node)); get_common_node(&process_node->common_node); atomic_set(&process_node->data.flags, 0); insert_common_node(table, &process_node->common_node, info->unique_pid, table->hashbits); } return process_node; } static inline void file_modify_node_pre_free(void *common_node) { file_context_file_modify_node_t *file_node = (file_context_file_modify_node_t *)container_of(common_node, file_context_file_modify_node_t, common_node); clear_common_table(&file_node->process_table.common_table); } static file_context_file_modify_node_t *lookup_or_add_file_modify_node(file_context_common_table_t *table, const file_context_info_t *info) { file_context_file_modify_node_t *tmp_file_node = NULL; file_context_common_node_t *common_node = NULL; common_node = lookup_common_node(table, info->msg_info.key.file_key.ptr, table->hashbits); if (common_node) { tmp_file_node = (file_context_file_modify_node_t *)container_of(common_node, file_context_file_modify_node_t, common_node); // If file_key mismatches, reinsert the node so 'forget' that we found a common_node if (cmp_file_context(&tmp_file_node->key, &info->msg_info.key) != 0) { put_common_node(common_node); tmp_file_node = NULL; } } if (!tmp_file_node) { tmp_file_node = KMEM_NEW0(file_context_file_modify_node); if (!tmp_file_node) { return NULL; } init_common_node(&tmp_file_node->common_node, KMEM_STRUCT_CACHE_NAME(file_context_file_modify_node)); get_common_node(&tmp_file_node->common_node); tmp_file_node->common_node.pre_free_func = file_modify_node_pre_free; hash_init(tmp_file_node->process_table.hashtable); tmp_file_node->key = info->msg_info.key; init_file_context_common_table(&tmp_file_node->process_table.common_table, tmp_file_node->process_table.hashtable, FILE_CONTEXT_SMALL_TABLE_SIZE_BITS, FILE_CONTEXT_SMALL_TABLE_MAX_SIZE, FILE_CONTEXT_SMALL_TABLE_LRU_CLEAN_SIZE); insert_common_node(table, &tmp_file_node->common_node, info->msg_info.key.file_key.ptr, table->hashbits); } return tmp_file_node; } static bool add_file_modify_cache_(file_context_big_table_t *table, const file_context_info_t *info) { file_context_file_modify_node_t *tmp_file_node = NULL; file_context_process_node_t *tmp_process_node = NULL; tmp_file_node = lookup_or_add_file_modify_node(&table->common_table, info); if (!tmp_file_node) { return false; } tmp_process_node = lookup_or_add_process_node(&tmp_file_node->process_table.common_table, info); if (!tmp_process_node) { put_common_node(&tmp_file_node->common_node); return false; } atomic_set(&tmp_process_node->data.flags, 1); put_common_node(&tmp_process_node->common_node); put_common_node(&tmp_file_node->common_node); return true; } bool add_file_modify_cache(file_context_info_t *info) { bool ret = false; file_context_tables_t *tables = get_file_context_entry(MAX_TRANSPORT_EXTENDED_SIZE - 1); if (!tables) { EPRINTF("add_file_modify_cache: failed to get table"); return false; } ret = add_file_modify_cache_(&tables->write_table, info); if (ret) { DPRINTF("add_file_modify_cache: %llu", info->msg_info.key.file_key.ptr); } else { EPRINTF("add_file_modify_cache failed: %llu", info->msg_info.key.file_key.ptr); } put_file_context_entry(tables); return ret; } // This function should be called in close, it will remove the process/file from tables bool check_update_file_modify_cache(file_context_info_t *info) { file_context_tables_t *tables; bool modified = false; file_context_common_node_t *common_node = NULL; file_context_file_modify_node_t *file_node = NULL; file_context_process_node_t *process_node = NULL; tables = get_file_context_entry(MAX_TRANSPORT_EXTENDED_SIZE - 1); if (!tables) { return false; } common_node = lookup_common_node(&tables->write_table.common_table, info->msg_info.key.file_key.ptr, tables->write_table.common_table.hashbits); put_file_context_entry(tables); if (!common_node) { return false; } file_node = (file_context_file_modify_node_t *)container_of(common_node, file_context_file_modify_node_t, common_node); if (cmp_file_context(&file_node->key, &info->msg_info.key) != 0) { put_common_node(&file_node->common_node); return false; } common_node = lookup_common_node(&file_node->process_table.common_table, info->unique_pid, file_node->process_table.common_table.hashbits); if (!common_node) { put_common_node(&file_node->common_node); return false; } process_node = (file_context_process_node_t *)container_of(common_node, file_context_process_node_t, common_node); if (atomic_read(&process_node->data.flags)) { modified = true; } { // remove read already process node file_context_common_node_t *common_nodes[2] = {NULL, NULL}; /* RCU WRITER */ spin_lock(&file_node->process_table.common_table.spinlock); common_nodes[0] = remove_common_node_by_key_no_lock(&file_node->process_table.common_table, info->unique_pid); if (file_node->process_table.common_table.size == 0) { spin_lock(&tables->write_table.common_table.spinlock); common_nodes[1] = remove_common_node_by_key_no_lock(&tables->write_table.common_table, info->msg_info.key.file_key.ptr); spin_unlock(&tables->write_table.common_table.spinlock); } spin_unlock(&file_node->process_table.common_table.spinlock); /* RCU WRITER */ // put common_node from remove_common_node_by_key_no_lock if (common_nodes[0]) { put_common_node(common_nodes[0]); } if (common_nodes[1]) { put_common_node(common_nodes[1]); } } put_common_node(&file_node->common_node); put_common_node(&process_node->common_node); return modified; } int acquire_file_modify_entry(void) { file_context_tables_t *entry; entry = init_file_context_entry(MAX_TRANSPORT_EXTENDED_SIZE - 1); if (!entry) { EPRINTF("acquire_file_modify_entry: init_file_context_entry failed"); return -ENOMEM; } spin_lock(&global_fs_event_cache_manager.writer_lock); if (global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1]) { WPRINTF("acquire_file_context_entry: %d, already exists", MAX_TRANSPORT_EXTENDED_SIZE - 1); } else { rcu_assign_pointer(global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1], entry); entry = NULL; } spin_unlock(&global_fs_event_cache_manager.writer_lock); if (entry) { put_file_context_entry(entry); } IPRINTF("acquire_file_modify_entry\n"); return 0; } void release_file_modify_entry(void) { file_context_tables_t *entry = NULL; spin_lock(&global_fs_event_cache_manager.writer_lock); if (global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1]) { entry = global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1]; rcu_assign_pointer(global_fs_event_cache_manager.tables[MAX_TRANSPORT_EXTENDED_SIZE - 1], NULL); } spin_unlock(&global_fs_event_cache_manager.writer_lock); if (entry) { put_file_context_entry(entry); } IPRINTF("release_file_modify_entry\n"); }