shell bypass 403

Cubjrnet7 Shell

: /usr/src/file_protector-1.1-1584/ [ drwxr-xr-x ]

name : file_contexts.c
/**
@file     file_contexts.c
@brief    Cache sent fs events
@details  Copyright (c) 2023 Acronis International GmbH
@author   Bruce Wang ([email protected])
@since    $Id: $
*/

#include "compat.h"
#include "debug.h"
#include "file_contexts.h"
#include "file_contexts_priv.h"
#include "memory.h"

#include <linux/jiffies.h>

#ifndef list_first_entry_or_null
#define list_first_entry_or_null(ptr, type, member) (list_empty(ptr) ? NULL : list_first_entry(ptr, type, member))
#endif

typedef struct
{
    file_context_tree_node_t node;
} file_context_process_node_t;

static file_context_manager_t global_fs_event_cache_manager;
static KMEM_STRUCT_CACHE_DECLARE(file_context_open_process_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_open_file_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_rw_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_process_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_file_modify_node);
static KMEM_STRUCT_CACHE_DECLARE(interval_node);

int file_contexts_init(void)
{
    int i = 0;
    spin_lock_init(&global_fs_event_cache_manager.writer_lock);
    for (; i < MAX_TRANSPORT_SIZE; i++)
    {
        global_fs_event_cache_manager.tables_for_transport[i] = NULL;
    }
    global_fs_event_cache_manager.close_modified_table = NULL;

    KMEM_STRUCT_CACHE_NAME(file_context_open_process_node) = NULL;
    KMEM_STRUCT_CACHE_NAME(file_context_open_file_node)    = NULL;
    KMEM_STRUCT_CACHE_NAME(file_context_rw_node)           = NULL;
    KMEM_STRUCT_CACHE_NAME(file_context_process_node)      = NULL;
    KMEM_STRUCT_CACHE_NAME(file_context_file_modify_node)  = NULL;
    KMEM_STRUCT_CACHE_NAME(interval_node)                  = NULL;

    if (!KMEM_STRUCT_CACHE_INIT(file_context_open_process_node, 0, NULL))
    {
        EPRINTF("Failed to create file_context_open_process_node cache");
        goto fail;
    }

    if (!KMEM_STRUCT_CACHE_INIT(file_context_open_file_node, 0, NULL))
    {
        EPRINTF("Failed to create file_context_open_file_node_t cache");
        goto fail;
    }

    if (!KMEM_STRUCT_CACHE_INIT(file_context_rw_node, 0, NULL))
    {
        EPRINTF("Failed to create file_context_rw_node cache");
        goto fail;
    }

    if (!KMEM_STRUCT_CACHE_INIT(file_context_process_node, 0, NULL))
    {
        EPRINTF("Failed to create file_context_process_node cache");
        goto fail;
    }

    if (!KMEM_STRUCT_CACHE_INIT(file_context_file_modify_node, 0, NULL))
    {
        EPRINTF("Failed to create file_context_file_modify_node cache");
        goto fail;
    }

    if (!KMEM_STRUCT_CACHE_INIT(interval_node, 0, NULL))
    {
        EPRINTF("Failed to create interval_node cache");
        goto fail;
    }

    return 0;

fail:
    file_contexts_init_fail_free();
    return -ENOMEM;
}

void file_contexts_init_fail_free(void)
{
    KMEM_STRUCT_CACHE_DEINIT(file_context_open_process_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_open_file_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_rw_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_process_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_file_modify_node);
    KMEM_STRUCT_CACHE_DEINIT(interval_node);
}

static inline void put_file_context_big_table(file_context_big_table_t *entry);
// must be called under table writer_lock
static inline void put_file_context_entry(file_context_tables_t *entry)
{
    if (entry->open_table) {
        put_file_context_big_table(entry->open_table);
        rcu_assign_pointer(entry->open_table, NULL);
    }
    if (entry->read_table) {
        put_file_context_big_table(entry->read_table);
        rcu_assign_pointer(entry->read_table, NULL);
    }
    if (entry->write_table) {
        put_file_context_big_table(entry->write_table);
        rcu_assign_pointer(entry->write_table, NULL);
    }
}

void file_contexts_deinit(void)
{
    int i;
    file_context_tables_t* tables_to_free[MAX_TRANSPORT_SIZE];
    for (i = 0; i < MAX_TRANSPORT_SIZE; i++)
    {
        tables_to_free[i] = NULL;
    }

    spin_lock(&global_fs_event_cache_manager.writer_lock);
    for (i = 0; i < MAX_TRANSPORT_SIZE; i++)
    {
        if (global_fs_event_cache_manager.tables_for_transport[i])
        {
            tables_to_free[i] = global_fs_event_cache_manager.tables_for_transport[i];
            put_file_context_entry(global_fs_event_cache_manager.tables_for_transport[i]);
            rcu_assign_pointer(global_fs_event_cache_manager.tables_for_transport[i], NULL);
        }
    }
    if (global_fs_event_cache_manager.close_modified_table)
    {
        put_file_context_big_table(global_fs_event_cache_manager.close_modified_table);
        rcu_assign_pointer(global_fs_event_cache_manager.close_modified_table, NULL);
    }
    spin_unlock(&global_fs_event_cache_manager.writer_lock);
    synchronize_rcu();

    for (i = 0; i < MAX_TRANSPORT_SIZE; i++)
    {
        if (tables_to_free[i])
            mem_free(tables_to_free[i]);
    }

    // For 'put_file_context_big_table' synchronization
    rcu_barrier();
    KMEM_STRUCT_CACHE_DEINIT(file_context_open_process_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_open_file_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_rw_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_process_node);
    KMEM_STRUCT_CACHE_DEINIT(file_context_file_modify_node);
    KMEM_STRUCT_CACHE_DEINIT(interval_node);
}

static inline void get_common_node(file_context_common_node_t *common_node)
{
    atomic_inc(&common_node->ref_count);
}

static inline bool get_common_node_rcu(file_context_common_node_t *common_node)
{
    return atomic_inc_not_zero(&common_node->ref_count);
}

static inline bool put_test_common_node(file_context_common_node_t *common_node)
{
    return atomic_dec_and_test(&common_node->ref_count);
}

static inline void get_ht_node(file_context_ht_node_t *ht_node)
{
    return get_common_node(&ht_node->common);
}

static inline bool get_ht_node_rcu(file_context_ht_node_t *ht_node)
{
    return get_common_node_rcu(&ht_node->common);
}

static inline void put_ht_node(file_context_ht_node_t *ht_node)
{
    if (put_test_common_node(&ht_node->common))
    {
        call_rcu(&ht_node->rcu, ht_node->free_func);
    }
}

static inline void get_tree_node(file_context_tree_node_t *tree_node)
{
    return get_common_node(&tree_node->common);
}

static inline void put_tree_node(file_context_tree_node_t *tree_node)
{
    if (put_test_common_node(&tree_node->common))
    {
        tree_node->free_func(tree_node);
    }
}

/* This function requires lock*/
static inline void remove_common_node_from_lru(file_context_common_node_t *common_node)
{
    list_del(&common_node->lru_list_node);
    common_node->lru_list_node_inserted = false;
}

/* This function requires lock*/
static inline void remove_and_put_ht_node(file_context_ht_node_t *ht_node, file_context_common_table_t *common_table)
{
    remove_common_node_from_lru(&ht_node->common);
    hash_del_rcu(&ht_node->node);
    common_table->size -= 1;
    put_ht_node(ht_node);
}

/* This function requires lock, you must put node manually outside of lock*/
static inline void remove_tree_node(file_context_tree_t* tree, file_context_tree_node_t *tree_node)
{
    remove_common_node_from_lru(&tree_node->common);
    rb_erase(&tree_node->node, &tree->tree);
    tree->size -= 1;
}

static void clear_common_table_nolock(file_context_common_table_t *common_table)
{
    while (1)
    {
        file_context_ht_node_t *node = list_first_entry_or_null(&common_table->lru_list, file_context_ht_node_t, common.lru_list_node);
        if (!node)
        {
            break;
        }
        remove_and_put_ht_node(node, common_table);
    }
}

static void clear_tree_nolock(file_context_tree_t* tree)
{
    while (1)
    {
        file_context_tree_node_t *node = list_first_entry_or_null(&tree->lru_list, file_context_tree_node_t, common.lru_list_node);
        if (!node)
        {
            break;
        }
        remove_tree_node(tree, node);
        put_tree_node(node);
    }
}

static inline void deferred_free_big_table(struct rcu_head *head)
{
    file_context_big_table_t *table = container_of(head, file_context_big_table_t, rcu);
    // perhaps this is excessive, but it is better to be safe
    clear_common_table_nolock(&table->common_table);
#ifdef KERNEL_MOCK
    BUG_ON(table->common_table.size != 0);
#endif
    vmem_free(table);
    atomic64_sub(1, &g_memory_metrics->total_file_contexts_tables);
}

static inline void put_file_context_big_table(file_context_big_table_t *table)
{
    if (atomic_dec_and_test(&table->ref_count))
    {
        call_rcu(&table->rcu, deferred_free_big_table);
    }
}

static file_context_big_table_t *get_file_context_big_table_impl(file_context_tables_t *entry, file_context_table_type_t type)
{
    file_context_big_table_t* result = NULL;
    switch (type)
    {
        case FILE_CONTEXT_OPEN_TABLE:
            result = rcu_dereference(entry->open_table);
            break;
        case FILE_CONTEXT_READ_TABLE:
            result = rcu_dereference(entry->read_table);
            break;
        case FILE_CONTEXT_WRITE_TABLE:
            result = rcu_dereference(entry->write_table);
            break;
    }

    if (result) {
        if (!atomic_inc_not_zero(&result->ref_count)) {
            result = NULL;
        }
    }
    return result;
}

static file_context_big_table_t *get_file_context_big_table(transport_id_t transport_id, file_context_table_type_t type)
{
    file_context_big_table_t* result = NULL;
    file_context_tables_t *entry;
    int idx = transport_id_index(transport_id);
    if (idx < 0 || idx >= MAX_TRANSPORT_SIZE)
    {
        return NULL;
    }

    rcu_read_lock();
    entry = rcu_dereference(global_fs_event_cache_manager.tables_for_transport[idx]);
    if (entry) {
        if (entry->transport_id != transport_id) {
            DPRINTF("Transport id mismatch: %ld != %ld", entry->transport_id, transport_id);
        } else {
            result = get_file_context_big_table_impl(entry, type);
        }
    }
    rcu_read_unlock();
    return result;
}

static file_context_big_table_t *get_file_context_close_modified_table(void)
{
    file_context_big_table_t* result = NULL;
    rcu_read_lock();
    result = rcu_dereference(global_fs_event_cache_manager.close_modified_table);
    if (result) {
        if (!atomic_inc_not_zero(&result->ref_count)) {
            result = NULL;
        }
    }
    rcu_read_unlock();
    return result;
}

// This function does not verify the transport_id
static file_context_big_table_t *get_file_context_big_table_by_idx(int idx, file_context_table_type_t type)
{
    file_context_big_table_t* result = NULL;
    file_context_tables_t *entry;
    if (idx < 0 || idx >= MAX_TRANSPORT_SIZE)
    {
        return NULL;
    }

    rcu_read_lock();
    entry = rcu_dereference(global_fs_event_cache_manager.tables_for_transport[idx]);
    if (entry) {
        result = get_file_context_big_table_impl(entry, type);
    }
    rcu_read_unlock();
    return result;
}

void release_file_context_entry(transport_id_t id)
{
    file_context_tables_t* tables = NULL;
    int idx = transport_id_index(id);
    spin_lock(&global_fs_event_cache_manager.writer_lock);
    if (global_fs_event_cache_manager.tables_for_transport[idx])
    {
        if (global_fs_event_cache_manager.tables_for_transport[idx]->transport_id == id) {
            tables = global_fs_event_cache_manager.tables_for_transport[idx];
            put_file_context_entry(tables);
            rcu_assign_pointer(global_fs_event_cache_manager.tables_for_transport[idx], NULL);
        } else {
            WPRINTF("release_file_context_entry: %d, id mismatch: %llu, %llu", idx, global_fs_event_cache_manager.tables_for_transport[idx]->transport_id, id);
        }
    }
    spin_unlock(&global_fs_event_cache_manager.writer_lock);

    if (tables) {
        synchronize_rcu();
        mem_free(tables);
    }

    IPRINTF("release_file_context_entry: %llu", id);
}

static void init_file_context_common_table(file_context_common_table_t *table,
                                           uint8_t hashbits,
                                           unsigned int max_size,
                                           unsigned short clean_count,
                                           unsigned long expire_time_ms)
{
    INIT_LIST_HEAD(&table->lru_list);
    spin_lock_init(&table->spinlock);
    table->hashbits = hashbits;
    table->max_size = max_size;
    table->clean_count = clean_count;
    table->expire_time_jiffies = msecs_to_jiffies(expire_time_ms);
    table->size = 0;
    __hash_init(table->hashtable, 1 << hashbits);
}

static void init_file_context_tree(file_context_tree_t *tree, unsigned int max_size, unsigned short clean_count)
{
    tree->tree = RB_ROOT;
    INIT_LIST_HEAD(&tree->lru_list);
    spin_lock_init(&tree->spinlock);
    tree->max_size = max_size;
    tree->clean_count = clean_count;
    tree->size = 0;
}

static file_context_big_table_t* init_big_table(unsigned long expire_time_ms)
{
    file_context_big_table_t* table = vmem_alloc(sizeof(file_context_big_table_t) + sizeof(struct hlist_head) * (1 << FILE_CONTEXT_BIG_TABLE_SIZE_BITS));
    if (!table) {
        return NULL;
    }

    atomic64_add(1, &g_memory_metrics->total_file_contexts_tables);
    atomic_set(&table->ref_count, 1);
    init_file_context_common_table(&table->common_table,
                                   FILE_CONTEXT_BIG_TABLE_SIZE_BITS,
                                   FILE_CONTEXT_BIG_TABLE_SIZE,
                                   FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE,
                                   expire_time_ms);

    return table;
}

static file_context_tables_t* init_file_context_entry(transport_id_t id)
{
    file_context_tables_t* tables = mem_alloc(sizeof(file_context_tables_t));
    if (!tables) {
        return NULL;
    }

    *tables = (file_context_tables_t){0};
    tables->transport_id = id;
    return tables;
}

int acquire_file_context_entry(transport_id_t id)
{
    int err = 0;
    int idx = transport_id_index(id);
    file_context_tables_t *entry = init_file_context_entry(id);
    if (!entry) {
        return -ENOMEM;
    }

    spin_lock(&global_fs_event_cache_manager.writer_lock);
    if (global_fs_event_cache_manager.tables_for_transport[idx])
    {
        WPRINTF("acquire_file_context_entry: %d, already exists", idx);
        err = -EEXIST;
    } else {
        rcu_assign_pointer(global_fs_event_cache_manager.tables_for_transport[idx], entry);
        entry = NULL;
    }
    spin_unlock(&global_fs_event_cache_manager.writer_lock);

    if (entry) {
        mem_free(entry);
    }

    return err;
}

typedef void (*evict_fn_t)(void* ctx, file_context_common_node_t*);
static inline void check_lru(struct list_head* lru_list
                           , unsigned int size
                           , unsigned int max_size
                           , unsigned short clean_count
                           , unsigned long expire_time_jiffies
                           , evict_fn_t evict, void* ctx)
{
    unsigned long now;

    // Remove nodes if LRU list is too large
    if (size > max_size) {
        while (clean_count) {
            file_context_common_node_t *node = list_first_entry_or_null(lru_list, file_context_common_node_t, lru_list_node);
            if (!node)
                break;

            evict(ctx, node);
            clean_count--;
        }
    }

    // Clean expired nodes
    now = jiffies;
    while (clean_count) {
        file_context_common_node_t *node = list_first_entry_or_null(lru_list, file_context_common_node_t, lru_list_node);
        if (!node)
            break;

        if (now < expire_time_jiffies + node->last_access_time)
            break;

        evict(ctx, node);
        clean_count--;
    }
}

static void lru_evict_ht(void* ctx, file_context_common_node_t* node)
{
    file_context_common_table_t *common_table = (file_context_common_table_t *)ctx;
    file_context_ht_node_t *ht_node = container_of(node, file_context_ht_node_t, common);
    remove_and_put_ht_node(ht_node, common_table);
}

/* This function requires lock*/
static inline void check_common_table_lru(file_context_common_table_t *table)
{
    return check_lru(&table->lru_list, table->size, table->max_size, table->clean_count, table->expire_time_jiffies, lru_evict_ht, table);
}

static void insert_ht_node(file_context_common_table_t *common_table,
                           file_context_ht_node_t *ht_node,
                           uint64_t key)
{
    file_context_ht_node_t *search_node;
    file_context_common_node_t *common_node = &ht_node->common;
    uint8_t hashbits = common_table->hashbits;
    bool inserted = false;

    common_node->key = key;

    /* RCU WRITER */
    spin_lock(&common_table->spinlock);
    list_add_tail(&common_node->lru_list_node, &common_table->lru_list);
    common_node->lru_list_node_inserted = true;
    // iterate through the hashtable to find the same key, if it exists, prefer the new node
    hash_for_each_possible_with_hashbits(common_table->hashtable, search_node, node, key, hashbits)
    {
        if (common_node->key == search_node->common.key)
        {
            // TODO: it is better to ensure that file_key does not match but this approach is good enough
            remove_common_node_from_lru(&search_node->common);
            hlist_replace_rcu(&search_node->node, &ht_node->node);
            put_ht_node(search_node);
            inserted = true;
            break;
        }
    }
    if (!inserted)
    {
        hash_add_rcu_hashbits(common_table->hashtable, &ht_node->node, key, hashbits);
        common_table->size += 1;
        check_common_table_lru(common_table);
    }
    spin_unlock(&common_table->spinlock);
    /* RCU WRITER */
}

struct tree_evict_context {
    file_context_tree_t* tree;
    struct list_head* to_free_list;
};

static void lru_evict_tree_node(void* ctx, file_context_common_node_t* node)
{
    struct tree_evict_context* context = (struct tree_evict_context*)ctx;
    file_context_tree_node_t* tree_node = container_of(node, file_context_tree_node_t, common);
    remove_tree_node(context->tree, tree_node);
    list_add_tail(&tree_node->free_node, context->to_free_list);
}

/* This function requires lock*/
static inline void check_tree_lru(file_context_tree_t* tree, struct list_head* to_free_list)
{
    struct tree_evict_context context = {tree, to_free_list};
    return check_lru(&tree->lru_list, tree->size, tree->max_size, tree->clean_count, FILE_CONTEXT_TREE_EXPIRE_TIME_MS, lru_evict_tree_node, &context);
}

static void insert_tree_node(file_context_tree_t* tree,
                             file_context_tree_node_t* tree_node,
                             uint64_t key)
{
    struct rb_node **link = &(tree->tree.rb_node);
    struct rb_node *parent = NULL;
    file_context_common_node_t *common_node = &tree_node->common;
    bool inserted = false;
    LIST_HEAD(to_free_list);

    common_node->key = key;

    spin_lock(&tree->spinlock);
    list_add_tail(&common_node->lru_list_node, &tree->lru_list);
    common_node->lru_list_node_inserted = true;
    while (*link)
    {
        file_context_tree_node_t *curr;
        parent = *link;
        curr = container_of(parent, file_context_tree_node_t, node);
        if (key < curr->common.key) {
            link = &parent->rb_left;
        } else if (key > curr->common.key) {
            link = &parent->rb_right;
        } else {
            // TODO: is this a good approach? maybe keep the entry that was already there?
            remove_common_node_from_lru(&curr->common);
            rb_replace_node(&curr->node, &tree_node->node, &tree->tree);
            RB_CLEAR_NODE(&curr->node);
            spin_unlock(&tree->spinlock);
            put_tree_node(curr);
            inserted = true;
            break;
        }
    }

    if (!inserted)
    {
        rb_link_node(&tree_node->node, parent, link);
        rb_insert_color(&tree_node->node, &tree->tree);
        tree->size += 1;
        check_tree_lru(tree, &to_free_list);
    }
    spin_unlock(&tree->spinlock);

    while (!list_empty(&to_free_list))
    {
        file_context_tree_node_t *to_free_node = list_first_entry(&to_free_list, file_context_tree_node_t, free_node);
        list_del(&to_free_node->free_node);
        put_tree_node(to_free_node);
    }
}

static void init_common_node(file_context_common_node_t *node)
{
    node->last_access_time = jiffies;
    atomic_set(&node->ref_count, 1);
}

static void init_ht_node(file_context_ht_node_t *node, file_contexts_rcu_free_func_t free_func)
{
    node->free_func = free_func;
    init_common_node(&node->common);
}

static void init_tree_node(file_context_tree_node_t* node, file_contexts_tree_free_func_t free_func)
{
    node->free_func = free_func;
    init_common_node(&node->common);
}

static inline file_context_ht_node_t *find_hash_node(struct hlist_head *head, uint64_t key, uint8_t hashbits)
{
    file_context_ht_node_t *tmp = NULL;
    hash_for_each_possible_with_hashbits(head, tmp, node, key, hashbits)
    {
        if (tmp->common.key == key)
        {
            return tmp;
        }
    }
    return NULL;
}

static inline file_context_ht_node_t *find_hash_node_rcu(struct hlist_head *head, uint64_t key, uint8_t hashbits)
{
    file_context_ht_node_t *tmp = NULL;
    hash_for_each_possible_rcu_with_hashbits(head, tmp, node, key, hashbits)
    {
        if (tmp->common.key == key)
        {
            return tmp;
        }
    }
    return NULL;
}

static inline file_context_common_node_t *lookup_common_node(file_context_common_table_t *table, uint64_t key, uint8_t hashbits)
{
    file_context_ht_node_t *ht_node = NULL;
    file_context_common_node_t *common_node = NULL;

    /* RCU READER */
    rcu_read_lock();
    ht_node = find_hash_node_rcu(table->hashtable, key, hashbits);
    if (ht_node)
    {
        if (!get_ht_node_rcu(ht_node)) {
            ht_node = NULL;
        }
    }
    rcu_read_unlock();
    /* RCU READER */

    if (ht_node)
    {
        common_node = &ht_node->common;
        spin_lock(&table->spinlock);
        common_node->last_access_time = jiffies;
        if (common_node->lru_list_node_inserted) {
            list_del(&common_node->lru_list_node);
            list_add_tail(&common_node->lru_list_node, &table->lru_list);
        }
        spin_unlock(&table->spinlock);
    }

    return common_node;
}

// Must be called under tree lock
static struct rb_node* lookup_tree_rbnode(struct rb_root* tree, uint64_t key)
{
    struct rb_node *node = tree->rb_node;
    while (node) {
        file_context_tree_node_t *tree_node = container_of(node, file_context_tree_node_t, node);
        if (key < tree_node->common.key) {
            node = node->rb_left;
        } else if (key > tree_node->common.key) {
            node = node->rb_right;
        } else {
            return &tree_node->node;
        }
    }

    return NULL;
}

static file_context_common_node_t *lookup_tree_node(file_context_tree_t *tree, uint64_t key)
{
    struct rb_node *node;
    file_context_tree_node_t* tree_node = NULL;

    spin_lock(&tree->spinlock);
    node = lookup_tree_rbnode(&tree->tree, key);
    if (node) {
        tree_node = container_of(node, file_context_tree_node_t, node);
        get_tree_node(tree_node);
    }
    spin_unlock(&tree->spinlock);

    return tree_node ? &tree_node->common : NULL;
}

typedef struct erase_tree_result {
    int size;
    bool erased;
} erase_tree_result_t;

static erase_tree_result_t erase_tree_node(file_context_tree_t *tree, uint64_t key)
{
    struct rb_node *tree_node = NULL;
    file_context_tree_node_t* node = NULL;
    erase_tree_result_t result = {0};

    spin_lock(&tree->spinlock);
    tree_node = lookup_tree_rbnode(&tree->tree, key);
    if (tree_node) {
        node = container_of(tree_node, file_context_tree_node_t, node);
        remove_tree_node(tree, node);
        result.erased = true;
        result.size = tree->size;
    }
    spin_unlock(&tree->spinlock);

    if (node)
        put_tree_node(node);

    return result;
}

static void lookup_common_node_all(file_context_table_type_t type
                                 , uint64_t key
                                 , file_context_common_node_t **common_nodes
                                 , const transport_ids_t* ids
                                 , bool* found_all)
{
    int idx = 0;
    *found_all = true;
    for (; idx < MAX_TRANSPORT_SIZE; idx++)
    {
        transport_id_t transport_id = ids->ids[idx];
        file_context_big_table_t *table;
        if (!transport_id)
            continue;

        table = get_file_context_big_table(transport_id, type);
        common_nodes[idx] = NULL;
        if (table)
        {
            file_context_common_node_t* node = lookup_common_node(&table->common_table, key, table->common_table.hashbits);
            if (node) {
                common_nodes[idx] = node;
            } else {
                *found_all = false;
            }
            put_file_context_big_table(table);
        } else {
            *found_all = false;
        }
    }
}

static int remove_common_node_by_key(file_context_common_table_t *common_table, uint64_t key)
{
    file_context_ht_node_t *ht_node = NULL;

    /* RCU WRITER */
    spin_lock(&common_table->spinlock);
    ht_node = find_hash_node(common_table->hashtable, key, common_table->hashbits);
    if (ht_node)
    {
        file_context_common_node_t *common_node = &ht_node->common;
        if (common_node->lru_list_node_inserted) {
            list_del(&common_node->lru_list_node);
            common_node->lru_list_node_inserted = false;
        }
        hash_del_rcu(&ht_node->node);
        common_table->size -= 1;
    }
    spin_unlock(&common_table->spinlock);
    /* RCU WRITER */

    if (ht_node)
    {
        put_ht_node(ht_node);
        return 0;
    }

    return -ENOENT;
}

// this function should be called inside common_table->spinlock
// This function is currently specialized for "modify close" cache, you likely do not need to use it elsewhere.
static void remove_common_node(file_context_common_table_t *common_table, file_context_common_node_t* common_node)
{
    file_context_ht_node_t *ht_node = container_of(common_node, file_context_ht_node_t, common);
    bool need_to_put = false;
    spin_lock(&common_table->spinlock);

    if (common_node->lru_list_node_inserted) {
        list_del(&common_node->lru_list_node);
        common_node->lru_list_node_inserted = false;
    }
    // This check is important because node might have been already removed due to LRU eviction
    if (!hlist_unhashed(&ht_node->node)) {
        hash_del_rcu(&ht_node->node);
        common_table->size -= 1;
        need_to_put = true;
    }

    spin_unlock(&common_table->spinlock);

    if (need_to_put) {
        put_ht_node(ht_node);
    }
}

static int remove_common_cache(int idx, uint64_t key, file_context_table_type_t type)
{
    int ret = 0;
    file_context_big_table_t *table = get_file_context_big_table_by_idx(idx, type);
    if (!table)
    {
        return -ENOENT;
    }
    ret = remove_common_node_by_key(&table->common_table, key);
    if (ret == 0)
    {
        DPRINTF("remove_common_cache[%d]: %llu", idx, key);
    }
    put_file_context_big_table(table);
    return ret;
}

void remove_common_cache_all(const file_key_t* file_key)
{
    uint64_t key = file_key->ptr;
    int i = 0;
    for (; i < MAX_TRANSPORT_SIZE; i++)
    {
        remove_common_cache(i, key, FILE_CONTEXT_OPEN_TABLE);
        remove_common_cache(i, key, FILE_CONTEXT_READ_TABLE);
        remove_common_cache(i, key, FILE_CONTEXT_WRITE_TABLE);
    }
    {
        file_context_big_table_t *table = get_file_context_close_modified_table();
        if (table)
        {
            remove_common_node_by_key(&table->common_table, key);
            put_file_context_big_table(table);
        }
    }
}

static inline int cmp_file_context_key(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
    if (cache_key->file_key.ptr != current_key->file_key.ptr ||
        cache_key->file_key.ino != current_key->file_key.ino ||
        cache_key->file_key.gen != current_key->file_key.gen ||
        cache_key->file_key.dev != current_key->file_key.dev)
    {
        return -1;
    }
    return 0;
}

static inline int cmp_file_context_update_time(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
    if (cache_key->i_mtime.tv_sec != current_key->i_mtime.tv_sec ||
        cache_key->i_mtime.tv_nsec != current_key->i_mtime.tv_nsec ||
        cache_key->i_ctime.tv_sec != current_key->i_ctime.tv_sec ||
        cache_key->i_ctime.tv_nsec != current_key->i_ctime.tv_nsec)
    {
        return -1;
    }
    return 0;
}

static inline int cmp_file_context(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
    if (cmp_file_context_key(cache_key, current_key) == 0 &&
        cmp_file_context_update_time(cache_key, current_key) == 0)
    {
        return 0;
    }
    return -1;
}

static bool deadline_valid(unsigned long deadline)
{
    return time_after(deadline, jiffies);
}

static bool flags_valid(int expected_flags, int having_flags)
{
    return (expected_flags & having_flags) == expected_flags;
}

static inline file_context_open_file_node_t *to_open_file_node(file_context_common_node_t *common_node)
{
    return container_of(common_node, file_context_open_file_node_t, node.common);
}

static inline void put_open_file_node(file_context_open_file_node_t *file_node)
{
    put_ht_node(&file_node->node);
}

static inline file_context_open_process_node_t *to_open_process_node(file_context_common_node_t *common_node)
{
    return container_of(common_node, file_context_open_process_node_t, node.common);
}

static inline void put_open_process_node(file_context_open_process_node_t *process_node)
{
    put_tree_node(&process_node->node);
}

bool check_open_cache(const transport_ids_t* ids, file_context_info_t *info)
{
    file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
    bool skip;
    int id = 0;

    // skip is mapped into 'found_all'. if not all were found, skip is false
    lookup_common_node_all(FILE_CONTEXT_OPEN_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip);

    for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
    {
        file_context_open_file_node_t *file_node = NULL;
        file_context_open_process_node_t *process_node = NULL;
        file_context_common_node_t *common_node = NULL;
        if (common_nodes[id] == NULL)
        {
            continue;
        }
        file_node = to_open_file_node(common_nodes[id]);
        // for open events caching, make sure that both file_ptr key and times match
        if (cmp_file_context(&file_node->key, &info->msg_info.key) != 0)
        {
            skip = false;
            put_open_file_node(file_node);
            continue;
        }

        if (deadline_valid(file_node->data.deadline) && flags_valid(info->params.open.flags, atomic_read(&file_node->data.flags)))
        {
            info->msg_info.skipped_transport_ids[id] = ids->ids[id];
            put_open_file_node(file_node);
            continue;
        }

        common_node = lookup_tree_node(&file_node->process_lookup, info->pid_key);
        put_open_file_node(file_node);
        if (!common_node)
        {
            skip = false;
            continue;
        }
        process_node = to_open_process_node(common_node);
        if (deadline_valid(process_node->data.deadline) && flags_valid(info->params.open.flags, atomic_read(&process_node->data.flags)))
        {
            info->msg_info.skipped_transport_ids[id] = ids->ids[id];
        }
        else
        {
            skip = false;
        }
        put_open_process_node(process_node);
    }
    return skip;
}

static void open_process_node_free(file_context_tree_node_t* node)
{
    file_context_open_process_node_t *process_node = container_of(node, file_context_open_process_node_t, node);
    KMEM_DELETE(file_context_open_process_node, process_node);
}

static inline file_context_open_process_node_t *add_open_process_node(file_context_tree_t *tree,
                                                                      uint64_t pid_key)
{
    file_context_open_process_node_t *open_process_node = KMEM_NEW0(file_context_open_process_node);
    if (!open_process_node)
        return NULL;

    init_tree_node(&open_process_node->node, open_process_node_free);
    get_tree_node(&open_process_node->node);
    atomic_set(&open_process_node->data.flags, 0);
    WRITE_ONCE(open_process_node->data.deadline, 0);
    insert_tree_node(tree, &open_process_node->node, pid_key);
    return open_process_node;
}

static void file_context_open_file_node_free(struct rcu_head *rcu)
{
    file_context_open_file_node_t *file_node = container_of(rcu, file_context_open_file_node_t, node.rcu);
    clear_tree_nolock(&file_node->process_lookup);
    KMEM_DELETE(file_context_open_file_node, file_node);
}

static inline file_context_open_file_node_t *add_open_file_node(file_context_common_table_t *table,
                                                                const file_context_key_t *key)
{
    file_context_ht_node_t* ht_node;
    file_context_open_file_node_t *open_file_node = KMEM_NEW0(file_context_open_file_node);
    if (!open_file_node)
        return NULL;

    ht_node = &open_file_node->node;
    init_ht_node(ht_node, file_context_open_file_node_free);
    get_ht_node(ht_node);

    open_file_node->key = *key;
    atomic_set(&open_file_node->data.flags, 0);
    WRITE_ONCE(open_file_node->data.deadline, 0);
    init_file_context_tree(&open_file_node->process_lookup, FILE_CONTEXT_PROCESS_TREE_MAX_SIZE, FILE_CONTEXT_PROCESS_TREE_LRU_CLEAN_SIZE);

    insert_ht_node(table, ht_node, key->file_key.ptr);
    return open_file_node;
}

static void add_open_node(file_context_big_table_t *table,
                          const file_context_key_t *key, uint64_t pid_key,
                          file_context_open_file_node_t **pfile_node,
                          file_context_open_process_node_t **pprocess_node)
{
    file_context_open_file_node_t *tmp_file_node = NULL;
    file_context_open_process_node_t *tmp_process_node = NULL;

    // Create or find the open node per file key...
    {
        file_context_common_node_t *common_node = lookup_common_node(&table->common_table, key->file_key.ptr, table->common_table.hashbits);
        if (common_node)
        {
            tmp_file_node = to_open_file_node(common_node);
            // If file_key mismatches, reinsert the node so 'forget' that we found a common_node
            if (cmp_file_context(&tmp_file_node->key, key) != 0)
            {
                put_open_file_node(tmp_file_node);
                common_node = NULL;
            }
        }

        if (common_node)
        {
            tmp_file_node = to_open_file_node(common_node);
        }
        else
        {
            tmp_file_node = add_open_file_node(&table->common_table, key);
        }
        *pfile_node = tmp_file_node;
    }

    // ...and, if asked, process node by pid_key
    if (pprocess_node)
    {
        file_context_common_node_t *common_node = lookup_tree_node(&tmp_file_node->process_lookup, pid_key);
        if (common_node)
        {
            tmp_process_node = to_open_process_node(common_node);
        }
        else
        {
            tmp_process_node = add_open_process_node(&tmp_file_node->process_lookup, pid_key);
        }
        *pprocess_node = tmp_process_node;
    }
}

static unsigned long to_deadline(uint32_t ttl_s)
{
    unsigned long cur_jiffies = jiffies;
    // If ttl_s is 0, it means that the entry should be valid forever so set it to some very big time
    if (ttl_s == 0)
        return cur_jiffies + msecs_to_jiffies(FILE_CONTEXT_LONGEST_EXPIRE_TIME_MS);
    else
        return cur_jiffies + msecs_to_jiffies(ttl_s * 1000);
}

int add_open_cache(transport_id_t id, const file_context_add_cache_request_t* info)
{
    unsigned long deadline;
    int ret = 0;
    file_context_open_file_node_t* file_node = NULL;
    file_context_open_process_node_t* process_node = NULL;
    file_context_big_table_t *table = get_file_context_big_table(id, FILE_CONTEXT_OPEN_TABLE);
    if (!table)
        return -ENOENT;

    deadline = to_deadline(info->ttl_s);
    add_open_node(table, &info->key, info->pid_key, &file_node, info->pid_key ? &process_node : NULL);
    put_file_context_big_table(table);

    if (info->pid_key) {
        if (process_node) {
            atomic_or_compat(info->params.open.flags, &process_node->data.flags);
            WRITE_ONCE(process_node->data.deadline, deadline);
        } else {
            ret = -ENOMEM;
        }
    } else {
        if (file_node) {
            atomic_or_compat(info->params.open.flags, &file_node->data.flags);
            WRITE_ONCE(file_node->data.deadline, deadline);
        } else {
            ret = -ENOMEM;
        }
    }

    if (file_node)
        put_open_file_node(file_node);
    if (process_node)
        put_open_process_node(process_node);

    return ret;
}

/* This function requires lock*/
static interval_node_t *malloc_interval_node(uint64_t low, uint64_t high, interval_set_t *set)
{
    interval_node_t *node = KMEM_NEW(interval_node);
    if (!node)
    {
        return NULL;
    }
    RB_CLEAR_NODE(&node->rb);
    node->low = low;
    node->high = high;
    set->interval_count++;
#ifdef INTERVAL_SET_DEBUG
    set->total_interval_size += (node->high - node->low);
#endif
    return node;
}

/* This function requires lock*/
static void remove_interval_node(struct rb_node *rb_node, interval_set_t *set)
{
    interval_node_t *node;
    if (!rb_node)
    {
        return;
    }
    node = rb_entry(rb_node, interval_node_t, rb);
    set->interval_count--;
#ifdef INTERVAL_SET_DEBUG
    set->total_interval_size -= (node->high - node->low);
#endif
    rb_erase(rb_node, &set->root);
    KMEM_DELETE(interval_node, node);
}

/* This function requires lock*/
void clean_interval_tree(interval_set_t *set)
{
    struct rb_node *rb_node = set->root.rb_node;
    while (rb_node)
    {
        remove_interval_node(rb_node, set);
        rb_node = set->root.rb_node;
    }
}

// node contains (low, high)
static bool contain(interval_node_t *node, uint64_t low, uint64_t high)
{
    if (node->low <= low && high <= node->high)
        return true;
    return false;
}

#ifndef list_last_entry
#define list_last_entry(ptr, type, member) \
    list_entry((ptr)->prev, type, member)
#endif

/* This function requires lock
For each node and new node, possible situation:
    1. node contains new node, return true
    2. new node contains node, remove overlapped node, check left and right
    3. new node is less/greater than node, check left/right
    4. new node is left/right overlapped with node, extend new node's low/high, remove node, check left/right
*/
static bool check_overlap(uint64_t *low, uint64_t *high, struct rb_root *root, struct list_head *del_list)
{
    interval_node_t *cur, *next;
    struct list_head stack;
    struct rb_node *rb_node;

    INIT_LIST_HEAD(&stack);
    rb_node = root->rb_node;
    if (!rb_node)
    {
        return false;
    }
    cur = rb_entry(rb_node, interval_node_t, rb);
    list_add_tail(&cur->stack_node, &stack);

    while (!list_empty(&stack))
    {
        cur = list_last_entry(&stack, interval_node_t, stack_node);
        list_del(&cur->stack_node);

        // assume that s0 is current node, s1 is new node

        // current node contains new node
        /*
        tree:
                (14,20)
            (4,7)     (21,22)
        (1,3)   (9,13)

        stack: (14,20)
        new node: (17,18)
        (17,18) is contained by (14,20)

        ____s0---s1=s1--s0____
            14   17 18  20

        do nothing, return true
        */
        if (contain(cur, *low, *high))
        {
            return true;
        }

        // new node contains current node
        /*
        tree:
                (14,17)
            (4,7)     (18,19)
        (1,3)   (9,13)

        stack: (14,17)
        new node: (10,20)
        (10,20) contains (14,17)

        ____s1====s0---s0===s1____
            10    14   17   20

        after operation:
        deleted list: (14,17)
        stack: (18,19), (4,7)
        */
        if ((*low < cur->low) && (*high > cur->high))
        {
            list_add_tail(&cur->del_list_node, del_list);
            if (cur->rb.rb_right)
            {
                next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
                list_add_tail(&next->stack_node, &stack);
            }
            if (cur->rb.rb_left)
            {
                next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
                list_add_tail(&next->stack_node, &stack);
            }
            continue;
        }

        // new node is less than current node
        /*
        tree:
            (4,7)
        (1,3)   (9,13)

        stack: (4,7)
        new node: (0,2)
        (0,2) is less than (4,7)

        ____s1==s1__s0---s0____
            0   2   4    7

        after operation:
        deleted list:
        stack: (1,3)
        */
        if (*high < cur->low)
        {
            if (cur->rb.rb_left)
            {
                next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
                list_add_tail(&next->stack_node, &stack);
            }
            continue;
        }
        // new node is left overlapped with current node
        /*
        tree:
            (4,8)
        (1,3)   (9,13)

        stack: (4,8)
        new node: (2,6)
        (2,6) is left overlapped with (4,8)

        ____s1==s0xxs1--s0____
            2   4   6   8

        after operation:
        new node->(2, 8)
        deleted list: (4,8)
        stack: (1,3)
        */
        else if (*high <= cur->high)
        {
            list_add_tail(&cur->del_list_node, del_list);
            *high = cur->high;
            if (cur->rb.rb_left)
            {
                next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
                list_add_tail(&next->stack_node, &stack);
            }
            continue;
        }

        // new node is greater than node
        /*
        tree:
                (10,17)
            (4,7)     (18,19)

        stack: (10,17)
        new node: (20,30)
        (20,30) is greater than (10,17)

        ____s0-------s0___s1=========s1____
            10       17   20         30

        after operation:
        deleted list:
        stack: (18,19)
        */
        if (*low > cur->high)
        {
            if (cur->rb.rb_right)
            {
                next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
                list_add_tail(&next->stack_node, &stack);
            }
            continue;
        }
        // new node is right overlapped with node
        /*
        tree:
                (10,17)
            (4,7)     (18,19)

        stack: (10,17)
        new node: (15,30)
        (15,30) is right overlapped with (10,17)
               s1
        ____s0-----s1xxs0=========s1____
            10     15  17         30

        after operation:
        new node->(10, 30)
        deleted list:(10,17)
        stack: (18,19)
        */
        else if (cur->low <= *low)
        {
            list_add_tail(&cur->del_list_node, del_list);
            *low = cur->low;
            if (cur->rb.rb_right)
            {
                next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
                list_add_tail(&next->stack_node, &stack);
            }
            continue;
        }

        EPRINTF("Something wrong");
    }

    return false;
}

/* This function requires lock
    1. find the overlapped interval
    2. remove overlapped interval
    3. insert new interval
*/
bool insert_interval(uint64_t low, uint64_t high, interval_set_t *set)
{
    struct rb_node **new, *parent = NULL;
    interval_node_t *new_node, *cur_node;
    struct list_head del_list;
    uint64_t l = low, h = high;

    INIT_LIST_HEAD(&del_list);
    if (check_overlap(&l, &h, &set->root, &del_list))
    {
        return true;
    }

    new_node = malloc_interval_node(l, h, set);
    if (!new_node)
    {
        return false;
    }

    while (!list_empty(&del_list))
    {
        // erase from leaf node
        cur_node = list_last_entry(&del_list, interval_node_t, del_list_node);
        list_del(&cur_node->del_list_node);
        remove_interval_node(&cur_node->rb, set);
    }

    new = &set->root.rb_node;

    while (*new)
    {
        parent = *new;
        cur_node = rb_entry(parent, interval_node_t, rb);
        if (new_node->high < cur_node->low)
            new = &parent->rb_left;
        else
            new = &parent->rb_right;
    }

    rb_link_node(&new_node->rb, parent, new);
    rb_insert_color(&new_node->rb, &set->root);
    return true;
}

/* This function requires lock
    1. node contains new node, return true
    2. less/greater, check left/right
    3. overlapped/new node contains node, return false
*/
bool check_interval(uint64_t low, uint64_t high, interval_set_t *set)
{
    interval_node_t *cur;
    struct rb_node *rb_node = set->root.rb_node;

    while (rb_node)
    {
        cur = rb_entry(rb_node, interval_node_t, rb);
        if (contain(cur, low, high))
            return true;

        if (high < cur->low)
        {
            rb_node = rb_node->rb_left;
        }
        else if (low > cur->high)
        {
            rb_node = rb_node->rb_right;
        }
        else
        {
            // overlapped
            return false;
        }
    }

    return false;
}

typedef struct rounded_interval_s
{
    uint64_t low;
    uint64_t high;
} rounded_interval_t;

static bool try_round_interval(uint64_t low, uint64_t high, loff_t file_size, rounded_interval_t* out)
{
    if (file_size <= 0
     || file_size > FILE_CONTEXT_MAX_FILE_SIZE
     || low > (uint64_t)file_size
     || high < low)
    {
        return false;
    }

    out->low = FILE_CONTEXT_CHUNK_LOWER_BOUND(low);
    out->high = FILE_CONTEXT_CHUNK_UPPER_BOUND(high);
    return true;
}

static file_context_rw_node_t* to_rw_node(file_context_common_node_t *common_node)
{
    return container_of(common_node, file_context_rw_node_t, node.common);
}

static void put_rw_node(file_context_rw_node_t *rw_node)
{
    put_ht_node(&rw_node->node);
}

static file_context_rw_node_t* add_rw_cache_node(transport_id_t id, const file_context_key_t* key, file_context_table_type_t type);
/*
Send read events only the first time the file is fully read
The interval will be reset when the file is changed
Skip event when return is true.
*/
bool check_and_update_read_cache(const transport_ids_t* ids, file_context_info_t *info)
{
    file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
    bool skip = true;
    bool found_all = false;
    int id = 0;
    // TODO: This cast is unsafe, ask to explicitly provide file_size instead
    loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr);
    rounded_interval_t interval;

    if (!try_round_interval(info->params.rw.low, info->params.rw.high, file_size, &interval))
    {
        return true;
    }

    lookup_common_node_all(FILE_CONTEXT_READ_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &found_all);

    for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
    {
        transport_id_t transport_id = ids->ids[id];
        file_context_rw_node_t *read_node = NULL;

        // By default skip the read event
        info->msg_info.skipped_transport_ids[id] = transport_id;
        if (common_nodes[id])
        {
            read_node = to_rw_node(common_nodes[id]);
            if (cmp_file_context_key(&read_node->key, &info->msg_info.key) != 0)
            {
                put_rw_node(read_node);
                common_nodes[id] = NULL;
                read_node = NULL;
            }
        }

        if (common_nodes[id] == NULL)
        {
            read_node = add_rw_cache_node(ids->ids[id], &info->msg_info.key, FILE_CONTEXT_READ_TABLE);
            if (read_node)
            {
                common_nodes[id] = &read_node->node.common;
            }
        }

        if (common_nodes[id] == NULL)
        {
            continue;
        }

        spin_lock(&read_node->data.spinlock);
        insert_interval(interval.low, interval.high, &read_node->data.interval_set);
        if (check_interval(0, file_size, &read_node->data.interval_set))
        {
            if (atomic_cmpxchg(&read_node->data.is_reported, false, true) == false)
            {
                // send full read event by setting this flag
                info->msg_info.skipped_transport_ids[id] = 0;
                skip = false;
            }
        }
        spin_unlock(&read_node->data.spinlock);
        put_rw_node(read_node);
    }
    return skip;
}

// Skip event when return is true.
bool check_write_cache(const transport_ids_t* ids, file_context_info_t *info)
{
    file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
    bool skip;
    int id = 0;
    // TODO: This cast is unsafe, ask to explicitly provide file_size instead
    loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr);
    if (file_size > FILE_CONTEXT_MAX_FILE_SIZE) {
        // Currently we are not handling any files larger than FILE_CONTEXT_MAX_FILE_SIZE
        return true;
    }

    lookup_common_node_all(FILE_CONTEXT_WRITE_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip);
    for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
    {
        file_context_rw_node_t *write_node = NULL;
        if (common_nodes[id] == NULL)
        {
            continue;
        }

        write_node = to_rw_node(common_nodes[id]);
        if (cmp_file_context_key(&write_node->key, &info->msg_info.key) == 0)
        {
            spin_lock(&write_node->data.spinlock);
            if (check_interval(info->params.rw.low, info->params.rw.high, &write_node->data.interval_set))
            {
                info->msg_info.skipped_transport_ids[id] = ids->ids[id];
            }
            else
            {
                skip = false;
            }
            spin_unlock(&write_node->data.spinlock);
        }
        else
        {
            skip = false;
        }
        put_rw_node(write_node);
    }
    return skip;
}

static void rw_node_free(struct rcu_head *rcu)
{
    file_context_rw_node_t *rw_node = container_of(rcu, file_context_rw_node_t, node.rcu);
    clean_interval_tree(&rw_node->data.interval_set);
    KMEM_DELETE(file_context_rw_node, rw_node);
}

static file_context_rw_node_t *add_rw_node(file_context_big_table_t* table, const file_context_key_t* key)
{
    file_context_rw_node_t *node = NULL;
    file_context_common_node_t *common_node = lookup_common_node(&table->common_table, key->file_key.ptr, table->common_table.hashbits);
    if (common_node)
    {
        node = to_rw_node(common_node);
        // For 'read' and 'write' events, time changes are not important.
        // It is the fact that particular region was accessed that matters
        if (cmp_file_context_key(&node->key, key) != 0)
        {
            put_rw_node(node);
            node = NULL;
        }
    }

    if (!node)
    {
        node = KMEM_NEW0(file_context_rw_node);
        if (node)
        {
            file_context_ht_node_t* ht_node = &node->node;
            init_ht_node(ht_node, rw_node_free);
            get_ht_node(ht_node);
            node->key = *key;
            spin_lock_init(&node->data.spinlock);
            node->data.interval_set.root = RB_ROOT;

            insert_ht_node(&table->common_table, ht_node, key->file_key.ptr);
        }
    }

    return node;
}

static file_context_rw_node_t* add_rw_cache_node(transport_id_t id, const file_context_key_t* key, file_context_table_type_t type)
{
    file_context_rw_node_t *node = NULL;
    file_context_big_table_t *table = get_file_context_big_table(id, type);
    if (!table) {
        return NULL;
    }

    node = add_rw_node(table, key);
    put_file_context_big_table(table);
    return node;
}

int add_write_cache(transport_id_t id, const file_context_add_cache_request_t* info)
{
    // TODO: This cast is unsafe, ask to explicitly provide file_size instead
    loff_t file_size = i_size_read((const struct inode *)info->key.file_key.ptr);
    rounded_interval_t interval;
    file_context_rw_node_t *node = NULL;
    if (!try_round_interval(info->params.rw.low, info->params.rw.high, file_size, &interval))
        return -EINVAL;

    node = add_rw_cache_node(id, &info->key, FILE_CONTEXT_WRITE_TABLE);
    if (!node)
        return -ENOMEM;

    spin_lock(&node->data.spinlock);
    insert_interval(interval.low, interval.high, &node->data.interval_set);
    spin_unlock(&node->data.spinlock);

    put_rw_node(node);
    return 0;
}

static void process_node_free(file_context_tree_node_t* node)
{
    file_context_process_node_t *process_node = container_of(node, file_context_process_node_t, node);
    KMEM_DELETE(file_context_process_node, process_node);
}

static file_context_process_node_t* to_process_node(file_context_common_node_t *common_node)
{
    return container_of(common_node, file_context_process_node_t, node.common);
}

static inline file_context_process_node_t *lookup_or_add_process_node(file_context_tree_t *tree,
                                                                      uint64_t pid_key)
{
    file_context_process_node_t *process_node = NULL;
    file_context_common_node_t *common_node = NULL;
    common_node = lookup_tree_node(tree, pid_key);
    if (common_node)
    {
        process_node = to_process_node(common_node);
    }

    if (!process_node)
    {
        file_context_tree_node_t* tree_node;
        process_node = KMEM_NEW0(file_context_process_node);
        if (!process_node)
            return NULL;

        tree_node = &process_node->node;
        init_tree_node(tree_node, process_node_free);
        get_tree_node(tree_node);
        insert_tree_node(tree, tree_node, pid_key);
    }
    return process_node;
}

static void file_modify_node_free(struct rcu_head *rcu)
{
    file_context_file_modify_node_t *file_node = container_of(rcu, file_context_file_modify_node_t, node.rcu);
    clear_tree_nolock(&file_node->process_lookup);
    KMEM_DELETE(file_context_file_modify_node, file_node);
}

static inline file_context_file_modify_node_t* to_file_modify_node(file_context_common_node_t *common_node)
{
    return container_of(common_node, file_context_file_modify_node_t, node.common);
}

static void put_file_modify_node(file_context_file_modify_node_t *file_node)
{
    put_ht_node(&file_node->node);
}

static file_context_file_modify_node_t *lookup_or_add_file_modify_node(file_context_common_table_t *table, const file_context_key_t* key)
{
    file_context_file_modify_node_t *tmp_file_node = NULL;
    file_context_common_node_t *common_node = NULL;

    common_node = lookup_common_node(table, key->file_key.ptr, table->hashbits);
    if (common_node)
    {
        tmp_file_node = to_file_modify_node(common_node);
        // If file_key mismatches, reinsert the node so 'forget' that we found a common_node
        if (cmp_file_context_key(&tmp_file_node->key, key) != 0)
        {
            put_file_modify_node(tmp_file_node);
            tmp_file_node = NULL;
        }
    }

    if (!tmp_file_node)
    {
        file_context_ht_node_t* ht_node;
        tmp_file_node = KMEM_NEW0(file_context_file_modify_node);
        if (!tmp_file_node)
            return NULL;

        ht_node = &tmp_file_node->node;
        init_ht_node(ht_node, file_modify_node_free);
        get_ht_node(ht_node);
        tmp_file_node->key = *key;
        init_file_context_tree(&tmp_file_node->process_lookup, FILE_CONTEXT_PROCESS_TREE_MAX_SIZE, FILE_CONTEXT_PROCESS_TREE_LRU_CLEAN_SIZE);
        insert_ht_node(table, ht_node, key->file_key.ptr);
    }

    return tmp_file_node;
}

static inline void put_process_node(file_context_process_node_t *process_node)
{
    return put_tree_node(&process_node->node);
}

static bool add_file_modify_cache_(file_context_big_table_t *table, const file_context_add_cache_request_t *info)
{
    file_context_file_modify_node_t *tmp_file_node = NULL;
    file_context_process_node_t *tmp_process_node = NULL;

    tmp_file_node = lookup_or_add_file_modify_node(&table->common_table, &info->key);
    if (!tmp_file_node)
    {
        return false;
    }

    tmp_process_node = lookup_or_add_process_node(&tmp_file_node->process_lookup, info->pid_key);
    if (!tmp_process_node)
    {
        put_file_modify_node(tmp_file_node);
        return false;
    }

    put_process_node(tmp_process_node);
    put_file_modify_node(tmp_file_node);

    return true;
}

bool add_file_modify_cache(const file_context_add_cache_request_t *info)
{
    bool ret = false;
    file_context_big_table_t *table = get_file_context_close_modified_table();
    if (!table)
    {
        return false;
    }

    ret = add_file_modify_cache_(table, info);

    if (ret)
    {
        DPRINTF("add_file_modify_cache: %llu", info->key.file_key.ptr);
    }
    else
    {
        EPRINTF("add_file_modify_cache failed: %llu", info->key.file_key.ptr);
    }

    put_file_context_big_table(table);
    return ret;
}

// This function should be called in close, it will remove the process/file from tables 
bool check_update_file_modify_cache(file_context_info_t *info)
{
    bool modified = false;
    file_context_common_node_t *common_node = NULL;
    file_context_file_modify_node_t *file_node = NULL;
    erase_tree_result_t erase_result;
    file_context_big_table_t *table = get_file_context_close_modified_table();
    if (!table)
    {
        return false;
    }
    common_node = lookup_common_node(&table->common_table, info->msg_info.key.file_key.ptr, table->common_table.hashbits);
    if (!common_node)
    {
        goto out_free_table;
    }

    file_node = to_file_modify_node(common_node);
    if (cmp_file_context_key(&file_node->key, &info->msg_info.key) != 0)
    {
        goto out_free_file;
    }

    erase_result = erase_tree_node(&file_node->process_lookup, info->pid_key);
    if (!erase_result.erased)
    {
        goto out_free_file;
    }

    modified = true;
    // TODO: This is a race with someone who wants to insert new process node - add a mutex around MODIFY calls.
    if (erase_result.size == 0) {
        remove_common_node(&table->common_table, common_node);
    }

out_free_file:
    put_file_modify_node(file_node);
out_free_table:
    put_file_context_big_table(table);
    return modified;
}

int acquire_file_modify_entry(void)
{
    file_context_big_table_t* table = init_big_table(FILE_CONTEXT_BIG_TABLE_EXPIRE_TIME_MS);
    if (!table)
    {
        EPRINTF("acquire_file_modify_entry: init_file_context_entry failed");
        return -ENOMEM;
    }

    spin_lock(&global_fs_event_cache_manager.writer_lock);
    if (global_fs_event_cache_manager.close_modified_table)
    {
        WPRINTF("acquire_file_context_entry already exists");
    }
    else
    {
        rcu_assign_pointer(global_fs_event_cache_manager.close_modified_table, table);
        table = NULL;
    }
    spin_unlock(&global_fs_event_cache_manager.writer_lock);

    if (table)
    {
        put_file_context_big_table(table);
    }

    return 0;
}

void release_file_modify_entry(void)
{
    file_context_big_table_t *table = NULL;

    spin_lock(&global_fs_event_cache_manager.writer_lock);
    if (global_fs_event_cache_manager.close_modified_table)
    {
        table = global_fs_event_cache_manager.close_modified_table;
        rcu_assign_pointer(global_fs_event_cache_manager.close_modified_table , NULL);
    }
    spin_unlock(&global_fs_event_cache_manager.writer_lock);

    if (table)
    {
        put_file_context_big_table(table);
    }
}

static unsigned long to_expire_time_ms(file_context_table_type_t type)
{
    switch (type)
    {
    case FILE_CONTEXT_OPEN_TABLE:
        return FILE_CONTEXT_BIG_TABLE_EXPIRE_TIME_MS;
    case FILE_CONTEXT_READ_TABLE:
    case FILE_CONTEXT_WRITE_TABLE:
        return FILE_CONTEXT_RW_TABLE_EXPIRE_TIME_MS;
        return FILE_CONTEXT_RW_TABLE_EXPIRE_TIME_MS;
    }

    return FILE_CONTEXT_BIG_TABLE_EXPIRE_TIME_MS;
}

int acquire_file_context_table(transport_id_t transport_id, file_context_table_type_t type)
{
    int ret = -EFAULT;
    int idx = transport_id_index(transport_id);
    file_context_tables_t *tables;
    file_context_big_table_t* table = init_big_table(to_expire_time_ms(type));
    if (!table) {
        return -ENOMEM;
    }

    spin_lock(&global_fs_event_cache_manager.writer_lock);
    tables = global_fs_event_cache_manager.tables_for_transport[idx];
    if (tables && tables->transport_id == transport_id)
    {
        file_context_big_table_t** ptable = NULL;
        switch (type)
        {
        case FILE_CONTEXT_OPEN_TABLE:
            ptable = &tables->open_table;
            break;
        case FILE_CONTEXT_READ_TABLE:
            ptable = &tables->read_table;
            break;
        case FILE_CONTEXT_WRITE_TABLE:
            ptable = &tables->write_table;
            break;
        }

        // Check if table pointer is sane + table does not exist yet
        if (ptable && !(*ptable))
        {
            rcu_assign_pointer(*ptable, table);
            table = NULL;
            ret = 0;
        }
    }
    spin_unlock(&global_fs_event_cache_manager.writer_lock);

    // failure condition when table was not assigned
    if (table)
    {
        put_file_context_big_table(table);
    }

    return ret;
}

© 2025 Cubjrnet7