name : syscall_common.c
/**
@file
@brief    Common functions and variables for system call hooks
@details  Copyright (c) 2017-2022 Acronis International GmbH
@author   Mikhail Molchanov ([email protected])
@since    $Id: $
*/

#include <linux/version.h>

#include "syscall_common.h"

#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 8, 0)

#include "compat.h"
#include "debug.h"
#include "fs_syscall_hooks.h"
#include "lsm_common.h"
#include "write_protection.h"

#include <asm/ia32_unistd.h>	// for ia32_sys_call_table '__NR_ia32_*' system call function indices
#include <linux/compiler.h>
#include <linux/delay.h>	// loops_per_jiffy
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/syscalls.h>	// sys_close
#include <linux/types.h>	// bool, size_t, [u]int(8|16|32|64)_t
#include <linux/unistd.h>	// for sys_call_table '__NR_*' system call function indices

static int syscall_hooks_resolve_symbols(void);
static void **p_sys_call_table = NULL;
static void **p_ia32_sys_call_table = NULL;
static bool hooks_attached = false;

#define HOOK_TRAMPOLINE(abi, tag) HOOK_TRAMPOLINE_ASM(SYSCALL_HOOK_TRAMPOLINE(abi, tag), SYSCALL_HOOK_NAME(abi, tag)) \
void SYSCALL_HOOK_TRAMPOLINE(abi, tag) (void);

HOOK_TRAMPOLINE(sys, creat)

HOOK_TRAMPOLINE(sys, open)
HOOK_TRAMPOLINE(sys, openat)

HOOK_TRAMPOLINE(sys, close)

HOOK_TRAMPOLINE(sys, read)
HOOK_TRAMPOLINE(sys, pread64)
HOOK_TRAMPOLINE(sys, readv)
HOOK_TRAMPOLINE(sys, preadv)
HOOK_TRAMPOLINE(sys, preadv2)

HOOK_TRAMPOLINE(sys, write)
HOOK_TRAMPOLINE(sys, pwrite64)
HOOK_TRAMPOLINE(sys, writev)
HOOK_TRAMPOLINE(sys, pwritev)
HOOK_TRAMPOLINE(sys, pwritev2)

HOOK_TRAMPOLINE(sys, rename)
HOOK_TRAMPOLINE(sys, renameat)
HOOK_TRAMPOLINE(sys, renameat2)

HOOK_TRAMPOLINE(sys, unlink)
HOOK_TRAMPOLINE(sys, unlinkat)

HOOK_TRAMPOLINE(ia32_sys, creat)

HOOK_TRAMPOLINE(ia32_sys, open)
HOOK_TRAMPOLINE(ia32_sys, openat)

HOOK_TRAMPOLINE(ia32_sys, close)

HOOK_TRAMPOLINE(ia32_sys, read)
HOOK_TRAMPOLINE(compat_sys, pread64)
HOOK_TRAMPOLINE(compat_sys, readv)
HOOK_TRAMPOLINE(compat_sys, preadv)
HOOK_TRAMPOLINE(compat_sys, preadv2)

HOOK_TRAMPOLINE(ia32_sys, write)
HOOK_TRAMPOLINE(compat_sys, pwrite64)
HOOK_TRAMPOLINE(compat_sys, writev)
HOOK_TRAMPOLINE(compat_sys, pwritev)
HOOK_TRAMPOLINE(compat_sys, pwritev2)

HOOK_TRAMPOLINE(ia32_sys, rename)
HOOK_TRAMPOLINE(ia32_sys, renameat)
HOOK_TRAMPOLINE(ia32_sys, renameat2)

HOOK_TRAMPOLINE(ia32_sys, unlink)
HOOK_TRAMPOLINE(ia32_sys, unlinkat)

struct hook_desc hook_table[] = {
	DEFINE_HOOK_DESC(sys, creat, true),

	DEFINE_HOOK_DESC(sys, open, true),
	DEFINE_HOOK_DESC(sys, openat, true),

	DEFINE_HOOK_DESC(sys, close, true),

	DEFINE_HOOK_DESC(sys, read, true),
	DEFINE_HOOK_DESC(sys, pread64, true),
	DEFINE_HOOK_DESC(sys, readv, true),
	DEFINE_HOOK_DESC(sys, preadv, true),
	DEFINE_HOOK_DESC(sys, preadv2, PREADV2_ENABLED),

	DEFINE_HOOK_DESC(sys, write, true),
	DEFINE_HOOK_DESC(sys, pwrite64, true),
	DEFINE_HOOK_DESC(sys, writev, true),
	DEFINE_HOOK_DESC(sys, pwritev, true),
	DEFINE_HOOK_DESC(sys, pwritev2, PWRITEV2_ENABLED),

	DEFINE_HOOK_DESC(sys, rename, true),
	DEFINE_HOOK_DESC(sys, renameat, true),
	DEFINE_HOOK_DESC(sys, renameat2, RENAMEAT2_ENABLED),

	DEFINE_HOOK_DESC(sys, unlink, true),
	DEFINE_HOOK_DESC(sys, unlinkat, true)
};

struct hook_desc ia32_hook_table[] = {
	DEFINE_IA32_HOOK_DESC(ia32_sys, creat, true),

	DEFINE_IA32_HOOK_DESC(ia32_sys, open, true),
	DEFINE_IA32_HOOK_DESC(ia32_sys, openat, true),

	DEFINE_IA32_HOOK_DESC(ia32_sys, close, true),

	DEFINE_IA32_HOOK_DESC(ia32_sys, read, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, pread64, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, readv, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, preadv, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, preadv2, PREADV2_ENABLED),

	DEFINE_IA32_HOOK_DESC(ia32_sys, write, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, pwrite64, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, writev, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, pwritev, true),
	DEFINE_COMPAT_HOOK_DESC(compat_sys, pwritev2, PWRITEV2_ENABLED),

	DEFINE_IA32_HOOK_DESC(ia32_sys, rename, true),
	DEFINE_IA32_HOOK_DESC(ia32_sys, renameat, true),
	DEFINE_IA32_HOOK_DESC(ia32_sys, renameat2, RENAMEAT2_ENABLED),

	DEFINE_IA32_HOOK_DESC(ia32_sys, unlink, true),
	DEFINE_IA32_HOOK_DESC(ia32_sys, unlinkat, true)
};

static bool need_syscall_hooks(void)
{
	size_t i;
	for (i = 0; i < TOTAL_HOOKS_COUNT; i++) {
		if (syscall_replaced_by_lsm(hook_table[i].syscall_nr, false /*ia32*/))
		{
			continue;
		}
		if (hook_table[i].enabled) {
			return true;
		}
	}

	for (i = 0; i < TOTAL_IA32_HOOKS_COUNT; i++) {
		if (syscall_replaced_by_lsm(ia32_hook_table[i].syscall_nr, true /*ia32*/))
		{
			continue;
		}
		if (ia32_hook_table[i].enabled) {
			return true;
		}
	}

	return false;
}

static int attach_hooks(bool safe_mode)
{
	unsigned long flags;
	cr0_write_protect_t wp;
	size_t i;
	int err;
	bool need_hooks = need_syscall_hooks();
	if (!need_hooks)
		return 0;

	if (safe_mode) {
		IPRINTF("safe mode is enabled, failing syscall hooks attachment");
		return -EPERM;
	}

	IPRINTF("attaching syscall hooks...");

	err = syscall_hooks_resolve_symbols();
	if (err) {
		return err;
	}

	local_irq_save(flags);
	wp = disable_write_protect();

	for (i = 0; i < TOTAL_HOOKS_COUNT; i++) {
		if (syscall_replaced_by_lsm(hook_table[i].syscall_nr, false /*ia32*/))
		{
			continue;
		}
		if (hook_table[i].enabled) {
			hook_table[i].syscall_orig = (void *)p_sys_call_table[hook_table[i].syscall_nr];
			DPRINTF("'%s_orig()' = %p", hook_table[i].syscall_name, hook_table[i].syscall_orig);

			p_sys_call_table[hook_table[i].syscall_nr] = hook_table[i].syscall_hook;
			IPRINTF("'%s_hook()' = %p was planted!", hook_table[i].syscall_name, hook_table[i].syscall_hook);
		} else {
			DPRINTF("planting '%s_hook()' was skipped because it isn't enabled on current distro", hook_table[i].syscall_name);
		}
	}

	for (i = 0; i < TOTAL_IA32_HOOKS_COUNT; i++) {
		if (syscall_replaced_by_lsm(ia32_hook_table[i].syscall_nr, true /*ia32*/))
		{
			continue;
		}
		if (ia32_hook_table[i].enabled) {
			ia32_hook_table[i].syscall_orig = (void *)p_ia32_sys_call_table[ia32_hook_table[i].syscall_nr];
			DPRINTF("'%s_orig()' = %p", ia32_hook_table[i].syscall_name, ia32_hook_table[i].syscall_orig);

			p_ia32_sys_call_table[ia32_hook_table[i].syscall_nr] = ia32_hook_table[i].syscall_hook;
			IPRINTF("'%s_hook()' = %p was planted!", ia32_hook_table[i].syscall_name, ia32_hook_table[i].syscall_hook);
		} else {
			DPRINTF("planting '%s_hook()' was skipped because it isn't enabled on current distro", ia32_hook_table[i].syscall_name);
		}
	}

	restore_write_protect(wp);
	hooks_attached = true;
	local_irq_restore(flags);

	IPRINTF("syscall hooks attached");
	return 0;
}

// FIXME: 'detach' may fail if pointers in syscall tables pointing to our
// hooks were overwritten by someone else
static int detach_hooks(void)
{
	unsigned long flags;
	cr0_write_protect_t wp;
	size_t i;
	IPRINTF("detaching syscall hooks...");

	local_irq_save(flags);
	wp = disable_write_protect();

	for (i = 0; i < TOTAL_HOOKS_COUNT; i++) {
		if (syscall_replaced_by_lsm(hook_table[i].syscall_nr, false /*ia32*/))
		{
			continue;
		}
		if (hook_table[i].enabled) {
			if (likely(hook_table[i].syscall_hook == (void *)p_sys_call_table[hook_table[i].syscall_nr])) {
				p_sys_call_table[hook_table[i].syscall_nr] = hook_table[i].syscall_orig;
				IPRINTF("%s_orig = %p was removed!", hook_table[i].syscall_name, hook_table[i].syscall_orig);
			} else {
				EPRINTF("sys_call_table pointer to '%s()' differs from expected, "
					"not replacing it with the original one", hook_table[i].syscall_name);
			}
		} else {
			DPRINTF("planting '%s_orig()' was skipped because corresponding hook isn't enabled on current distro", hook_table[i].syscall_name);
		}
	}

	for (i = 0; i < TOTAL_IA32_HOOKS_COUNT; i++) {
		if (syscall_replaced_by_lsm(ia32_hook_table[i].syscall_nr, true /*ia32*/))
		{
			continue;
		}
		if (ia32_hook_table[i].enabled) {
			if (likely(ia32_hook_table[i].syscall_hook == (void *)p_ia32_sys_call_table[ia32_hook_table[i].syscall_nr])) {
				p_ia32_sys_call_table[ia32_hook_table[i].syscall_nr] = ia32_hook_table[i].syscall_orig;
				IPRINTF("%s_orig = %p was removed!", ia32_hook_table[i].syscall_name, ia32_hook_table[i].syscall_orig);
			} else {
				EPRINTF("ia32_sys_call_table pointer to '%s()' differs from expected, "
					"not replacing it with the original one", ia32_hook_table[i].syscall_name);
			}
		} else {
			DPRINTF("planting '%s_orig()' was skipped because corresponding hook isn't enabled on current distro", ia32_hook_table[i].syscall_name);
		}
	}

	restore_write_protect(wp);
	hooks_attached = false;
	local_irq_restore(flags);

	IPRINTF("syscall hooks detached");
	return 0;
}

int syscall_hooks_attach(bool safe_mode)
{
	if (hooks_attached) {
		EPRINTF("syscall hooks are already attached");
		return -EINVAL;
	}
	IPRINTF("module_refcount()=%lu", module_refcount_compat(THIS_MODULE));
	return attach_hooks(safe_mode);
}

int syscall_hooks_detach(void)
{
	int r = detach_hooks();
	IPRINTF("module_refcount()=%lu", module_refcount_compat(THIS_MODULE));
	return r;
}

/*
    In the easyest case 'syscall table' is 'exported' and is directly
    accessible by its name.

    Debian (since at least Debian 8, kernel v3.16) does not export
    'sys_call_table' nor 'ia32_sys_call_table'.

    Probably 'sys_call_table' symbol was unexported in 2.5.41

    In intermediate case 'syscall table' is 'not exported' but can be
    found by 'kallsyms_lookup_name()' or via 'kallsyms_on_each_symbol()'
    if 'kallsyms_lookup_name()' itself is not exported (for example on
    CentOS 6.x).

    'kallsyms_lookup_name()' was exported since 'stable/v2.6.33' but
    unexported since 'stable/v5.7'.

    In worst case 'syscall table' can be found by some 'heuristic'.

    Heuristic must check if (while searching for 'sys_call_table') we
    just found our variable 'p_sys_call_table' instead of real
    'sys_call_table' (it was actually the case, when our variable was
    named 'sys_call_table').

    Some collected facts:

    CentOS 6.0  2.6.32-71.el6.x86_64    LINUX_VERSION_CODE=2.6.32   RHEL_RELEASE_CODE=6.0
    CentOS 6.5  2.6.32-431.el6.x86_64   LINUX_VERSION_CODE=2.6.32   RHEL_RELEASE_CODE=6.5
    CentOS 6.10 2.6.32-754.el6.x86_64   LINUX_VERSION_CODE=2.6.32   RHEL_RELEASE_CODE=6.9

        'kallsyms_lookup_name' is not exported
        'kallsyms_on_each_symbol' is exported

    CentOS  7.0  3.10.0-123.el7.x86_64  LINUX_VERSION_CODE=3.10.0   RHEL_RELEASE_CODE=7.0
    CentOS  7.9  3.10.0-1160.el7.x86_64 LINUX_VERSION_CODE=3.10.0   RHEL_RELEASE_CODE=7.9
    Debian  9.5  4.9.0-7-amd64          LINUX_VERSION_CODE=4.9.110
    Debian 10.4  4.19.0-9-amd64         LINUX_VERSION_CODE=4.19.118
    Ubuntu 16.04 4.4.0-31-generic       LINUX_VERSION_CODE=4.4.13

        Both 'kallsyms_lookup_name' and 'compat_kallsyms_lookup_name'
        return address of 'sys_call_table' and 'ia32_sys_call_table'

    Debian 8.0   3.16.0-4-amd64         LINUX_VERSION_CODE=3.16.7

        Both 'kallsyms_lookup_name' and 'kallsyms_on_each_symbol' do not
        return address of 'sys_call_table' nor 'ia32_sys_call_table'.

        It is the case when syscall table search heuristic is necessary.
*/
#if defined USE_FIND_SYSCALL_TABLE_HEURISTIC

#define LOOKUP(nr, name) \
	name = (unsigned long) compat_kallsyms_lookup_name(#name); \
	if (!name) { \
		EPRINTF("%s(%s) failure", "kallsyms_lookup_name", #name); \
		return 0; \
	} else { \
		DPRINTF("%s=0x%X %s=0x%p", #nr, nr, #name, (void *)name); \
	}

static unsigned long * __init heuristic_find_sys_call_table(void)
{
	unsigned long *p;
	unsigned long sys_write;
	unsigned long sys_writev;

	LOOKUP(__NR_write , sys_write)
	LOOKUP(__NR_writev, sys_writev)

	for (p = (unsigned long *) sys_close;
	     p < (unsigned long *) &loops_per_jiffy;
	   ++p)
	{
		if ((unsigned long) sys_close == p[__NR_close]
		&& sys_write  == p[__NR_write]
		&& sys_writev == p[__NR_writev])
		{
			if ((unsigned long *)&p_sys_call_table == p) {
				WPRINTF("p==&%s", "p_sys_call_table");
			} else {
				IPRINTF("heuristic: %s=0x%p", "sys_call_table", p);
				return p;
			}
		}
	}

	EPRINTF("%s not found", "sys_call_table");
	return 0;
}

static unsigned long * __init heuristic_find_ia32_sys_call_table(void)
{
	unsigned long *p;
	unsigned long compat_sys_readv;
	unsigned long compat_sys_pwritev;
	unsigned long compat_sys_writev;

	LOOKUP(__NR_ia32_readv  , compat_sys_readv)
	LOOKUP(__NR_ia32_pwritev, compat_sys_pwritev)
	LOOKUP(__NR_ia32_writev , compat_sys_writev)

	for (p = (unsigned long *) sys_close;
	     p < (unsigned long *) &loops_per_jiffy;
	   ++p)
	{
		if (compat_sys_readv   == p[__NR_ia32_readv]
		&&  compat_sys_pwritev == p[__NR_ia32_pwritev]
		&&  compat_sys_writev  == p[__NR_ia32_writev])
		{
			if ((unsigned long *)&p_ia32_sys_call_table == p) {
				WPRINTF("p==&%s", "p_ia32_sys_call_table");
			} else {
				IPRINTF("heuristic: %s=0x%p", "ia32_sys_call_table", p);
				return p;
			}
		}
	}

	EPRINTF("%s not found", "ia32_sys_call_table");
	return 0;
}

#undef LOOKUP
#endif

static unsigned long * __init find_sys_call_table(void)
{
	unsigned long *p = (unsigned long *) compat_kallsyms_lookup_name("sys_call_table");
#if defined USE_FIND_SYSCALL_TABLE_HEURISTIC
	if (!p) {
		p = heuristic_find_sys_call_table();
	}
#endif
	return p;
}

static unsigned long * __init find_ia32_sys_call_table(void)
{
	unsigned long *p = (unsigned long *) compat_kallsyms_lookup_name("ia32_sys_call_table");
#if defined USE_FIND_SYSCALL_TABLE_HEURISTIC
	if (!p) {
		p = heuristic_find_ia32_sys_call_table();
	}
#endif
	return p;
}

static int syscall_hooks_resolve_symbols(void)
{
	// !!! This will fail on modern kernels !!!
	// https://stackoverflow.com/questions/78599971/hooking-syscall-by-modifying-sys-call-table-does-not-work
	BUILD_BUG_ON(TOTAL_HOOKS_COUNT != ARRAY_SIZE(hook_table));
	BUILD_BUG_ON(TOTAL_IA32_HOOKS_COUNT != ARRAY_SIZE(ia32_hook_table));

	if (p_sys_call_table && p_ia32_sys_call_table) {
		return 0;
	}

	p_sys_call_table = (void **)find_sys_call_table();
	if (!p_sys_call_table) {
		EPRINTF("'%s' syscall table is unavailable", "x86_64");
		return -ENOSYS;
	}
	IPRINTF("'%s' syscall table is at %p", "x86_64", p_sys_call_table);

	p_ia32_sys_call_table = (void **)find_ia32_sys_call_table();
	if (!p_ia32_sys_call_table) {
		EPRINTF("'%s' syscall table is unavailable", "ia32");
		return -ENOSYS;
	}
	IPRINTF("'%s' syscall table is at %p", "ia32", p_ia32_sys_call_table);

	return 0;
}

#else
int syscall_hooks_attach(bool safe_mode)
{
	return 0;
}

int syscall_hooks_detach(void)
{
	return 0;
}
#endif

© 2025 Cubjrnet7