前言
为什么要将系统调用定义成宏SYSCALL_DEFINEx?bug CVE-2009-0029,CVE-2010-3301的存在: Linux 2.6.28及以前版本的内核中,将系统调用中32位参数传入64位的寄存器时无法作符号扩展,可能导致系统崩溃或提权漏洞。
内核开发者通过将系统调用的所有输入参数都先转化成long类型(64位),再强制转化到相应的类型来规避这个漏洞。
SYSCALL_DEFINEx的宏定义
// 定义位置:\include\linux\syscalls.h
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long sys_##sname(void)
#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
#define SYSCALL_DEFINEx(x, sname, ...) \
SYSCALL_METADATA(sname, x, __VA_ARGS__) \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
#define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
#define __SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(SyS##name)))); \
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
“##”是连接符,__VA_ARGS__代表前面“…”里面的可变参数
SYSCALL_DEFINEx里面的x代表的是
系统调用
的参数个数
sys_scoket系统调用的宏扩展
scoket是我们常用的函数,其对应的
系统调用
在\net\Scoket.c定义,函数部分如下:
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
int retval;
struct socket *sock;
int flags;
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
...
}
- 1 SYSCALL_DEFINE3预处理,宏首先扩展为如下:
__SYSCALL_DEFINEx(3, _socket, int, family, int, type, int, protocol) \
由于 SYSCALL_DEFINEx 调用了 SYSCALL_METADATA 和 __SYSCALL_DEFINEx
由于 SYSCALL_METADATA 是跟踪系统调用的, 因此只关注 __SYSCALL_DEFINEx
- 2 __SYSCALL_DEFINEx(3, …)的宏再次扩展如下:
asmlinkage long sys_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol)) \
__attribute__((alias(__stringify(SyS_socket)))); \
static inline long SYSC_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol)); \
asmlinkage long SyS_socket(__MAP(x,__SC_LONG,int, family, int, type, int, protocol)); \
asmlinkage long SyS_socket(__MAP(x,__SC_LONG,int, family, int, type, int, protocol)) \
{ \
long ret = SYSC_socket(__MAP(x,__SC_CAST,int, family, int, type, int, protocol)); \
__MAP(x,__SC_TEST,int, family, int, type, int, protocol); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,int, family, int, type, int, protocol)); \
return ret; \
} \
static inline long SYSC_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol))
第一行的sys_scoket只是函数声明,最后一行SYSC_socket才是函数定义,因为其没有分号,再加上SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)之后函数体,明确无误是真正的socket系统调用函数定义
SyS_socket里面又调用到了SYSC_socket了,为何要使用宏定义这样绕来绕去的?
关键在于__MAP、__SC_DECL、__SC_LONG、__SC_CAST、__SC_TEST、__SC_ARGS这几个宏。
一些奇怪的宏的扩展
// 定义位置:\include\linux\syscalls.h
#define __MAP0(m,...)
#define __MAP1(m,t,a) m(t,a)
#define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__)
#define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__)
#define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__)
#define __MAP5(m,t,a,...) m(t,a), __MAP4(m,__VA_ARGS__)
#define __MAP6(m,t,a,...) m(t,a), __MAP5(m,__VA_ARGS__)
#define __MAP(n,...) __MAP##n(__VA_ARGS__)
#define __SC_DECL(t, a) t a
#define __TYPE_IS_L(t) (__same_type((t)0, 0L))
#define __TYPE_IS_UL(t) (__same_type((t)0, 0UL))
// t为long long或unsigned long long 返回真,t为int或long则返回假
#define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL))
#define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a
#define __SC_CAST(t, a) (t) a
#define __SC_ARGS(t, a) a
#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long))
// 定义位置:\include\linux/bug.h
/* 其中当e为非0的时候,经过两次非操作得到的结果是1,加上符号就是-1,struct的位字段不允许位负,这样在编译的时候就会报错;当e=0的时候,经过两次非操作仍然是0,-0的结果还是0.这样可以在编译的时候发现错误*/
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
__MAP(x,__SC_DECL,int, family, int, type, int, protocol)宏展开后: int family, int type,int protocol
__MAP3(__SC_DECL,int, family, int, type, int, protocol) ==> __SC_DECL(int, family),__MAP2(__SC_DECL,int, type, int, protocol) ==>__SC_DECL(int, family),__SC_DECL(int, type),__SC_DECL(int, protocol) ==> int family,int type,int protocol
__MAP(x,__SC_LONG,int, family, int, type, int, protocol)宏展开后: long family, long type,long protocol
__MAP3(__SC_LONG,int, family, int, type, int, protocol) ==> __SC_LONG(int, family),__SC_LONG(int, type),__SC_LONG(int, protocol) ==> long family, long type,long protocol
其中__typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L))意思是如果t是long long类型就返回long long,否者返回long数据类型(64bit)
__MAP(x,__SC_CAST,int, family, int, type, int, protocol)宏展开后: (int) family, (int) type,(int) protocol
__MAP(x,__SC_CAST,int, family, int, type, int, protocol) ==> __SC_CAST(int, family),__SC_CAST(int, type),__SC_CAST(int, protocol) ==> (int) family, (int) type,(int) protocol
__MAP(x,__SC_TEST,int, family, int, type, int, protocol)宏展开后: (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0)
__MAP(x,__SC_TEST,int, family, int, type, int, protocol) ==> __SC_TEST(int, family),__SC_TEST(int, type),__SC_TEST(int, protocol) ==> (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0)
__MAP(x,__SC_ARGS,int, family, int, type, int, protocol)宏展开后: family, type, protoco
==> __SC_ARGS(int, family),__SC_ARGS(int, type),__SC_ARGS(int, protocol) ==> family, type, protoco
__PROTECT(x, ret,__MAP(x,__SC_ARGS,int, family, int, type, int, protocol))宏展开后: asmlinkage_protect(x, ret, family, type, protoco)
asmlinkage_protect(x, ret, family, type, protoco) ==> __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), "m" (family), "m" (type), "m" (protoco));
汇编语句
的主要作用是避免系统调用的返回值(eax)被编译器优化掉,详情如下:
- “”:Assembler Template(汇编模板)为空,即什么都不做,当然也就不会有任何优化操作了
- “=r” (ret): OutputOperands(输出操作),从寄存器中读取系统调用返回值,并赋给ret变量
-
“0” (ret), “m” (family), “m” (type), “m” (protoco):InputOperands(输入操作)
“0” (ret):从C变量ret中读取值,并存储到和输出操作同位置的寄存器中
“m” (family), “m” (type), “m” (protoco):从内存中读取各变量值
扩展最初的宏
asmlinkage long sys_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol)) \
__attribute__((alias(__stringify(SyS_socket)))); \
扩展为:
asmlinkage long sys_socket(int family,int type,int protocol) _attribute__((alias("SyS_socket"))); // 将sys_socket设置为SyS_socket函数的别名,调用sys_socket就是调用SyS_socket
一些说明:
// 定义位置:\include\linux
#define __stringify_1(x...) #x // #的作用是将宏参数字符串化,如:#do = > "do"
#define __stringify(x...) __stringify_1(x)
因此:__stringify(SyS_socket) ==> “SyS_socket”
最后,完整的扩展为:
#define __SYSCALL_DEFINEx(3, _socket, int, family, int, type, int, protocol) \
asmlinkage long sys_socket(int family,int type,int protocol) _attribute__((alias("SyS_socket")));\
static inline long SYSC_socket(int family,int type,int protocol);
asmlinkage long SyS_socket(long family, long type,long protocol); \
asmlinkage long SyS_socket(long family, long type,long protocol) \
{ \
long ret = SYSC_socket((int) family, (int) type,(int) protocol); \
(void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0); \
__asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), "m" (family), "m" (type), "m" (protoco)); \
return ret; \
} \
static inline long SYSC_socket(int family,int type,int protocol)
{
int retval;
struct socket *sock;
int flags;
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
...
}
系统调用sys_socket函数时,其实是调用SyS_socket函数,SyS_socket函数内部又调用SYSC_socket函数(真正执行socket系统调用代码),最后将没有优化的返回值ret返回
参考