bpftrace系统分析脚本实战

观书散遗帙,探古穷至妙。这篇文章主要讲述bpftrace系统分析脚本实战相关的知识,希望能为你提供帮助。
最近ebpf比较火,作为运维,大家应该比较关注ebpf对系统问题分析定位的能力。下面分享一些我实际在用的bpftrace脚本案例。


案例一,重要文件被删除【bpftrace系统分析脚本实战】 有时候线上某个重要文件不翼而飞,我们需要知道是谁删的,可以利用相关跟踪点找到罪魁祸首。。。

  • 通过静态跟踪点跟踪unlink相关的跟踪点(tracepoint)
#!/usr/local/bin/bpftrace

/*
tracepoint:syscalls:sys_enter_unlinkat
int __syscall_nr
int dfd
const char ?const char * pathname
int flag
*/

BEGIN

printf("%-20s %-10s %-10s %-10s %-20s %s\\n","TIME", "USERNAME", "UID","PID","COMM", "FNAME");

tracepoint:syscalls:sys_enter_unlinkat

printf("%-20s %-10s %-10u %-10u %-20s %s\\n", strftime("%F-%T", nsecs), username, uid, pid, comm, str(args-> pathname));


# 执行结果
$ sudo bpftrace rm.bt
Attaching 2 probes...
TIMEUSERNAMEUIDPIDCOMMFNAME
2021-02-26-18:35:02obbobbxy1008130183rmabc
2021-02-26-18:35:02obbobbxy1008130183rm123
2021-02-26-18:35:26obbobbxy1008130209rm123
2021-02-26-18:35:34obbobbxy1008130212rm123
2021-02-26-18:35:38root0130508rm/var/rudder/cfengine-community/state/cf_lock.lmdb
2021-02-26-18:35:38root0130509rm/var/rudder/cfengine-community/state/cf_lock.lmdb.lock
2021-02-26-18:35:40obbobbxy1008130704rmabc
2021-02-26-18:35:40obbobbxy1008130704rm123
2021-02-26-18:36:05root0634systemd-logindS.gpg-agent
2021-02-26-18:36:05root0634systemd-logindS.gpg-agent.extra
2021-02-26-18:36:05root0634systemd-logindS.gpg-agent.ssh
2021-02-26-18:36:05root0634systemd-logindS.dirmngr
2021-02-26-18:36:05root0634systemd-logindS.gpg-agent.browser
2021-02-26-18:36:05root0634systemd-logindgnupg
2021-02-26-18:36:05root0634systemd-logindprivate
2021-02-26-18:36:05root0634systemd-logindnotify
2021-02-26-18:36:05root0634systemd-logindtransient
2021-02-26-18:36:05root0634systemd-logindsystemd

  • 通过动态跟踪点跟踪unlink相关的跟踪点(kprobe)
#!/usr/local/bin/bpftrace

// header file path: /usr/src/linux-headers-$(uname -r | sed s/-amd64//)-common/include
#include < linux/dcache.h>

/*
https://www.kernel.org/doc/htmldocs/filesystems/API?https://www.kernel.org/doc/htmldocs/filesystems/API-vfs-unlink.html :int vfs_unlink (struct inode * dir, struct dentry * dentry, struct inode ** delegated_inode);
https://docs.huihoo.com/doxygen/linux/kernel/3.7/structdentry.html :struct dentry -> struct qstr d_name
https://docs.huihoo.com/doxygen/linux/kernel/3.7/structqstr.html:struct qstr d_name ? https://docs.huihoo.com/doxygen/linux/kernel/3.7/structqstr.html:struct qstr d_name -> const unsigned char *name
filename: ((struct dentry *)arg1)-> d_name.name
*/

BEGIN

@t = 0;
@c = 0;
@duration = $1;
@maxcount = $2;
if (@duration == 0)
@duration = 10;

printf("%-20s %-5s %-10s %-10s %-10s %-20s %s\\n","TIME", "TYPE", "USERNAME", "UID","PID","COMM", "FNAME");

kprobe:vfs_unlink

printf("%-20s %-5s %-10s %-10u %-10u %-20s %s\\n", strftime("%F-%T", nsecs), "file", username, uid, pid, comm, str(((struct dentry *)arg1)-> d_name.name));
@c++;

kprobe:vfs_rmdir

printf("%-20s %-5s %-10s %-10u %-10u %-20s %s\\n", strftime("%F-%T", nsecs), "dir", username, uid, pid, comm, str(((struct dentry *)arg1)-> d_name.name));
@c++;

interval:s:1

@t++;
if ( @t > = @duration )
clear(@t);
clear(@c);
clear(@maxcount);
exit();

if ( @maxcount != 0 & & @c > @maxcount )
clear(@t);
clear(@c);
clear(@duration);
exit();


# 脚本默认运行10s,可以通过第一个位置参数传入sudo ./watch_rm.bt 30,结束后会打印运行时间内所有删除文件/目录的进程,第二个参数可以指定最多获取记录数
# sudo ./watch_rm.bt 5 30, 表示脚本最多运行5s,最多获取30条记录,如果不指定第二个参数,默认就按超时退出;
$ sudo ./watch_rm.bt
Attaching 4 probes...
TIMETYPEUSERNAMEUIDPIDCOMMFNAME
2021-02-23-22:43:59dirobbobbxy1008187638rmdir1
2021-02-23-22:43:59dirobbobbxy1008187639rmdir2
2021-02-23-22:43:59dirobbobbxy1008187640rmdir3
2021-02-23-22:43:59fileobbobbxy1008187641rmfile1
2021-02-23-22:43:59fileobbobbxy1008187642rmfile2
2021-02-23-22:43:59fileobbobbxy1008187643rmfile3
2021-02-23-22:43:59fileobbobbxy1008187644rmfile4
2021-02-23-22:43:59fileobbobbxy1008187645rmfile5
2021-02-23-22:43:59fileobbobbxy1008187646rmfile6
2021-02-23-22:43:59fileobbobbxy1008187647rmfile7
2021-02-23-22:43:59fileobbobbxy1008187648rmfile8
2021-02-23-22:43:59fileobbobbxy1008187649rmfile9

案例二,进程top流量
#!/usr/local/bin/bpftrace

#include < linux/fs.h>
#include < net/sock.h>

kprobe:sock_recvmsg,
kprobe:sock_sendmsg

@socket[tid] = arg0;


kretprobe:sock_recvmsg

if (retval < 0x7fffffff)
@read_bytes[comm] = hist(retval);

delete(@socket[tid]);


kretprobe:sock_sendmsg

if (retval < 0x7fffffff)
@write_bytes[comm] = hist(retval);

delete(@socket[tid]);


END

clear(@socket);


# 以直方图形式输出进程socket读写字节数,如果需要,则可以加上pid/sport/dport
$ sudo bpftrace socksize.bt
Attaching 5 probes...
^C

@read_bytes[ospfd]:
[64, 128)1 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|

@read_bytes[sshd]:
[32, 64)1 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|

@read_bytes[java]:
[8, 16)2 |@@@@@@@@@@@|
[16, 32)9 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[32, 64)5 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[64, 128)2 |@@@@@@@@@@@|

@read_bytes[nginx]:
[0]107 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[1]1 ||
[2, 4)0 ||
[4, 8)0 ||
[8, 16)0 ||
[16, 32)0 ||
[32, 64)0 ||
[64, 128)20 |@@@@@@@@@|
[128, 256)113 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[256, 512)103 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[512, 1K)35 |@@@@@@@@@@@@@@@@|
[1K, 2K)5 |@@|


# 使用bcc的tcptop工具也可以做到
$ sudo /usr/share/bcc/tools/tcptop 1 1
Tracing... Output every 1 secs. Hit Ctrl-C to end
23:37:39 loadavg: 0.49 0.48 0.35 2/831 85332

PIDCOMMLADDRRADDRRX_KBTX_KB
82242nginx192.168.3.185:52456192.168.9.57:713024
82242nginx192.168.3.185:47782192.168.7.66:713024
82242nginx192.168.3.185:43064192.168.1.51:713024



案例三,redis 热key发现
# 查看redis源码,可以跟踪lookupkey相关的函数
#!/usr/local/bin/bpftrace

BEGIN

@t=0;
@duration=10;
if ($1 > 0)
@duration=$1;

printf("Tracing /usr/bin/redis-server %d seconds: \\n\\n", @duration);


/*
https://github.com/redis/redis/blob/8f9958dc24fa5992d3d10f6b9caf999e1beee4e5/src/server.h#L666:
typedef struct redisObject
unsigned type:4;
unsigned encoding:4;
unsigned lru:LRU_BITS;
int refcount;
void *ptr;
robj;

https://github.com/redis/redis/blob/8f9958dc24fa5992d3d10f6b9caf999e1beee4e5/src/db.c#L168:
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply)
*/

// 注意lookupKey在get/set时候都会调用,lookupKeyReadOrReply只在get时调用
//u:/usr/bin/redis-server:lookupKeyReadOrReply,
u:/usr/bin/redis-server:lookupKey

// arg1: robj *key
// $key: key
?// $key: key + (4*4) offset # unsigned 4 bytes, int 4 bytes
$key = str(arg1+16);
@keys[$key] = count();


END

print(@keys, 10);
clear(@keys);


interval:s:1

@t++;
if (@t > @duration)
print(@keys);
clear(@keys);
clear(@t);
clear(@duration);
exit();



$ sudo ./redis-uprobe.bt 99999# 默认10自动退出,可以通过第一个位置变量传入跟踪时长,最多输出top10的key;同时另外一个终端执行/usr/bin/redis-benchmark -q模拟redis请求
Attaching 4 probes...
Tracing /usr/bin/redis-server 99999 seconds:

keys[counter:__rand_int__]: 100000
@keys[myset]: 200000
@keys[mylist]: 900000
@keys[key:__rand_int__]: 1200000

@duration: 99999
@t: 91



案例四,审计系统shell命令执行记录 通常公司因为某些资质申请或者证书审核的原因,会需要做一些系统安全或者日志审计,需要有服务器命令行执行记录。我们通常使用auditd工具实现,同样的,我们用ebpf的uprobe能力也能简单实现相关的功能。
#!/usr/local/bin/bpftrace

BEGIN

printf("%-20s %-6s %-10s %s\\n", "TIME", "PID", "USERNAME", "COMMAND");


/*
int shell_execve PARAMS((char *, char , char ));
shell_execve (command, args, env);
*/
u:/bin/bash:shell_execve

time("%F-%T");
printf("%-6d %-10s ", pid, username);
join(arg1); // argv


$ sudo ./bash-readline.bt
Attaching 2 probes...
TIMEPIDUSERNAMECOMMAND
2021-02-28-19:16:3165488obbobbxybash a.sh
2021-02-28-19:16:3165489obbobbxy/bin/echo 123 300c
2021-02-28-19:16:3165490obbobbxy/bin/echo abc asdfadf
2021-02-28-19:16:3165491obbobbxywhoami
2021-02-28-19:16:3165492obbobbxybpftrace --version
2021-02-28-19:16:3465495obbobbxycat a.sh
2021-02-28-19:17:1565523rootps aux
2021-02-28-19:17:1565524rootgrep /usr/bin/osquery
2021-02-28-19:17:1565525rootgrep -v grep
2021-02-28-19:17:1565526rootwc -l



小结目前debian9以上系统都是4.x内核,基本利用bpftrace或者bcc-tools能实现许多跟踪点的信息跟踪,辅助我们日常工作,提升问题定位能力。
debian11以后系统默认都启用了BTF,CO-RE不远了。

    推荐阅读