Posts malloc源码学习(glibc-2.23)
Post
Cancel

malloc源码学习(glibc-2.23)

学习glibc-2.23源码中malloc相关知识,文章顺序和malloc流程相同。本文仅为自己缕清思路用,因此很多细节和基础没有涉及,可能会比较乱。若你看到了这篇文章,推荐看下面的博客,介绍得可能会更加细致。

参考:

重要结构体


malloc_state(arena)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
struct malloc_state
{
  /* Serialize access.  */
  mutex_t mutex;

  /* Flags (formerly in max_fast).  */
  int flags;

  /* Fastbins */
  mfastbinptr fastbinsY[NFASTBINS];

  /* Base of the topmost chunk -- not otherwise kept in a bin */
  mchunkptr top;

  /* The remainder from the most recent split of a small request */
  mchunkptr last_remainder;

  /* Normal bins packed as described above */
  mchunkptr bins[NBINS * 2 - 2];

  /* Bitmap of bins */
  unsigned int binmap[BINMAPSIZE];

  /* Linked list */
  struct malloc_state *next;

  /* Linked list for free arenas.  Access to this field is serialized
     by free_list_lock in arena.c.  */
  struct malloc_state *next_free;

  /* Number of threads attached to this arena.  0 if the arena is on
     the free list.  Access to this field is serialized by
     free_list_lock in arena.c.  */
  INTERNAL_SIZE_T attached_threads;

  /* Memory allocated from the system in this arena.  */
  INTERNAL_SIZE_T system_mem;
  INTERNAL_SIZE_T max_system_mem;
};

malloc_chunk

1
2
3
4
5
6
7
8
9
10
11
12
struct malloc_chunk {

  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */

  struct malloc_chunk* fd;         /* double links -- used only if free. */
  struct malloc_chunk* bk;

  /* Only used for large blocks: pointer to next larger size.  */
  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
  struct malloc_chunk* bk_nextsize;
};

__libc_malloc函数


即malloc函数,负责调用_int_malloc函数进行处理。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void *
__libc_malloc (size_t bytes)
{
  mstate ar_ptr;
  void *victim;

  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));       //程序第一次调用malloc的时候执行,malloc_hook_ini,初始化。之后再重新调用malloc

  arena_get (ar_ptr, bytes);

  victim = _int_malloc (ar_ptr, bytes);
  /* Retry with another arena only if we were able to find a usable arena
     before.  */
  if (!victim && ar_ptr != NULL)
    {
      LIBC_PROBE (memory_malloc_retry, 1, bytes);
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }

  if (ar_ptr != NULL)
    (void) mutex_unlock (&ar_ptr->mutex);

  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
          ar_ptr == arena_for_chunk (mem2chunk (victim)));
  return victim;
}

_int_malloc()函数


malloc处理逻辑全在这个函数中,按流程拆分进行分析。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
static void * _int_malloc (mstate av, size_t bytes)
{
  .............

  //将请求的内存大小转为chunk大小(即加上chunk头等必要信息的大小),nb为转换后的大小
  checked_request2size (bytes, nb);

  //如果没有可分配的arenas,则调用sysmalloc利用mmap进行内存分配
  if (__glibc_unlikely (av == NULL))
    {
      void *p = sysmalloc (nb, av);        
      if (p != NULL)
	alloc_perturb (p, bytes);
      return p;
    }

fastbin处理

fastbin保存在mstate中的fastbinY数组。 64位下,fastbin最大值为0x80=128,保存在全局变量global_max_fast中。mstate中的fastbinY数组,一共有10项,对应大小为:0x20,0x30,0x40,0x50,0x60,0x70,0x80,0x90,0xA0,0xB0。不知道为何有0x90,0xA0和0xB0,后续学习时关注一下。fastbin对应项所标识的大小,不是malloc的大小,而是chunk的大小。这里贴个图。

fastbin

fastbin 是单链表结构,bk指针没有用到,采用先进后出(LIFO)的方式,即:free时把chunk放入链表最前面(也就是fastbin数组对应项指针指向的地方),malloc时从最前面取出chunk分配。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
  /***************
     fastbin处理
   **************/

  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
      idx = fastbin_index (nb);
      mfastbinptr *fb = &fastbin (av, idx);
      mchunkptr pp = *fb;
      do
        {
          victim = pp;
          // fastbin中没有找到对应的可分配chunk,跳出循环
          if (victim == NULL)
            break;
        }
      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
             != victim);

catomic_compare_and_exchange_val_acq()函数取出链表中第一项,并将该项的下一个chunk放在链表开头(即出链表):

1
2
3
4
5
6
7
8
9
/* The only basic operation needed is compare and exchange.  */
#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
  ({ __typeof (mem) __gmemp = (mem);				      \
     __typeof (*mem) __gret = *__gmemp;				      \
     __typeof (*mem) __gnewval = (newval);			      \
								      \
     if (__gret == (oldval))					      \
       *__gmemp = __gnewval;					      \
     __gret; })

检查分配出来的fastbin项是否合规,并返回指针:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
      // 如果上面do-while循环中,victim为空,则跳过从fastbin中分配chunk的阶段,进入smallbin阶段
      if (victim != 0)
        {
          // 检查victim的chunk大小是否符合当前index
          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0)) 
            {
              errstr = "malloc(): memory corruption (fast)";
            errout:
              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
              return NULL;
            }
          // 检查
          check_remalloced_chunk (av, victim, nb);
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }
    }

具体执行检查的函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
  static void
do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
  // 去除标志位
  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
  // 如果M标志位不存在,检查当前chunk是否属于av,并检查是否属于main arena
  if (!chunk_is_mmapped (p))
    {
      assert (av == arena_for_chunk (p));
      if (chunk_non_main_arena (p))
        assert (av != &main_arena);
      else
        assert (av == &main_arena);
    }
  // check inuse 标志位
  do_check_inuse_chunk (av, p);

  // 检查size是否对齐,大小是否大于最小size
  assert ((sz & MALLOC_ALIGN_MASK) == 0);
  assert ((unsigned long) (sz) >= MINSIZE);
  /* ... and alignment */
  assert (aligned_OK (chunk2mem (p)));
  /* chunk is less than MINSIZE more than request */
  assert ((long) (sz) - (long) (s) >= 0);
  assert ((long) (sz) - (long) (s + MINSIZE) < 0);
}

注意:鉴于设计fast bin的初衷就是进行快速的小内存分配和释放,因此系统将属于fast bin的chunk的PREV_INUSE位总是设置为1,这样即使当fast bin中有某个chunk同一个free chunk相邻的时候,系统也不会进行自动合并操作,而是保留两者。虽然这样做可能会造成额外的碎片化问题,但瑕不掩瑜。

bins处理

bins包括unsorted bin,small bins,large bins,保存在mstate中的bins数组,共129项。bins[0]无效,bins[1]为unsorted bin。各bin中size步长:

1
2
3
4
5
6
7
    64 bins of size       8
    32 bins of size      64
    16 bins of size     512
     8 bins of size    4096
     4 bins of size   32768
     2 bins of size  262144
     1 bin  of size what's left

其中小于512B的bins在前64项,属于smallbins,其余属于largebins。smallbins中每个bin链表的chunk大小相同,可直接通过size索引至对应的index;largebins中每个bin链表的各chunk大小可以不同,需要通过计算获得index(详见largebins)。

另外有一个需要注意的问题,bins数组一共有NBINS*2-2项,乘以2的原因是:bins数组中,对于每个bin,存储的是一个fd和一个bk(双向链表)

smallbins

如果在fastbin中没有找到合适的chunk,则从small bins中查找。64位下,small bin中chunk的最大大小为1024字节:

1
2
3
4
5
6
7
8
9
10
#define NBINS             128
#define NSMALLBINS         64
#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT  //2*SIZE_SZ
#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
// SMALLBIN_CORRECTION = 0, SMALLBIN_WIDTH = MALLOC_ALIGNMENT = 2*SIZE_SZ
// MIN_LARGE_SIZE = (64 - 0) * 2*8 = 1024
#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
// 是否属于smallbin大小范围内
#define in_smallbin_range(sz)  \
  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)

malloc对small bins的处理:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  /*
     If a small request, check regular bin.  Since these "smallbins"
     hold one size each, no searching within bins is necessary.
     (For a large request, we need to wait until unsorted chunks are
     processed to find best fit. But for small ones, fits are exact
     anyway, so we can check now, which is faster.)
   */

  if (in_smallbin_range (nb))
    {
      // 通过大小nb找到index,进而找到对应的bin
      idx = smallbin_index (nb);
      bin = bin_at (av, idx);
      // 取出当前bin中的最后一项(先进先出)
      if ((victim = last (bin)) != bin)
        {
          // 如果bin中没有项(前面fastbin也没有找到),则认为需要初始化mstate
          if (victim == 0) /* initialization check */
            malloc_consolidate (av);
          else
            {
              // 有符合条件的chunk。取出前一个chunk(victim->bk)
              bck = victim->bk;
              // 判断下一个chunk的fd是否指向自己
	if (__glibc_unlikely (bck->fd != victim))
                {
                  errstr = "malloc(): smallbin double linked list corrupted";
                  goto errout;
                }
              // 设置inuse标志位。fastbin不用设置,因为fastbin中的chunk不会抹去inuse bit
              set_inuse_bit_at_offset (victim, nb);
              // 取出当前chunk(victim)后,修改前一个chunk为链表中最后一个chunk
              bin->bk = bck;
              bck->fd = bin;

              // 和fastbin一样,做一些标志位的检查和设置
              if (av != &main_arena)
                victim->size |= NON_MAIN_ARENA;
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }
    }

largebins 介绍

大于等于1024字节的chunk放在large bins中,称为large chunk。large bins共63个,分为6组。large chunk使用fd_nextsize和bk_nextsize连接起来。特性:

  • 同一个large bin中的每个chunk大小可以不一样,但是有范围限制,详情见下方表格
  • large chunk可以任意从链表中添加/删除,无需先进先出/先进后出
  • 63个largebins中,共分为6组,每组中各项步长相同,后一组步长为上一组步长的8倍。详情如下表所示
  • 同一个large bin中,每个chunk大小不一定相同,为了加快分配和释放速度,同一个bin链表中各个chunk按照size从大到小的顺序排列;相同大小的chunk按照最近使用顺序排列,并用fd/bk指针链接
  • large bins中的free chunk是用fd_nextsize和bk_nextsize来链起来的,并且不会指向当前large bin的头指针(即数组对应项的地址),最后一个chunk的fd_nextsize指向的是链表中的第一项,而链表的第一项的bk_nextsize指向的是链表中的最后一项。这与smallbins不同,smallbin中的最后一个chunk的fd指向当前smallbin的链表头,而链表中第一个chunk的bk指向的也是链表头。 large bins的结构图如下:

largebins

可以看出,largebin是个多级链表,其中大小相同的chunk各有一个链表,用fd/bk指针链接,并且这些链表的头节点也链接成一个链表,用fd_nextsize/bk_nextsize链接。

large bins的size-index关系:

1
2
3
4
5
6
7
#define largebin_index_64(sz)                                                \
  (((((unsigned long) (sz)) >> 6) <= 48) ?  48 + (((unsigned long) (sz)) >> 6) :\
   ((((unsigned long) (sz)) >> 9) <= 20) ?  91 + (((unsigned long) (sz)) >> 9) :\
   ((((unsigned long) (sz)) >> 12) <= 10) ? 110 + (((unsigned long) (sz)) >> 12) :\
   ((((unsigned long) (sz)) >> 15) <= 4) ? 119 + (((unsigned long) (sz)) >> 15) :\
   ((((unsigned long) (sz)) >> 18) <= 2) ? 124 + (((unsigned long) (sz)) >> 18) :\
   126)

large chunk的具体分配在下面的大循环中介绍。

fastbins consolidate

largebins的处理在后面,此处只是先获取了bin的index。另外要判断一下arena是否存在fastbins,若存在,则对fastbin进行consolidate,合并,目的是为了减少fastbin的碎片。但是为什么要在这里进行整合,不是很清楚。可能是为了整合后增大unsorted bin的命中?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
  /*
     If this is a large request, consolidate fastbins before continuing.
     While it might look excessive to kill all fastbins before
     even seeing if there is space available, this avoids
     fragmentation problems normally associated with fastbins.
     Also, in practice, programs tend to have runs of either small or
     large requests, but less often mixtures, so consolidation is not
     invoked all that often in most programs. And the programs that
     it is called frequently in otherwise tend to fragment.
   */

  else
    {
      idx = largebin_index (nb);
      if (have_fastchunks (av))
        malloc_consolidate (av);
    }

来看看consolidate的过程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
  ------------------------- malloc_consolidate -------------------------

  malloc_consolidate is a specialized version of free() that tears
  down chunks held in fastbins.  Free itself cannot be used for this
  purpose since, among other things, it might place chunks back onto
  fastbins.  So, instead, we need to use a minor variant of the same
  code.

  Also, because this routine needs to be called the first time through
  malloc anyway, it turns out to be the perfect place to trigger
  initialization code.
*/

static void malloc_consolidate(mstate av)
{
  mfastbinptr*    fb;                 /* current fastbin being consolidated */
  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
  mchunkptr       p;                  /* current chunk being consolidated */
  mchunkptr       nextp;              /* next chunk to consolidate */
  mchunkptr       unsorted_bin;       /* bin header */
  mchunkptr       first_unsorted;     /* chunk to link to */

  /* These have same use as in free() */
  mchunkptr       nextchunk;
  INTERNAL_SIZE_T size;
  INTERNAL_SIZE_T nextsize;
  INTERNAL_SIZE_T prevsize;
  int             nextinuse;
  mchunkptr       bck;
  mchunkptr       fwd;

  /*
    If max_fast is 0, we know that av hasn't
    yet been initialized, in which case do so below
  */

  if (get_max_fast () != 0) {
    // 清除arena中fastbins的flag
    clear_fastchunks(av);
    // 获取unsorted bin
    unsorted_bin = unsorted_chunks(av);

    /*
      Remove each chunk from fast bin and consolidate it, placing it
      then in unsorted bin. Among other reasons for doing this,
      placing in unsorted bin avoids needing to calculate actual bins
      until malloc is sure that chunks aren't immediately going to be
      reused anyway.
    */
    // 这段话的意思是,移除fast bin的每个chunk,将其合并后放在unsorted bin中,
    // 而不是放入bins中,这可以避免额外的关于放入具体哪个bins的计算,直到malloc
    // 能够确认这些chunks不会被立即重用。可以看出,unsorted bin的一个用途就是加快
    // 内存的分配和释放,整个操作不用花时间寻找合适的bin。

    maxfb = &fastbin (av, NFASTBINS - 1);
    fb = &fastbin (av, 0);
    // 一个大的do-while循环,遍历fastbins中的每个bin,清除内容
    do {
      // 将fb中的值设置为0,并返回old value
      p = atomic_exchange_acq (fb, 0);
      if (p != 0) {
	do {
	  check_inuse_chunk(av, p);
	  nextp = p->fd;

	  /* Slightly streamlined version of consolidation code in free() */
	  size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
	  nextchunk = chunk_at_offset(p, size);
	  nextsize = chunksize(nextchunk);

    // 如果前一个chunk没有在用,也处于free状态
    // 将前一个chunk unlink,从链表取出
	  if (!prev_inuse(p)) {
	    prevsize = p->prev_size;
      // 扩充size
	    size += prevsize;
	    p = chunk_at_offset(p, -((long) prevsize));
	    unlink(av, p, bck, fwd);
	  }

    // 下一个chunk如果不是top chunk
	  if (nextchunk != av->top) {
	    nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

      // 如果下一个chunk也没有使用,unlink从链表取出
      // 否则,nextchunk的previnuse bit清空
      // 因为之前是在fastbin里,该bit为1
	    if (!nextinuse) {
        //扩充size
	      size += nextsize;
	      unlink(av, nextchunk, bck, fwd);
	    } else
	      clear_inuse_bit_at_offset(nextchunk, 0);

      // 将新chunk p放入unsorted bin中链表首部
	    first_unsorted = unsorted_bin->fd;
	    unsorted_bin->fd = p;
	    first_unsorted->bk = p;

	    if (!in_smallbin_range (size)) {
	      p->fd_nextsize = NULL;
	      p->bk_nextsize = NULL;
	    }

      // 设置新chunk p的标志位以及链表指针
	    set_head(p, size | PREV_INUSE);
	    p->bk = unsorted_bin;
	    p->fd = first_unsorted;
	    set_foot(p, size);
	  }

	  else {
      // 如果nextchunk为top chunk,则将p归入top,并修改top指针
	    size += nextsize;
	    set_head(p, size | PREV_INUSE);
	    av->top = p;
	  }

	} while ( (p = nextp) != 0);

      }
    } while (fb++ != maxfb);
  }
  else {
    // 如果fastbin为空,则说明没有初始化mstate,则初始化
    malloc_init_state(av);
    check_malloc_state(av);
  }
}

到这里,malloc并没有从fastbins和smallbins找到可以用的free chunk,并且使用了malloc_consolidate对fastbins进行了整合。接下来进入一个大循环,主要做以下几件事:

  • 第一步:尝试从unsortedbin中分配用户所需的内存,并进行consolidate(如果成功,可能会产生last remainder)。
  • 第二步:如果第一步没有满足需求。尝试从largebins中分配用户所需的内存。
  • 第三步:如果第二步没有满足需求。尝试寻找更大的chunk来使用(与第一步一样,也会产生last remainder)。
  • 第四步:如果第三步没有满足需求。尝试从top chunk中分配用户所需的内存。

unsortedbin

先来看第一步,整合fastbins到unsorted bin之后,先从unsorted bin找合适的chunk进行分配。若在unsorted bin中找到了合适的chunk,则结束分配,否则将unsorted bin中的chunk放入对应的small bin或者large bin中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
  /*
     Process recently freed or remaindered chunks, taking one only if
     it is exact fit, or, if this a small request, the chunk is remainder from
     the most recent non-exact fit.  Place other traversed chunks in
     bins.  Note that this step is the only place in any routine where
     chunks are placed in bins.

     The outer loop here is needed because we might not realize until
     near the end of malloc that we should have consolidated, so must
     do so and retry. This happens at most once, and only when we would
     otherwise need to expand memory to service a "small" request.
   */

  for (;; )
    {
      int iters = 0;
      // 从unsorted bin的最后一个chunk开始找,先进先出
      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
        {
          bck = victim->bk;
          // 判断当前chunk的大小是否合法
          if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
              || __builtin_expect (victim->size > av->system_mem, 0))
            malloc_printerr (check_action, "malloc(): memory corruption",
                             chunk2mem (victim), av);
          size = chunksize (victim);

          /*
             If a small request, try to use last remainder if it is the
             only chunk in unsorted bin.  This helps promote locality for
             runs of consecutive small requests. This is the only
             exception to best-fit, and applies only when there is
             no exact fit for a small chunk.
           */

          // 如果申请的大小属于small bin范围,并且当前chunk是unsorted bin中唯一chunk,
          // 并且chunk是last remainder:
          if (in_smallbin_range (nb) &&
              bck == unsorted_chunks (av) &&
              victim == av->last_remainder &&
              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
            {
              /* 将last remainder切割,切割出来的分配给用户,剩下的作为新的last remainder */
              remainder_size = size - nb;
              remainder = chunk_at_offset (victim, nb);
              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
              av->last_remainder = remainder;
              remainder->bk = remainder->fd = unsorted_chunks (av);
              if (!in_smallbin_range (remainder_size))
                {
                  remainder->fd_nextsize = NULL;
                  remainder->bk_nextsize = NULL;
                }

              // 对分配出来的chunk进行设置,也设置remainder chunk
              set_head (victim, nb | PREV_INUSE |
                        (av != &main_arena ? NON_MAIN_ARENA : 0));
              set_head (remainder, remainder_size | PREV_INUSE);
              set_foot (remainder, remainder_size);

              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }

          /* 如果有victim chunk且不为last remainder,则从unsorted bin链表中取出 */
          unsorted_chunks (av)->bk = bck;
          bck->fd = unsorted_chunks (av);

          /* 如果victim大小和申请的chunk大小一样,则直接取出来分配 */

          if (size == nb)
            {
              set_inuse_bit_at_offset (victim, size);
              if (av != &main_arena)
                victim->size |= NON_MAIN_ARENA;
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }

          // 如果unsorted bin中的chunk大小没有直接满足的,走下面if-else
          // 该if-else,将unsorted bin中的chunk放入其它对应的bin中 
          // 如果申请的大小为small bins大小,则整合入对应的small bin
          if (in_smallbin_range (size))
            {
              // bck=对应的small bin链表头
              // fwd=对应的small bin的第一个chunk
              victim_index = smallbin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;
            }
          else
            {
              // 如果大小在large bins范围内,则整合入对应的large bin
              victim_index = largebin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;

              /* maintain large bins in sorted order */
              if (fwd != bck)  // large bin非空
                {
                  /* Or with inuse bit to speed comparisons */
                  size |= PREV_INUSE;  // 取出P标志位,加快比较
                  /* if smaller than smallest, bypass loop below */
                  assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
                  // 如果大小小于当前large bin中最小chunk的大小
                  // 则该chunk应该放在链表最后
                  if ((unsigned long) (size) < (unsigned long) (bck->bk->size))
                    {
                      fwd = bck;  // fwd=链表头
                      bck = bck->bk;  // bck=large bin最后一个chunk
                      // 插入victim chunk
                      victim->fd_nextsize = fwd->fd;
                      victim->bk_nextsize = fwd->fd->bk_nextsize;
                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
                    }
                  else
                    {
                      // 否则,从第一个节点开始找,找到第一个大小小于等于size的chunk
                      assert ((fwd->size & NON_MAIN_ARENA) == 0);
                      while ((unsigned long) size < fwd->size)
                        {
                          fwd = fwd->fd_nextsize;
                          assert ((fwd->size & NON_MAIN_ARENA) == 0);
                        }

                      // 如果找到的chunk的大小等于victim的大小,则将victim插入到以该chunk为链表头的链表中,
                      // 插入到链表第二个位置
                      if ((unsigned long) size == (unsigned long) fwd->size)
                        /* Always insert in the second position.  */
                        fwd = fwd->fd;
                      else
                        {
                          // 否则,找到的chunk的大小小于victim的size,
                          // 将victim chunk插入到该chunk之前
                          victim->fd_nextsize = fwd;
                          victim->bk_nextsize = fwd->bk_nextsize;
                          fwd->bk_nextsize = victim;
                          victim->bk_nextsize->fd_nextsize = victim;
                        }
                      bck = fwd->bk;
                    }
                }
              else  // 当前large bin为空,则直接插入victim chunk到large bin中
                victim->fd_nextsize = victim->bk_nextsize = victim;
            }
          
          // 在largebins的bitmap中,将victim_index对应的位置1
          // largebins的bitmap是为了更快的找到非空的large bin,
          // 因为largebins中项目过多,而且大小不同,逐个寻找比较费时,
          // 因此采用bitmap标识对应的large bin是否为空
          mark_bin (av, victim_index);
          victim->bk = bck;
          victim->fd = fwd;
          fwd->bk = victim;
          bck->fd = victim;

          // 最多经过10000次循环,每次都从unsorted bin中找合适的chunk进行分配,
          // 若没有合适的chunk,则将chunk整理到bins中
          // 经过10000次整理,unsorted bin中的chunk都已经放入到对应的bin中了
#define MAX_ITERS       10000
          if (++iters >= MAX_ITERS)
            break;
        }

再谈 largebins(分配)

来看第二步:如果unsorted bin中没有找到匹配的chunk,并且已经对unsorted bin进行了整合,那么就从largebins中找找看吧。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
      /*
         If a large request, scan through the chunks of current bin in
         sorted order to find smallest that fits.  Use the skip list for this.
       */
      // 如果是large chunk请求,现在从largebins中查找合适chunk
      if (!in_smallbin_range (nb))
        {
          // 根据之前获取的large bin的index,定位到对应的large bin
          bin = bin_at (av, idx);

          /* skip scan if empty or largest chunk is too small */
          // 如果bin非空,且bin中最大的chunk大小大于等于要分配的大小nb,则进入分配阶段
          if ((victim = first (bin)) != bin &&
              (unsigned long) (victim->size) >= (unsigned long) (nb))
            {
              // 从后(大小最小的)往前找,找到第一个大小大于等于要申请的大小(nb)的chunk list
              // 这里是chunk list,不是chunk,因为large bin的链表是二级链表,
              // 二级链表保存了所有大小相同的chunk
              victim = victim->bk_nextsize;
              while (((unsigned long) (size = chunksize (victim)) <
                      (unsigned long) (nb)))
                victim = victim->bk_nextsize;

              /* Avoid removing the first entry for a size so that the skip
                 list does not have to be rerouted.  */
              // 避免移除二级链表头部节点,优先从二级链表的第二个节点开始分配
              // 如果命中的节点不是bin的最后一个节点,则取出二级链表的第二个节点
              if (victim != last (bin) && victim->size == victim->fd->size)
                victim = victim->fd;

              // 将该节点从二级链表中unlink,取出,分割该chunk
              remainder_size = size - nb;
              unlink (av, victim, bck, fwd);

              /* Exhaust */
              // 如果分割后剩下的大小已经小于了MINSIZE,则不再切割
              // 因为切割后剩下的chunk已经太小了,无法分配
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
                    victim->size |= NON_MAIN_ARENA;
                }
              /* Split */
              else
                {
                  // 如果分割后剩下的大小仍然大于等于MINSIZE,则切割
                  remainder = chunk_at_offset (victim, nb);
                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  // 将切割后剩下的chunk插入unsorted bin中最前方位置
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
	  if (__glibc_unlikely (fwd->bk != bck))
                    {
                      errstr = "malloc(): corrupted unsorted chunks";
                      goto errout;
                    }
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              // 将切割出来的chunk分配,至此,large bin分配完成
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

接下来是第三步,如果index对应的large bin中没有合适的chunk,则从更大的large bin中找寻。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
      /*
         Search for a chunk by scanning bins, starting with next largest
         bin. This search is strictly by best-fit; i.e., the smallest
         (with ties going to approximately the least recently used) chunk
         that fits is selected.

         The bitmap avoids needing to check that most blocks are nonempty.
         The particular case of skipping all bins during warm-up phases
         when no chunks have been returned yet is faster than it might look.
       */
      // 如果在对应的large bin中没有找到合适的chunk,那么从其它size的large bin中继续寻找
      // 这里为了加快寻找,用了bitmap

      ++idx;
      bin = bin_at (av, idx);
      block = idx2block (idx);
      map = av->binmap[block];
      bit = idx2bit (idx);

      for (;; )
        {
          /* Skip rest of block if there are no more set bits in this block.  */
          if (bit > map || bit == 0)
            {
              do
                {
                  if (++block >= BINMAPSIZE) /* out of bins */
                    goto use_top;
                }
              while ((map = av->binmap[block]) == 0);

              bin = bin_at (av, (block << BINMAPSHIFT));
              bit = 1;
            }

          /* Advance to bin with set bit. There must be one. */
          while ((bit & map) == 0)
            {
              bin = next_bin (bin);
              bit <<= 1;
              assert (bit != 0);
            }

          // 找到了下一个非空bin,看看到底是不是非空
          // 如果的确非空,则从其中找chunk,
          // 否则,将bitmap对应位置1
          /* Inspect the bin. It is likely to be non-empty */
          victim = last (bin);

          /*  If a false alarm (empty bin), clear the bit. */
          if (victim == bin)
            {
              av->binmap[block] = map &= ~bit; /* Write through */
              bin = next_bin (bin);
              bit <<= 1;
            }

          else
            {
              // 这里,从找到的非空bin中分配chunk,过程和large bin分配一样
              // 这里不再从链表尾部向前定位大小合适的chunk了,
              // 因为该large bin里的所有chunk肯定都比nb大,只是单纯检查一下即可
              size = chunksize (victim);

              /*  We know the first chunk in this bin is big enough to use. */
              assert ((unsigned long) (size) >= (unsigned long) (nb));

              remainder_size = size - nb;

              /* unlink */
              unlink (av, victim, bck, fwd);

              /* Exhaust */
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
                    victim->size |= NON_MAIN_ARENA;
                }

              /* Split */
              else
                {
                  remainder = chunk_at_offset (victim, nb);

                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
	  if (__glibc_unlikely (fwd->bk != bck))
                    {
                      errstr = "malloc(): corrupted unsorted chunks 2";
                      goto errout;
                    }
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;

                  /* advertise as last remainder */
                  if (in_smallbin_range (nb))
                    av->last_remainder = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

至此,不论是否从largebins中找到合适chunk,large bin使命结束。

top chunk

进入第四步,如果largebins中没有找到合适chunk,则从top chunk中分配用户所需的内存。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    use_top:
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in av->top). Note that this is in accord with the best-fit
         search rule.  In effect, av->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).
         只要top chunk够大,就分割出用户所需的内存。通常情况下,top chunk是够大的,
         因为它是可扩展的,只要不超过系统的限制。

         We require that av->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
         top chunk在初始化之后总是存在的,如果top chunk被耗尽了(小于MINSIZE),
         那么就填充它。
       */

      victim = av->top;
      size = chunksize (victim);

      // 保证分割后的top chunk比MINSIZE大
      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
        {
          remainder_size = size - nb;
          remainder = chunk_at_offset (victim, nb);
          av->top = remainder;
          set_head (victim, nb | PREV_INUSE |
                    (av != &main_arena ? NON_MAIN_ARENA : 0));
          set_head (remainder, remainder_size | PREV_INUSE);

          check_malloced_chunk (av, victim, nb);
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }

      /* When we are using atomic ops to free fast chunks we can get
         here for all block sizes.  */
      // top chunk大小不满足标准,无法从top chunk分配内存给用户
      // 如果fastbins非空,即仍然存在fast chunk,
      // 则调用malloc_consolidate函数,重新整合fastbins,
      // 然后进入下一次大循环,在后面循环中再次找合适的chunk
      else if (have_fastchunks (av))
        {
          malloc_consolidate (av);
          /* restore original bin index */
          if (in_smallbin_range (nb))
            idx = smallbin_index (nb);
          else
            idx = largebin_index (nb);
        }

      /*
         Otherwise, relay to handle system-dependent cases
       */
      // 如果已经没有fast chunk了,那只能走系统调用,执行sysmalloc申请内存
      else
        {
          void *p = sysmalloc (nb, av);
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }
    }
}

终于结束了,可以看出ptmalloc的开发者为了避免系统调用,真的是做足了功夫。下一步学习free。其实malloc还有一些其它的流程没有分析,比如arena初始化,以及最后sysmalloc函数扩充内存,后续学习时遇到了再进一步分析。

总结:

 fastbinssmall binslarge binsunsorted bins
项数1062631
链表方向单向双向双向双向
进出算法先进后出先进先出自由选择,从后向前先进先出
64位最大值(包括chunk头)1281008最大项无限制/
32位最大值(包括chunk头)64504最大项无限制/
64位idx计算(包括chunk头)size»4 - 2size»4见largebin_index_64宏1
32位idx计算(包括chunk头)size»3 - 2size»3见largebin_index_32宏1
链表中chunk大小是否相同相同相同可以不相同可以不相同
fd/bk(总是用来链接大小相同的节点)链表中的下/上一个free chunk(非物理间隔)链表中的下/上一个free chunk(非物理间隔)bin二级链表,即相同大小chunk所组成的链表,按照最近使用顺序排列链表中的下/上一个free chunk(非物理间隔)
fd_nextsize/bk_nextsize//bin一级链表,即不同大小的chunk list头部节点所组成的链表,从大到小顺序排列/

最后贴一个ptmalloc的脑图,是xuanxuan老师去年发给我的,它在我电脑里已经保存了一年零4个月,但我从来没看过。整理完代码再看,的确清楚了很多。不愧是xuanxuanblingbling。

heap-mindmap

This post is licensed under CC BY 4.0 by the author.