malloc 13us耗时分析定位

发布时间 2023-06-04 19:38:33作者: stupidstan2019

【问题描述】

10个生产者malloc(50KB),1个消费者(模拟业务,1ms后free)

调用malloc接口大概13us,分析耗时

【调用栈】

【编译调试】

root@jch:~/glibc_test/glibc-2.31/build# make install -sj;gcc -g -L /root/glibc-2.31_install/64/lib -Wl,--rpath=/root/glibc-2.31_install/64/libs -Wl,-I /root/glibc-2.31_install/64/lib/ld-2.31.so ../hello.c -o hello -lpthread -ljemalloc  -mcmodel=medium;./hello

 【内部耗时】

1) 打点结果

[TIME=1685875827][140712515462912]user_call_malloc ,count: 18000,avg: 13(us), min:0,max:9155, total: 242970(us)

[TIME1=1685875828][140712515462912]grow_heap ,count: 17961,avg: 7(us), min:0,max:9148, total: 137963(us)

[TIME1=1685875828][140712515462912]sysmalloc_2414_2775_sethead2 ,count: 17974,avg: 4(us), min:0,max:6546, total: 85628(us)

2)grow_heap里的__m_protect耗时7us

3)sysmalloc里的set_head接口

【疑问】

第一个为什么这么耗时

第二个改内存值可能触发系统调用

【测试代码】

#include<stdio.h>
#include<unistd.h>
#include<stdlib.h>
#include<string.h>
#include "sys/time.h"
#include <pthread.h>
#define TOTAL_COUNT (1000 * 1000 * 10 )
#define NUM_Threads 10
#define COUNT (TOTAL_COUNT/NUM_Threads)
#define PRODUCE_COUNT 18000
#define AVERAGE_PRODUCE_COUNT (PRODUCE_COUNT/NUM_Threads)

#define je_malloc malloc
#define je_free free
//#include <jemalloc/jemalloc.h>

static int producer_index=0;
static int consumer_index=0;
typedef unsigned long long uint64;
#define MIN(A, B)        ((B) < (A) ? (B) : (A))
#define MAX(A, B)        ((B) > (A) ? (B) : (A))

uint64 us_clock_now(){
    struct timeval st;
    gettimeofday(&st, NULL);
    return st.tv_sec * 1000 * 1000 + st.tv_usec;
}

#define TIME_BEGIN(name)  uint64 __##name##begin = us_clock_now();

#define TIME_END(name, print_interval_us) \
do { \
    static __thread uint64 __##name##total = 0; \
    static __thread uint64 __##name##count = 0; \
    static __thread uint64 __##name##min = 1000000000000; \
    static __thread uint64 __##name##max = 0; \
    static __thread uint64 __##name##pre_total = 0; \
    static __thread uint64 __##name##pre_count = 0; \
    static __thread uint64 __##name##log_time = 0;  \
    uint64 current = (us_clock_now() - __##name##begin);\
    __##name##min = MIN(current, __##name##min);        \
    __##name##max = MAX(current, __##name##max);        \
    __##name##total += current; \
    __##name##count++; \
    if (us_clock_now() - __##name##log_time > print_interval_us) \
    { \
        __##name##log_time = us_clock_now(); \
        uint64 __##name##this_total = __##name##total - __##name##pre_total; \
        uint64 __##name##this_count = __##name##count - __##name##pre_count; \
        __##name##pre_total = __##name##total; \
        __##name##pre_count = __##name##count;  \
        if (__##name##this_count > 1)       {                           \
        printf("[TIME=%llu][%ld]%-30s,count:%10llu,avg:%10llu(us), min:%llu,max:%llu, total:%10llu(us)\n", \
            us_clock_now()/1000000,pthread_self(), \
            #name,  __##name##this_count, \
            __##name##this_count == 0 ? 0 :__##name##this_total / __##name##this_count, __##name##min, __##name##max, __##name##this_total); \
        }                                  \
        __##name##min=1000000000000;                    \
        __##name##max = 0;\
    } \
} while (0);

static char* threads[NUM_Threads][COUNT];
static uint64 malloc_time[NUM_Threads][COUNT];
static uint64 global_malloc_time[NUM_Threads * COUNT];
static uint64 global_producer_index = 0;
static uint64 global_consumer_index = 0;

pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER;


void *PrintHello(void *arg)
{
    int id = (int) arg;
    static struct timeval st;
    static struct timeval ed;
    static long time_total=0;
    long time_current;
    uint64 last_produce_time = us_clock_now();
    for(int i=0;i<COUNT;i++){
        producer_index=i;
//        gettimeofday(&st, NULL);
        TIME_BEGIN(user_call_malloc);
        char * s1=(char *)je_malloc(50 * 1000);
        *s1 = 'a';
        TIME_END(user_call_malloc, 10 * 1000 *1000);

//        gettimeofday(&ed, NULL);
        threads[id][i] = s1;
        pthread_mutex_lock(&gMutex);
        global_malloc_time[global_producer_index]= us_clock_now();
        global_producer_index++;
//        malloc_time[id][i]=us_clock_now();
        pthread_mutex_unlock(&gMutex);
//        time_current = (ed.tv_sec - st.tv_sec)  * 1000000 + (ed.tv_usec - st.tv_usec) ;
//        time_total += time_current;
        if(i%AVERAGE_PRODUCE_COUNT==0){
            while(us_clock_now() - last_produce_time < 1000*1000){
                usleep(1);
            }
            last_produce_time = us_clock_now();
        }
    }
    printf("[time_total]:%ld, threadId=%d\n", time_total,id);
    return 0;
}

void *Consumer(void *arg)
{
    int id = (int) arg;
    static struct timeval st;
    static struct timeval ed;
    static long time_total=0;
    long time_current;
    for(id=0;id<NUM_Threads;id++){
        for (int i=0;i<COUNT;i++){
            while(global_malloc_time[global_consumer_index]==0 || us_clock_now() - global_malloc_time[global_consumer_index] <= 1000) {
                usleep(1);
            }
            global_consumer_index++;
            TIME_BEGIN(free);
            je_free(threads[id][i]);
            consumer_index=i;
            if(global_consumer_index % 100000==0){
                printf("gap=%llu,consumer_index=%llu, producer_index=%llu\n",global_producer_index-global_consumer_index, global_consumer_index,global_producer_index);
            }
            TIME_END(free, 10000 * 1000);

        }
    };
    return 0;
}

int main(){
    printf("AVERAGE_PRODUCE_COUNT=%d\n", AVERAGE_PRODUCE_COUNT);
     memset(threads, 0, sizeof(threads));
    memset(global_malloc_time, 0, sizeof(global_malloc_time));
    pthread_t tids[NUM_Threads];
    int i;
    int ret;
    for (i=0; i<NUM_Threads; i++)
    {
        ret = pthread_create(&tids[i], NULL, PrintHello, (void *)i);
        if (ret != 0)
        {
            printf("pthread_create error: error_code = \n");
        }
    }
    usleep(10 * 1000);
//    sleep(2);
    pthread_create(&tids[i], NULL, Consumer, NULL);
    void *a1;
    for (i=0; i<NUM_Threads; i++){
        pthread_join(tids[i],&a1);
    }
    return 0;
}
`