unordered_map和map的耗时

发布时间 2023-04-15 13:16:39作者: BoBro

在实际生产环境中,遇到使用map还是unordered_map的场景。

  • 一方面,有unordered_map需要自定义hash函数,导致构建时比较复杂。而map使用的是比较运算符来判断元素在map中的位置,std::vector有比较运算符,所以构建map比较简单。
  • 另一方面,unordered_map时hash表,查找时间复杂度为o(1), map为红黑树,查找时间复杂度为o(log2(n)).
    为对比具体的时间差异。故复原了实际场景来测试,代码如下:
#include <stdio.h>
#include <iostream>
#include <map>
#include <unordered_map>
#include <numeric>
#include <vector>
#include <chrono>
#include <random>

enum diopiDtype_t {
    diopi_dtype_bool,
    diopi_dtype_int32,
    diopi_dtype_uint32,
    diopi_dtype_float16,
    diopi_dtype_float32,
    diopi_dtype_float64,
    diopi_dtype_int8,
    diopi_dtype_uint8,
    diopi_dtype_int16,
    diopi_dtype_uint16,
    diopi_dtype_int64,
    diopi_dtype_uint64,

};

std::vector<std::vector<diopiDtype_t>> va{
     {diopi_dtype_bool, diopi_dtype_int32},
    {diopi_dtype_bool, diopi_dtype_float16},
    {diopi_dtype_bool, diopi_dtype_float32},

    {diopi_dtype_int8, diopi_dtype_int16},
    {diopi_dtype_int8, diopi_dtype_int32},
    {diopi_dtype_int8, diopi_dtype_float16},
    {diopi_dtype_int8, diopi_dtype_float32},

    {diopi_dtype_uint8, diopi_dtype_int32},
    {diopi_dtype_uint8, diopi_dtype_int64},
    {diopi_dtype_uint8, diopi_dtype_float16},
    {diopi_dtype_uint8, diopi_dtype_float32},

    {diopi_dtype_int16, diopi_dtype_int32},
    {diopi_dtype_int16, diopi_dtype_float16},
    {diopi_dtype_int16, diopi_dtype_float32},

    {diopi_dtype_int32, diopi_dtype_bool},
    {diopi_dtype_int32, diopi_dtype_int8},
    {diopi_dtype_int32, diopi_dtype_int16},
    {diopi_dtype_int32, diopi_dtype_int64},
    {diopi_dtype_int32, diopi_dtype_float16},
    {diopi_dtype_int32, diopi_dtype_float32},

    {diopi_dtype_uint32, diopi_dtype_int64},
    {diopi_dtype_uint32, diopi_dtype_uint64},

    {diopi_dtype_int64, diopi_dtype_int32},
    {diopi_dtype_int64, diopi_dtype_uint32},
    {diopi_dtype_int64, diopi_dtype_float16},
    {diopi_dtype_int64, diopi_dtype_float32},

    {diopi_dtype_uint64, diopi_dtype_uint32},

    {diopi_dtype_float16, diopi_dtype_bool},
    {diopi_dtype_float16, diopi_dtype_int8},
    {diopi_dtype_float16, diopi_dtype_uint8},
    {diopi_dtype_float16, diopi_dtype_int16},
    {diopi_dtype_float16, diopi_dtype_int32},
    {diopi_dtype_float16, diopi_dtype_int64},
    {diopi_dtype_float16, diopi_dtype_float32},

    {diopi_dtype_float32, diopi_dtype_bool},
    {diopi_dtype_float32, diopi_dtype_int8},
    {diopi_dtype_float32, diopi_dtype_uint8},
    {diopi_dtype_float32, diopi_dtype_int16},
    {diopi_dtype_float32, diopi_dtype_int32},
    {diopi_dtype_float32, diopi_dtype_int64},
    {diopi_dtype_float32, diopi_dtype_float16},
    {diopi_dtype_float32, diopi_dtype_float64},

    {diopi_dtype_float64, diopi_dtype_float32}, 
};

struct HashCnnl {
    size_t operator()(const std::vector<diopiDtype_t>& v) const {
        auto f = [&](auto x1, auto x2){return static_cast<size_t>(x1) ^ static_cast<size_t>(x2);};
        return std::reduce(v.begin(), v.end(), 0, f);
    }
};

template<typename Key, typename Map>
void mp_find(Map mp, std::vector<Key> v, int cnt){
    for (int i = 0; i < cnt; ++i){
        for (size_t j = 0; j < v.size(); ++j){
            mp.find(v[j]);
        }
    }
    return;
}

int main(){
    std::unordered_map<std::vector<diopiDtype_t>, int, HashCnnl> myHashMap;
    std::map<std::vector<diopiDtype_t>, int> myMap;
    for (size_t i = 0; i < va.size(); ++i){
        myHashMap.emplace(va[i], i);
        myMap.emplace(va[i], i);
    }
    
    int cnt = 1000;

    size_t n = va.size();
    auto begin = std::chrono::steady_clock::now();
    mp_find(myMap, va, cnt);
    auto end = std::chrono::steady_clock::now();
    std::chrono::duration<double, std::milli> time_elapse_map(end - begin);

    begin = std::chrono::steady_clock::now();
    mp_find(myHashMap, va, cnt);
    end = std::chrono::steady_clock::now();
    std::chrono::duration<double, std::milli> time_elapse_hashMap(end - begin);

    std::cout << "map: " << time_elapse_map.count() << "ms" << std::endl;
    std::cout << "hashMap: " << time_elapse_hashMap.count() << "ms" << std::endl;
    return 0;
}

运行结果如下:
map: 33.081ms
hashMap: 14.675ms
可见在数据规模(map中的元素个数)为42时,map的时间消耗是unordered_map的两倍多。
可见使用unordered_map更合理。