c++ std::hash<std::string> 字符串哈希函数

发布时间 2023-08-08 17:42:24作者: miyanyan

msvc

采用了FNV-1a的哈希算法

// 众所周知 std::string 就是一个 basic_string<char>
template <class _Elem, class _Traits, class _Alloc>
struct hash<basic_string<_Elem, _Traits, _Alloc>> {
    _CXX17_DEPRECATE_ADAPTOR_TYPEDEFS typedef basic_string<_Elem, _Traits, _Alloc> _ARGUMENT_TYPE_NAME;
    _CXX17_DEPRECATE_ADAPTOR_TYPEDEFS typedef size_t _RESULT_TYPE_NAME;

    _NODISCARD size_t operator()(const basic_string<_Elem, _Traits, _Alloc>& _Keyval) const noexcept {
        return _Hash_array_representation(_Keyval.c_str(), _Keyval.size());
    }
};

// 转换成bytes进行计算
template <class _Kty>
_NODISCARD size_t _Hash_array_representation(
    const _Kty* const _First, const size_t _Count) noexcept { // bitwise hashes the representation of an array
    static_assert(is_trivial_v<_Kty>, "Only trivial types can be directly hashed.");
    return _Fnv1a_append_bytes(
        _FNV_offset_basis, reinterpret_cast<const unsigned char*>(_First), _Count * sizeof(_Kty));
}

// 最终使用FNV-1a哈希算法
// These FNV-1a utility functions are extremely performance sensitive,
// check examples like that in VSO-653642 before making changes.
#if defined(_WIN64)
_INLINE_VAR constexpr size_t _FNV_offset_basis = 14695981039346656037ULL;
_INLINE_VAR constexpr size_t _FNV_prime        = 1099511628211ULL;
#else // defined(_WIN64)
_INLINE_VAR constexpr size_t _FNV_offset_basis = 2166136261U;
_INLINE_VAR constexpr size_t _FNV_prime        = 16777619U;
#endif // defined(_WIN64)

_NODISCARD inline size_t _Fnv1a_append_bytes(size_t _Val, const unsigned char* const _First,
    const size_t _Count) noexcept { // accumulate range [_First, _First + _Count) into partial FNV-1a hash _Val
    for (size_t _Idx = 0; _Idx < _Count; ++_Idx) {
        _Val ^= static_cast<size_t>(_First[_Idx]);
        _Val *= _FNV_prime;
    }

    return _Val;
}

参考

Which hashing algorithm is best for uniqueness and speed?