跳转至内容
  • 快速十六进制转八进制

    General Discussion | 综合讨论
    2
    1 赞同
    2 帖子
    42 浏览
    StehsaerS
    改进

    经过@yyq252517提醒并测试,直接使用12bit的查找表可以更快:

    consteval std::array<std::array<char, 4>, 4096> hex_to_oct_lookup() noexcept { std::array<std::array<char, 4>, 4096> lookup{}; for (size_t i = 0; i < 4096; ++i) { lookup[i][0] = '0' + ((i >> 0) & 0b111); lookup[i][1] = '0' + ((i >> 3) & 0b111); lookup[i][2] = '0' + ((i >> 6) & 0b111); lookup[i][3] = '0' + ((i >> 9) & 0b111); } return lookup; } // ... std::string hex_to_oct(const std::string& hex) noexcept { const auto hex_length = hex.length(); if (hex_length == 0) return "0"; const auto groups = (hex_length + 2) / 3; const auto oct_digits = groups * 4; std::string res(oct_digits, '0'); for (size_t i = hex_length % 3; i < hex_length; i += 3) { const size_t curr_group = (i + 2) / 3; const uint16_t h0 = hex_lookup[std::bit_cast<uint8_t>(hex[i + 2])]; // [3:0] const uint16_t h1 = hex_lookup[std::bit_cast<uint8_t>(hex[i + 1])]; // [7:4] const uint16_t h2 = hex_lookup[std::bit_cast<uint8_t>(hex[i + 0])]; // [11:8] const uint16_t idx = (h2 << 8) | (h1 << 4) | h0; // [11:0] const auto [o1, o2, o3, o4] = hex_to_oct_lookup_table[idx]; res[curr_group * 4 + 3] = o1; res[curr_group * 4 + 2] = o2; res[curr_group * 4 + 1] = o3; res[curr_group * 4 + 0] = o4; } if (hex_length % 3 != 0) { std::array<uint16_t, 3> h{0, 0, 0}; if (hex_length % 3 == 1) { h[0] = hex_lookup[std::bit_cast<uint8_t>(hex[0])]; } else // hex_length % 3 == 2 { h[1] = hex_lookup[std::bit_cast<uint8_t>(hex[0])]; h[0] = hex_lookup[std::bit_cast<uint8_t>(hex[1])]; } const uint16_t idx = (h[2] << 8) | (h[1] << 4) | h[0]; const auto [o1, o2, o3, o4] = hex_to_oct_lookup_table[idx]; res[3] = o1; res[2] = o2; res[1] = o3; res[0] = o4; } const auto first_non_zero = res.find_first_not_of('0'); if (first_non_zero == std::string::npos) return "0"; return res.substr(first_non_zero); }

    使用和此前一致的测试环境,可以得到:

    Optimized version: 72.16 ms (stddev: 0.45 ms), 1385897448.4948883 hex chars/sec

    比此前快了15%!此前低估了现代CPU的L1缓存系统的能力,看来它处理4096*4byte = 16KiB的查找表也不在话下

    感谢@yyq252517