QQ群上有群友提到了十六进制转八进制,借此练练手写一个快速十六进制转八进制代码并在此分享:Godbolt链接
consteval std::array<uint8_t, 256> gen_lookup() noexcept
{
std::array<uint8_t, 256> lookup{};
lookup.fill(0);
lookup['0'] = 0;
lookup['1'] = 1;
lookup['2'] = 2;
lookup['3'] = 3;
lookup['4'] = 4;
lookup['5'] = 5;
lookup['6'] = 6;
lookup['7'] = 7;
lookup['8'] = 8;
lookup['9'] = 9;
lookup['a'] = 10;
lookup['b'] = 11;
lookup['c'] = 12;
lookup['d'] = 13;
lookup['e'] = 14;
lookup['f'] = 15;
lookup['A'] = 10;
lookup['B'] = 11;
lookup['C'] = 12;
lookup['D'] = 13;
lookup['E'] = 14;
lookup['F'] = 15;
return lookup;
}
consteval std::array<std::array<char, 2>, 64> hex_to_oct_lookup() noexcept
{
std::array<std::array<char, 2>, 64> lookup{};
for (size_t i = 0; i < 64; ++i)
{
lookup[i][0] = '0' + (i % 8);
lookup[i][1] = '0' + (i / 8);
}
return lookup;
}
constexpr auto hex_lookup = gen_lookup();
constexpr auto hex_to_oct_lookup_table = hex_to_oct_lookup();
std::string hex_to_oct(const std::string& hex) noexcept
{
const auto hex_length = hex.length();
if (hex_length == 0) return "0";
const auto groups = (hex_length + 2) / 3;
const auto oct_digits = groups * 4;
std::string res(oct_digits, '0');
for (size_t i = hex_length % 3; i < hex_length; i += 3)
{
const size_t curr_group = (i + 2) / 3;
const auto h0 = hex_lookup[static_cast<uint8_t>(hex[i + 2])]; // [3:0]
const auto h1 = hex_lookup[static_cast<uint8_t>(hex[i + 1])]; // [7:4]
const auto h2 = hex_lookup[static_cast<uint8_t>(hex[i + 0])]; // [11:8]
const uint8_t o12 = h0 | ((h1 & 0b11) << 4); // [5:0]
const uint8_t o34 = (h1 >> 2) | (h2 << 2); // [11:6]
const auto [o1, o2] = hex_to_oct_lookup_table[o12];
const auto [o3, o4] = hex_to_oct_lookup_table[o34];
res[curr_group * 4 + 3] = o1;
res[curr_group * 4 + 2] = o2;
res[curr_group * 4 + 1] = o3;
res[curr_group * 4 + 0] = o4;
}
if (hex_length % 3 != 0)
{
std::array<uint8_t, 3> h{0, 0, 0};
if (hex_length % 3 == 1)
{
h[0] = hex_lookup[static_cast<uint8_t>(hex[0])];
}
else // hex_length % 3 == 2
{
h[1] = hex_lookup[static_cast<uint8_t>(hex[0])];
h[0] = hex_lookup[static_cast<uint8_t>(hex[1])];
}
const uint8_t o12 = h[0] | ((h[1] & 0b11) << 4); // [5:0]
const uint8_t o34 = (h[1] >> 2) | (h[2] << 2); // [11:6]
const auto [o1, o2] = hex_to_oct_lookup_table[o12];
const auto [o3, o4] = hex_to_oct_lookup_table[o34];
res[3] = o1;
res[2] = o2;
res[1] = o3;
res[0] = o4;
}
const auto first_non_zero = res.find_first_not_of('0');
if (first_non_zero == std::string::npos) return "0";
return res.substr(first_non_zero);
}
优化点
- 用了小型查找表快速将16进制字符转成对应的十六进制数字
- 每12bit为一组转换,同样使用小型查找表将6bit直接转成2个八进制字符
- 全程不使用分支,提高执行效率
- 使用
consteval直接在编译期生成查找表 - 所有查找表均足够小并且按64字节对齐,对齐L1缓存行,提升访存效率
性能测试
测试环境:
- CPU: Intel i7-13620H
- RAM: DDR5-5200 32GiB
- OS: Debian 13 (Linux 6.12.73+deb13-amd64)
转换100M个十六进制字符,对比上述代码和Gemini写的版本(Gemini版本由群友提供):
Optimized version: 84.01 ms (stddev: 1.12 ms), 1190284058.7910337 hex chars/sec
Gemini version: 1464.58 ms (stddev: 31.58 ms), 68279030.44209935 hex chars/sec
上述代码可以达到1.2G/s的速度,按照4.9GHz的主频,大约每字符4周期
正确性测试
使用Python脚本生成从1~40位16进制数测例,并使用Xmake并行测试,全部通过
设计妥协
- 为了快速转换,没有错误检测,所有非法字符均映射为0
Gemini版本代码
static std::string hex_to_oct_gemini_version(std::string hex)
{
// 补齐长度为 3 的倍数,方便分段
int padding = (3 - hex.length() % 3) % 3;
std::string prefix(padding, '0');
hex = prefix + hex;
std::string res = "";
// 每次处理 3 个 16 进制字符
for (int i = 0; i < hex.length(); i += 3)
{
// 1. 将 3 位 16 进制转为 12 位整数
int combined = 0;
for (int j = 0; j < 3; ++j)
{
char c = hex[i + j];
int v = (std::isdigit(c) ? c - '0' : std::toupper(c) - 'A' + 10);
combined = (combined << 4) | v;
}
// 2. 将 12 位整数转为 4 位 8 进制
std::string chunk = "";
for (int j = 0; j < 4; ++j)
{
chunk = std::to_string(combined & 7) + chunk;
combined >>= 3;
}
res += chunk;
}
// 最后处理前导零
size_t start = res.find_first_not_of('0');
return (start == std::string::npos) ? "0" : res.substr(start);
}
明显可见不少槽点:
- 非常多的三目运算(
std::isdigit、std::toupper等内置函数里面可能仍然有分支),降低执行效率 - 用了过多的临时
std::string,尽管有SSO优化,效率仍然低下 - 没有提前给
std::string预留空间 - 直接用
std::to_string转换过于重量级,且两个std::string相连接也会有额外的开销