|
|
/* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
it under the terms of the GNU General Public License, version 2.0,
|
|
|
as published by the Free Software Foundation.
|
|
|
|
|
|
This program is also distributed with certain software (including
|
|
|
but not limited to OpenSSL) that is licensed under separate terms,
|
|
|
as designated in a particular file or component or in included license
|
|
|
documentation. The authors of MySQL hereby grant you an additional
|
|
|
permission to link the program and your derivative works with the
|
|
|
separately licensed software that they have included with MySQL.
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
GNU General Public License, version 2.0, for more details.
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
along with this program; if not, write to the Free Software
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
|
|
/*
|
|
|
Bug#16403708 SUBOPTIMAL CODE IN MY_STRNXFRM_SIMPLE()
|
|
|
Bug#68476 Suboptimal code in my_strnxfrm_simple()
|
|
|
|
|
|
Below we test some alternative implementations for my_strnxfrm_simple.
|
|
|
In order to do benchmarking, configure in optimized mode, and
|
|
|
generate a separate executable for this file:
|
|
|
cmake -DMERGE_UNITTESTS=0
|
|
|
You may want to tweak some constants below:
|
|
|
- experiment with num_iterations
|
|
|
run './strings_strnxfrm-t --disable-tap-output'
|
|
|
to see timing reports for your platform.
|
|
|
|
|
|
|
|
|
Benchmarking with gcc and clang indicates that:
|
|
|
|
|
|
There is insignificant difference between my_strnxfrm_simple and strnxfrm_new
|
|
|
when src != dst
|
|
|
|
|
|
my_strnxfrm_simple() is significantly faster than strnxfrm_new
|
|
|
when src == dst, especially for long strings.
|
|
|
|
|
|
Loop unrolling gives significant speedup for large strings.
|
|
|
*/
|
|
|
|
|
|
#include <gtest/gtest.h>
|
|
|
#include <inttypes.h>
|
|
|
#include <sys/types.h>
|
|
|
#include <algorithm>
|
|
|
#include <memory>
|
|
|
#include <string>
|
|
|
#include <unordered_map>
|
|
|
#include <utility>
|
|
|
#include <vector>
|
|
|
|
|
|
#include "my_inttypes.h"
|
|
|
#include "my_sys.h"
|
|
|
#include "template_utils.h"
|
|
|
#include "unittest/gunit/benchmark.h"
|
|
|
#include "unittest/gunit/strnxfrm.h"
|
|
|
|
|
|
using std::make_pair;
|
|
|
using std::max;
|
|
|
using std::pair;
|
|
|
using std::string;
|
|
|
using std::to_string;
|
|
|
using std::unordered_map;
|
|
|
|
|
|
namespace strnxfrm_unittest {
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
// Simply print out an array.
|
|
|
void print_array(const uchar *arr, size_t len) {
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
|
fprintf(stderr, " %02x", arr[i]);
|
|
|
if ((i % 8) == 7 || i == len - 1) fprintf(stderr, "\n");
|
|
|
}
|
|
|
fprintf(stderr, "\n");
|
|
|
}
|
|
|
|
|
|
// A function to compare two arrays and print them out in its entirety
|
|
|
// (for easier context) if they are not equal.
|
|
|
void expect_arrays_equal(const uchar *expected, const uchar *got, size_t len) {
|
|
|
int num_err = 0;
|
|
|
for (size_t i = 0; i < len && num_err < 5; ++i) {
|
|
|
EXPECT_EQ(expected[i], got[i]);
|
|
|
if (expected[i] != got[i]) ++num_err;
|
|
|
}
|
|
|
if (num_err) {
|
|
|
fprintf(stderr, "Expected:\n");
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
|
fprintf(stderr, " %c%02x", expected[i] != got[i] ? '*' : ' ',
|
|
|
expected[i]);
|
|
|
if ((i % 8) == 7 || i == len - 1) fprintf(stderr, "\n");
|
|
|
}
|
|
|
fprintf(stderr, "\nGot:\n");
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
|
fprintf(stderr, " %c%02x", expected[i] != got[i] ? '*' : ' ', got[i]);
|
|
|
if ((i % 8) == 7 || i == len - 1) fprintf(stderr, "\n");
|
|
|
}
|
|
|
fprintf(stderr, "\n");
|
|
|
}
|
|
|
}
|
|
|
|
|
|
CHARSET_INFO *init_collation(const char *name) {
|
|
|
MY_CHARSET_LOADER loader;
|
|
|
my_charset_loader_init_mysys(&loader);
|
|
|
return my_collation_get_by_name(&loader, name, MYF(0));
|
|
|
}
|
|
|
|
|
|
int compare_through_strxfrm(CHARSET_INFO *cs, const char *a, const char *b) {
|
|
|
uchar abuf[256], bbuf[256];
|
|
|
int alen = my_strnxfrm(cs, abuf, sizeof(abuf), pointer_cast<const uchar *>(a),
|
|
|
strlen(a));
|
|
|
int blen = my_strnxfrm(cs, bbuf, sizeof(bbuf), pointer_cast<const uchar *>(b),
|
|
|
strlen(b));
|
|
|
|
|
|
if (false) // Enable this for debugging.
|
|
|
{
|
|
|
fprintf(stderr, "\n\nstrxfrm for '%s':\n", a);
|
|
|
print_array(abuf, alen);
|
|
|
fprintf(stderr, "strxfrm for '%s':\n", b);
|
|
|
print_array(bbuf, blen);
|
|
|
}
|
|
|
|
|
|
int cmp = memcmp(abuf, bbuf, std::min(alen, blen));
|
|
|
if (cmp != 0) return cmp;
|
|
|
|
|
|
if (alen == blen) {
|
|
|
return 0;
|
|
|
} else {
|
|
|
return (alen < blen) ? -1 : 1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
#if !defined(DBUG_OFF)
|
|
|
// There is no point in benchmarking anything in debug mode.
|
|
|
const size_t num_iterations = 1ULL;
|
|
|
#else
|
|
|
// Set this so that each test case takes a few seconds.
|
|
|
// And set it back to a small value before pushing!!
|
|
|
// const size_t num_iterations= 20000000ULL;
|
|
|
const size_t num_iterations = 2ULL;
|
|
|
#endif
|
|
|
|
|
|
class StrnxfrmTest : public ::testing::TestWithParam<size_t> {
|
|
|
protected:
|
|
|
virtual void SetUp() {
|
|
|
m_length = GetParam();
|
|
|
m_src.assign(m_length, 0x20);
|
|
|
m_dst.assign(m_length, 0x20);
|
|
|
}
|
|
|
std::vector<uchar> m_src;
|
|
|
std::vector<uchar> m_dst;
|
|
|
size_t m_length;
|
|
|
};
|
|
|
|
|
|
size_t test_values[] = {1, 10, 100, 1000};
|
|
|
|
|
|
INSTANTIATE_TEST_CASE_P(Strnxfrm, StrnxfrmTest,
|
|
|
::testing::ValuesIn(test_values));
|
|
|
|
|
|
TEST_P(StrnxfrmTest, OriginalSrcDst) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_orig(cs, &m_dst[0], m_length, m_length, &m_src[0], m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST_P(StrnxfrmTest, OriginalUnrolledSrcDst) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_orig_unrolled(cs, &m_dst[0], m_length, m_length, &m_src[0],
|
|
|
m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST_P(StrnxfrmTest, ModifiedSrcDst) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_new(cs, &m_dst[0], m_length, m_length, &m_src[0], m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST_P(StrnxfrmTest, ModifiedUnrolledSrcDst) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_new_unrolled(cs, &m_dst[0], m_length, m_length, &m_src[0],
|
|
|
m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST_P(StrnxfrmTest, OriginalSrcSrc) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_orig(cs, &m_src[0], m_length, m_length, &m_src[0], m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST_P(StrnxfrmTest, OriginalUnrolledSrcSrc) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_orig_unrolled(cs, &m_src[0], m_length, m_length, &m_src[0],
|
|
|
m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST_P(StrnxfrmTest, ModifiedSrcSrc) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_new(cs, &m_src[0], m_length, m_length, &m_src[0], m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST_P(StrnxfrmTest, ModifiedUnrolledSrcSrc) {
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
for (size_t ix = 0; ix < num_iterations; ++ix)
|
|
|
strnxfrm_new_unrolled(cs, &m_src[0], m_length, m_length, &m_src[0],
|
|
|
m_length, 192);
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, SimpleUTF8Correctness) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8_bin");
|
|
|
|
|
|
const char *src = "abc æøå 日本語";
|
|
|
unsigned char buf[32];
|
|
|
|
|
|
static const unsigned char full_answer_with_pad[32] = {
|
|
|
0x00, 0x61, 0x00, 0x62, 0x00, 0x63, // abc
|
|
|
0x00, 0x20, // space
|
|
|
0x00, 0xe6, 0x00, 0xf8, 0x00, 0xe5, // æøå
|
|
|
0x00, 0x20, // space
|
|
|
0x65, 0xe5, 0x67, 0x2c, 0x8a, 0x9e, // 日本語
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20 // space for padding
|
|
|
};
|
|
|
|
|
|
for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
|
|
|
expect_arrays_equal(full_answer_with_pad, buf, maxlen);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, SimpleUTF8MB4Correctness) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
const char *src = "abc æøå 日本語";
|
|
|
unsigned char buf[30];
|
|
|
|
|
|
static const unsigned char full_answer_with_pad[30] = {
|
|
|
0x1c, 0x47, 0x1c, 0x60, 0x1c, 0x7a, // abc
|
|
|
0x02, 0x09, // space
|
|
|
0x1c, 0x47, 0x1c, 0xaa, 0x1d, 0xdd, 0x1c, 0x47, // æøå
|
|
|
0x02, 0x09, // space
|
|
|
0xfb, 0x40, 0xe5, 0xe5, 0xfb, 0x40, 0xe7, 0x2c,
|
|
|
0xfb, 0x41, 0x8a, 0x9e, // 日本語
|
|
|
};
|
|
|
|
|
|
for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
|
|
|
expect_arrays_equal(full_answer_with_pad, buf, maxlen);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, UTF8MB4Correctness_as_ci) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_ci");
|
|
|
|
|
|
const char *src = "abc æøå 日本語";
|
|
|
unsigned char buf[62];
|
|
|
|
|
|
static const unsigned char full_answer_with_pad[62] = {
|
|
|
0x1c, 0x47, 0x1c, 0x60, 0x1c, 0x7a, // abc
|
|
|
0x02, 0x09, // space
|
|
|
0x1c, 0x47, 0x1c, 0xaa, 0x1d, 0xdd, 0x1c, 0x47, // æøå
|
|
|
0x02, 0x09, // space
|
|
|
0xfb, 0x40, 0xe5, 0xe5, 0xfb, 0x40, 0xe7, 0x2c, // 日本語
|
|
|
0xfb, 0x41, 0x8a, 0x9e, 0x00, 0x00, // level separator
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // abc
|
|
|
0x00, 0x20, // space
|
|
|
0x00, 0x20, 0x01, 0x10, 0x00, 0x20, 0x00, 0x20, // æøå
|
|
|
0x00, 0x2F, 0x00, 0x20, 0x00, 0x29, 0x00, 0x20, // space
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20 // 日本語
|
|
|
};
|
|
|
|
|
|
for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
|
|
|
expect_arrays_equal(full_answer_with_pad, buf, maxlen);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, UTF8MB4Correctness_as_ci_1) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_ci");
|
|
|
// case insensitive
|
|
|
EXPECT_EQ(compare_through_strxfrm(cs, "Abc", "aBC"), 0);
|
|
|
// accent sensitive
|
|
|
EXPECT_NE(compare_through_strxfrm(cs, "ǍḄÇ", "ÁḆĈ"), 0);
|
|
|
EXPECT_NE(compare_through_strxfrm(cs, u8"\uA73A", u8"\uA738"), 0);
|
|
|
// Hangul decomposition
|
|
|
EXPECT_EQ(compare_through_strxfrm(cs, u8"\uAC00", u8"\u326E"), 0);
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, JapaneseUTF8MB4) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs");
|
|
|
|
|
|
const char *src =
|
|
|
"\x61\x41\xCA\xAC\xCA\xAD" // latin 'aAʬʭ'
|
|
|
// Hiragana and Katakana 'ぁンはばぱ'
|
|
|
"\xE3\x81\x81\xE3\x83\xB3\xE3\x81\xAF\xE3\x81\xB0\xE3\x81\xB1"
|
|
|
// Japanese Han '亜熙憐'
|
|
|
"\xE4\xBA\x9C\xE7\x86\x99\xE6\x86\x90"
|
|
|
// Other Han '﨎㐀'
|
|
|
"\xEF\xA8\x8E\xE3\x90\x80"
|
|
|
// Greek, Coptic etc. 'αⲁаⳤퟻ'
|
|
|
"\xCE\xB1\xE2\xB2\x81\xD0\xB0\xE2\xB3\xA4\xED\x9F\xBB";
|
|
|
|
|
|
static const unsigned char full_answer_with_pad[156] = {
|
|
|
// Level 1
|
|
|
0x1C, 0x47, 0x1C, 0x47, 0x1F, 0xB1, 0x1F, 0xB5, // latin
|
|
|
0x1F, 0xB6, 0x1F, 0xE7, 0x1F, 0xD0, 0x1F, 0xD0, // Hiragana and Katakana
|
|
|
0x1F, 0xD0, 0x54, 0xA4, 0x6D, 0x76, 0x60, 0x00, // Japanese Han
|
|
|
0xFB, 0x41, 0xFA, 0x0E, 0xFB, 0x80, 0xB4, 0x00, // Other Han
|
|
|
0xFB, 0x86, 0x1F, 0xB9, 0xFB, 0x86, 0x1F, 0xE6, // Greek, Coptic etc.
|
|
|
0xFB, 0x86, 0x20, 0x22, 0xFB, 0x86, 0x1F, 0xF1, 0xFB, 0x86, 0x1F, 0xE6,
|
|
|
0xFB, 0x86, 0x1F, 0xF0, 0xFB, 0x86, 0x3D, 0x59, 0x00,
|
|
|
0x00, // Level separator
|
|
|
// Level 2
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // latin
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // Hiragana and Katakana
|
|
|
0x00, 0x37, 0x00, 0x20, 0x00, 0x38, 0x00, 0x20, 0x00, 0x20, 0x00,
|
|
|
0x20, // Japanese Han
|
|
|
0x00, 0x20, 0x00, 0x20, // Other Han
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // Greek, Coptic etc.
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x00, // Level separator
|
|
|
// Level 3
|
|
|
0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, // latin
|
|
|
0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, // Hiragana and Katakana
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00,
|
|
|
0x02, // Japanese Han
|
|
|
0x00, 0x02, 0x00, 0x02, // Other Han
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x04, // Greek, Coptic etc.
|
|
|
0x00, 0x04, 0x00, 0x04, 0x00, 0x02};
|
|
|
|
|
|
unsigned char buf[sizeof(full_answer_with_pad)];
|
|
|
for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
|
|
|
expect_arrays_equal(full_answer_with_pad, buf, maxlen);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, Japanese_ks_UTF8MB4) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs_ks");
|
|
|
|
|
|
/*
|
|
|
Weights of Japanese Han, Other Han, Greek, Coptic are not changed comparing
|
|
|
to the test result of collation utf8mb4_ja_0900_as_cs (in test
|
|
|
JapaneseUtf8mb4 above). But additional quaternary weight is added for
|
|
|
Hiragana and Katakana characters.
|
|
|
*/
|
|
|
const char *src =
|
|
|
"\x61\x41\xCA\xAC\xCA\xAD" // latin 'aAʬʭ'
|
|
|
// Hiragana and Katakana 'ぁンはばぱ'
|
|
|
"\xE3\x81\x81\xE3\x83\xB3\xE3\x81\xAF\xE3\x81\xB0\xE3\x81\xB1"
|
|
|
// Japanese Han '亜熙憐'
|
|
|
"\xE4\xBA\x9C\xE7\x86\x99\xE6\x86\x90"
|
|
|
// Other Han '﨎㐀'
|
|
|
"\xEF\xA8\x8E\xE3\x90\x80"
|
|
|
// Greek, Coptic etc. 'αⲁаⳤퟻ'
|
|
|
"\xCE\xB1\xE2\xB2\x81\xD0\xB0\xE2\xB3\xA4\xED\x9F\xBB"
|
|
|
// Prefix context 'さー' and 'サー'
|
|
|
"\xE3\x81\x95\xE3\x83\xBC\xE3\x82\xB5\xE3\x83\xBC";
|
|
|
|
|
|
static const unsigned char full_answer_with_pad[] = {
|
|
|
// Level 1
|
|
|
0x1C, 0x47, 0x1C, 0x47, 0x1F, 0xB1, 0x1F, 0xB5, // latin
|
|
|
0x1F, 0xB6, 0x1F, 0xE7, 0x1F, 0xD0, 0x1F, 0xD0, // Hiragana and Katakana
|
|
|
0x1F, 0xD0, 0x54, 0xA4, 0x6D, 0x76, 0x60, 0x00, // Japanese Han
|
|
|
0xFB, 0x41, 0xFA, 0x0E, 0xFB, 0x80, 0xB4, 0x00, // Other Han
|
|
|
0xFB, 0x86, 0x1F, 0xB9, 0xFB, 0x86, 0x1F, 0xE6, // Greek, Coptic etc.
|
|
|
0xFB, 0x86, 0x20, 0x22, 0xFB, 0x86, 0x1F, 0xF1, 0xFB, 0x86, 0x1F, 0xE6,
|
|
|
0xFB, 0x86, 0x1F, 0xF0, 0xFB, 0x86, 0x3D, 0x59, 0x1F, 0xC1, 0x1F, 0xB6,
|
|
|
0x1F, 0xC1, 0x1F, 0xB6, // Prefix context
|
|
|
0x00, 0x00, // Level separator
|
|
|
// Level 2
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // latin
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // Hiragana and Katakana
|
|
|
0x00, 0x37, 0x00, 0x20, 0x00, 0x38, 0x00, 0x20, 0x00, 0x20, 0x00,
|
|
|
0x20, // Japanese Han
|
|
|
0x00, 0x20, 0x00, 0x20, // Other Han
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // Greek, Coptic etc.
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, // Prefix context
|
|
|
0x00, 0x00, // Level separator
|
|
|
// Level 3
|
|
|
0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, // latin
|
|
|
0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, // Hiragana and Katakana
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00,
|
|
|
0x02, // Japanese Han
|
|
|
0x00, 0x02, 0x00, 0x02, // Other Han
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x04, // Greek, Coptic etc.
|
|
|
0x00, 0x04, 0x00, 0x04, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
|
|
|
0x00, 0x0E, // Prefix context
|
|
|
0x00, 0x0C, 0x00, 0x21, 0x00, 0x00, // Level separator
|
|
|
// Level 4
|
|
|
0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, // Hiragana and Katakana
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00,
|
|
|
0x08 // Prefix context
|
|
|
};
|
|
|
|
|
|
unsigned char buf[sizeof(full_answer_with_pad)];
|
|
|
for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
|
|
|
expect_arrays_equal(full_answer_with_pad, buf, maxlen);
|
|
|
}
|
|
|
|
|
|
CHARSET_INFO *as_cs = init_collation("utf8mb4_ja_0900_as_cs");
|
|
|
CHARSET_INFO *as_cs_ks = init_collation("utf8mb4_ja_0900_as_cs_ks");
|
|
|
// utf8 "にほんご"
|
|
|
const char *str1 = "\xE3\x81\xAB\xE3\x81\xBB\xE3\x82\x93\xE3\x81\x94";
|
|
|
// utf8 "ニホンゴ"
|
|
|
const char *str2 = "\xE3\x83\x8B\xE3\x83\x9B\xE3\x83\xB3\xE3\x82\xB4";
|
|
|
EXPECT_EQ(compare_through_strxfrm(as_cs, str1, str2), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs_ks, str1, str2), 0);
|
|
|
|
|
|
str1 = "\xE3\x81\xAF\xE3\x81\xAF"; // utf8 "はは"
|
|
|
str2 = "\xE3\x81\xAF\xE3\x83\x8F"; // utf8 "はハ"
|
|
|
const char *str3 = "\xE3\x83\x8F\xE3\x81\xAF"; // utf8 "ハは"
|
|
|
const char *str4 = "\xE3\x83\x8F\xE3\x83\x8F"; // utf8 "ハハ"
|
|
|
EXPECT_EQ(compare_through_strxfrm(as_cs, str1, str2), 0);
|
|
|
EXPECT_EQ(compare_through_strxfrm(as_cs, str2, str3), 0);
|
|
|
EXPECT_EQ(compare_through_strxfrm(as_cs, str3, str4), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs_ks, str1, str2), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs_ks, str2, str3), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs_ks, str3, str4), 0);
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, JapaneseUTF8MB4_1) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs");
|
|
|
// Japanese HE followed with Handakuten mark
|
|
|
const char *src1 = "\xE3\x81\xB8\xE3\x82\x99";
|
|
|
// Japanese HE followed with voiced length mark
|
|
|
const char *src2 = "\xE3\x81\xB8\xE3\x82\x9E";
|
|
|
|
|
|
/*
|
|
|
When the voiced length mark is after 'HE', it should sort before
|
|
|
'HE followed with Handakuten mark'on tertiary level.
|
|
|
*/
|
|
|
static const unsigned char answer1[] = {0x1F, 0xD3, 0x00, 0x00, 0x00,
|
|
|
0x20, 0x00, 0x37, 0x00, 0x00,
|
|
|
0x00, 0x0E, 0x00, 0x02};
|
|
|
static const unsigned char answer2[] = {
|
|
|
0x1F, 0xD3, 0x1F, 0xD3, 0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00,
|
|
|
0x37, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x01, 0x00, 0x21};
|
|
|
|
|
|
unsigned char buf[32];
|
|
|
|
|
|
size_t buf_len = sizeof(answer1);
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, buf_len, pointer_cast<const uchar *>(src1),
|
|
|
strlen(src1));
|
|
|
expect_arrays_equal(answer1, buf, buf_len);
|
|
|
|
|
|
buf_len = sizeof(answer2);
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, buf_len, pointer_cast<const uchar *>(src2),
|
|
|
strlen(src2));
|
|
|
expect_arrays_equal(answer2, buf, buf_len);
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, UTF8MB4PadCorrectness) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_cs");
|
|
|
|
|
|
const char *src = "abc ";
|
|
|
unsigned char buf[46];
|
|
|
|
|
|
static const unsigned char full_answer[52] = {
|
|
|
0x1c, 0x47, 0x1c, 0x60, 0x1c, 0x7a, // abc
|
|
|
0x02, 0x09, 0x02, 0x09, 0x02, 0x09,
|
|
|
0x02, 0x09, // Four spaces.
|
|
|
0x00, 0x00, // Level separator.
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, // Accents for abc.
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, // Accents for four spaces.
|
|
|
0x00, 0x00, // Level separator.
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, // Case for abc.
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, // Case for four spaces.
|
|
|
};
|
|
|
|
|
|
for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
|
|
|
SCOPED_TRACE("maxlen=" + to_string(maxlen) + "/" + to_string(sizeof(buf)));
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
|
|
|
expect_arrays_equal(full_answer, buf, maxlen);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
TEST(StrXfrmTest, NullPointer) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
unsigned char buf[256];
|
|
|
|
|
|
memset(buf, 0x33, sizeof(buf));
|
|
|
cs->coll->strnxfrm(cs, buf, sizeof(buf), sizeof(buf), nullptr, 0,
|
|
|
MY_STRXFRM_PAD_TO_MAXLEN);
|
|
|
|
|
|
for (size_t i = 0; i < sizeof(buf); ++i) {
|
|
|
EXPECT_EQ(0, buf[i]);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Benchmark based on reduced test case in Bug #83247 / #24788778.
|
|
|
//
|
|
|
// Note: This benchmark does not exercise any real multibyte characters;
|
|
|
// it is mostly exercising padding. If we change the test string to contain
|
|
|
// e.g. Japanese characters, performance goes down by ~20%.
|
|
|
static void BM_SimpleUTF8(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8_bin");
|
|
|
|
|
|
static constexpr int key_cols = 12;
|
|
|
static constexpr int set_key_cols = 6; // Only the first half is set.
|
|
|
static constexpr int key_col_chars = 80;
|
|
|
static constexpr int bytes_per_char = 3;
|
|
|
static constexpr int key_bytes = key_col_chars * bytes_per_char;
|
|
|
static constexpr int buffer_bytes = key_cols * key_bytes;
|
|
|
|
|
|
unsigned char source[buffer_bytes];
|
|
|
unsigned char dest[buffer_bytes];
|
|
|
|
|
|
const char *content = "PolyFilla27773";
|
|
|
const int len = strlen(content);
|
|
|
memset(source, 0, sizeof(source));
|
|
|
|
|
|
for (int k = 0, offset = 0; k < set_key_cols; ++k, offset += key_bytes) {
|
|
|
memcpy(source + offset, content, len);
|
|
|
}
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
for (int k = 0, offset = 0; k < key_cols; ++k, offset += key_bytes) {
|
|
|
if (k < set_key_cols) {
|
|
|
my_strnxfrm(cs, dest + offset, key_bytes, source + offset, len);
|
|
|
} else {
|
|
|
my_strnxfrm(cs, dest + offset, key_bytes, source + offset, 0);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
}
|
|
|
BENCHMARK(BM_SimpleUTF8)
|
|
|
|
|
|
// Verifies using my_charpos to find the length of a string.
|
|
|
// hp_hash.c does this extensively. Not really a strnxfrm benchmark,
|
|
|
// but belongs to the same optimization effort.
|
|
|
static void BM_UTF8MB4StringLength(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
// Some English text, then some Norwegian text, then some Japanese,
|
|
|
// and then a few emoji (the last with skin tone modifiers).
|
|
|
const char *content =
|
|
|
"Premature optimization is the root of all evil. "
|
|
|
"Våre norske tegn bør æres. 日本語が少しわかります。 "
|
|
|
"✌️🐶👩🏽";
|
|
|
const int len = strlen(content);
|
|
|
int tot_len = 0;
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
tot_len += my_charpos(cs, content, content + len, len / cs->mbmaxlen);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
EXPECT_NE(0, tot_len);
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_UTF8MB4StringLength)
|
|
|
|
|
|
// Benchmark testing the default recommended collation for 8.0, without
|
|
|
// stressing padding as much, but still testing only Latin letters.
|
|
|
static void BM_SimpleUTF8MB4(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
const char *content =
|
|
|
"This is a rather long string that contains only "
|
|
|
"simple letters that are available in ASCII. This is a common special "
|
|
|
"case that warrants a benchmark on its own, even if the character set "
|
|
|
"and collation supports much more complicated scenarios.";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x1e, 0x95, 0x1d, 0x18, 0x1d, 0x32, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0x32,
|
|
|
0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09, 0x1e, 0x33, 0x1c, 0x47,
|
|
|
0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x1e, 0x33, 0x02, 0x09, 0x1d, 0x77,
|
|
|
0x1d, 0xdd, 0x1d, 0xb9, 0x1c, 0xf4, 0x02, 0x09, 0x1e, 0x71, 0x1e, 0x95,
|
|
|
0x1e, 0x33, 0x1d, 0x32, 0x1d, 0xb9, 0x1c, 0xf4, 0x02, 0x09, 0x1e, 0x95,
|
|
|
0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x95, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0xdd,
|
|
|
0x1d, 0xb9, 0x1e, 0x95, 0x1c, 0x47, 0x1d, 0x32, 0x1d, 0xb9, 0x1e, 0x71,
|
|
|
0x02, 0x09, 0x1d, 0xdd, 0x1d, 0xb9, 0x1d, 0x77, 0x1f, 0x0b, 0x02, 0x09,
|
|
|
0x1e, 0x71, 0x1d, 0x32, 0x1d, 0xaa, 0x1e, 0x0c, 0x1d, 0x77, 0x1c, 0xaa,
|
|
|
0x02, 0x09, 0x1d, 0x77, 0x1c, 0xaa, 0x1e, 0x95, 0x1e, 0x95, 0x1c, 0xaa,
|
|
|
0x1e, 0x33, 0x1e, 0x71, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0x47,
|
|
|
0x1e, 0x95, 0x02, 0x09, 0x1c, 0x47, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09,
|
|
|
0x1c, 0x47, 0x1e, 0xe3, 0x1c, 0x47, 0x1d, 0x32, 0x1d, 0x77, 0x1c, 0x47,
|
|
|
0x1c, 0x60, 0x1d, 0x77, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0x32, 0x1d, 0xb9,
|
|
|
0x02, 0x09, 0x1c, 0x47, 0x1e, 0x71, 0x1c, 0x7a, 0x1d, 0x32, 0x1d, 0x32,
|
|
|
0x02, 0x77, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1d, 0x32, 0x1e, 0x71,
|
|
|
0x02, 0x09, 0x1d, 0x32, 0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09,
|
|
|
0x1c, 0x7a, 0x1d, 0xdd, 0x1d, 0xaa, 0x1d, 0xaa, 0x1d, 0xdd, 0x1d, 0xb9,
|
|
|
0x02, 0x09, 0x1e, 0x71, 0x1e, 0x0c, 0x1c, 0xaa, 0x1c, 0x7a, 0x1d, 0x32,
|
|
|
0x1c, 0x47, 0x1d, 0x77, 0x02, 0x09, 0x1c, 0x7a, 0x1c, 0x47, 0x1e, 0x71,
|
|
|
0x1c, 0xaa, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x95,
|
|
|
0x02, 0x09, 0x1e, 0xf5, 0x1c, 0x47, 0x1e, 0x33, 0x1e, 0x33, 0x1c, 0x47,
|
|
|
0x1d, 0xb9, 0x1e, 0x95, 0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09,
|
|
|
0x1c, 0x60, 0x1c, 0xaa, 0x1d, 0xb9, 0x1c, 0x7a, 0x1d, 0x18, 0x1d, 0xaa,
|
|
|
0x1c, 0x47, 0x1e, 0x33, 0x1d, 0x65, 0x02, 0x09, 0x1d, 0xdd, 0x1d, 0xb9,
|
|
|
0x02, 0x09, 0x1d, 0x32, 0x1e, 0x95, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0xdd,
|
|
|
0x1e, 0xf5, 0x1d, 0xb9, 0x02, 0x22, 0x02, 0x09, 0x1c, 0xaa, 0x1e, 0xe3,
|
|
|
0x1c, 0xaa, 0x1d, 0xb9, 0x02, 0x09, 0x1d, 0x32, 0x1c, 0xe5, 0x02, 0x09,
|
|
|
0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0x18,
|
|
|
0x1c, 0x47, 0x1e, 0x33, 0x1c, 0x47, 0x1c, 0x7a, 0x1e, 0x95, 0x1c, 0xaa,
|
|
|
0x1e, 0x33, 0x02, 0x09, 0x1e, 0x71, 0x1c, 0xaa, 0x1e, 0x95, 0x02, 0x09,
|
|
|
0x1c, 0x47, 0x1d, 0xb9, 0x1c, 0x8f, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0xdd,
|
|
|
0x1d, 0x77, 0x1d, 0x77, 0x1c, 0x47, 0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xdd,
|
|
|
0x1d, 0xb9, 0x02, 0x09, 0x1e, 0x71, 0x1e, 0xb5, 0x1e, 0x0c, 0x1e, 0x0c,
|
|
|
0x1d, 0xdd, 0x1e, 0x33, 0x1e, 0x95, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0xaa,
|
|
|
0x1e, 0xb5, 0x1c, 0x7a, 0x1d, 0x18, 0x02, 0x09, 0x1d, 0xaa, 0x1d, 0xdd,
|
|
|
0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1c, 0x7a, 0x1d, 0xdd, 0x1d, 0xaa,
|
|
|
0x1e, 0x0c, 0x1d, 0x77, 0x1d, 0x32, 0x1c, 0x7a, 0x1c, 0x47, 0x1e, 0x95,
|
|
|
0x1c, 0xaa, 0x1c, 0x8f, 0x02, 0x09, 0x1e, 0x71, 0x1c, 0x7a, 0x1c, 0xaa,
|
|
|
0x1d, 0xb9, 0x1c, 0x47, 0x1e, 0x33, 0x1d, 0x32, 0x1d, 0xdd, 0x1e, 0x71,
|
|
|
0x02, 0x77};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_SimpleUTF8MB4)
|
|
|
|
|
|
// Benchmark testing a wider variety of character sets on a more complicated
|
|
|
// collation (the recommended default collation for 8.0), without stressing
|
|
|
// padding as much.
|
|
|
static void BM_MixedUTF8MB4(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
// Some English text, then some Norwegian text, then some Japanese,
|
|
|
// and then a few emoji (the last with skin tone modifiers).
|
|
|
const char *content =
|
|
|
"Premature optimization is the root of all evil. "
|
|
|
"Våre norske tegn bør æres. 日本語が少しわかります。 "
|
|
|
"✌️🐶👩🏽";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x1e, 0x0c, 0x1e, 0x33, 0x1c, 0xaa, 0x1d, 0xaa, 0x1c, 0x47, 0x1e, 0x95,
|
|
|
0x1e, 0xb5, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xdd, 0x1e, 0x0c,
|
|
|
0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xaa, 0x1d, 0x32, 0x1f, 0x21, 0x1c, 0x47,
|
|
|
0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xdd, 0x1d, 0xb9, 0x02, 0x09, 0x1d, 0x32,
|
|
|
0x1e, 0x71, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x02, 0x09,
|
|
|
0x1e, 0x33, 0x1d, 0xdd, 0x1d, 0xdd, 0x1e, 0x95, 0x02, 0x09, 0x1d, 0xdd,
|
|
|
0x1c, 0xe5, 0x02, 0x09, 0x1c, 0x47, 0x1d, 0x77, 0x1d, 0x77, 0x02, 0x09,
|
|
|
0x1c, 0xaa, 0x1e, 0xe3, 0x1d, 0x32, 0x1d, 0x77, 0x02, 0x77, 0x02, 0x09,
|
|
|
0x1e, 0xe3, 0x1c, 0x47, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xb9,
|
|
|
0x1d, 0xdd, 0x1e, 0x33, 0x1e, 0x71, 0x1d, 0x65, 0x1c, 0xaa, 0x02, 0x09,
|
|
|
0x1e, 0x95, 0x1c, 0xaa, 0x1c, 0xf4, 0x1d, 0xb9, 0x02, 0x09, 0x1c, 0x60,
|
|
|
0x1d, 0xdd, 0x1e, 0x33, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0xaa, 0x1e, 0x33,
|
|
|
0x1c, 0xaa, 0x1e, 0x71, 0x02, 0x77, 0x02, 0x09, 0xfb, 0x40, 0xe5, 0xe5,
|
|
|
0xfb, 0x40, 0xe7, 0x2c, 0xfb, 0x41, 0x8a, 0x9e, 0x3d, 0x60, 0xfb, 0x40,
|
|
|
0xdc, 0x11, 0x3d, 0x66, 0x3d, 0x87, 0x3d, 0x60, 0x3d, 0x83, 0x3d, 0x79,
|
|
|
0x3d, 0x67, 0x02, 0x8a, 0x02, 0x09, 0x0a, 0x2d, 0x13, 0xdf, 0x14, 0x12,
|
|
|
0x13, 0xa6};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_MixedUTF8MB4)
|
|
|
|
|
|
static void BM_MixedUTF8MB4_AS_CI(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_ci");
|
|
|
|
|
|
// Some English text, then some Norwegian text, then some Japanese,
|
|
|
// and then a few emoji (the last with skin tone modifiers).
|
|
|
const char *content =
|
|
|
"Premature optimization is the root of all evil. "
|
|
|
"Våre norske tegn bør æres. 日本語が少しわかります。 "
|
|
|
"✌️🐶👩🏽";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x1e, 0x0c, 0x1e, 0x33, 0x1c, 0xaa, 0x1d, 0xaa, 0x1c, 0x47, 0x1e, 0x95,
|
|
|
0x1e, 0xb5, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xdd, 0x1e, 0x0c,
|
|
|
0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xaa, 0x1d, 0x32, 0x1f, 0x21, 0x1c, 0x47,
|
|
|
0x1e, 0x95, 0x1d, 0x32, 0x1d, 0xdd, 0x1d, 0xb9, 0x02, 0x09, 0x1d, 0x32,
|
|
|
0x1e, 0x71, 0x02, 0x09, 0x1e, 0x95, 0x1d, 0x18, 0x1c, 0xaa, 0x02, 0x09,
|
|
|
0x1e, 0x33, 0x1d, 0xdd, 0x1d, 0xdd, 0x1e, 0x95, 0x02, 0x09, 0x1d, 0xdd,
|
|
|
0x1c, 0xe5, 0x02, 0x09, 0x1c, 0x47, 0x1d, 0x77, 0x1d, 0x77, 0x02, 0x09,
|
|
|
0x1c, 0xaa, 0x1e, 0xe3, 0x1d, 0x32, 0x1d, 0x77, 0x02, 0x77, 0x02, 0x09,
|
|
|
0x1e, 0xe3, 0x1c, 0x47, 0x1e, 0x33, 0x1c, 0xaa, 0x02, 0x09, 0x1d, 0xb9,
|
|
|
0x1d, 0xdd, 0x1e, 0x33, 0x1e, 0x71, 0x1d, 0x65, 0x1c, 0xaa, 0x02, 0x09,
|
|
|
0x1e, 0x95, 0x1c, 0xaa, 0x1c, 0xf4, 0x1d, 0xb9, 0x02, 0x09, 0x1c, 0x60,
|
|
|
0x1d, 0xdd, 0x1e, 0x33, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0xaa, 0x1e, 0x33,
|
|
|
0x1c, 0xaa, 0x1e, 0x71, 0x02, 0x77, 0x02, 0x09, 0xfb, 0x40, 0xe5, 0xe5,
|
|
|
0xfb, 0x40, 0xe7, 0x2c, 0xfb, 0x41, 0x8a, 0x9e, 0x3d, 0x60, 0xfb, 0x40,
|
|
|
0xdc, 0x11, 0x3d, 0x66, 0x3d, 0x87, 0x3d, 0x60, 0x3d, 0x83, 0x3d, 0x79,
|
|
|
0x3d, 0x67, 0x02, 0x8a, 0x02, 0x09, 0x0a, 0x2d, 0x13, 0xdf, 0x14, 0x12,
|
|
|
0x13, 0xa6, 0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x29, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x2F, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x01, 0x10, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_MixedUTF8MB4_AS_CI)
|
|
|
|
|
|
// Case-sensitive, accent-sensitive benchmark, using the same string as
|
|
|
// BM_SimpleUTF8MB4. This will naturally be slower, since many more weights
|
|
|
// need to be generated.
|
|
|
static void BM_MixedUTF8MB4_AS_CS(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_as_cs");
|
|
|
|
|
|
// Some English text, then some Norwegian text, then some Japanese,
|
|
|
// and then a few emoji (the last with skin tone modifiers).
|
|
|
const char *content =
|
|
|
"Premature optimization is the root of all evil. "
|
|
|
"Våre norske tegn bør æres. 日本語が少しわかります。 "
|
|
|
"✌️🐶👩🏽";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
// Primary weights.
|
|
|
0x1e,
|
|
|
0x0c,
|
|
|
0x1e,
|
|
|
0x33,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x1d,
|
|
|
0xaa,
|
|
|
0x1c,
|
|
|
0x47,
|
|
|
0x1e,
|
|
|
0x95,
|
|
|
0x1e,
|
|
|
0xb5,
|
|
|
0x1e,
|
|
|
0x33,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1d,
|
|
|
0xdd,
|
|
|
0x1e,
|
|
|
0x0c,
|
|
|
0x1e,
|
|
|
0x95,
|
|
|
0x1d,
|
|
|
0x32,
|
|
|
0x1d,
|
|
|
0xaa,
|
|
|
0x1d,
|
|
|
0x32,
|
|
|
0x1f,
|
|
|
0x21,
|
|
|
0x1c,
|
|
|
0x47,
|
|
|
0x1e,
|
|
|
0x95,
|
|
|
0x1d,
|
|
|
0x32,
|
|
|
0x1d,
|
|
|
0xdd,
|
|
|
0x1d,
|
|
|
0xb9,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1d,
|
|
|
0x32,
|
|
|
0x1e,
|
|
|
0x71,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1e,
|
|
|
0x95,
|
|
|
0x1d,
|
|
|
0x18,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1e,
|
|
|
0x33,
|
|
|
0x1d,
|
|
|
0xdd,
|
|
|
0x1d,
|
|
|
0xdd,
|
|
|
0x1e,
|
|
|
0x95,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1d,
|
|
|
0xdd,
|
|
|
0x1c,
|
|
|
0xe5,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1c,
|
|
|
0x47,
|
|
|
0x1d,
|
|
|
0x77,
|
|
|
0x1d,
|
|
|
0x77,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x1e,
|
|
|
0xe3,
|
|
|
0x1d,
|
|
|
0x32,
|
|
|
0x1d,
|
|
|
0x77,
|
|
|
0x02,
|
|
|
0x77,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1e,
|
|
|
0xe3,
|
|
|
0x1c,
|
|
|
0x47,
|
|
|
0x1e,
|
|
|
0x33,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1d,
|
|
|
0xb9,
|
|
|
0x1d,
|
|
|
0xdd,
|
|
|
0x1e,
|
|
|
0x33,
|
|
|
0x1e,
|
|
|
0x71,
|
|
|
0x1d,
|
|
|
0x65,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1e,
|
|
|
0x95,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x1c,
|
|
|
0xf4,
|
|
|
0x1d,
|
|
|
0xb9,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1c,
|
|
|
0x60,
|
|
|
0x1d,
|
|
|
0xdd,
|
|
|
0x1e,
|
|
|
0x33,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x1c,
|
|
|
0x47,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x1e,
|
|
|
0x33,
|
|
|
0x1c,
|
|
|
0xaa,
|
|
|
0x1e,
|
|
|
0x71,
|
|
|
0x02,
|
|
|
0x77,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0xfb,
|
|
|
0x40,
|
|
|
0xe5,
|
|
|
0xe5,
|
|
|
0xfb,
|
|
|
0x40,
|
|
|
0xe7,
|
|
|
0x2c,
|
|
|
0xfb,
|
|
|
0x41,
|
|
|
0x8a,
|
|
|
0x9e,
|
|
|
0x3d,
|
|
|
0x60,
|
|
|
0xfb,
|
|
|
0x40,
|
|
|
0xdc,
|
|
|
0x11,
|
|
|
0x3d,
|
|
|
0x66,
|
|
|
0x3d,
|
|
|
0x87,
|
|
|
0x3d,
|
|
|
0x60,
|
|
|
0x3d,
|
|
|
0x83,
|
|
|
0x3d,
|
|
|
0x79,
|
|
|
0x3d,
|
|
|
0x67,
|
|
|
0x02,
|
|
|
0x8a,
|
|
|
0x02,
|
|
|
0x09,
|
|
|
0x0a,
|
|
|
0x2d,
|
|
|
0x13,
|
|
|
0xdf,
|
|
|
0x14,
|
|
|
0x12,
|
|
|
0x13,
|
|
|
0xa6,
|
|
|
// Level separator.
|
|
|
0x00,
|
|
|
0x00,
|
|
|
// Secondary weights.
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x29,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x2f,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x01,
|
|
|
0x10,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x37,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
0x00,
|
|
|
0x20,
|
|
|
// Level separator.
|
|
|
0x00,
|
|
|
0x00,
|
|
|
// Tertiary weights.
|
|
|
0x00,
|
|
|
0x08,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x08,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x04,
|
|
|
0x00,
|
|
|
0x04,
|
|
|
0x00,
|
|
|
0x04,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x0e,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x0e,
|
|
|
0x00,
|
|
|
0x0e,
|
|
|
0x00,
|
|
|
0x0e,
|
|
|
0x00,
|
|
|
0x0e,
|
|
|
0x00,
|
|
|
0x0e,
|
|
|
0x00,
|
|
|
0x0e,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
0x00,
|
|
|
0x02,
|
|
|
};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
size_t ret = 0;
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
ret = my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
pointer_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
EXPECT_EQ(sizeof(expected), ret);
|
|
|
expect_arrays_equal(expected, dest, ret);
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_MixedUTF8MB4_AS_CS)
|
|
|
|
|
|
// Specifically benchmark Japanese text.
|
|
|
static void BM_JapaneseUTF8MB4(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
const char *content =
|
|
|
"データの保存とアクセスを行うストレージエンジンがSQLパーサとは"
|
|
|
"分離独立しており、用途に応じたストレージエンジンを選択できる"
|
|
|
"「マルチストレージエンジン」方式を採用している。";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x3d, 0x6d, 0x1c, 0x0e, 0x3d, 0x6a, 0x3d, 0x73, 0xfb, 0x40, 0xcf, 0xdd,
|
|
|
0xfb, 0x40, 0xdb, 0x58, 0x3d, 0x6e, 0x3d, 0x5a, 0x3d, 0x62, 0x3d, 0x68,
|
|
|
0x3d, 0x67, 0x3d, 0x8a, 0xfb, 0x41, 0x88, 0x4c, 0x3d, 0x5c, 0x3d, 0x67,
|
|
|
0x3d, 0x6e, 0x3d, 0x85, 0x1c, 0x0e, 0x3d, 0x66, 0x3d, 0x5e, 0x3d, 0x8b,
|
|
|
0x3d, 0x66, 0x3d, 0x8b, 0x3d, 0x60, 0x1e, 0x71, 0x1e, 0x21, 0x1d, 0x77,
|
|
|
0x3d, 0x74, 0x1c, 0x0e, 0x3d, 0x65, 0x3d, 0x6e, 0x3d, 0x74, 0xfb, 0x40,
|
|
|
0xd2, 0x06, 0xfb, 0x41, 0x96, 0xe2, 0xfb, 0x40, 0xf2, 0xec, 0xfb, 0x40,
|
|
|
0xfa, 0xcb, 0x3d, 0x66, 0x3d, 0x6d, 0x3d, 0x5f, 0x3d, 0x83, 0x02, 0x31,
|
|
|
0xfb, 0x40, 0xf5, 0x28, 0xfb, 0x41, 0x90, 0x14, 0x3d, 0x70, 0xfb, 0x40,
|
|
|
0xdf, 0xdc, 0x3d, 0x66, 0x3d, 0x6a, 0x3d, 0x67, 0x3d, 0x6e, 0x3d, 0x85,
|
|
|
0x1c, 0x0e, 0x3d, 0x66, 0x3d, 0x5e, 0x3d, 0x8b, 0x3d, 0x66, 0x3d, 0x8b,
|
|
|
0x3d, 0x8a, 0xfb, 0x41, 0x90, 0x78, 0xfb, 0x40, 0xe2, 0x9e, 0x3d, 0x6d,
|
|
|
0x3d, 0x61, 0x3d, 0x84, 0x03, 0x73, 0x3d, 0x79, 0x3d, 0x84, 0x3d, 0x6b,
|
|
|
0x3d, 0x67, 0x3d, 0x6e, 0x3d, 0x85, 0x1c, 0x0e, 0x3d, 0x66, 0x3d, 0x5e,
|
|
|
0x3d, 0x8b, 0x3d, 0x66, 0x3d, 0x8b, 0x03, 0x74, 0xfb, 0x40, 0xe5, 0xb9,
|
|
|
0xfb, 0x40, 0xdf, 0x0f, 0x3d, 0x8a, 0xfb, 0x40, 0xe3, 0xa1, 0xfb, 0x40,
|
|
|
0xf5, 0x28, 0x3d, 0x66, 0x3d, 0x6d, 0x3d, 0x5b, 0x3d, 0x84, 0x02, 0x8a};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_JapaneseUTF8MB4)
|
|
|
|
|
|
/*
|
|
|
A benchmark that illustrates the potential perils of not including the
|
|
|
range [0x00,0x20) in our fast path; newlines throw us off the fast path
|
|
|
and reduce speed.
|
|
|
|
|
|
The newlines are spaced a bit randomly in order not to create a perfectly
|
|
|
predictable pattern for the branch predictor (benchmark paranoia).
|
|
|
*/
|
|
|
static void BM_NewlineFilledUTF8MB4(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
const char *content =
|
|
|
"This is a\n prett\ny unrealist\nic case; a\nn "
|
|
|
"Eng\nlish sente\nnce where\n we'\nve added a new\nline every te\nn "
|
|
|
"bytes or\n so.\n";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x1e, 0x95, 0x1d, 0x18, 0x1d, 0x32, 0x1e, 0x71, 0x02, 0x09, 0x1d, 0x32,
|
|
|
0x1e, 0x71, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x02, 0x02, 0x09, 0x1e, 0x0c,
|
|
|
0x1e, 0x33, 0x1c, 0xaa, 0x1e, 0x95, 0x1e, 0x95, 0x02, 0x02, 0x1f, 0x0b,
|
|
|
0x02, 0x09, 0x1e, 0xb5, 0x1d, 0xb9, 0x1e, 0x33, 0x1c, 0xaa, 0x1c, 0x47,
|
|
|
0x1d, 0x77, 0x1d, 0x32, 0x1e, 0x71, 0x1e, 0x95, 0x02, 0x02, 0x1d, 0x32,
|
|
|
0x1c, 0x7a, 0x02, 0x09, 0x1c, 0x7a, 0x1c, 0x47, 0x1e, 0x71, 0x1c, 0xaa,
|
|
|
0x02, 0x34, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x02, 0x1d, 0xb9, 0x02, 0x09,
|
|
|
0x1c, 0xaa, 0x1d, 0xb9, 0x1c, 0xf4, 0x02, 0x02, 0x1d, 0x77, 0x1d, 0x32,
|
|
|
0x1e, 0x71, 0x1d, 0x18, 0x02, 0x09, 0x1e, 0x71, 0x1c, 0xaa, 0x1d, 0xb9,
|
|
|
0x1e, 0x95, 0x1c, 0xaa, 0x02, 0x02, 0x1d, 0xb9, 0x1c, 0x7a, 0x1c, 0xaa,
|
|
|
0x02, 0x09, 0x1e, 0xf5, 0x1d, 0x18, 0x1c, 0xaa, 0x1e, 0x33, 0x1c, 0xaa,
|
|
|
0x02, 0x02, 0x02, 0x09, 0x1e, 0xf5, 0x1c, 0xaa, 0x03, 0x05, 0x02, 0x02,
|
|
|
0x1e, 0xe3, 0x1c, 0xaa, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0x8f, 0x1c, 0x8f,
|
|
|
0x1c, 0xaa, 0x1c, 0x8f, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09, 0x1d, 0xb9,
|
|
|
0x1c, 0xaa, 0x1e, 0xf5, 0x02, 0x02, 0x1d, 0x77, 0x1d, 0x32, 0x1d, 0xb9,
|
|
|
0x1c, 0xaa, 0x02, 0x09, 0x1c, 0xaa, 0x1e, 0xe3, 0x1c, 0xaa, 0x1e, 0x33,
|
|
|
0x1f, 0x0b, 0x02, 0x09, 0x1e, 0x95, 0x1c, 0xaa, 0x02, 0x02, 0x1d, 0xb9,
|
|
|
0x02, 0x09, 0x1c, 0x60, 0x1f, 0x0b, 0x1e, 0x95, 0x1c, 0xaa, 0x1e, 0x71,
|
|
|
0x02, 0x09, 0x1d, 0xdd, 0x1e, 0x33, 0x02, 0x02, 0x02, 0x09, 0x1e, 0x71,
|
|
|
0x1d, 0xdd, 0x02, 0x77, 0x02, 0x02};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_NewlineFilledUTF8MB4)
|
|
|
|
|
|
static void BM_HashSimpleUTF8MB4(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
const char *content =
|
|
|
"This is a rather long string that contains only "
|
|
|
"simple letters that are available in ASCII. This is a common special "
|
|
|
"case that warrants a benchmark on its own, even if the character set "
|
|
|
"and collation supports much more complicated scenarios.";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
uint64 nr1 = 1, nr2 = 4;
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
cs->coll->hash_sort(cs, reinterpret_cast<const uchar *>(content), len, &nr1,
|
|
|
&nr2);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
/*
|
|
|
Just to keep the compiler from optimizing away everything; this is highly
|
|
|
unlikely to ever happen given hash function that's not totally broken.
|
|
|
Don't test for an exact value; it will vary by platform and number
|
|
|
of iterations.
|
|
|
*/
|
|
|
EXPECT_FALSE(nr1 == 0 && nr2 == 0);
|
|
|
}
|
|
|
BENCHMARK(BM_HashSimpleUTF8MB4)
|
|
|
|
|
|
/*
|
|
|
Test a non-trivial collation with contractions, to highlight
|
|
|
the performance difference.
|
|
|
*/
|
|
|
static void BM_Hungarian_AS_CS(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_hu_0900_as_cs");
|
|
|
|
|
|
// Text snippet from Wikipedia.
|
|
|
const char *content =
|
|
|
"A MySQL adatbázisok adminisztrációjára a mellékelt "
|
|
|
"parancssori eszközöket (mysql és mysqladmin) használhatjuk.";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x1c, 0x47, 0x02, 0x09, 0x1d, 0xaa, 0x1f, 0x0b, 0x1e, 0x71, 0x1e, 0x21,
|
|
|
0x1d, 0x77, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0x8f, 0x1c, 0x47, 0x1e, 0x95,
|
|
|
0x1c, 0x60, 0x1c, 0x47, 0x1f, 0x21, 0x1d, 0x32, 0x1e, 0x71, 0x1d, 0xdd,
|
|
|
0x1d, 0x65, 0x02, 0x09, 0x1c, 0x47, 0x1c, 0x8f, 0x1d, 0xaa, 0x1d, 0x32,
|
|
|
0x1d, 0xb9, 0x1d, 0x32, 0x1e, 0x71, 0x54, 0xa5, 0x1e, 0x95, 0x1e, 0x33,
|
|
|
0x1c, 0x47, 0x1c, 0x7a, 0x1d, 0x32, 0x1d, 0xdd, 0x1d, 0x4c, 0x1c, 0x47,
|
|
|
0x1e, 0x33, 0x1c, 0x47, 0x02, 0x09, 0x1c, 0x47, 0x02, 0x09, 0x1d, 0xaa,
|
|
|
0x1c, 0xaa, 0x1d, 0x77, 0x1d, 0x77, 0x1c, 0xaa, 0x1d, 0x65, 0x1c, 0xaa,
|
|
|
0x1d, 0x77, 0x1e, 0x95, 0x02, 0x09, 0x1e, 0x0c, 0x1c, 0x47, 0x1e, 0x33,
|
|
|
0x1c, 0x47, 0x1d, 0xb9, 0x1c, 0x7a, 0x54, 0xa5, 0x1e, 0x71, 0x1d, 0xdd,
|
|
|
0x1e, 0x33, 0x1d, 0x32, 0x02, 0x09, 0x1c, 0xaa, 0x1e, 0x71, 0x54, 0xa5,
|
|
|
0x1d, 0x65, 0x1d, 0xdd, 0x54, 0xa5, 0x1f, 0x21, 0x1d, 0xdd, 0x54, 0xa5,
|
|
|
0x1d, 0x65, 0x1c, 0xaa, 0x1e, 0x95, 0x02, 0x09, 0x03, 0x17, 0x1d, 0xaa,
|
|
|
0x1f, 0x0b, 0x1e, 0x71, 0x1e, 0x21, 0x1d, 0x77, 0x02, 0x09, 0x1c, 0xaa,
|
|
|
0x1e, 0x71, 0x02, 0x09, 0x1d, 0xaa, 0x1f, 0x0b, 0x1e, 0x71, 0x1e, 0x21,
|
|
|
0x1d, 0x77, 0x1c, 0x47, 0x1c, 0x8f, 0x1d, 0xaa, 0x1d, 0x32, 0x1d, 0xb9,
|
|
|
0x03, 0x18, 0x02, 0x09, 0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x71, 0x54, 0xa5,
|
|
|
0x1d, 0xb9, 0x1c, 0x47, 0x1d, 0x77, 0x1d, 0x18, 0x1c, 0x47, 0x1e, 0x95,
|
|
|
0x1d, 0x4c, 0x1e, 0xb5, 0x1d, 0x65, 0x02, 0x77, 0x00, 0x00, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x24, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x24,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x24, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x08,
|
|
|
0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x08, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02};
|
|
|
uchar dest[sizeof(expected)] = {0};
|
|
|
|
|
|
size_t ret = 0;
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
ret = my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
pointer_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
EXPECT_EQ(sizeof(expected), ret);
|
|
|
expect_arrays_equal(expected, dest, ret);
|
|
|
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_Hungarian_AS_CS)
|
|
|
|
|
|
static void BM_Japanese_AS_CS(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs");
|
|
|
const char *content =
|
|
|
"サーバー SQL モードの設定方法。この設定は、たとえば"
|
|
|
"別のデータベースシステムからのコードとの互換性を保ったり、特定の状況に"
|
|
|
"ついてのエラー処理を制御したりするために、SQL の構文およびセマンティクス"
|
|
|
"の特定の側面を変更します。";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x1F, 0xC1, 0x1F, 0xB6, 0x1F, 0xD0, 0x1F, 0xB6, 0x02, 0x09, 0x1E, 0x71,
|
|
|
0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xD9, 0x1F, 0xBB, 0x1F, 0xCA,
|
|
|
0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x5E, 0x8C, 0x5E, 0x8E, 0x02, 0x8A,
|
|
|
0x1F, 0xC0, 0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x1F, 0xD0, 0x02, 0x31,
|
|
|
0x1F, 0xC6, 0x1F, 0xCA, 0x1F, 0xBA, 0x1F, 0xD0, 0x5E, 0x5B, 0x1F, 0xCF,
|
|
|
0x1F, 0xC9, 0x1F, 0xBA, 0x1F, 0xC6, 0x1F, 0xD3, 0x1F, 0xBA, 0x1F, 0xC3,
|
|
|
0x1F, 0xC2, 0x1F, 0xC3, 0x1F, 0xC9, 0x1F, 0xD7, 0x1F, 0xBC, 0x1F, 0xDE,
|
|
|
0x1F, 0xCF, 0x1F, 0xC0, 0x1F, 0xBB, 0x1F, 0xCA, 0x1F, 0xCA, 0x1F, 0xCF,
|
|
|
0x57, 0xD2, 0x56, 0x34, 0x5A, 0x90, 0x1F, 0xE6, 0x5E, 0x6C, 0x1F, 0xC8,
|
|
|
0x1F, 0xC6, 0x1F, 0xDF, 0x02, 0x31, 0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF,
|
|
|
0x5A, 0x1C, 0x56, 0xEE, 0x1F, 0xCC, 0x1F, 0xC8, 0x1F, 0xB7, 0x1F, 0xC9,
|
|
|
0x1F, 0xCF, 0x1F, 0xBA, 0x1F, 0xDE, 0x1F, 0xB6, 0x59, 0xB1, 0x5F, 0xA6,
|
|
|
0x1F, 0xE6, 0x5A, 0x8C, 0x57, 0xD9, 0x1F, 0xC2, 0x1F, 0xC6, 0x1F, 0xDF,
|
|
|
0x1F, 0xC3, 0x1F, 0xE0, 0x1F, 0xC6, 0x1F, 0xD8, 0x1F, 0xCC, 0x02, 0x31,
|
|
|
0x1E, 0x71, 0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xCF, 0x58, 0x0E,
|
|
|
0x5E, 0x47, 0x1F, 0xBB, 0x1F, 0xDD, 0x1F, 0xD1, 0x1F, 0xC4, 0x1F, 0xD5,
|
|
|
0x1F, 0xE7, 0x1F, 0xC9, 0x1F, 0xB7, 0x1F, 0xBE, 0x1F, 0xC3, 0x1F, 0xCF,
|
|
|
0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF, 0x5B, 0x45, 0x5F, 0x17, 0x1F, 0xE6,
|
|
|
0x5E, 0x60, 0x58, 0x0A, 0x1F, 0xC2, 0x1F, 0xD5, 0x1F, 0xC3, 0x02, 0x8A,
|
|
|
0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C,
|
|
|
0x00, 0x21, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x0E,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_Japanese_AS_CS)
|
|
|
|
|
|
static void BM_Japanese_AS_CS_KS(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_ja_0900_as_cs_ks");
|
|
|
const char *content =
|
|
|
"サーバー SQL モードの設定方法。この設定は、たとえば"
|
|
|
"別のデータベースシステムからのコードとの互換性を保ったり、特定の状況に"
|
|
|
"ついてのエラー処理を制御したりするために、SQL の構文およびセマンティクス"
|
|
|
"の特定の側面を変更します。";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x1F, 0xC1, 0x1F, 0xB6, 0x1F, 0xD0, 0x1F, 0xB6, 0x02, 0x09, 0x1E, 0x71,
|
|
|
0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xD9, 0x1F, 0xBB, 0x1F, 0xCA,
|
|
|
0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x5E, 0x8C, 0x5E, 0x8E, 0x02, 0x8A,
|
|
|
0x1F, 0xC0, 0x1F, 0xCF, 0x5A, 0xC2, 0x5C, 0x45, 0x1F, 0xD0, 0x02, 0x31,
|
|
|
0x1F, 0xC6, 0x1F, 0xCA, 0x1F, 0xBA, 0x1F, 0xD0, 0x5E, 0x5B, 0x1F, 0xCF,
|
|
|
0x1F, 0xC9, 0x1F, 0xBA, 0x1F, 0xC6, 0x1F, 0xD3, 0x1F, 0xBA, 0x1F, 0xC3,
|
|
|
0x1F, 0xC2, 0x1F, 0xC3, 0x1F, 0xC9, 0x1F, 0xD7, 0x1F, 0xBC, 0x1F, 0xDE,
|
|
|
0x1F, 0xCF, 0x1F, 0xC0, 0x1F, 0xBB, 0x1F, 0xCA, 0x1F, 0xCA, 0x1F, 0xCF,
|
|
|
0x57, 0xD2, 0x56, 0x34, 0x5A, 0x90, 0x1F, 0xE6, 0x5E, 0x6C, 0x1F, 0xC8,
|
|
|
0x1F, 0xC6, 0x1F, 0xDF, 0x02, 0x31, 0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF,
|
|
|
0x5A, 0x1C, 0x56, 0xEE, 0x1F, 0xCC, 0x1F, 0xC8, 0x1F, 0xB7, 0x1F, 0xC9,
|
|
|
0x1F, 0xCF, 0x1F, 0xBA, 0x1F, 0xDE, 0x1F, 0xB6, 0x59, 0xB1, 0x5F, 0xA6,
|
|
|
0x1F, 0xE6, 0x5A, 0x8C, 0x57, 0xD9, 0x1F, 0xC2, 0x1F, 0xC6, 0x1F, 0xDF,
|
|
|
0x1F, 0xC3, 0x1F, 0xE0, 0x1F, 0xC6, 0x1F, 0xD8, 0x1F, 0xCC, 0x02, 0x31,
|
|
|
0x1E, 0x71, 0x1E, 0x21, 0x1D, 0x77, 0x02, 0x09, 0x1F, 0xCF, 0x58, 0x0E,
|
|
|
0x5E, 0x47, 0x1F, 0xBB, 0x1F, 0xDD, 0x1F, 0xD1, 0x1F, 0xC4, 0x1F, 0xD5,
|
|
|
0x1F, 0xE7, 0x1F, 0xC9, 0x1F, 0xB7, 0x1F, 0xBE, 0x1F, 0xC3, 0x1F, 0xCF,
|
|
|
0x5C, 0xDA, 0x5C, 0x45, 0x1F, 0xCF, 0x5B, 0x45, 0x5F, 0x17, 0x1F, 0xE6,
|
|
|
0x5E, 0x60, 0x58, 0x0A, 0x1F, 0xC2, 0x1F, 0xD5, 0x1F, 0xC3, 0x02, 0x8A,
|
|
|
0x00, 0x00, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x37,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C,
|
|
|
0x00, 0x21, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0C, 0x00, 0x21,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0C, 0x00, 0x21, 0x00, 0x0E,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x0E, 0x00, 0x02, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0C, 0x00, 0x21, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E,
|
|
|
0x00, 0x0E, 0x00, 0x0D, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x02, 0x00, 0x0E, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x0E, 0x00, 0x02, 0x00, 0x00,
|
|
|
0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_Japanese_AS_CS_KS)
|
|
|
|
|
|
TEST(StrXfrmTest, ChineseUTF8MB4) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_zh_0900_as_cs");
|
|
|
|
|
|
const char *src =
|
|
|
"\xE9\x98\xBF\xE5\x92\x97" // The first and last Han character in zh.xml
|
|
|
"\xF0\xAC\xBA\xA1" // The last Han character
|
|
|
"\xC4\x81\x61\x62\xC5\xAB\x75\x55\xC7\x96\x5A" // Some latin characters
|
|
|
// are used as Bopomofo.
|
|
|
"\xF0\x94\x99\x86" // The last character that has explicit weight
|
|
|
// in the DUCET.
|
|
|
|
|
|
/* Non-Han characters that have implicit weight. */
|
|
|
"\xF0\x97\x86\xA0\xF0\xAC\xBA\xA2\xF0\xAE\xAF\xA0\xF0\xB3\x8C\xB3";
|
|
|
|
|
|
static const unsigned char full_answer_with_pad[116] = {
|
|
|
// level 1
|
|
|
0x1C, 0x47, 0xBD, 0xBE, // The first and last Han character in zh.xml
|
|
|
0xBD, 0xC3, 0xCE, 0xA1, // The last Han character
|
|
|
/* Latin characters. Some are used as Bopomofo. */
|
|
|
0xBD, 0xC4, 0xBD, 0xC4, 0xBD, 0xDD, 0xC0, 0x32, 0xC0, 0x32, 0xC0, 0x32,
|
|
|
0xC0, 0x32, 0xC0, 0x9E,
|
|
|
|
|
|
0xF6, 0x20, // The last character that has explicit weight in the DUCET.
|
|
|
/* Non-Han characters that have implicit weight. */
|
|
|
0xF6, 0x21, 0x81, 0xA0, 0xF6, 0x27, 0xCE, 0xA2, 0xF6, 0x27, 0xEB, 0xE0,
|
|
|
0xF6, 0x28, 0xB3, 0x33,
|
|
|
|
|
|
// level separator.
|
|
|
0x00, 0x00,
|
|
|
|
|
|
// level 2
|
|
|
0x00, 0x20, 0x00, 0x20, // The first and last Han character in zh.xml
|
|
|
0x00, 0x20, // The last Han character
|
|
|
|
|
|
/* Latin characters. Some are used as Bopomofo. */
|
|
|
0x00, 0x1F, 0x01, 0x16, 0x00, 0x20, 0x00, 0x20, 0x00, 0x1F, 0x01, 0x16,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x01, 0x16, 0x00, 0x20,
|
|
|
|
|
|
0x00, 0x20, // The last character that has explicit weight in the DUCET.
|
|
|
/* Non-Han characters that have implicit weight. */
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
// level separator.
|
|
|
0x00, 0x00,
|
|
|
|
|
|
// level 3
|
|
|
0x00, 0x02, 0x00, 0x02, // The first and last Han character in zh.xml
|
|
|
0x00, 0x02, // The last Han character
|
|
|
/* Latin characters. Some are used as Bopomofo. */
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08,
|
|
|
0x00, 0x08, 0x00, 0x08,
|
|
|
|
|
|
0x00, 0x02, // The last character that has explicit weight in the DUCET.
|
|
|
/* Non-Han characters that have implicit weight. */
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02};
|
|
|
|
|
|
unsigned char buf[sizeof(full_answer_with_pad)];
|
|
|
for (size_t maxlen = 0; maxlen < sizeof(buf); maxlen += 2) {
|
|
|
memset(buf, 0xff, sizeof(buf));
|
|
|
my_strnxfrm(cs, buf, maxlen, pointer_cast<const uchar *>(src), strlen(src));
|
|
|
expect_arrays_equal(full_answer_with_pad, buf, maxlen);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
static void BM_Chinese_AS_CS(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_zh_0900_as_cs");
|
|
|
const char *content =
|
|
|
"春江潮水连海平,海上明月共潮生。"
|
|
|
"滟滟随波千万里,何处春江无月明!"
|
|
|
"江流宛转绕芳甸,月照花林皆似霰;"
|
|
|
"空里流霜不觉飞,汀上白沙看不见。"
|
|
|
"江天一色无纤尘,皎皎空中孤月轮。"
|
|
|
"江畔何人初见月?江月何年初照人?"
|
|
|
"人生代代无穷已,江月年年只相似。"
|
|
|
"不知江月待何人,但见长江送流水。"
|
|
|
"白云一片去悠悠,青枫浦上不胜愁。"
|
|
|
"谁家今夜扁舟子?何处相思明月楼?";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x2C, 0xD0, 0x4F, 0xF1, 0x28, 0x08, 0x87, 0xE8, 0x60, 0x4C, 0x42, 0xEF,
|
|
|
0x75, 0x93, 0x02, 0x22, 0x42, 0xEF, 0x83, 0x8A, 0x6C, 0x4F, 0xAF, 0x96,
|
|
|
0x3F, 0x58, 0x28, 0x08, 0x84, 0xCF, 0x02, 0x8A, 0xA3, 0xA4, 0xA3, 0xA4,
|
|
|
0x8A, 0x5F, 0x23, 0x71, 0x78, 0xA8, 0x93, 0x1A, 0x5E, 0xD9, 0x02, 0x22,
|
|
|
0x44, 0xAC, 0x2B, 0xD5, 0x2C, 0xD0, 0x4F, 0xF1, 0x96, 0x31, 0xAF, 0x96,
|
|
|
0x6C, 0x4F, 0x02, 0x60, 0x4F, 0xF1, 0x63, 0x7B, 0x92, 0xDD, 0xBA, 0x2E,
|
|
|
0x7F, 0x07, 0x39, 0x15, 0x32, 0xB2, 0x02, 0x22, 0xAF, 0x96, 0xB4, 0x41,
|
|
|
0x47, 0xD7, 0x62, 0x27, 0x51, 0x4C, 0x85, 0xE9, 0x81, 0x86, 0x02, 0x34,
|
|
|
0x59, 0x09, 0x5E, 0xD9, 0x63, 0x7B, 0x87, 0xBA, 0x24, 0x78, 0x56, 0x5A,
|
|
|
0x39, 0x48, 0x02, 0x22, 0x8F, 0x74, 0x83, 0x8A, 0x1E, 0x4D, 0x82, 0x46,
|
|
|
0x57, 0xD9, 0x24, 0x78, 0x4F, 0x79, 0x02, 0x8A, 0x4F, 0xF1, 0x8E, 0x8A,
|
|
|
0xA6, 0x3E, 0x81, 0xEE, 0x96, 0x31, 0x99, 0x9E, 0x28, 0x97, 0x02, 0x22,
|
|
|
0x50, 0xC2, 0x50, 0xC2, 0x59, 0x09, 0xB8, 0x20, 0x3F, 0xCC, 0xAF, 0x96,
|
|
|
0x66, 0xC9, 0x02, 0x8A, 0x4F, 0xF1, 0x72, 0xB6, 0x44, 0xAC, 0x7F, 0x11,
|
|
|
0x2B, 0x7B, 0x4F, 0x79, 0xAF, 0x96, 0x02, 0x66, 0x4F, 0xF1, 0xAF, 0x96,
|
|
|
0x44, 0xAC, 0x6F, 0xD5, 0x2B, 0x7B, 0xB4, 0x41, 0x7F, 0x11, 0x02, 0x66,
|
|
|
0x7F, 0x11, 0x84, 0xCF, 0x2F, 0xE2, 0x2F, 0xE2, 0x96, 0x31, 0x7B, 0xE1,
|
|
|
0xA7, 0x41, 0x02, 0x22, 0x4F, 0xF1, 0xAF, 0x96, 0x6F, 0xD5, 0x6F, 0xD5,
|
|
|
0xB6, 0xC3, 0x9B, 0x15, 0x85, 0xE9, 0x02, 0x8A, 0x24, 0x78, 0xB6, 0x2E,
|
|
|
0x4F, 0xF1, 0xAF, 0x96, 0x2F, 0xF4, 0x44, 0xAC, 0x7F, 0x11, 0x02, 0x22,
|
|
|
0x30, 0x86, 0x4F, 0x79, 0xB3, 0xDD, 0x4F, 0xF1, 0x89, 0x2A, 0x63, 0x7B,
|
|
|
0x87, 0xE8, 0x02, 0x8A, 0x1E, 0x4D, 0xB0, 0x1B, 0xA6, 0x3E, 0x75, 0x00,
|
|
|
0x7D, 0x93, 0xAB, 0xAF, 0xAB, 0xAF, 0x02, 0x22, 0x7B, 0x7D, 0x3A, 0x63,
|
|
|
0x76, 0xA2, 0x83, 0x8A, 0x24, 0x78, 0x85, 0x16, 0x2B, 0x2D, 0x02, 0x8A,
|
|
|
0x84, 0x30, 0x4D, 0xF3, 0x52, 0x63, 0xA5, 0xC7, 0x21, 0xE0, 0xB8, 0x87,
|
|
|
0xBC, 0x16, 0x02, 0x66, 0x44, 0xAC, 0x2B, 0xD5, 0x9B, 0x15, 0x88, 0x52,
|
|
|
0x6C, 0x4F, 0xAF, 0x96, 0x64, 0xA1, 0x02, 0x66, 0x00, 0x00, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x20,
|
|
|
0x00, 0x20, 0x00, 0x20, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x03, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x03,
|
|
|
0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02,
|
|
|
0x00, 0x02, 0x00, 0x03};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_Chinese_AS_CS)
|
|
|
|
|
|
static void BM_UTF8MB4_bin(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_bin");
|
|
|
const char *content =
|
|
|
"Premature optimization is the root of all evil. "
|
|
|
"Våre norske tegn bør æres. 日本語が少しわかります。 "
|
|
|
"✌️🐶👩🏽";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
// Just recorded from a trial run on the string above.
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x00, 0x00, 0x50, 0x00, 0x00, 0x72, 0x00, 0x00, 0x65, 0x00, 0x00, 0x6D,
|
|
|
0x00, 0x00, 0x61, 0x00, 0x00, 0x74, 0x00, 0x00, 0x75, 0x00, 0x00, 0x72,
|
|
|
0x00, 0x00, 0x65, 0x00, 0x00, 0x20, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x70,
|
|
|
0x00, 0x00, 0x74, 0x00, 0x00, 0x69, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x69,
|
|
|
0x00, 0x00, 0x7A, 0x00, 0x00, 0x61, 0x00, 0x00, 0x74, 0x00, 0x00, 0x69,
|
|
|
0x00, 0x00, 0x6F, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x20, 0x00, 0x00, 0x69,
|
|
|
0x00, 0x00, 0x73, 0x00, 0x00, 0x20, 0x00, 0x00, 0x74, 0x00, 0x00, 0x68,
|
|
|
0x00, 0x00, 0x65, 0x00, 0x00, 0x20, 0x00, 0x00, 0x72, 0x00, 0x00, 0x6F,
|
|
|
0x00, 0x00, 0x6F, 0x00, 0x00, 0x74, 0x00, 0x00, 0x20, 0x00, 0x00, 0x6F,
|
|
|
0x00, 0x00, 0x66, 0x00, 0x00, 0x20, 0x00, 0x00, 0x61, 0x00, 0x00, 0x6C,
|
|
|
0x00, 0x00, 0x6C, 0x00, 0x00, 0x20, 0x00, 0x00, 0x65, 0x00, 0x00, 0x76,
|
|
|
0x00, 0x00, 0x69, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x20,
|
|
|
0x00, 0x00, 0x56, 0x00, 0x00, 0xE5, 0x00, 0x00, 0x72, 0x00, 0x00, 0x65,
|
|
|
0x00, 0x00, 0x20, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x72,
|
|
|
0x00, 0x00, 0x73, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x65, 0x00, 0x00, 0x20,
|
|
|
0x00, 0x00, 0x74, 0x00, 0x00, 0x65, 0x00, 0x00, 0x67, 0x00, 0x00, 0x6E,
|
|
|
0x00, 0x00, 0x20, 0x00, 0x00, 0x62, 0x00, 0x00, 0xF8, 0x00, 0x00, 0x72,
|
|
|
0x00, 0x00, 0x20, 0x00, 0x00, 0xE6, 0x00, 0x00, 0x72, 0x00, 0x00, 0x65,
|
|
|
0x00, 0x00, 0x73, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x20, 0x00, 0x65, 0xE5,
|
|
|
0x00, 0x67, 0x2C, 0x00, 0x8A, 0x9E, 0x00, 0x30, 0x4C, 0x00, 0x5C, 0x11,
|
|
|
0x00, 0x30, 0x57, 0x00, 0x30, 0x8F, 0x00, 0x30, 0x4B, 0x00, 0x30, 0x8A,
|
|
|
0x00, 0x30, 0x7E, 0x00, 0x30, 0x59, 0x00, 0x30, 0x02, 0x00, 0x00, 0x20,
|
|
|
0x00, 0x27, 0x0C, 0x00, 0xFE, 0x0F, 0x01, 0xF4, 0x36, 0x01, 0xF4, 0x69,
|
|
|
0x01, 0xF3, 0xFD};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, sizeof(dest),
|
|
|
reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
expect_arrays_equal(expected, dest, sizeof(dest));
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_UTF8MB4_bin)
|
|
|
|
|
|
static void BM_UTF8MB4_0900_bin(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_bin");
|
|
|
const char *content =
|
|
|
"Premature optimization is the root of all evil. "
|
|
|
"Våre norske tegn bør æres. 日本語が少しわかります。 "
|
|
|
"✌️🐶👩🏽";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
uchar *dest = new uchar[len];
|
|
|
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
my_strnxfrm(cs, dest, len, reinterpret_cast<const uchar *>(content), len);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
/*
|
|
|
utf8mb4_0900_bin_nopad gives the weight that has same bytes and length as
|
|
|
source string.
|
|
|
*/
|
|
|
expect_arrays_equal((const uchar *)content, dest, len);
|
|
|
delete[] dest;
|
|
|
SetBytesProcessed(num_iterations * len);
|
|
|
}
|
|
|
BENCHMARK(BM_UTF8MB4_0900_bin)
|
|
|
|
|
|
// The classic MySQL latin1 collation, for reference.
|
|
|
static void BM_Latin1_CI(size_t num_iterations) {
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("latin1_swedish_ci");
|
|
|
|
|
|
const char *content =
|
|
|
"Alla människor är födda fria och lika i värde "
|
|
|
"och rättigheter. De är utrustade med förnuft och samvete och bör "
|
|
|
"handla gentemot varandra i en anda av broderskap.";
|
|
|
const int len = strlen(content);
|
|
|
|
|
|
/*
|
|
|
Just recorded from a trial run on the string above.
|
|
|
The entire last row is padding.
|
|
|
*/
|
|
|
static constexpr uchar expected[] = {
|
|
|
0x41, 0x4c, 0x4c, 0x41, 0x20, 0x4d, 0x41, 0xa4, 0x4e, 0x4e, 0x49, 0x53,
|
|
|
0x4b, 0x4f, 0x52, 0x20, 0x41, 0xa4, 0x52, 0x20, 0x46, 0x41, 0xb6, 0x44,
|
|
|
0x44, 0x41, 0x20, 0x46, 0x52, 0x49, 0x41, 0x20, 0x4f, 0x43, 0x48, 0x20,
|
|
|
0x4c, 0x49, 0x4b, 0x41, 0x20, 0x49, 0x20, 0x56, 0x41, 0xa4, 0x52, 0x44,
|
|
|
0x45, 0x20, 0x4f, 0x43, 0x48, 0x20, 0x52, 0x41, 0xa4, 0x54, 0x54, 0x49,
|
|
|
0x47, 0x48, 0x45, 0x54, 0x45, 0x52, 0x2e, 0x20, 0x44, 0x45, 0x20, 0x41,
|
|
|
0xa4, 0x52, 0x20, 0x55, 0x54, 0x52, 0x55, 0x53, 0x54, 0x41, 0x44, 0x45,
|
|
|
0x20, 0x4d, 0x45, 0x44, 0x20, 0x46, 0x41, 0xb6, 0x52, 0x4e, 0x55, 0x46,
|
|
|
0x54, 0x20, 0x4f, 0x43, 0x48, 0x20, 0x53, 0x41, 0x4d, 0x56, 0x45, 0x54,
|
|
|
0x45, 0x20, 0x4f, 0x43, 0x48, 0x20, 0x42, 0x41, 0xb6, 0x52, 0x20, 0x48,
|
|
|
0x41, 0x4e, 0x44, 0x4c, 0x41, 0x20, 0x47, 0x45, 0x4e, 0x54, 0x45, 0x4d,
|
|
|
0x4f, 0x54, 0x20, 0x56, 0x41, 0x52, 0x41, 0x4e, 0x44, 0x52, 0x41, 0x20,
|
|
|
0x49, 0x20, 0x45, 0x4e, 0x20, 0x41, 0x4e, 0x44, 0x41, 0x20, 0x41, 0x56,
|
|
|
0x20, 0x42, 0x52, 0x4f, 0x44, 0x45, 0x52, 0x53, 0x4b, 0x41, 0x50, 0x2e,
|
|
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
|
};
|
|
|
uchar dest[sizeof(expected)];
|
|
|
|
|
|
size_t ret = 0;
|
|
|
StartBenchmarkTiming();
|
|
|
for (size_t i = 0; i < num_iterations; ++i) {
|
|
|
ret = cs->coll->strnxfrm(cs, dest, sizeof(dest), sizeof(dest),
|
|
|
pointer_cast<const uchar *>(content), len,
|
|
|
MY_STRXFRM_PAD_TO_MAXLEN);
|
|
|
}
|
|
|
StopBenchmarkTiming();
|
|
|
|
|
|
EXPECT_EQ(sizeof(expected), ret);
|
|
|
expect_arrays_equal(expected, dest, ret);
|
|
|
|
|
|
SetBytesProcessed(num_iterations * strlen(content));
|
|
|
}
|
|
|
BENCHMARK(BM_Latin1_CI)
|
|
|
|
|
|
// Since the UCA collations are NO PAD, strnncollsp should heed spaces.
|
|
|
TEST(PadCollationTest, BasicTest) {
|
|
|
constexpr char foo[] = "foo";
|
|
|
constexpr char foosp[] = "foo ";
|
|
|
constexpr char bar[] = "bar";
|
|
|
constexpr char foobar[] = "foobar";
|
|
|
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
auto my_strnncollsp = cs->coll->strnncollsp;
|
|
|
|
|
|
// "foo" == "foo"
|
|
|
EXPECT_EQ(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
|
|
|
pointer_cast<const uchar *>(foo), strlen(foo)),
|
|
|
0);
|
|
|
// "foo" < "foo "
|
|
|
EXPECT_LT(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
|
|
|
pointer_cast<const uchar *>(foosp), strlen(foosp)),
|
|
|
0);
|
|
|
// "foo" > "bar"
|
|
|
EXPECT_GT(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
|
|
|
pointer_cast<const uchar *>(bar), strlen(bar)),
|
|
|
0);
|
|
|
// "foo" < "foobar".
|
|
|
EXPECT_LT(my_strnncollsp(cs, pointer_cast<const uchar *>(foo), strlen(foo),
|
|
|
pointer_cast<const uchar *>(foobar), strlen(foobar)),
|
|
|
0);
|
|
|
|
|
|
// Exactly the same tests in reverse.
|
|
|
|
|
|
// "foo " > "foo"
|
|
|
EXPECT_GT(
|
|
|
my_strnncollsp(cs, pointer_cast<const uchar *>(foosp), strlen(foosp),
|
|
|
pointer_cast<const uchar *>(foo), strlen(foo)),
|
|
|
0);
|
|
|
// "bar" < "foo"
|
|
|
EXPECT_LT(my_strnncollsp(cs, pointer_cast<const uchar *>(bar), strlen(bar),
|
|
|
pointer_cast<const uchar *>(foo), strlen(foo)),
|
|
|
0);
|
|
|
// "foobar" > "foo".
|
|
|
EXPECT_GT(
|
|
|
my_strnncollsp(cs, pointer_cast<const uchar *>(foobar), strlen(foobar),
|
|
|
pointer_cast<const uchar *>(foo), strlen(foo)),
|
|
|
0);
|
|
|
}
|
|
|
|
|
|
TEST(StrxfrmTest, NoPadCollation) {
|
|
|
CHARSET_INFO *ai_ci = init_collation("utf8mb4_0900_ai_ci");
|
|
|
CHARSET_INFO *as_cs = init_collation("utf8mb4_0900_as_cs");
|
|
|
CHARSET_INFO *as_ci = init_collation("utf8mb4_0900_as_ci");
|
|
|
|
|
|
// Basic sanity checks.
|
|
|
EXPECT_EQ(compare_through_strxfrm(ai_ci, "abc", "abc"), 0);
|
|
|
EXPECT_NE(compare_through_strxfrm(as_ci, "abc", "Ǎḅç"), 0);
|
|
|
EXPECT_NE(compare_through_strxfrm(ai_ci, "abc", "def"), 0);
|
|
|
EXPECT_NE(compare_through_strxfrm(as_ci, "abc", "def"), 0);
|
|
|
|
|
|
// Spaces from the end should matter, no matter the collation.
|
|
|
EXPECT_LT(compare_through_strxfrm(ai_ci, "abc", "abc "), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_ci, "abc", "Ǎḅç "), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs, "abc", "abc "), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs, "abc", "Abc "), 0);
|
|
|
|
|
|
// Same with other types of spaces.
|
|
|
EXPECT_LT(compare_through_strxfrm(ai_ci, "abc", u8"abc \u00a0"), 0);
|
|
|
|
|
|
// Non-breaking space should compare _equal_ to space in ai_ci and as_ci,
|
|
|
// but _after_ in as_cs.
|
|
|
EXPECT_EQ(compare_through_strxfrm(ai_ci, "abc ", u8"abc\u00a0"), 0);
|
|
|
EXPECT_EQ(compare_through_strxfrm(as_ci, "abc ", u8"abc\u00a0"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs, "abc ", u8"abc\u00a0"), 0);
|
|
|
|
|
|
// Also in the middle of the string.
|
|
|
EXPECT_EQ(compare_through_strxfrm(ai_ci, "a c", u8"a\u00a0c"), 0);
|
|
|
EXPECT_EQ(compare_through_strxfrm(as_ci, "a c", u8"a\u00a0c"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs, "a c", u8"a\u00a0c"), 0);
|
|
|
|
|
|
// Verify that space in the middle of the string isn't stripped.
|
|
|
EXPECT_LT(compare_through_strxfrm(ai_ci, "ab c", "abc"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_ci, "ab c", "abc"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs, "ab c", "abc"), 0);
|
|
|
|
|
|
// Whitespace ordering as specified by DUCET.
|
|
|
EXPECT_GT(compare_through_strxfrm(as_ci, " ", "\t"), 0);
|
|
|
EXPECT_GT(compare_through_strxfrm(as_cs, " ", "\t"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(as_cs, "", "\t"), 0);
|
|
|
}
|
|
|
|
|
|
TEST(StrxfrmTest, Contractions) {
|
|
|
CHARSET_INFO *hu_ai_ci = init_collation("utf8mb4_hu_0900_ai_ci");
|
|
|
|
|
|
// Basic sanity checks.
|
|
|
EXPECT_EQ(compare_through_strxfrm(hu_ai_ci, "abc", "abc"), 0);
|
|
|
EXPECT_NE(compare_through_strxfrm(hu_ai_ci, "abc", "def"), 0);
|
|
|
EXPECT_EQ(compare_through_strxfrm(hu_ai_ci, "abc", "Abc"), 0);
|
|
|
|
|
|
// "cs" counts as a separate letter, where c < cs < d, so:
|
|
|
EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "c", "cs"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "cs", "d"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "ct", "cst"), 0);
|
|
|
EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "cst", "dt"), 0);
|
|
|
|
|
|
// Wikipedia gives this as an example.
|
|
|
EXPECT_LT(compare_through_strxfrm(hu_ai_ci, "cukor", "csak"), 0);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
This test is disabled by default since it needs ~10 seconds to run,
|
|
|
even in optimized mode.
|
|
|
*/
|
|
|
TEST(BitfiddlingTest, DISABLED_FastOutOfRange) {
|
|
|
unsigned char bytes[4];
|
|
|
for (int a = 0; a < 256; ++a) {
|
|
|
bytes[0] = a;
|
|
|
for (int b = 0; b < 256; ++b) {
|
|
|
bytes[1] = b;
|
|
|
for (int c = 0; c < 256; ++c) {
|
|
|
bytes[2] = c;
|
|
|
for (int d = 0; d < 256; ++d) {
|
|
|
bytes[3] = d;
|
|
|
bool any_out_of_range_slow =
|
|
|
(a < 0x20 || a > 0x7e) || (b < 0x20 || b > 0x7e) ||
|
|
|
(c < 0x20 || c > 0x7e) || (d < 0x20 || d > 0x7e);
|
|
|
|
|
|
uint32 four_bytes;
|
|
|
memcpy(&four_bytes, bytes, sizeof(four_bytes));
|
|
|
bool any_out_of_range_fast =
|
|
|
(((four_bytes + 0x01010101u) & 0x80808080) ||
|
|
|
((four_bytes - 0x20202020u) & 0x80808080));
|
|
|
|
|
|
EXPECT_EQ(any_out_of_range_slow, any_out_of_range_fast);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
A version of FastOutOfRange that tests the analogous trick for 16-bit
|
|
|
integers instead (much, much faster).
|
|
|
*/
|
|
|
TEST(BitfiddlingTest, FastOutOfRange16) {
|
|
|
unsigned char bytes[2];
|
|
|
for (int a = 0; a < 256; ++a) {
|
|
|
bytes[0] = a;
|
|
|
for (int b = 0; b < 256; ++b) {
|
|
|
bytes[1] = b;
|
|
|
bool any_out_of_range_slow =
|
|
|
(a < 0x20 || a > 0x7e) || (b < 0x20 || b > 0x7e);
|
|
|
|
|
|
uint16 two_bytes;
|
|
|
memcpy(&two_bytes, bytes, sizeof(two_bytes));
|
|
|
bool any_out_of_range_fast =
|
|
|
(((two_bytes + uint16{0x0101}) & uint16{0x8080}) ||
|
|
|
((two_bytes - uint16{0x2020}) & uint16{0x8080}));
|
|
|
|
|
|
EXPECT_EQ(any_out_of_range_slow, any_out_of_range_fast);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
uint64 hash(CHARSET_INFO *cs, const char *str) {
|
|
|
uint64 nr1 = 1, nr2 = 4;
|
|
|
cs->coll->hash_sort(cs, pointer_cast<const uchar *>(str), strlen(str), &nr1,
|
|
|
&nr2);
|
|
|
return nr1;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
NOTE: In this entire test, there's an infinitesimal chance
|
|
|
that something that we expect doesn't match, still matches
|
|
|
by pure accident.
|
|
|
*/
|
|
|
TEST(PadCollationTest, HashSort) {
|
|
|
CHARSET_INFO *ai_ci = init_collation("utf8mb4_0900_ai_ci");
|
|
|
CHARSET_INFO *as_cs = init_collation("utf8mb4_0900_as_cs");
|
|
|
|
|
|
// Basic sanity checks.
|
|
|
EXPECT_EQ(hash(ai_ci, "abc"), hash(ai_ci, "abc"));
|
|
|
EXPECT_NE(hash(ai_ci, "abc"), hash(ai_ci, "def"));
|
|
|
|
|
|
// Spaces from the end should matter, no matter the collation.
|
|
|
EXPECT_NE(hash(ai_ci, "abc"), hash(ai_ci, "abc "));
|
|
|
EXPECT_NE(hash(as_cs, "abc"), hash(as_cs, "abc "));
|
|
|
EXPECT_NE(hash(as_cs, "abc"), hash(as_cs, "Abc "));
|
|
|
|
|
|
// Same with other types of spaces.
|
|
|
EXPECT_NE(hash(ai_ci, "abc"), hash(ai_ci, u8"abc \u00a0"));
|
|
|
|
|
|
// Non-breaking space should compare _equal_ to space in ai_ci,
|
|
|
// but _inequal_ in as_cs.
|
|
|
EXPECT_EQ(hash(ai_ci, "abc "), hash(ai_ci, u8"abc\u00a0"));
|
|
|
EXPECT_NE(hash(as_cs, "abc "), hash(as_cs, u8"abc\u00a0"));
|
|
|
EXPECT_NE(hash(as_cs, "abc"), hash(as_cs, u8"abc\u00a0"));
|
|
|
|
|
|
// Also in the middle of the string.
|
|
|
EXPECT_EQ(hash(ai_ci, "a c"), hash(ai_ci, u8"a\u00a0c"));
|
|
|
EXPECT_NE(hash(as_cs, "a c"), hash(as_cs, u8"a\u00a0c"));
|
|
|
|
|
|
// Verify that space in the middle of the string isn't stripped.
|
|
|
EXPECT_NE(hash(ai_ci, "ab c"), hash(ai_ci, "abc"));
|
|
|
EXPECT_NE(hash(as_cs, "ab c"), hash(as_cs, "abc"));
|
|
|
}
|
|
|
|
|
|
TEST(HashTest, NullPointer) {
|
|
|
CHARSET_INFO *cs = init_collation("utf8mb4_0900_ai_ci");
|
|
|
uint64 nr1 = 1, nr2 = 4;
|
|
|
|
|
|
/*
|
|
|
We should get the same hash from the empty string no matter what
|
|
|
the pointer is.
|
|
|
*/
|
|
|
cs->coll->hash_sort(cs, nullptr, 0, &nr1, &nr2);
|
|
|
EXPECT_EQ(nr1, hash(cs, ""));
|
|
|
|
|
|
cs->coll->hash_sort(cs, pointer_cast<const uchar *>(" "), 8, &nr1,
|
|
|
&nr2);
|
|
|
// Don't care what the values are, just that we don't crash.
|
|
|
}
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
// Test that strnxfrmlen() holds for all single characters.
|
|
|
void test_strnxfrmlen(CHARSET_INFO *cs) {
|
|
|
pair<size_t, my_wc_t> longest{0, 0};
|
|
|
|
|
|
uchar inbuf[16], outbuf[256]; // Ought to be enough for anyone.
|
|
|
const size_t max_len = cs->coll->strnxfrmlen(cs, cs->mbmaxlen);
|
|
|
|
|
|
for (my_wc_t ch = 0; ch <= 0x10ffff; ++ch) {
|
|
|
size_t in_len = cs->cset->wc_mb(cs, ch, inbuf, inbuf + sizeof(inbuf));
|
|
|
if (in_len <= 0) {
|
|
|
continue; // Not representable in this character set.
|
|
|
}
|
|
|
size_t out_len =
|
|
|
cs->coll->strnxfrm(cs, outbuf, sizeof(outbuf), 1, inbuf, in_len, 0);
|
|
|
EXPECT_LE(out_len, max_len);
|
|
|
if (out_len > max_len) {
|
|
|
fprintf(stderr, "U+%04lX needed more room than strnxfrmlen() claimed\n",
|
|
|
ch);
|
|
|
fprintf(stderr, "Weight string:");
|
|
|
for (size_t i = 0; i < out_len; ++i) {
|
|
|
fprintf(stderr, " %02x", outbuf[i]);
|
|
|
}
|
|
|
fprintf(stderr, "\n\n");
|
|
|
}
|
|
|
|
|
|
longest = max(longest, make_pair(out_len, ch));
|
|
|
}
|
|
|
|
|
|
fprintf(stderr,
|
|
|
"Longest character in '%s': U+%04lX, %d bytes (strnxfrm_len=%d)\n",
|
|
|
cs->name, longest.second, static_cast<int>(longest.first),
|
|
|
static_cast<int>(max_len));
|
|
|
}
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
TEST(StrxfrmLenTest, StrnxfrmLenIsLongEnoughForAllCharacters) {
|
|
|
// Load one collation to get everything going.
|
|
|
init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
for (CHARSET_INFO *cs : all_charsets) {
|
|
|
if (cs && (cs->state & MY_CS_AVAILABLE)) {
|
|
|
SCOPED_TRACE(cs->name);
|
|
|
test_strnxfrmlen(init_collation(cs->name));
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Golden hashes for a test string. These may be stored on disk, so we need to
|
|
|
// make sure that they never change.
|
|
|
struct GoldenHashResult {
|
|
|
pair<uint64, uint64> hash_value;
|
|
|
};
|
|
|
|
|
|
TEST(StrmxfrmHashTest, HashStability) {
|
|
|
// Load one collation to get everything going.
|
|
|
init_collation("utf8mb4_0900_ai_ci");
|
|
|
|
|
|
// Reference values. Please keep this list sorted.
|
|
|
unordered_map<string, GoldenHashResult> expected = {
|
|
|
{"armscii8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"armscii8_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"ascii_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"ascii_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"big5_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"big5_chinese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"binary", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp1250_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp1250_croatian_ci", {{0xe25aa32298f78f4aLL, 0x000002b0LL}}},
|
|
|
{"cp1250_czech_cs", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp1250_general_ci", {{0x81c46f6c6b06f8fcLL, 0x000002b0LL}}},
|
|
|
{"cp1250_polish_ci", {{0xe25aa32298f78f4aLL, 0x000002b0LL}}},
|
|
|
{"cp1251_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp1251_bulgarian_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"cp1251_general_ci", {{0xce71da5364c300a4LL, 0x000002b0LL}}},
|
|
|
{"cp1251_general_cs", {{0xff44ce45c6d3d142LL, 0x000002b0LL}}},
|
|
|
{"cp1251_ukrainian_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"cp1256_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp1256_general_ci", {{0x44ed84e7ad4a6c1cLL, 0x000002b0LL}}},
|
|
|
{"cp1257_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp1257_general_ci", {{0x15219f243a38ad58LL, 0x000002b0LL}}},
|
|
|
{"cp1257_lithuanian_ci", {{0xaa3ef638e5e056e8LL, 0x000002b0LL}}},
|
|
|
{"cp850_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp850_general_ci", {{0xf32b1cf4087a0b08LL, 0x000002b0LL}}},
|
|
|
{"cp852_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp852_general_ci", {{0x60dce9bffdeccd52LL, 0x000002b0LL}}},
|
|
|
{"cp866_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp866_general_ci", {{0xce71da5364c300a4LL, 0x000002b0LL}}},
|
|
|
{"cp932_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"cp932_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"dec8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"dec8_swedish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"eucjpms_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"eucjpms_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"euckr_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"euckr_korean_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"gb18030_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"gb18030_chinese_ci", {{0xb7b6676124243e73LL, 0x00000abdLL}}},
|
|
|
{"gb18030_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
|
|
|
{"gb2312_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"gb2312_chinese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"gbk_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"gbk_chinese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"geostd8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"geostd8_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"greek_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"greek_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"hebrew_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"hebrew_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"hp8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"hp8_english_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"keybcs2_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"keybcs2_general_ci", {{0xd2d54c0201229650LL, 0x000002b0LL}}},
|
|
|
{"koi8r_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"koi8r_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"koi8u_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"koi8u_general_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"latin1_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"latin1_danish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"latin1_general_ci", {{0xd7d424d55cb8f402LL, 0x000002b0LL}}},
|
|
|
{"latin1_general_cs", {{0x96b2a3f94ffe41f9LL, 0x000002b0LL}}},
|
|
|
{"latin1_german1_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"latin1_german2_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"latin1_spanish_ci", {{0xd7d424d55cb8f402LL, 0x000002b0LL}}},
|
|
|
{"latin1_swedish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"latin2_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"latin2_croatian_ci", {{0xe25aa32298f78f4aLL, 0x000002b0LL}}},
|
|
|
{"latin2_czech_cs", {{0xba89a4855c3a88b6LL, 0x000002b0LL}}},
|
|
|
{"latin2_general_ci", {{0xd9179195a5ddebf8LL, 0x000002b0LL}}},
|
|
|
{"latin2_hungarian_ci", {{0xba89a4855c3a88b6LL, 0x000002b0LL}}},
|
|
|
{"latin5_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"latin5_turkish_ci", {{0x68989a162aab9f1cLL, 0x000002b0LL}}},
|
|
|
{"latin7_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"latin7_estonian_cs", {{0xa281f3df87b89fe1LL, 0x000002b0LL}}},
|
|
|
{"latin7_general_ci", {{0xc6808727382ffb41LL, 0x000002b0LL}}},
|
|
|
{"latin7_general_cs", {{0xf70d2b9f0d640804LL, 0x000002b0LL}}},
|
|
|
{"macce_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"macce_general_ci", {{0xb27ca521eb9b7492LL, 0x000002b0LL}}},
|
|
|
{"macroman_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"macroman_general_ci", {{0x3254bac0fa3625efLL, 0x000002b0LL}}},
|
|
|
{"sjis_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"sjis_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"swe7_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"swe7_swedish_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"tis620_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"tis620_thai_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"ucs2_bin", {{0x1877f0a25b18b4c6LL, 0x0000055fLL}}},
|
|
|
{"ucs2_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"ucs2_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
|
|
|
{"ucs2_general_mysql500_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
|
|
|
{"ucs2_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
|
|
|
{"ucs2_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
|
|
|
{"ucs2_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"ucs2_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
|
|
|
{"ucs2_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
|
|
|
{"ucs2_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ucs2_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"ujis_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"ujis_japanese_ci", {{0xdae43ea5cabac97cLL, 0x000002b0LL}}},
|
|
|
{"utf16_bin", {{0x1877f0a25b18b4c6LL, 0x0000055fLL}}},
|
|
|
{"utf16_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf16_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
|
|
|
{"utf16_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
|
|
|
{"utf16_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
|
|
|
{"utf16_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
|
|
|
{"utf16_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf16_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
|
|
|
{"utf16_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
|
|
|
{"utf16_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
|
|
|
{"utf16_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf16le_bin", {{0x3da26ce08ecbfaf9LL, 0x0000055fLL}}},
|
|
|
{"utf16le_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
|
|
|
{"utf32_bin", {{0x353330032692faLL, 0x00000abdLL}}},
|
|
|
{"utf32_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf32_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_general_ci", {{0x353330032692faLL, 0x00000abdLL}}},
|
|
|
{"utf32_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
|
|
|
{"utf32_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
|
|
|
{"utf32_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
|
|
|
{"utf32_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf32_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
|
|
|
{"utf32_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
|
|
|
{"utf32_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
|
|
|
{"utf32_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf32_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"utf8_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf8_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
|
|
|
{"utf8_general_mysql500_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
|
|
|
{"utf8_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
|
|
|
{"utf8_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
|
|
|
{"utf8_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
|
|
|
{"utf8_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf8_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
|
|
|
{"utf8_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_tolower_ci", {{0x8eab9a2c403c8eb9LL, 0x0000055fLL}}},
|
|
|
{"utf8_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
|
|
|
{"utf8_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
|
|
|
{"utf8_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_0900_as_ci", {{0xfc978781d49d0d9bLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_0900_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"utf8mb4_bin", {{0xb6240d9a0a0f7efcLL, 0x000002b0LL}}},
|
|
|
{"utf8mb4_croatian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_cs_0900_ai_ci", {{0x36582be4fafa0bbbLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_cs_0900_as_cs", {{0xac403419684d8c71LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_czech_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf8mb4_da_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_da_0900_as_cs", {{0xbd24fdcb7b0cf519LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_danish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_de_pb_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_de_pb_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_eo_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_eo_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_es_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_es_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_es_trad_0900_ai_ci", {{0x555a77b8a263f17fLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_es_trad_0900_as_cs", {{0xae993a138c5c030dLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_esperanto_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_estonian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_et_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_et_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_general_ci", {{0xfb66c3f2301bd579LL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_german2_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_hr_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_hr_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_hu_0900_ai_ci", {{0x3162e9e9cebb9148LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_hu_0900_as_cs", {{0x88842661c548eec1LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_hungarian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_icelandic_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_is_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_is_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_ja_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_ja_0900_as_cs_ks", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_la_0900_ai_ci", {{0x2928cd07bca9a85dLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_la_0900_as_cs", {{0x29a7f3eb43a9819LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_latvian_ci", {{0x6473871765c3455cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_lithuanian_ci", {{0xccb8395ef1969f40LL, 0x00000553LL}}},
|
|
|
{"utf8mb4_lt_0900_ai_ci", {{0xcd5ce469f67f6792LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_lt_0900_as_cs", {{0xe2e6dc41a4d6b3c1LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_lv_0900_ai_ci", {{0xcd5ce469f67f6792LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_lv_0900_as_cs", {{0xfe377cec9551f0f4LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_persian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_pl_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_pl_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_polish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_ro_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_ro_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_roman_ci", {{0xf40d4b3c957fccdcLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_romanian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_ru_0900_ai_ci", {{0xb55bc2bf5ab2bf53LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_ru_0900_as_cs", {{0x36f5a31292841899LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_sinhala_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_sk_0900_ai_ci", {{0x36582be4fafa0bbbLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_sk_0900_as_cs", {{0xac403419684d8c71LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_sl_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_sl_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_slovak_ci", {{0x1dc65c2738ed47c0LL, 0x00000553LL}}},
|
|
|
{"utf8mb4_slovenian_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_spanish2_ci", {{0x3e79d9277da1beb4LL, 0x00000547LL}}},
|
|
|
{"utf8mb4_spanish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_sv_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_sv_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_swedish_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_tr_0900_ai_ci", {{0x7ea67be76364740fLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_tr_0900_as_cs", {{0xfa4556e24336675eLL, 0x00000001LL}}},
|
|
|
{"utf8mb4_turkish_ci", {{0x3fb28acb6e515c9cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_unicode_520_ci", {{0x5c1f019a21e3d464LL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_unicode_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_vi_0900_ai_ci", {{0x3329a425d0f7f8d3LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_vi_0900_as_cs", {{0xcfb3e3073c9f5a19LL, 0x00000001LL}}},
|
|
|
{"utf8mb4_vietnamese_ci", {{0x3acdfaa93364f55cLL, 0x0000055fLL}}},
|
|
|
{"utf8mb4_zh_0900_as_cs", {{0x23c370d9ac589d1fLL, 0x00000001LL}}},
|
|
|
};
|
|
|
|
|
|
string test_str =
|
|
|
"This is a fairly long string. It does not contain any special "
|
|
|
"characters since they are probably not universally supported across all "
|
|
|
"character sets, but should at least be enough to make the nr1 value go "
|
|
|
"up past the 32-bit mark.";
|
|
|
|
|
|
for (CHARSET_INFO *cs : all_charsets) {
|
|
|
if (cs && (cs->state & MY_CS_AVAILABLE)) {
|
|
|
init_collation(cs->name);
|
|
|
|
|
|
char buf[4096];
|
|
|
uint errors;
|
|
|
size_t len =
|
|
|
my_convert(buf, sizeof(buf), cs, test_str.data(), test_str.size(),
|
|
|
&my_charset_utf8mb4_0900_ai_ci, &errors);
|
|
|
ASSERT_EQ(0, errors);
|
|
|
|
|
|
uint64 nr1 = 4, nr2 = 1;
|
|
|
cs->coll->hash_sort(cs, pointer_cast<const uchar *>(buf), len, &nr1,
|
|
|
&nr2);
|
|
|
|
|
|
// Change this from false to true to output source code you can paste
|
|
|
// into “expected” above.
|
|
|
if (false) {
|
|
|
printf(" {\"%s\", {{0x%016" PRIx64 "LL, 0x%" PRIx64 "LL}}},\n",
|
|
|
cs->name, nr1, nr2);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
ASSERT_EQ(1, expected.count(cs->name))
|
|
|
<< "Character set " << cs->name << " is missing in the database";
|
|
|
SCOPED_TRACE(cs->name);
|
|
|
|
|
|
EXPECT_EQ(expected[cs->name].hash_value.first, nr1);
|
|
|
EXPECT_EQ(expected[cs->name].hash_value.second, nr2);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
} // namespace strnxfrm_unittest
|
|
|
|