用于EagleEye3.0 规则集漏报和误报测试的示例项目,项目收集于github和gitee
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

508 lines
17 KiB

3 months ago
/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is also distributed with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have included with MySQL.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
// First include (the generated) my_config.h, to get correct platform defines.
#include "my_config.h"
#include <gtest/gtest.h>
#include <random>
#include <vector>
#include "extra/lz4/my_xxhash.h"
#include "include/my_murmur3.h"
#include "my_alloc.h"
#include "sql/hash_join_buffer.h"
#include "sql/hash_join_iterator.h"
#include "sql/item_cmpfunc.h"
#include "sql/row_iterator.h"
#include "sql/sql_executor.h"
#include "sql/sql_optimizer.h"
#include "sql_string.h"
#include "unittest/gunit/benchmark.h"
#include "unittest/gunit/fake_integer_iterator.h"
#include "unittest/gunit/fake_string_iterator.h"
#include "unittest/gunit/fake_table.h"
#include "unittest/gunit/parsertest.h"
#include "unittest/gunit/temptable/mock_field_long.h"
#include "unittest/gunit/temptable/mock_field_varstring.h"
#include "unittest/gunit/test_utils.h"
namespace hash_join_unittest {
using std::vector;
static hash_join_buffer::TableCollection CreateTenTableJoin(MEM_ROOT *mem_root,
bool store_data) {
constexpr int kNumColumns = 10;
constexpr bool kColumnsNullable = true;
constexpr int kNumTablesInJoin = 10;
// Set up a ten-table join. For simplicity, allocate everything on a MEM_ROOT
// that will take care of releasing allocated memory.
vector<QEP_TAB *> qep_tabs;
for (int i = 0; i < kNumTablesInJoin; ++i) {
Fake_TABLE *fake_table =
new (mem_root) Fake_TABLE(kNumColumns, kColumnsNullable);
QEP_TAB *qep_tab = new (mem_root) QEP_TAB;
qep_tab->set_qs(new (mem_root) QEP_shared);
qep_tab->set_table(fake_table);
qep_tab->table_ref = fake_table->pos_in_table_list;
if (store_data) {
bitmap_set_all(fake_table->write_set);
for (uint j = 0; j < fake_table->s->fields; ++j) {
fake_table->field[j]->store(1000, false /* is_unsigned */);
}
}
qep_tabs.push_back(qep_tab);
}
return hash_join_buffer::TableCollection(qep_tabs);
}
static void BM_StoreFromTableBuffersNoData(size_t num_iterations) {
StopBenchmarkTiming();
my_testing::Server_initializer initializer;
initializer.SetUp();
MEM_ROOT mem_root;
hash_join_buffer::TableCollection table_collection =
CreateTenTableJoin(&mem_root, false);
String buffer;
buffer.reserve(1024);
StartBenchmarkTiming();
for (size_t i = 0; i < num_iterations; ++i) {
ASSERT_FALSE(
hash_join_buffer::StoreFromTableBuffers(table_collection, &buffer));
ASSERT_GT(buffer.length(), 0);
}
StopBenchmarkTiming();
initializer.TearDown();
}
BENCHMARK(BM_StoreFromTableBuffersNoData)
static void BM_StoreFromTableBuffersWithData(size_t num_iterations) {
StopBenchmarkTiming();
my_testing::Server_initializer initializer;
initializer.SetUp();
MEM_ROOT mem_root;
hash_join_buffer::TableCollection table_collection =
CreateTenTableJoin(&mem_root, true);
String buffer;
buffer.reserve(1024);
StartBenchmarkTiming();
for (size_t i = 0; i < num_iterations; ++i) {
ASSERT_FALSE(
hash_join_buffer::StoreFromTableBuffers(table_collection, &buffer));
ASSERT_GT(buffer.length(), 0);
}
StopBenchmarkTiming();
initializer.TearDown();
}
BENCHMARK(BM_StoreFromTableBuffersWithData)
// Return eight bytes of data.
static vector<uchar> GetShortData() { return {1, 2, 3, 4, 5, 6, 7, 8}; }
// Return 1024 bytes of data.
static vector<uchar> GetLongData() {
constexpr int kDataSize = 1024;
vector<uchar> data(kDataSize);
for (int i = 0; i < kDataSize; ++i) {
data.push_back(i);
}
return data;
}
static void BM_Murmur3ShortData(size_t num_iterations) {
StopBenchmarkTiming();
vector<uchar> data = GetShortData();
StartBenchmarkTiming();
size_t sum = 0;
for (size_t i = 0; i < num_iterations; ++i) {
sum += murmur3_32(&data[0], data.size(), 0);
}
StopBenchmarkTiming();
// The sum variable is just to assure that the compiler doesn't optimize away
// the entire for loop.
EXPECT_NE(0, sum);
SetBytesProcessed(num_iterations * data.size());
}
BENCHMARK(BM_Murmur3ShortData)
static void BM_MurmurLongData(size_t num_iterations) {
StopBenchmarkTiming();
vector<uchar> data = GetLongData();
StartBenchmarkTiming();
size_t sum = 0;
for (size_t i = 0; i < num_iterations; ++i) {
sum += murmur3_32(&data[0], data.size(), 0);
}
StopBenchmarkTiming();
// The sum variable is just to assure that the compiler doesn't optimize away
// the entire for loop.
EXPECT_NE(0, sum);
SetBytesProcessed(num_iterations * data.size());
}
BENCHMARK(BM_MurmurLongData)
static void BM_XXHash64ShortData(size_t num_iterations) {
StopBenchmarkTiming();
vector<uchar> data = GetShortData();
StartBenchmarkTiming();
size_t sum = 0;
for (size_t i = 0; i < num_iterations; ++i) {
sum += MY_XXH64(&data[0], data.size(), 0);
}
StopBenchmarkTiming();
// The sum variable is just to assure that the compiler doesn't optimize away
// the entire for loop.
EXPECT_NE(0, sum);
SetBytesProcessed(num_iterations * data.size());
}
BENCHMARK(BM_XXHash64ShortData)
static void BM_XXHash64LongData(size_t num_iterations) {
StopBenchmarkTiming();
vector<uchar> data = GetLongData();
StartBenchmarkTiming();
size_t sum = 0;
for (size_t i = 0; i < num_iterations; ++i) {
sum += MY_XXH64(&data[0], data.size(), 0);
}
StopBenchmarkTiming();
// The sum variable is just to assure that the compiler doesn't optimize away
// the entire for loop.
EXPECT_NE(0, sum);
SetBytesProcessed(num_iterations * data.size());
}
BENCHMARK(BM_XXHash64LongData)
// A class that takes care of setting up an environment for testing a hash join
// iterator. The constructors will set up two tables (left and right), as well
// as two (fake) iterators that reads data from these two tables. Both tables
// has only one column, and the join condition between the two tables is a
// simple equality between these two columns. There are two different
// constructors; one for integer data sets and one for string data sets.
//
// The user must provide the data contents for both tables in the constructor.
class HashJoinTestHelper {
public:
unique_ptr_destroy_only<RowIterator> left_iterator;
unique_ptr_destroy_only<RowIterator> right_iterator;
QEP_TAB *left_qep_tab;
QEP_TAB *right_qep_tab;
Item_func_eq *join_condition;
HashJoinTestHelper(Server_initializer *initializer,
const vector<int> &left_dataset,
const vector<int> &right_dataset) {
m_left_table_field.reset(
new (&m_mem_root) Mock_field_long("column1", false /* is_nullable */));
Fake_TABLE *left_table =
new (&m_mem_root) Fake_TABLE(m_left_table_field.get());
m_right_table_field.reset(
new (&m_mem_root) Mock_field_long("column1", false /* is_nullable */));
Fake_TABLE *right_table =
new (&m_mem_root) Fake_TABLE(m_right_table_field.get());
SetupFakeTables(initializer, left_table, right_table);
left_iterator.reset(new (&m_mem_root) FakeIntegerIterator(
initializer->thd(), left_table,
down_cast<Field_long *>(left_table->field[0]), move(left_dataset)));
right_iterator.reset(new (&m_mem_root) FakeIntegerIterator(
initializer->thd(), right_table,
down_cast<Field_long *>(right_table->field[0]), move(right_dataset)));
}
HashJoinTestHelper(Server_initializer *initializer,
const vector<std::string> &left_dataset,
const vector<std::string> &right_dataset) {
m_left_table_field.reset(new (&m_mem_root) Mock_field_varstring(
nullptr, "column1", 255 /* length */, false /* is_nullable */));
Fake_TABLE *left_table =
new (&m_mem_root) Fake_TABLE(m_left_table_field.get());
m_right_table_field.reset(new (&m_mem_root) Mock_field_varstring(
nullptr, "column1", 255 /* length */, false /* is_nullable */));
Fake_TABLE *right_table =
new (&m_mem_root) Fake_TABLE(m_right_table_field.get());
SetupFakeTables(initializer, left_table, right_table);
left_iterator.reset(new (&m_mem_root) FakeStringIterator(
initializer->thd(), left_table,
down_cast<Field_varstring *>(left_table->field[0]),
move(left_dataset)));
right_iterator.reset(new (&m_mem_root) FakeStringIterator(
initializer->thd(), right_table,
down_cast<Field_varstring *>(right_table->field[0]),
move(right_dataset)));
}
private:
void SetupFakeTables(Server_initializer *initializer, Fake_TABLE *left_table,
Fake_TABLE *right_table) {
bitmap_set_all(left_table->write_set);
bitmap_set_all(left_table->read_set);
bitmap_set_all(right_table->write_set);
bitmap_set_all(right_table->read_set);
SELECT_LEX *select_lex =
parse(initializer,
"SELECT * FROM t1 JOIN t2 ON (t1.column1 = t2.column1);", 0);
JOIN *join = new (&m_mem_root) JOIN(initializer->thd(), select_lex);
left_qep_tab = new (&m_mem_root) QEP_TAB;
left_qep_tab->set_qs(new (&m_mem_root) QEP_shared);
left_qep_tab->set_table(left_table);
left_qep_tab->table_ref = left_table->pos_in_table_list;
left_qep_tab->set_join(join);
right_qep_tab = new (&m_mem_root) QEP_TAB;
right_qep_tab->set_qs(new (&m_mem_root) QEP_shared);
right_qep_tab->set_table(right_table);
right_qep_tab->table_ref = right_table->pos_in_table_list;
right_qep_tab->set_join(join);
join_condition = new Item_func_eq(new Item_field(left_table->field[0]),
new Item_field(right_table->field[0]));
join_condition->set_cmp_func();
}
// For simplicity, we allocate everything on a MEM_ROOT that takes care of
// releasing any memory. However, we must ensure that the destructor is called
// for Mock_field_varstring. Wrapping the fields in a unique_ptr_destroy_only
// will ensure this.
MEM_ROOT m_mem_root;
unique_ptr_destroy_only<Field> m_left_table_field;
unique_ptr_destroy_only<Field> m_right_table_field;
};
TEST(HashJoinTest, JoinIntOneToOneMatch) {
my_testing::Server_initializer initializer;
initializer.SetUp();
vector<int> left_data;
left_data.push_back(3);
vector<int> right_data;
right_data.push_back(3);
HashJoinTestHelper test_helper(&initializer, left_data, right_data);
HashJoinIterator hash_join_iterator(
initializer.thd(), std::move(test_helper.left_iterator),
{test_helper.left_qep_tab}, std::move(test_helper.right_iterator),
test_helper.right_qep_tab, 10 * 1024 * 1024 /* 10 MB */,
{test_helper.join_condition}, true);
ASSERT_FALSE(hash_join_iterator.Init());
EXPECT_EQ(0, hash_join_iterator.Read());
EXPECT_EQ(3, test_helper.left_qep_tab->table()->field[0]->val_int());
EXPECT_EQ(-1, hash_join_iterator.Read());
initializer.TearDown();
}
TEST(HashJoinTest, JoinIntNoMatch) {
my_testing::Server_initializer initializer;
initializer.SetUp();
HashJoinTestHelper test_helper(&initializer, {2, 4}, {3, 5});
HashJoinIterator hash_join_iterator(
initializer.thd(), std::move(test_helper.left_iterator),
{test_helper.left_qep_tab}, std::move(test_helper.right_iterator),
test_helper.right_qep_tab, 10 * 1024 * 1024 /* 10 MB */,
{test_helper.join_condition}, true);
ASSERT_FALSE(hash_join_iterator.Init());
EXPECT_EQ(-1, hash_join_iterator.Read());
initializer.TearDown();
}
TEST(HashJoinTest, JoinIntOneToManyMatch) {
my_testing::Server_initializer initializer;
initializer.SetUp();
HashJoinTestHelper test_helper(&initializer, {2}, {2, 2});
HashJoinIterator hash_join_iterator(
initializer.thd(), std::move(test_helper.left_iterator),
{test_helper.left_qep_tab}, std::move(test_helper.right_iterator),
test_helper.right_qep_tab, 10 * 1024 * 1024 /* 10 MB */,
{test_helper.join_condition}, true);
ASSERT_FALSE(hash_join_iterator.Init());
// We expect two result rows before the iterator should return -1 (EOF).
EXPECT_EQ(0, hash_join_iterator.Read());
EXPECT_EQ(2, test_helper.left_qep_tab->table()->field[0]->val_int());
EXPECT_EQ(0, hash_join_iterator.Read());
EXPECT_EQ(2, test_helper.left_qep_tab->table()->field[0]->val_int());
EXPECT_EQ(-1, hash_join_iterator.Read());
initializer.TearDown();
}
TEST(HashJoinTest, JoinStringOneToOneMatch) {
my_testing::Server_initializer initializer;
initializer.SetUp();
HashJoinTestHelper test_helper(&initializer, {"abc"}, {"abc"});
HashJoinIterator hash_join_iterator(
initializer.thd(), std::move(test_helper.left_iterator),
{test_helper.left_qep_tab}, std::move(test_helper.right_iterator),
test_helper.right_qep_tab, 10 * 1024 * 1024 /* 10 MB */,
{test_helper.join_condition}, true);
ASSERT_FALSE(hash_join_iterator.Init());
EXPECT_EQ(0, hash_join_iterator.Read());
String buffer;
String *result =
test_helper.left_qep_tab->table()->field[0]->val_str(&buffer);
EXPECT_EQ(std::string(result->ptr(), result->length()), std::string("abc"));
EXPECT_EQ(-1, hash_join_iterator.Read());
initializer.TearDown();
}
// Do a benchmark of HashJoinIterator::Init(). This function is responsible for
// building the hash table, and this step is also known as the "build phase".
//
// The table that the hash table is built from is a single-column table with
// 10000 uniformly distributed values between [0, 10000). We give the hash table
// enough memory so that it doesn't spill out to disk.
static void BM_HashTableIteratorBuild(size_t num_iterations) {
StopBenchmarkTiming();
my_testing::Server_initializer initializer;
initializer.SetUp();
const int min_value = 0;
const int max_value = 10000;
const int seed = 8834245;
std::mt19937 generator(seed);
std::uniform_int_distribution<> distribution(min_value, max_value);
vector<int> left_dataset;
vector<int> right_dataset;
for (int i = 0; i < (max_value - min_value); ++i) {
left_dataset.push_back(distribution(generator));
right_dataset.push_back(distribution(generator));
}
HashJoinTestHelper test_helper(&initializer, left_dataset, right_dataset);
HashJoinIterator hash_join_iterator(
initializer.thd(), std::move(test_helper.left_iterator),
{test_helper.left_qep_tab}, std::move(test_helper.right_iterator),
test_helper.right_qep_tab, 10 * 1024 * 1024 /* 10 MB */,
{test_helper.join_condition}, true);
StartBenchmarkTiming();
for (size_t i = 0; i < num_iterations; ++i) {
ASSERT_FALSE(hash_join_iterator.Init());
}
StopBenchmarkTiming();
initializer.TearDown();
}
BENCHMARK(BM_HashTableIteratorBuild)
// Do a benchmark of HashJoinIterator::Read(). This function will read a row
// from the right table, and look for a matching row in the hash table. This is
// also known as the "probe phase".
//
// The table that the hash table is built from is a single-column table with
// 10000 uniformly distributed values between [0, 10000). We give the hash table
// enough memory so that is doesn't spill out to disk.
static void BM_HashTableIteratorProbe(size_t num_iterations) {
StopBenchmarkTiming();
my_testing::Server_initializer initializer;
initializer.SetUp();
const int min_value = 0;
const int max_value = 10000;
const int seed = 8834245;
std::mt19937 generator(seed);
std::uniform_int_distribution<> distribution(min_value, max_value);
vector<int> left_dataset;
vector<int> right_dataset;
for (int i = 0; i < (max_value - min_value); ++i) {
left_dataset.push_back(distribution(generator));
right_dataset.push_back(distribution(generator));
}
HashJoinTestHelper test_helper(&initializer, left_dataset, right_dataset);
HashJoinIterator hash_join_iterator(
initializer.thd(), std::move(test_helper.left_iterator),
{test_helper.left_qep_tab}, std::move(test_helper.right_iterator),
test_helper.right_qep_tab, 10 * 1024 * 1024 /* 10 MB */,
{test_helper.join_condition}, true);
for (size_t i = 0; i < num_iterations; ++i) {
ASSERT_FALSE(hash_join_iterator.Init());
StartBenchmarkTiming();
int result;
do {
result = hash_join_iterator.Read();
} while (result == 0);
StopBenchmarkTiming();
}
initializer.TearDown();
}
BENCHMARK(BM_HashTableIteratorProbe)
} // namespace hash_join_unittest