#ifndef SQL_OPTIMIZER_INCLUDED #define SQL_OPTIMIZER_INCLUDED /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, as published by the Free Software Foundation. This program is also distributed with certain software (including but not limited to OpenSSL) that is licensed under separate terms, as designated in a particular file or component or in included license documentation. The authors of MySQL hereby grant you an additional permission to link the program and your derivative works with the separately licensed software that they have included with MySQL. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /** @file sql/sql_optimizer.h Classes used for query optimizations. */ /* This structure is used to collect info on potentially sargable predicates in order to check whether they become sargable after reading const tables. We form a bitmap of indexes that can be used for sargable predicates. Only such indexes are involved in range analysis. */ #include #include #include #include #include "my_alloc.h" #include "my_base.h" #include "my_compiler.h" #include "my_dbug.h" #include "my_table_map.h" #include "sql/field.h" #include "sql/item.h" #include "sql/item_subselect.h" #include "sql/mem_root_array.h" #include "sql/opt_explain_format.h" // Explain_sort_clause #include "sql/row_iterator.h" #include "sql/sql_array.h" #include "sql/sql_class.h" #include "sql/sql_const.h" #include "sql/sql_executor.h" // Next_select_func #include "sql/sql_lex.h" #include "sql/sql_list.h" #include "sql/sql_opt_exec_shared.h" #include "sql/sql_select.h" // Key_use #include "sql/table.h" #include "sql/temp_table_param.h" #include "template_utils.h" class COND_EQUAL; class Item_sum; class Opt_trace_context; class Window; struct MYSQL_LOCK; typedef Bounds_checked_array Item_null_array; // Key_use has a trivial destructor, no need to run it from Mem_root_array. typedef Mem_root_array Key_use_array; class Cost_model_server; struct SARGABLE_PARAM { Field *field; /* field against which to check sargability */ Item **arg_value; /* values of potential keys for lookups */ uint num_values; /* number of values in the above array */ }; struct ROLLUP { enum State { STATE_NONE, STATE_INITED, STATE_READY }; State state; Item_null_array null_items; Ref_item_array *ref_item_arrays; List *fields_list; ///< SELECT list List *all_fields; ///< Including hidden fields }; /** Wrapper for ORDER* pointer to trace origins of ORDER list As far as ORDER is just a head object of ORDER expression chain, we need some wrapper object to associate flags with the whole ORDER list. */ class ORDER_with_src { /** Private empty class to implement type-safe NULL assignment This private utility class allows us to implement a constructor from NULL and only NULL (or 0 -- this is the same thing) and an assignment operator from NULL. Assignments from other pointers still prohibited since other pointer types are incompatible with the "null" type, and the casting is impossible outside of ORDER_with_src class, since the "null" type is private. */ struct null {}; public: ORDER *order; ///< ORDER expression that we are wrapping with this class Explain_sort_clause src; ///< origin of order list private: int flags; ///< bitmap of Explain_sort_property public: ORDER_with_src() { clean(); } ORDER_with_src(ORDER *order_arg, Explain_sort_clause src_arg) : order(order_arg), src(src_arg), flags(order_arg ? ESP_EXISTS : ESP_none) {} /** Type-safe NULL assignment See a commentary for the "null" type above. */ ORDER_with_src &operator=(null *) { clean(); return *this; } /** Type-safe constructor from NULL See a commentary for the "null" type above. */ ORDER_with_src(null *) { clean(); } /** Transparent access to the wrapped order list These operators are safe, since we don't do any conversion of ORDER_with_src value, but just an access to the wrapped ORDER pointer value. We can use ORDER_with_src objects instead ORDER pointers in a transparent way without accessor functions. @note This operator also implements safe "operator bool()" functionality. */ operator ORDER *() { return order; } operator const ORDER *() const { return order; } ORDER *operator->() const { return order; } void clean() { order = NULL; src = ESC_none; flags = ESP_none; } int get_flags() const { DBUG_ASSERT(order); return flags; } }; class JOIN { JOIN(const JOIN &rhs); /**< not implemented */ JOIN &operator=(const JOIN &rhs); /**< not implemented */ public: JOIN(THD *thd_arg, SELECT_LEX *select) : select_lex(select), unit(select->master_unit()), thd(thd_arg), join_tab(NULL), qep_tab(NULL), best_ref(NULL), map2table(NULL), map2qep_tab(NULL), sort_by_table(NULL), tables(0), primary_tables(0), const_tables(0), tmp_tables(0), send_group_parts(0), streaming_aggregation(false), seen_first_record(false), // @todo Can this be substituted with select->is_explicitly_grouped()? grouped(select->is_explicitly_grouped()), do_send_rows(true), all_table_map(0), // Inner tables may always be considered to be constant: const_table_map(INNER_TABLE_BIT), found_const_table_map(INNER_TABLE_BIT), deps_of_remaining_lateral_derived_tables(0), send_records(0), found_records(0), examined_rows(0), row_limit(0), m_select_limit(0), fetch_limit(HA_POS_ERROR), best_positions(NULL), positions(NULL), first_select(sub_select), best_read(0.0), best_rowcount(0), sort_cost(0.0), windowing_cost(0.0), // Needed in case optimizer short-cuts, set properly in // make_tmp_tables_info() fields(&select->item_list), group_fields(), group_fields_cache(), sum_funcs(NULL), sum_funcs_end(), tmp_table_param(thd_arg->mem_root), lock(thd->lock), rollup(), // @todo Can this be substituted with select->is_implicitly_grouped()? implicit_grouping(select->is_implicitly_grouped()), select_distinct(select->is_distinct()), group_optimized_away(false), simple_order(false), simple_group(false), m_ordered_index_usage(ORDERED_INDEX_VOID), skip_sort_order(false), need_tmp_before_win(false), has_lateral(false), keyuse_array(thd->mem_root), all_fields(select->all_fields), fields_list(select->fields_list), tmp_all_fields(nullptr), tmp_fields_list(nullptr), error(0), order(select->order_list.first, ESC_ORDER_BY), group_list(select->group_list.first, ESC_GROUP_BY), m_windows(select->m_windows), m_windows_sort(false), m_windowing_steps(false), explain_flags(), /* Those four members are meaningless before JOIN::optimize(), so force a crash if they are used before that. */ where_cond((Item *)1), having_cond((Item *)1), having_for_explain((Item *)1), tables_list((TABLE_LIST *)1), cond_equal(NULL), return_tab(0), ref_items(nullptr), ref_slice_immediately_before_group_by(nullptr), current_ref_item_slice(REF_SLICE_SAVED_BASE), recursive_iteration_count(0), zero_result_cause(NULL), child_subquery_can_materialize(false), allow_outer_refs(false), sj_tmp_tables(), sjm_exec_list(), group_sent(false), calc_found_rows(false), with_json_agg(select->json_agg_func_used()), optimized(false), executed(false), plan_state(NO_PLAN), select_count(false) { rollup.state = ROLLUP::STATE_NONE; if (select->order_list.first) explain_flags.set(ESC_ORDER_BY, ESP_EXISTS); if (select->group_list.first) explain_flags.set(ESC_GROUP_BY, ESP_EXISTS); if (select->is_distinct()) explain_flags.set(ESC_DISTINCT, ESP_EXISTS); if (m_windows.elements > 0) explain_flags.set(ESC_WINDOWING, ESP_EXISTS); // Calculate the number of groups for (ORDER *group = group_list; group; group = group->next) send_group_parts++; } /// Query block that is optimized and executed using this JOIN SELECT_LEX *const select_lex; /// Query expression referring this query block SELECT_LEX_UNIT *const unit; /// Thread handler THD *const thd; /** Optimal query execution plan. Initialized with a tentative plan in JOIN::make_join_plan() and later replaced with the optimal plan in get_best_combination(). */ JOIN_TAB *join_tab; /// Array of QEP_TABs QEP_TAB *qep_tab; /** Array of plan operators representing the current (partial) best plan. The array is allocated in JOIN::make_join_plan() and is valid only inside this function. Initially (*best_ref[i]) == join_tab[i]. The optimizer reorders best_ref. */ JOIN_TAB **best_ref; JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs QEP_TAB **map2qep_tab; ///< mapping between table indexes and QEB_TABs /* The table which has an index that allows to produce the requried ordering. A special value of 0x1 means that the ordering will be produced by passing 1st non-const table to filesort(). NULL means no such table exists. */ TABLE *sort_by_table; /** Before plan has been created, "tables" denote number of input tables in the query block and "primary_tables" is equal to "tables". After plan has been created (after JOIN::get_best_combination()), the JOIN_TAB objects are enumerated as follows: - "tables" gives the total number of allocated JOIN_TAB objects - "primary_tables" gives the number of input tables, including materialized temporary tables from semi-join operation. - "const_tables" are those tables among primary_tables that are detected to be constant. - "tmp_tables" is 0, 1 or 2 (more if windows) and counts the maximum possible number of intermediate tables in post-processing (ie sorting and duplicate removal). Later, tmp_tables will be adjusted to the correct number of intermediate tables, @see JOIN::make_tmp_tables_info. - The remaining tables (ie. tables - primary_tables - tmp_tables) are input tables to materialized semi-join operations. The tables are ordered as follows in the join_tab array: 1. const primary table 2. non-const primary tables 3. intermediate sort/group tables 4. possible holes in array 5. semi-joined tables used with materialization strategy */ uint tables; ///< Total number of tables in query block uint primary_tables; ///< Number of primary input tables in query block uint const_tables; ///< Number of primary tables deemed constant uint tmp_tables; ///< Number of temporary tables used by query uint send_group_parts; /** Indicates that the data will be aggregated (typically GROUP BY), _and_ that it is already processed in an order that is compatible with the grouping in use (e.g. because we are scanning along an index, or because an earlier step sorted the data in a group-compatible order). Note that this flag changes value at multiple points during optimization; if it's set when a temporary table is created, this means we aggregate into said temporary table (end_write_group is chosen instead of end_write), but if it's set later, it means that we can aggregate as we go, just before sending the data to the client (end_send_group is chosen instead of end_send). @see make_group_fields, alloc_group_fields, JOIN::exec */ bool streaming_aggregation; bool seen_first_record; ///< Whether we've seen at least one row already bool grouped; ///< If query contains GROUP BY clause bool do_send_rows; ///< If true, send produced rows using query_result table_map all_table_map; ///< Set of tables contained in query table_map const_table_map; ///< Set of tables found to be const /** Const tables which are either: - not empty - empty but inner to a LEFT JOIN, thus "considered" not empty for the rest of execution (a NULL-complemented row will be used). */ table_map found_const_table_map; /** Used in some loops which scan the JOIN's tables: it is the bitmap of all tables which are dependencies of lateral derived tables which the loop has not yet processed. */ table_map deps_of_remaining_lateral_derived_tables; /* Number of records produced after join + group operation */ ha_rows send_records; ha_rows found_records; ha_rows examined_rows; ha_rows row_limit; // m_select_limit is used to decide if we are likely to scan the whole table. ha_rows m_select_limit; /** Used to fetch no more than given amount of rows per one fetch operation of server side cursor. The value is checked in end_send and end_send_group in fashion, similar to offset_limit_cnt: - fetch_limit= HA_POS_ERROR if there is no cursor. - when we open a cursor, we set fetch_limit to 0, - on each fetch iteration we add num_rows to fetch to fetch_limit */ ha_rows fetch_limit; /** This is the result of join optimization. @note This is a scratch array, not used after get_best_combination(). */ POSITION *best_positions; /******* Join optimization state members start *******/ /* Current join optimization state */ POSITION *positions; /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ Next_select_func first_select; /** The cost of best complete join plan found so far during optimization, after optimization phase - cost of picked join order (not taking into account the changes made by test_if_skip_sort_order()). */ double best_read; /** The estimated row count of the plan with best read time (see above). */ ha_rows best_rowcount; /// Expected cost of filesort. double sort_cost; /// Expected cost of windowing; double windowing_cost; List *fields; List group_fields, group_fields_cache; Item_sum **sum_funcs, ***sum_funcs_end; /** Describes a temporary table. Each tmp table has its own tmp_table_param. The one here has two roles: - is transiently used as a model by create_intermediate_table(), to build the tmp table's own tmp_table_param. - is also used as description of the pseudo-tmp-table of grouping (REF_SLICE_ORDERED_GROUP_BY) (e.g. in end_send_group()). */ Temp_table_param tmp_table_param; MYSQL_LOCK *lock; ROLLUP rollup; ///< Used with rollup bool implicit_grouping; ///< True if aggregated but no GROUP BY /** At construction time, set if SELECT DISTINCT. May be reset to false later, when we set up a temporary table operation that deduplicates for us. */ bool select_distinct; /** If we have the GROUP BY statement in the query, but the group_list was emptied by optimizer, this flag is true. It happens when fields in the GROUP BY are from constant table */ bool group_optimized_away; /* simple_xxxxx is set if ORDER/GROUP BY doesn't include any references to other tables than the first non-constant table in the JOIN. It's also set if ORDER/GROUP BY is empty. Used for deciding for or against using a temporary table to compute GROUP/ORDER BY. */ bool simple_order, simple_group; /* m_ordered_index_usage is set if an ordered index access should be used instead of a filesort when computing ORDER/GROUP BY. */ enum { ORDERED_INDEX_VOID, // No ordered index avail. ORDERED_INDEX_GROUP_BY, // Use index for GROUP BY ORDERED_INDEX_ORDER_BY // Use index for ORDER BY } m_ordered_index_usage; /** Is set if we have a GROUP BY and we have ORDER BY on a constant or when sorting isn't required. */ bool skip_sort_order; /** If true we need a temporary table on the result set before any windowing steps, e.g. for DISTINCT or we have a query ORDER BY. See details in JOIN::optimize */ bool need_tmp_before_win; /// If JOIN has lateral derived tables (is set at start of planning) bool has_lateral; /// Used and updated by JOIN::make_join_plan() and optimize_keyuse() Key_use_array keyuse_array; /// List storing all expressions used in query block List &all_fields; /// List storing all expressions of select list List &fields_list; /** This is similar to tmp_fields_list, but it also contains necessary extras: expressions added for ORDER BY, GROUP BY, window clauses, underlying items of split items. */ List *tmp_all_fields; /** Array of pointers to lists of expressions. Each list represents the SELECT list at a certain stage of execution. This array is only used when the query makes use of tmp tables: after writing to tmp table (e.g. for GROUP BY), if this write also does a function's calculation (e.g. of SUM), after the write the function's value is in a column of the tmp table. If a SELECT list expression is the SUM, and we now want to read that materialized SUM and send it forward, a new expression (Item_field type instead of Item_sum), is needed. The new expressions are listed in JOIN::tmp_fields_list[x]; 'x' is a number (REF_SLICE_). Same is applicable to tmp_all_fields. @see JOIN::make_tmp_tables_info() */ List *tmp_fields_list; int error; ///< set in optimize(), exec(), prepare_result() /** ORDER BY and GROUP BY lists, to transform with prepare,optimize and exec */ ORDER_with_src order, group_list; /** Any window definitions */ List m_windows; /** True if a window requires a certain order of rows, which implies that any order of rows coming out of the pre-window join will be disturbed. */ bool m_windows_sort; /// If we have set up tmp tables for windowing, @see make_tmp_tables_info bool m_windowing_steps; /** Buffer to gather GROUP BY, ORDER BY and DISTINCT QEP details for EXPLAIN */ Explain_format_flags explain_flags; /** JOIN::having_cond is initially equal to select_lex->having_cond, but may later be changed by optimizations performed by JOIN. The relationship between the JOIN::having_cond condition and the associated variable select_lex->having_value is so that having_value can be: - COND_UNDEF if a having clause was not specified in the query or if it has not been optimized yet - COND_TRUE if the having clause is always true, in which case JOIN::having_cond is set to NULL. - COND_FALSE if the having clause is impossible, in which case JOIN::having_cond is set to NULL - COND_OK otherwise, meaning that the having clause needs to be further evaluated All of the above also applies to the where_cond/select_lex->cond_value pair. */ /** Optimized WHERE clause item tree (valid for one single execution). Used in JOIN execution if no tables. Otherwise, attached in pieces to JOIN_TABs and then not used in JOIN execution. Printed by EXPLAIN EXTENDED. Initialized by SELECT_LEX::get_optimizable_conditions(). */ Item *where_cond; /** Optimized HAVING clause item tree (valid for one single execution). Used in JOIN execution, as last "row filtering" step. With one exception: may be pushed to the JOIN_TABs of temporary tables used in DISTINCT / GROUP BY (see JOIN::make_tmp_tables_info()); in that case having_cond is set to NULL, but is first saved to having_for_explain so that EXPLAIN EXTENDED can still print it. Initialized by SELECT_LEX::get_optimizable_conditions(). */ Item *having_cond; Item *having_for_explain; ///< Saved optimized HAVING for EXPLAIN /** Pointer set to select_lex->get_table_list() at the start of optimization. May be changed (to NULL) only if optimize_aggregated_query() optimizes tables away. */ TABLE_LIST *tables_list; COND_EQUAL *cond_equal; /* Join tab to return to. Points to an element of join->join_tab array, or to join->join_tab[-1]. This is used at execution stage to shortcut join enumeration. Currently shortcutting is done to handle outer joins or handle semi-joins with FirstMatch strategy. */ plan_idx return_tab; /** ref_items is an array of 5 slices, each containing an array of Item pointers. ref_items is used in different phases of query execution. - slice 0 is initially the same as SELECT_LEX::base_ref_items, ie it is the set of items referencing fields from base tables. During optimization and execution it may be temporarily overwritten by slice 1-3. - slice 1 is a representation of the used items when being read from the first temporary table. - slice 2 is a representation of the used items when being read from the second temporary table. - slice 3 is a representation of the used items when used in aggregation but no actual temporary table is needed. - slice 4 is a copy of the original slice 0. It is created if slice overwriting is necessary, and it is used to restore original values in slice 0 after having been overwritten. - slices 5 -> N are used by windowing: first are all the window's out tmp tables, the next indexes are reserved for the windows' frame buffers (in the same order), if any, e.g. One window: 5: window 1's out table 6: window 1's FB Two windows: 5: window 1's out table 6: window 2's out table 7: window 1's FB 8: window 2's FB and so on. Slice 0 is allocated for the lifetime of a statement, whereas slices 1-4 are associated with a single optimization. The size of slice 0 determines the slice size used when allocating the other slices. */ Ref_item_array *ref_items; // cardinality: REF_SLICE_SAVED_BASE + 1 + #windows*2 /** If slice REF_SLICE_ORDERED_GROUP_BY has been created, this is the QEP_TAB which is right before calculation of items in this slice. */ QEP_TAB *ref_slice_immediately_before_group_by; /** The slice currently stored in ref_items[0]. Used to restore the base ref_items slice from the "save" slice after it has been overwritten by another slice (1-3). */ uint current_ref_item_slice; /** Used only if this query block is recursive. Contains count of all executions of this recursive query block, since the last this->reset(). */ uint recursive_iteration_count; /** <> NULL if optimization has determined that execution will produce an empty result before aggregation, contains a textual explanation on why result is empty. Implicitly grouped queries may still produce an aggregation row. @todo - suggest to set to "Preparation determined that query is empty" when SELECT_LEX::is_empty_query() is true. */ const char *zero_result_cause; /** True if, at this stage of processing, subquery materialization is allowed for children subqueries of this JOIN (those in the SELECT list, in WHERE, etc). If false, and we have to evaluate a subquery at this stage, then we must choose EXISTS. */ bool child_subquery_can_materialize; /** True if plan search is allowed to use references to expressions outer to this JOIN (for example may set up a 'ref' access looking up an outer expression in the index, etc). */ bool allow_outer_refs; /* Temporary tables used to weed-out semi-join duplicates */ List sj_tmp_tables; List sjm_exec_list; /* end of allocation caching storage */ /** Exec time only: true <=> current group has been sent */ bool group_sent; /// If true, calculate found rows for this query block bool calc_found_rows; /** This will force tmp table to NOT use index + update for group operation as it'll cause [de]serialization for each json aggregated value and is very ineffective (times worse). Server should use filesort, or tmp table + filesort to resolve GROUP BY with JSON aggregate functions. */ bool with_json_agg; /** If set, "fields" has been replaced with a set of Item_refs for rollup processing; see the AggregateIterator constructor for more details. This is used when constructing iterators only; it is not used during execution. */ bool replaced_items_for_rollup = false; /// True if plan is const, ie it will return zero or one rows. bool plan_is_const() const { return const_tables == primary_tables; } /** True if plan contains one non-const primary table (ie not including tables taking part in semi-join materialization). */ bool plan_is_single_table() { return primary_tables - const_tables == 1; } bool optimize(); void reset(); void exec(); bool prepare_result(); bool destroy(); bool alloc_func_list(); bool make_sum_func_list(List &all_fields, List &send_fields, bool before_group_by, bool recompute = false); /** Overwrites one slice of ref_items with the contents of another slice. In the normal case, dst and src have the same size(). However: the rollup slices may have smaller size than slice_sz. */ void copy_ref_item_slice(uint dst_slice, uint src_slice) { copy_ref_item_slice(ref_items[dst_slice], ref_items[src_slice]); } void copy_ref_item_slice(Ref_item_array dst_arr, Ref_item_array src_arr) { DBUG_ASSERT(dst_arr.size() >= src_arr.size()); void *dest = dst_arr.array(); const void *src = src_arr.array(); if (!src_arr.is_null()) memcpy(dest, src, src_arr.size() * src_arr.element_size()); } /** Allocate a ref_item slice, assume that slice size is in ref_items[0] @param thd_arg thread handler @param sliceno The slice number to allocate in JOIN::ref_items @returns false if success, true if error */ bool alloc_ref_item_slice(THD *thd_arg, int sliceno) { DBUG_ASSERT(sliceno > 0 && ref_items[sliceno].is_null()); size_t count = ref_items[0].size(); Item **slice = pointer_cast(thd_arg->alloc(sizeof(Item *) * count)); if (slice == NULL) return true; ref_items[sliceno] = Ref_item_array(slice, count); return false; } /** Overwrite the base slice of ref_items with the slice supplied as argument. @param sliceno number to overwrite the base slice with, must be 1-4 or 4 + windowno. */ void set_ref_item_slice(uint sliceno) { DBUG_ASSERT((int)sliceno >= 1); if (current_ref_item_slice != sliceno) { copy_ref_item_slice(REF_SLICE_ACTIVE, sliceno); DBUG_PRINT("info", ("ref slice %u -> %u", current_ref_item_slice, sliceno)); current_ref_item_slice = sliceno; } } /// @note do also consider Switch_ref_item_slice uint get_ref_item_slice() const { return current_ref_item_slice; } /** Returns the clone of fields_list which is appropriate for evaluating expressions at the current stage of execution; which stage is denoted by the value of current_ref_item_slice. */ List *get_current_fields(); bool optimize_rollup(); bool rollup_process_const_fields(); bool rollup_make_fields(List &all_fields, List &fields, Item_sum ***func); bool switch_slice_for_rollup_fields(List &all_fields, List &fields); bool rollup_send_data(uint idx); bool rollup_write_data(uint idx, QEP_TAB *qep_tab); bool finalize_table_conditions(); /** Release memory and, if possible, the open tables held by this execution plan (and nested plans). It's used to release some tables before the end of execution in order to increase concurrency and reduce memory consumption. */ void join_free(); /** Cleanup this JOIN. Not a full cleanup. reusable? */ void cleanup(); bool clear_fields(table_map *save_nullinfo); void restore_fields(table_map save_nullinfo); /** Return whether the caller should send a row even if the join produced no rows if: - there is an aggregate function (sum_func_count!=0), and - the query is not grouped, and - a possible HAVING clause evaluates to TRUE. @note: if there is a having clause, it must be evaluated before returning the row. */ bool send_row_on_empty_set() const { return (do_send_rows && tmp_table_param.sum_func_count != 0 && group_list == NULL && !group_optimized_away && select_lex->having_value != Item::COND_FALSE); } bool generate_derived_keys(); void finalize_derived_keys(); bool get_best_combination(); bool attach_join_conditions(plan_idx last_tab); private: bool attach_join_condition_to_nest(plan_idx first_inner, plan_idx last_tab, Item *join_cond, bool is_sj_mat_cond); public: bool update_equalities_for_sjm(); bool add_sorting_to_table(uint idx, ORDER_with_src *order, bool force_stable_sort = false); bool decide_subquery_strategy(); void refine_best_rowcount(); void recalculate_deps_of_remaining_lateral_derived_tables( table_map plan_tables, uint idx); bool clear_corr_derived_tmp_tables(); void mark_const_table(JOIN_TAB *table, Key_use *key); /// State of execution plan. Currently used only for EXPLAIN enum enum_plan_state { NO_PLAN, ///< No plan is ready yet ZERO_RESULT, ///< Zero result cause is set NO_TABLES, ///< Plan has no tables PLAN_READY ///< Plan is ready }; /// See enum_plan_state enum_plan_state get_plan_state() const { return plan_state; } bool is_optimized() const { return optimized; } void set_optimized() { optimized = true; } bool is_executed() const { return executed; } void set_executed() { executed = true; } /** Retrieve the cost model object to be used for this join. @return Cost model object for the join */ const Cost_model_server *cost_model() const { DBUG_ASSERT(thd != NULL); return thd->cost_model(); } /** Check if FTS index only access is possible */ bool fts_index_access(JOIN_TAB *tab); Next_select_func get_end_select_func(); /** Propagate dependencies between tables due to outer join relations. @returns false if success, true if error */ bool propagate_dependencies(); /** Returns whether one should send the current row on to the output, or ignore it. (In particular, this implements OFFSET handling in the non-iterator executor.) */ bool should_send_current_row() { if (!do_send_rows) { return false; } if (unit->offset_limit_cnt > 0) { --unit->offset_limit_cnt; return false; } else { return true; } } /** Handle offloading of query parts to the underlying engines, when such is supported by their implementation. @returns 0 if success, 1 if error */ int push_to_engines(); RowIterator *root_iterator() const { return m_root_iterator.get(); } unique_ptr_destroy_only release_root_iterator() { return move(m_root_iterator); } void set_root_iterator(unique_ptr_destroy_only iterator) { m_root_iterator = move(iterator); } private: bool optimized; ///< flag to avoid double optimization in EXPLAIN /** Set by exec(), reset by reset(). Note that this needs to be set _during_ the query (not only when it's done executing), or the dynamic range optimizer will not understand which tables have been read. */ bool executed; /// Final execution plan state. Currently used only for EXPLAIN enum_plan_state plan_state; public: /* When join->select_count is set, tables will not be optimized away. The call to records() will be delayed until the execution phase and the counting will be done on an index of Optimizer's choice. The index will be decided in find_shortest_key(), called from optimize_aggregated_query(). */ bool select_count; private: /** Create a temporary table to be used for processing DISTINCT/ORDER BY/GROUP BY. @note Will modify JOIN object wrt sort/group attributes @param tab the JOIN_TAB object to attach created table to @param tmp_table_fields List of items that will be used to define column types of the table. @param tmp_table_group Group key to use for temporary table, NULL if none. @param save_sum_fields If true, do not replace Item_sum items in @c tmp_fields list with Item_field items referring to fields in temporary table. @returns false on success, true on failure */ bool create_intermediate_table(QEP_TAB *tab, List *tmp_table_fields, ORDER_with_src &tmp_table_group, bool save_sum_fields); /** Optimize distinct when used on a subset of the tables. E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b In this case we can stop scanning t2 when we have found one t1.a */ void optimize_distinct(); /** Function sets FT hints, initializes FT handlers and checks if FT index can be used as covered. */ bool optimize_fts_query(); bool prune_table_partitions(); /** Initialize key dependencies for join tables. TODO figure out necessity of this method. Current test suite passed without this intialization. */ void init_key_dependencies() { JOIN_TAB *const tab_end = join_tab + tables; for (JOIN_TAB *tab = join_tab; tab < tab_end; tab++) tab->key_dependent = tab->dependent; } private: void set_prefix_tables(); void cleanup_item_list(List &items) const; void set_semijoin_embedding(); bool make_join_plan(); bool init_planner_arrays(); bool extract_const_tables(); bool extract_func_dependent_tables(); void update_sargable_from_const(SARGABLE_PARAM *sargables); bool estimate_rowcount(); void optimize_keyuse(); void set_semijoin_info(); /** An utility function - apply heuristics and optimize access methods to tables. @note Side effect - this function could set 'Impossible WHERE' zero result. */ void adjust_access_methods(); void update_depend_map(); void update_depend_map(ORDER *order); /** Fill in outer join related info for the execution plan structure. For each outer join operation left after simplification of the original query the function set up the following pointers in the linear structure join->join_tab representing the selected execution plan. The first inner table t0 for the operation is set to refer to the last inner table tk through the field t0->last_inner. Any inner table ti for the operation are set to refer to the first inner table ti->first_inner. The first inner table t0 for the operation is set to refer to the first inner table of the embedding outer join operation, if there is any, through the field t0->first_upper. The on expression for the outer join operation is attached to the corresponding first inner table through the field t0->on_expr_ref. Here ti are structures of the JOIN_TAB type. EXAMPLE. For the query: @code SELECT * FROM t1 LEFT JOIN (t2, t3 LEFT JOIN t4 ON t3.a=t4.a) ON (t1.a=t2.a AND t1.b=t3.b) WHERE t1.c > 5, @endcode given the execution plan with the table order t1,t2,t3,t4 is selected, the following references will be set; t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2] t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2], on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to *t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref. @note The function assumes that the simplification procedure has been already applied to the join query (see simplify_joins). This function can be called only after the execution plan has been chosen. */ void make_outerjoin_info(); /** Initialize ref access for all tables that use it. @return False if success, True if error @note We cannot setup fields used for ref access before we have sorted the items within multiple equalities according to the final order of the tables involved in the join operation. Currently, this occurs in @see substitute_for_best_equal_field(). */ bool init_ref_access(); bool alloc_qep(uint n); void unplug_join_tabs(); bool setup_semijoin_materialized_table(JOIN_TAB *tab, uint tableno, POSITION *inner_pos, POSITION *sjm_pos); bool add_having_as_tmp_table_cond(uint curr_tmp_table); bool make_tmp_tables_info(); void set_plan_state(enum_plan_state plan_state_arg); bool compare_costs_of_subquery_strategies( Item_exists_subselect::enum_exec_method *method); ORDER *remove_const(ORDER *first_order, Item *cond, bool change_list, bool *simple_order, bool group_by); /** Check whether this is a subquery that can be evaluated by index look-ups. If so, change subquery engine to subselect_indexsubquery_engine. @retval 1 engine was changed @retval 0 engine wasn't changed @retval -1 OOM */ int replace_index_subquery(); /** Optimize DISTINCT, GROUP BY, ORDER BY clauses @retval false ok @retval true an error occurred */ bool optimize_distinct_group_order(); /** Test if an index could be used to replace filesort for ORDER BY/GROUP BY @details Investigate whether we may use an ordered index as part of either DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be used for only the first of any of these terms to be executed. This is reflected in the order which we check for test_if_skip_sort_order() below. However we do not check for DISTINCT here, as it would have been transformed to a GROUP BY at this stage if it is a candidate for ordered index optimization. If a decision was made to use an ordered index, the availability if such an access path is stored in 'm_ordered_index_usage' for later use by 'execute' or 'explain' */ void test_skip_sort(); bool alloc_indirection_slices(); /** If possible, convert the executor structures to a set of row iterators, storing the result in m_root_iterator. If not, m_root_iterator will remain nullptr. */ void create_iterators(); /** Create iterators with the knowledge that there are going to be zero rows coming from tables (before aggregation); typically because we know that all of them would be filtered away by WHERE (e.g. SELECT * FROM t1 WHERE 1=2). This will normally yield no output rows, but if we have implicit aggregation, it might yield a single one. */ void create_iterators_for_zero_rows(); /** @{ Helpers for create_iterators. */ void create_table_iterators(); unique_ptr_destroy_only create_root_iterator_for_join(); unique_ptr_destroy_only attach_iterators_for_having_and_limit( unique_ptr_destroy_only iterator); /** @} */ /** An iterator you can read from to get all records for this query. May be nullptr even after create_iterators() if the current query is not supported by the iterator executor. */ unique_ptr_destroy_only m_root_iterator; }; /** RAII class to ease the temporary switching to a different slice of the ref item array. */ class Switch_ref_item_slice { JOIN *join; uint saved; public: Switch_ref_item_slice(JOIN *join_arg, uint new_v) : join(join_arg), saved(join->get_ref_item_slice()) { if (!join->ref_items[new_v].is_null()) join->set_ref_item_slice(new_v); } ~Switch_ref_item_slice() { join->set_ref_item_slice(saved); } }; /** RAII class to ease the call of LEX::mark_broken() if error. Used during preparation and optimization of DML queries. */ class Prepare_error_tracker { public: Prepare_error_tracker(THD *thd_arg) : thd(thd_arg) {} ~Prepare_error_tracker() { if (unlikely(thd->is_error())) thd->lex->mark_broken(); } private: THD *const thd; }; bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno, bool other_tbls_ok); bool remove_eq_conds(THD *thd, Item *cond, Item **retcond, Item::cond_result *cond_value); bool optimize_cond(THD *thd, Item **conds, COND_EQUAL **cond_equal, List *join_list, Item::cond_result *cond_value); Item *substitute_for_best_equal_field(THD *thd, Item *cond, COND_EQUAL *cond_equal, JOIN_TAB **table_join_idx); bool build_equal_items(THD *thd, Item *cond, Item **retcond, COND_EQUAL *inherited, bool do_inherit, List *join_list, COND_EQUAL **cond_equal_ref); bool is_indexed_agg_distinct(JOIN *join, List *out_args); Key_use_array *create_keyuse_for_table(THD *thd, uint keyparts, Item_field **fields, List outer_exprs); Item_field *get_best_field(Item_field *item_field, COND_EQUAL *cond_equal); Item *make_cond_for_table(THD *thd, Item *cond, table_map tables, table_map used_table, bool exclude_expensive_cond); uint build_bitmap_for_nested_joins(List *join_list, uint first_unused); /** Create an order list that consists of all non-const fields and items. This is usable for e.g. converting DISTINCT into GROUP or ORDER BY. Try to put the items in "order_list" first, to allow one to optimize away a later ORDER BY. */ ORDER *create_order_from_distinct(THD *thd, Ref_item_array ref_item_array, ORDER *order_list, List &fields, bool skip_aggregates, bool convert_bit_fields_to_long, bool *all_order_by_fields_used); /** Returns true if arguments are a temporal Field having no date, part and a temporal expression having a date part. @param f Field @param v Expression */ inline bool field_time_cmp_date(const Field *f, const Item *v) { return f->is_temporal() && !f->is_temporal_with_date() && v->is_temporal_with_date(); } bool substitute_gc(THD *thd, SELECT_LEX *select_lex, Item *where_cond, ORDER *group_list, ORDER *order); /// RAII class to manage JOIN::deps_of_remaining_lateral_derived_tables class Deps_of_remaining_lateral_derived_tables { JOIN *join; table_map saved; /// All lateral tables not part of this map should be ignored table_map plan_tables; public: /** Constructor. @param j the JOIN @param plan_tables_arg @see JOIN::deps_of_remaining_lateral_derived_tables */ Deps_of_remaining_lateral_derived_tables(JOIN *j, table_map plan_tables_arg) : join(j), saved(join->deps_of_remaining_lateral_derived_tables), plan_tables(plan_tables_arg) {} ~Deps_of_remaining_lateral_derived_tables() { restore(); } void restore() { join->deps_of_remaining_lateral_derived_tables = saved; } void assert_unchanged() { DBUG_ASSERT(join->deps_of_remaining_lateral_derived_tables == saved); } void recalculate(uint next_idx) { if (join->has_lateral) /* No cur_tab given, so assume we start from a place in the plan which may be backward or forward compared to where we were before: recalculate. */ join->recalculate_deps_of_remaining_lateral_derived_tables(plan_tables, next_idx); } void recalculate(JOIN_TAB *cur_tab, uint next_idx) { /* We have just added cur_tab to the plan; if it's not lateral, the map doesn't change, no need to recalculate it. */ if (join->has_lateral && cur_tab->table_ref->is_derived() && cur_tab->table_ref->derived_unit()->m_lateral_deps) recalculate(next_idx); } void init() { // Normally done once in a run of JOIN::optimize(). if (join->has_lateral) { recalculate(join->const_tables); // Forget stale value: saved = join->deps_of_remaining_lateral_derived_tables; } } }; /** Estimates how many times a subquery will be executed as part of a query execution. If it is a cacheable subquery, the estimate tells how many times the subquery will be executed if it is not cached. @param[in] subquery the Item that represents the subquery @param[in,out] trace optimizer trace context @return the number of times the subquery is expected to be executed */ double calculate_subquery_executions(const Item_subselect *subquery, Opt_trace_context *trace); /** Class which presents a view of the current candidate table order for a JOIN. */ class Candidate_table_order { public: Candidate_table_order(const JOIN *join) : m_join(join) {} /// Returns the number of tables in the candidate plan. size_t size() const { return m_join->tables; } /// Returns the table reference at the given position in the candidate plan. const TABLE_LIST *table_ref(size_t position) const { return m_join->positions[position].table->table_ref; } private: const JOIN *const m_join; }; extern const char *antijoin_null_cond; /** Checks if an Item, which is constant for execution, can be evaluated during optimization. It cannot be evaluated if it contains a subquery and the OPTION_NO_SUBQUERY_DURING_OPTIMIZATION query option is active. @param item the Item to check @param select the query block that contains the Item @return false if this Item contains a subquery and subqueries cannot be evaluated during optimization, or true otherwise */ bool evaluate_during_optimization(const Item *item, const SELECT_LEX *select); #endif /* SQL_OPTIMIZER_INCLUDED */