Ответ 1
Хорошо написанный вопрос, который показывает некоторые исследования.
Из любопытства я попробовал MySQL 5.6, чтобы узнать, что тут нужно сказать об этих запросах.
Во-первых, обратите внимание, что запросы разные:
- изменение значения от "1" до "-1" для существующего/несуществующего ключевой случай - это одно.
- изменение "second_1.num IS NOT NULL" на "second_1.num IS NULL" в предложении WHERE является другим.
Использование EXPLAIN дает разные планы:
EXPLAIN SELECT `first`.num
FROM `first`
LEFT JOIN `second` AS second_1 ON second_1.num = -1 # non-existent key
LEFT JOIN `second` AS second_2 ON second_2.num = -2 # non-existent key
LEFT JOIN `second` AS second_3 ON second_3.num = -3 # non-existent key
LEFT JOIN `second` AS second_4 ON second_4.num = -4 # non-existent key
LEFT JOIN `second` AS second_5 ON second_5.num = -5 # non-existent key
LEFT JOIN `second` AS second_6 ON second_6.num = -6 # non-existent key
WHERE second_1.num IS NULL
AND second_2.num IS NULL
AND second_3.num IS NULL
AND second_4.num IS NULL
AND second_5.num IS NULL
AND second_6.num IS NULL
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE first index NULL key_num 4 NULL 1000 Using index
1 SIMPLE second_1 ref key_num key_num 4 const 1 Using where; Not exists; Using index
1 SIMPLE second_2 ref key_num key_num 4 const 1 Using where; Not exists; Using index
1 SIMPLE second_3 ref key_num key_num 4 const 1 Using where; Not exists; Using index
1 SIMPLE second_4 ref key_num key_num 4 const 1 Using where; Not exists; Using index
1 SIMPLE second_5 ref key_num key_num 4 const 1 Using where; Not exists; Using index
1 SIMPLE second_6 ref key_num key_num 4 const 1 Using where; Not exists; Using index
в отличие от
EXPLAIN SELECT `first`.num
FROM `first`
LEFT JOIN `second` AS second_1 ON second_1.num = 1 # existent key
LEFT JOIN `second` AS second_2 ON second_2.num = 2 # existent key
LEFT JOIN `second` AS second_3 ON second_3.num = 3 # existent key
LEFT JOIN `second` AS second_4 ON second_4.num = 4 # existent key
LEFT JOIN `second` AS second_5 ON second_5.num = 5 # existent key
LEFT JOIN `second` AS second_6 ON second_6.num = 6 # existent key
WHERE second_1.num IS NOT NULL
AND second_2.num IS NOT NULL
AND second_3.num IS NOT NULL
AND second_4.num IS NOT NULL
AND second_5.num IS NOT NULL
AND second_6.num IS NOT NULL
;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE second_1 ref key_num key_num 4 const 1 Using index
1 SIMPLE second_2 ref key_num key_num 4 const 1 Using index
1 SIMPLE second_3 ref key_num key_num 4 const 1 Using index
1 SIMPLE second_4 ref key_num key_num 4 const 1 Using index
1 SIMPLE second_5 ref key_num key_num 4 const 1 Using index
1 SIMPLE second_6 ref key_num key_num 4 const 1 Using index
1 SIMPLE first index NULL key_num 4 NULL 1000 Using index; Using join buffer (Block Nested Loop)
Используя формат JSON, мы имеем:
EXPLAIN FORMAT=JSON SELECT `first`.num
FROM `first`
LEFT JOIN `second` AS second_1 ON second_1.num = -1 # non-existent key
LEFT JOIN `second` AS second_2 ON second_2.num = -2 # non-existent key
LEFT JOIN `second` AS second_3 ON second_3.num = -3 # non-existent key
LEFT JOIN `second` AS second_4 ON second_4.num = -4 # non-existent key
LEFT JOIN `second` AS second_5 ON second_5.num = -5 # non-existent key
LEFT JOIN `second` AS second_6 ON second_6.num = -6 # non-existent key
WHERE second_1.num IS NULL
AND second_2.num IS NULL
AND second_3.num IS NULL
AND second_4.num IS NULL
AND second_5.num IS NULL
AND second_6.num IS NULL
;
EXPLAIN
{
"query_block": {
"select_id": 1,
"nested_loop": [
{
"table": {
"table_name": "first",
"access_type": "index",
"key": "key_num",
"key_length": "4",
"rows": 1000,
"filtered": 100,
"using_index": true
}
},
{
"table": {
"table_name": "second_1",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"not_exists": true,
"using_index": true,
"attached_condition": "<if>(found_match(second_1), isnull(`test`.`second_1`.`num`), true)"
}
},
{
"table": {
"table_name": "second_2",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"not_exists": true,
"using_index": true,
"attached_condition": "<if>(found_match(second_2), isnull(`test`.`second_2`.`num`), true)"
}
},
{
"table": {
"table_name": "second_3",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"not_exists": true,
"using_index": true,
"attached_condition": "<if>(found_match(second_3), isnull(`test`.`second_3`.`num`), true)"
}
},
{
"table": {
"table_name": "second_4",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"not_exists": true,
"using_index": true,
"attached_condition": "<if>(found_match(second_4), isnull(`test`.`second_4`.`num`), true)"
}
},
{
"table": {
"table_name": "second_5",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"not_exists": true,
"using_index": true,
"attached_condition": "<if>(found_match(second_5), isnull(`test`.`second_5`.`num`), true)"
}
},
{
"table": {
"table_name": "second_6",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"not_exists": true,
"using_index": true,
"attached_condition": "<if>(found_match(second_6), isnull(`test`.`second_6`.`num`), true)"
}
}
]
}
}
в отличие от
EXPLAIN FORMAT=JSON SELECT `first`.num
FROM `first`
LEFT JOIN `second` AS second_1 ON second_1.num = 1 # existent key
LEFT JOIN `second` AS second_2 ON second_2.num = 2 # existent key
LEFT JOIN `second` AS second_3 ON second_3.num = 3 # existent key
LEFT JOIN `second` AS second_4 ON second_4.num = 4 # existent key
LEFT JOIN `second` AS second_5 ON second_5.num = 5 # existent key
LEFT JOIN `second` AS second_6 ON second_6.num = 6 # existent key
WHERE second_1.num IS NOT NULL
AND second_2.num IS NOT NULL
AND second_3.num IS NOT NULL
AND second_4.num IS NOT NULL
AND second_5.num IS NOT NULL
AND second_6.num IS NOT NULL
;
EXPLAIN
{
"query_block": {
"select_id": 1,
"nested_loop": [
{
"table": {
"table_name": "second_1",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"using_index": true
}
},
{
"table": {
"table_name": "second_2",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"using_index": true
}
},
{
"table": {
"table_name": "second_3",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"using_index": true
}
},
{
"table": {
"table_name": "second_4",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"using_index": true
}
},
{
"table": {
"table_name": "second_5",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"using_index": true
}
},
{
"table": {
"table_name": "second_6",
"access_type": "ref",
"possible_keys": [
"key_num"
],
"key": "key_num",
"key_length": "4",
"ref": [
"const"
],
"rows": 1,
"filtered": 100,
"using_index": true
}
},
{
"table": {
"table_name": "first",
"access_type": "index",
"key": "key_num",
"key_length": "4",
"rows": 1000,
"filtered": 100,
"using_index": true,
"using_join_buffer": "Block Nested Loop"
}
}
]
}
}
Глядя на таблицу io, инструментализованную схемой производительности во время выполнения, мы имеем:
truncate table performance_schema.objects_summary_global_by_type;
select * from performance_schema.objects_summary_global_by_type
where OBJECT_NAME in ("first", "second");
OBJECT_TYPE OBJECT_SCHEMA OBJECT_NAME COUNT_STAR SUM_TIMER_WAIT MIN_TIMER_WAIT AVG_TIMER_WAIT MAX_TIMER_WAIT
TABLE test first 0 0 0 0 0
TABLE test second 0 0 0 0 0
SELECT `first`.num
FROM `first`
LEFT JOIN `second` AS second_1 ON second_1.num = -1 # non-existent key
LEFT JOIN `second` AS second_2 ON second_2.num = -2 # non-existent key
LEFT JOIN `second` AS second_3 ON second_3.num = -3 # non-existent key
LEFT JOIN `second` AS second_4 ON second_4.num = -4 # non-existent key
LEFT JOIN `second` AS second_5 ON second_5.num = -5 # non-existent key
LEFT JOIN `second` AS second_6 ON second_6.num = -6 # non-existent key
WHERE second_1.num IS NULL
AND second_2.num IS NULL
AND second_3.num IS NULL
AND second_4.num IS NULL
AND second_5.num IS NULL
AND second_6.num IS NULL
;
(...)
select * from performance_schema.objects_summary_global_by_type
where OBJECT_NAME in ("first", "second");
OBJECT_TYPE OBJECT_SCHEMA OBJECT_NAME COUNT_STAR SUM_TIMER_WAIT MIN_TIMER_WAIT AVG_TIMER_WAIT MAX_TIMER_WAIT
TABLE test first 1003 5705014442 1026171 5687889 87356557
TABLE test second 6012 271786533972 537266 45207298 1123939292
в отличие от:
select * from performance_schema.objects_summary_global_by_type
where OBJECT_NAME in ("first", "second");
OBJECT_TYPE OBJECT_SCHEMA OBJECT_NAME COUNT_STAR SUM_TIMER_WAIT MIN_TIMER_WAIT AVG_TIMER_WAIT MAX_TIMER_WAIT
TABLE test first 1003 5211074603 969338 5195454 61066176
TABLE test second 24 458656783 510085 19110361 66229860
Запрос, который масштабируется, практически не имеет таблицы IO в таблице second
.
Запрос, который не масштабируется, делает 6K table IO в таблице second
, или в 6 раз больше размера таблицы first
.
Это потому, что планы запросов разные, в свою очередь, потому что запросы разные (IS NOT NULL по сравнению с IS NULL).
Я думаю, что отвечает на вопрос, связанный с производительностью.
Обратите внимание, что оба запроса возвращают 1000 строк в моих тестах, которые могут быть не такими, какие вы хотите. Прежде чем настраивать запрос, чтобы сделать его быстрее, убедитесь, что он работает так, как ожидалось.