Note:
This topic has been translated from a Chinese forum by GPT and might contain errors.Original topic: SQL查询结果不一致,会少数据

【TiDB Usage Environment】Production Environment
【TiDB Version】v6.5.3
【Situation Description】
- Forced to use TiFlash MPP through parameter settings
- Inconsistent query results multiple times, missing one fixed data row
- If the cluster automatically selects the query engine, the results are normal
- If the 3 left joins in the SQL are rewritten into two and then unioned, the results are normal
【explain analyze execution plan comparison】
【SQL as follows】
SELECT
full_data.field1 AS field1,
full_data.field2 AS field2,
(CASE WHEN t1.total IS NULL THEN 0 ELSE t1.total END) +(CASE WHEN t2.total IS NULL THEN 0 ELSE t2.total END) AS total
FROM
(SELECT
CASE
WHEN group0.group_id IS NOT NULL THEN CONCAT(group0.group_id, '(组)')
ELSE dimension0.`patent_type`
END AS field1,
CASE
WHEN group1.group_id IS NOT NULL THEN CONCAT(group1.group_id, '(组)')
ELSE dimension1.`country`
END AS field2
FROM dw_ext.`t_1692603597355` t
INNER JOIN ads_tab11 dimension0 ON dimension0.pdoc_id = t.pdoc_id
LEFT JOIN biz.ads_tab22 group0 ON group0.group_value = dimension0.`patent_type` and group0.field = 'PATENT_TYPE' and group0.identifier_id = '0fb4b7e7fe0642618a1e7288faf7d8ae'
INNER JOIN ads_tab33 dimension1 ON dimension1.pdoc_id = t.pdoc_id
LEFT JOIN biz.ads_tab22 group1 ON group1.group_value = dimension1.`country` and group1.field = 'COUNTRY' and group1.identifier_id = '0fb4b7e7fe0642618a1e7288faf7d8ae'
WHERE 1=1
AND dimension0.`patent_type` in ('A','B','U','D') AND dimension0.`patent_type` IS NOT NULL
AND dimension1.`country` in ('US','CN') AND dimension1.`country` IS NOT NULL
GROUP BY field1,field2
HAVING COUNT(t.pdoc_id) > 0
) full_data
LEFT JOIN
(
SELECT field1,field2,COUNT(`pn`) AS total
FROM (
SELECT
CASE
WHEN group0.group_id IS NOT NULL THEN CONCAT(group0.group_id, '(组)')
ELSE dimension0.`patent_type`
END AS field1,
CASE
WHEN group1.group_id IS NOT NULL THEN CONCAT(group1.group_id, '(组)')
ELSE dimension1.`country`
END AS field2,dimension1.`pn`
FROM dw_ext.`t_1692603597355` t
INNER JOIN ads_tab11 dimension0 ON dimension0.pdoc_id = t.pdoc_id
LEFT JOIN biz.ads_tab22 group0 ON group0.group_value = dimension0.`patent_type` and group0.field = 'PATENT_TYPE' and group0.identifier_id = '0fb4b7e7fe0642618a1e7288faf7d8ae'
INNER JOIN ads_tab33 dimension1 ON dimension1.pdoc_id = t.pdoc_id
LEFT JOIN biz.ads_tab22 group1 ON group1.group_value = dimension1.`country` and group1.field = 'COUNTRY' and group1.identifier_id = '0fb4b7e7fe0642618a1e7288faf7d8ae'
WHERE 1=1
AND dimension0.`patent_type` in ('A','B','U','D') AND dimension0.`patent_type` IS NOT NULL
AND dimension1.`country` in ('US','CN') AND dimension1.`country` IS NOT NULL
AND dimension1.`pn` IS NOT NULL
AND dimension1.`country` IN ('US')
GROUP BY t.pdoc_id, dimension1.`pn`,field1,field2
) T
GROUP BYfield1,field2
ORDER BY total DESC,field1 ASC,field2 ASC
) t1 ON
t1.field1 = full_data.field1 AND
t1.field2 = full_data.field2
LEFT JOIN
(
SELECT field1,field2,COUNT(`pn`) AS total
FROM (
SELECT
CASE
WHEN group0.group_id IS NOT NULL THEN CONCAT(group0.group_id, '(组)')
ELSE dimension0.`patent_type`
END AS field1,
CASE
WHEN group1.group_id IS NOT NULL THEN CONCAT(group1.group_id, '(组)')
ELSE dimension1.`country`
END AS field2,dimension1.`pn`
FROM
dw_ext.`t_1692603597355` t
INNER JOIN ads_tab11 dimension0 ON dimension0.pdoc_id = t.pdoc_id
LEFT JOIN biz.ads_tab22 group0 ON group0.group_value = dimension0.`patent_type` and group0.field = 'PATENT_TYPE' and group0.identifier_id = '0fb4b7e7fe0642618a1e7288faf7d8ae'
INNER JOIN ads_tab33 dimension1 ON dimension1.pdoc_id = t.pdoc_id
LEFT JOIN biz.ads_tab22 group1 ON group1.group_value = dimension1.`country` and group1.field = 'COUNTRY' and group1.identifier_id = '0fb4b7e7fe0642618a1e7288faf7d8ae'
WHERE 1=1
AND dimension0.`patent_type` in ('A','B','U','D') AND dimension0.`patent_type` IS NOT NULL
AND dimension1.`country` in ('US','CN') AND dimension1.`country` IS NOT NULL
AND dimension1.`pn` IS NOT NULL
AND dimension1.`country` IN ('CN')
GROUP BY t.pdoc_id, dimension1.`pn`,field1,field2
) T
GROUP BY field1,field2
ORDER BY total DESC,field1 ASC,field2 ASC
) t2 ON
t2.field1 = full_data.field1 AND
t2.field2 = full_data.field2
GROUP BY full_data.field1,full_data.field2
HAVING total > 0
ORDER BY total DESC,field1 ASC,field2 ASC
LIMIT 30000;