JOIN 和子查询

最近更新时间:2024-04-17 17:02:01

我的收藏
TDSQL 支持对 SELECT 语句、多表 DELETE 和 UPDATE 操作的 join。

分表间 join 示例

如果分表之间带有分表键相等的条件,则相当于单机 join。
--构建两张测试表:
DROP TABLE IF EXISTS `test_join_shard_table1`;
CREATE TABLE `test_join_shard_table1` (
`id` int(10) NOT NULL,
`b` varchar(10) NOT NULL DEFAULT '',
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin shardkey=id;
INSERT INTO test_join_shard_table1 (id, b, c) VALUES
(1,"test1",1), (2,"test2",2), (3,"test3",3),
(4,"test4",4), (5,"test5",5), (6,"test6",6),
(7,"test7",7), (8,"test8",8), (9,"testX",11);

DROP TABLE IF EXISTS `test_join_shard_table2`;
CREATE TABLE `test_join_shard_table2` (
`id` int(10) NOT NULL,
`d` datetime,
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin shardkey=id;
INSERT INTO test_join_shard_table2 (id, d, c) VALUES
(1,NOW(),1), (2,NOW(),2), (3,NOW(),3),
(4,NOW(),4), (5,NOW(),5), (6,NOW(),6),
(7,NOW(),7), (8,NOW(),8), (9,NOW(),10);

--检查分布式测试表的数据分布情况:
/*sets:allsets*/ select * from test_join_shard_table1;
/*sets:allsets*/ select * from test_join_shard_table2;

--执行带INNER JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
INNER JOIN test_join_shard_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带LEFT JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
LEFT JOIN test_join_shard_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带RIGHT JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
RIGHT JOIN test_join_shard_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带FULL JOIN的SELECT查询语句,笛卡尔积
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
CROSS JOIN test_join_shard_table2 test2
ORDER BY NAME;

分表和广播表 join 示例

跨分片的分表与广播表,效果相当于单机 join。
--构建两张测试表:
DROP TABLE IF EXISTS `test_join_shard_table1`;
CREATE TABLE `test_join_shard_table1` (
`id` int(10) NOT NULL,
`b` varchar(10) NOT NULL DEFAULT '',
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin shardkey=id;
INSERT INTO test_join_shard_table1 (id, b, c) VALUES
(1,"test1",1), (2,"test2",2), (3,"test3",3),
(4,"test4",4), (5,"test5",5), (6,"test6",6),
(7,"test7",7), (8,"test8",8), (9,"testX",11);

DROP TABLE IF EXISTS `test_join_group_table2`;
CREATE TABLE `test_join_group_table2` (
`id` int(10) NOT NULL,
`d` datetime,
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin shardkey=noshardkey_allset;
INSERT INTO test_join_group_table2 (id, d, c) VALUES
(1,NOW(),1), (2,NOW(),2), (3,NOW(),3),
(4,NOW(),4), (5,NOW(),5), (6,NOW(),6),
(7,NOW(),7), (8,NOW(),8), (9,NOW(),10);

--检查分布式测试表的数据分布情况:
/*sets:allsets*/ select * from test_join_shard_table1;
/*sets:allsets*/ select * from test_join_group_table2;

--执行带INNER JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
INNER JOIN test_join_group_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带LEFT JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
LEFT JOIN test_join_group_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带RIGHT JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
RIGHT JOIN test_join_group_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带FULL JOIN的SELECT查询语句,笛卡尔积
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
CROSS JOIN test_join_group_table2 test2
ORDER BY NAME;

分表和单表 join 示例

--构建两张测试表:
DROP TABLE IF EXISTS `test_join_shard_table1`;
CREATE TABLE `test_join_shard_table1` (
`id` int(10) NOT NULL,
`b` varchar(10) NOT NULL DEFAULT '',
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin shardkey=id;
INSERT INTO test_join_shard_table1 (id, b, c) VALUES
(1,"test1",1), (2,"test2",2), (3,"test3",3),
(4,"test4",4), (5,"test5",5), (6,"test6",6),
(7,"test7",7), (8,"test8",8), (9,"testX",11);

DROP TABLE IF EXISTS `test_join_noshard_table2`;
CREATE TABLE `test_join_noshard_table2` (
`id` int(10) NOT NULL,
`d` datetime,
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
INSERT INTO test_join_noshard_table2 (id, d, c) VALUES
(1,NOW(),1), (2,NOW(),2), (3,NOW(),3),
(4,NOW(),4), (5,NOW(),5), (6,NOW(),6),
(7,NOW(),7), (8,NOW(),8), (9,NOW(),10);

--检查分布式测试表的数据分布情况:
/*sets:allsets*/ select * from test_join_shard_table1;
--检查单片表的数据:
select * from test_join_noshard_table2;

--执行带INNER JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
INNER JOIN test_join_noshard_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带LEFT JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
LEFT JOIN test_join_noshard_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带RIGHT JOIN的SELECT查询语句
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
RIGHT JOIN test_join_noshard_table2 test2
ON test1.c=test2.c
ORDER BY NAME;

--执行带FULL JOIN的SELECT查询语句,笛卡尔积
SELECT test1.id, test1.b AS NAME, test2.d AS TIME
FROM test_join_shard_table1 test1
CROSS JOIN test_join_noshard_table2 test2
ORDER BY NAME;

跨分片 update/delete join 示例

--创建测试表:
DROP TABLE IF EXISTS `test_join_shard_table1`;
CREATE TABLE `test_join_shard_table1` (
`id` int(10) NOT NULL,
`b` varchar(10) NOT NULL DEFAULT '',
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin shardkey=id;
INSERT INTO test_join_shard_table1 (id, b, c) VALUES
(1,"test1",1), (2,"test2",2), (3,"test3",3),
(4,"test4",4), (5,"test5",5), (6,"test6",6),
(7,"test7",7), (8,"test8",8), (9,"testX",11);

DROP TABLE IF EXISTS `test_join_shard_table2`;
CREATE TABLE `test_join_shard_table2` (
`id` int(10) NOT NULL,
`d` datetime,
`c` int(10) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin shardkey=id;
INSERT INTO test_join_shard_table2 (id, d, c) VALUES
(1,NOW(),1), (2,NOW(),2), (3,NOW(),3),
(4,NOW(),4), (5,NOW(),5), (6,NOW(),6),
(7,NOW(),7), (8,NOW(),8), (9,NOW(),10);

--检测分布式测试表的数据分布情况
/*sets:allsets*/ select * from test_join_shard_table1;
/*sets:allsets*/ select * from test_join_shard_table2;

--UPDATE…JOIN…ON…SET语句,单字段:
UPDATE test_join_shard_table1 test1
INNER JOIN test_join_shard_table2 test2
ON test1.c=test2.c SET test1.b="TEXTXXXXX"
WHERE test1.id>7;
SELECT * FROM test_join_shard_table1;

--UPDATE…JOIN…ON…SET语句,同一表多字段
UPDATE test_join_shard_table1 test1
INNER JOIN test_join_shard_table2 test2
ON test1.c=test2.c
SET test1.b="TEXTSSSS", test1.c=88
WHERE test1.id>7;
SELECT * FROM test_join_shard_table1;

--DELETE…FROM…JOIN…ON语句
DELETE test1 FROM test_join_shard_table1 test1
INNER JOIN test_join_shard_table2 test2
ON test1.c=test2.c
WHERE test1.id>7;
SELECT * FROM test_join_shard_table1;

union 语法

UNION 将来自多个 SELECT 语句的结果组合到一个结果集中。
语法如下:
SELECT ...
UNION [ALL | DISTINCT] SELECT ...
[UNION [ALL | DISTINCT] SELECT ...]
示例:
DROP TABLE IF EXISTS t1;
create table t1 (a int primary key, b int) shardkey=a;
DROP TABLE IF EXISTS t2;
create table t2 (a int primary key, b int) shardkey=a;
select * from t1 where t1.a in (select a from t2) union select * from t2 where t2.a>22;
各种表的组合场景:
分表:
DROP TABLE IF EXISTS s1;
create table s1 (a int primary key, b int) shardkey=a;
DROP TABLE IF EXISTS s2;
create table s2 (a int primary key, b int) shardkey=a;
单表:
DROP TABLE IF EXISTS ns1;
create table ns1 (a int primary key, b int);
DROP TABLE IF EXISTS ns2;
create table ns2 (a int primary key, b int);
广播表:
DROP TABLE IF EXISTS g1;
create table g1 (a int primary key, b int) shardkey=noshardkey_allset;
DROP TABLE IF EXISTS g2;
create table g2 (a int primary key, b int) shardkey=noshardkey_allset;
二级分区表:
DROP TABLE IF EXISTS p1;
create table p1 (a int, b int, PRIMARY KEY(a)) shardkey=a PARTITION BY range (b) (PARTITION p0 values less than (100), PARTITION p1 values less than (200));
DROP TABLE IF EXISTS p2;
create table p2 (a int, b int, PRIMARY KEY(a)) shardkey=a PARTITION BY range (b) (PARTITION p0 values less than (100), PARTITION p1 values less than (200));

各种类型表之间的union
select * from s1 union select * from s2;
select * from ns1 union select * from ns2;
select * from g1 union select * from g2;
select * from s1 union select * from ns1;
select * from p1 union select * from p2;
select * from s1 where not exists (select * from s2 where s2.a=s1.a order by s2.a) or b<10 union select * from s2 where s2.a>22;
select a, sum(b) from s1 group by a union select * from s2;
select a, sum(b) from s1 union select * from s2;
select distinct(a) from s1 union select a from s2;
select distinct(a), b from s1 union select a,b from s2;

子查询带有 shardkey 的 derived table

mysql> select a from (select * from test1 where a=1) as t;
+---+
| a |
+---+
| 1 |
+---+
1 row in set (0.00 sec)
说明:
子查询时不指定 shardkey,也可查询结果。

复杂 SQL

对于不能满足推荐方式的 SQL,由于需要做跨节点的数据交互,所以性能会差一些。 包括:
包含子查询的查询。
多表的 join 查询,且参与查询的各表的分区字段(shardkey)不相等,或者同时涉及不同类型的表,例如单表和分表。
对于这类复杂查询,通过条件下推,将真正参与查询的数据从后端数据库中抽取出来,存放在本地的临时表中,然后对临时表中的数据进行计算。
因此用户需要明确指定参与查询的表的条件,避免因为抽取大量数据而性能受损。
mysql> create table test1 ( a int key, b int, c char(20) ) shardkey=a;
Query OK, 0 rows affected (1.56 sec)

mysql> create table test2 ( a int key, d int, e char(20) ) shardkey=a;
Query OK, 0 rows affected (1.46 sec)

mysql> insert into test1 (a,b,c) values(1,2,"record1"),(2,3,"record2");
Query OK, 2 rows affected (0.02 sec)

mysql> insert into test2 (a,d,e) values(1,3,"test2_record1"),(2,3,"test2_record2");
Query OK, 2 rows affected (0.02 sec)

mysql> select * from test1 join test2 on test1.b=test2.d;
+---+------+---------+---+------+---------------+
| a | b | c | a | d | e |
+---+------+---------+---+------+---------------+
| 2 | 3 | record2 | 1 | 3 | test2_record1 |
| 2 | 3 | record2 | 2 | 3 | test2_record2 |
+---+------+---------+---+------+---------------+
2 rows in set (0.00 sec)

MySQL> select * from test1 where exists (select * from test2 where test2.a=test1.b);
+---+------+---------+
| a | b | c |
+---+------+---------+
| 1 | 2 | record1 |
+---+------+---------+
1 row in set (0.00 sec)
分布式实例还支持丰富的复杂 update/delete/insert 操作。
需要注意的是,这类查询是在与之对应的 select 基础上实现的,因此也需要将数据加载至网关临时表,建议用户尽量在查询中指定明确的查询条件,避免大量数据的加载带来性能损耗。 另外,网关在加载数据时默认不会对加载的数据进行上锁,这与官方的 MySQL 行为存在略微的差异;如需加锁可以通过修改 proxy 配置来实现。
MySQL [th]> update test1 set test1.c="record" where exists(select 1 from test2 where test1.b=test2.d);
Query OK, 1 row affected (0.00 sec)

MySQL [th]> update test1, test2 set test1.b=2 where test1.b=test2.d;
Query OK, 1 row affected (0.00 sec)

MySQL [th]> insert into test1 select cast(rand()*1024 as unsigned), d, e from test2;
Query OK, 2 rows affected (0.00 sec)

MySQL [th]> delete from test1 where b in (select b from test2);
Query OK, 6 rows affected (0.00 sec)

MySQL [th]> delete from test2.* using test1 right join test2 on test1.a=test2.a where test1.a is null;
Query OK, 2 rows affected (0.00 sec)