MySQL 8.0.13及更高版本支持函数索引,MySQL5.7-MySQL8.0.12通过MySQL创建虚拟列的方式来实现函数索引.
数据为美联航 1987年10月至1992年12月数据
数据量为26995097条
表结构
root@localhost 11:13: [test]> show create table ontime \G;
*************************** 1. row ***************************
Table: ontime
Create Table: CREATE TABLE `ontime` (
`Year` varchar(10) DEFAULT NULL,
`Quarter` tinyint DEFAULT NULL,
`Month` varchar(10) DEFAULT NULL,
`DayofMonth` tinyint DEFAULT NULL,
`DayOfWeek` tinyint DEFAULT NULL,
`FlightDate` date DEFAULT NULL,
`UniqueCarrier` char(7) DEFAULT NULL,
`AirlineID` int DEFAULT NULL,
`Carrier` char(2) DEFAULT NULL,
`TailNum` varchar(50) DEFAULT NULL,
`FlightNum` varchar(10) DEFAULT NULL,
`OriginAirportID` int DEFAULT NULL,
`OriginAirportSeqID` int DEFAULT NULL,
`OriginCityMarketID` int DEFAULT NULL,
`Origin` char(5) DEFAULT NULL,
`OriginCityName` varchar(100) DEFAULT NULL,
`OriginState` char(2) DEFAULT NULL,
`OriginStateFips` varchar(10) DEFAULT NULL,
`OriginStateName` varchar(100) DEFAULT NULL,
`OriginWac` int DEFAULT NULL,
`DestAirportID` int DEFAULT NULL,
`DestAirportSeqID` int DEFAULT NULL,
`DestCityMarketID` int DEFAULT NULL,
`Dest` char(5) DEFAULT NULL,
`DestCityName` varchar(100) DEFAULT NULL,
`DestState` char(2) DEFAULT NULL,
`DestStateFips` varchar(10) DEFAULT NULL,
`DestStateName` varchar(100) DEFAULT NULL,
`DestWac` int DEFAULT NULL,
`CRSDepTime` int DEFAULT NULL,
`DepTime` int DEFAULT NULL,
`DepDelay` int DEFAULT NULL,
`DepDelayMinutes` int DEFAULT NULL,
`DepDel15` int DEFAULT NULL,
`DepartureDelayGroups` int DEFAULT NULL,
`DepTimeBlk` varchar(20) DEFAULT NULL,
`TaxiOut` int DEFAULT NULL,
`WheelsOff` int DEFAULT NULL,
`WheelsOn` int DEFAULT NULL,
`TaxiIn` int DEFAULT NULL,
`CRSArrTime` int DEFAULT NULL,
`ArrTime` int DEFAULT NULL,
`ArrDelay` int DEFAULT NULL,
`ArrDelayMinutes` int DEFAULT NULL,
`ArrDel15` int DEFAULT NULL,
`ArrivalDelayGroups` int DEFAULT NULL,
`ArrTimeBlk` varchar(20) DEFAULT NULL,
`Cancelled` tinyint DEFAULT NULL,
`CancellationCode` char(1) DEFAULT NULL,
`Diverted` tinyint DEFAULT NULL,
`CRSElapsedTime` int DEFAULT NULL,
`ActualElapsedTime` int DEFAULT NULL,
`AirTime` int DEFAULT NULL,
`Flights` int DEFAULT NULL,
`Distance` int DEFAULT NULL,
`DistanceGroup` tinyint DEFAULT NULL,
`CarrierDelay` int DEFAULT NULL,
`WeatherDelay` int DEFAULT NULL,
`NASDelay` int DEFAULT NULL,
`SecurityDelay` int DEFAULT NULL,
`LateAircraftDelay` int DEFAULT NULL,
`FirstDepTime` varchar(10) DEFAULT NULL,
`TotalAddGTime` varchar(10) DEFAULT NULL,
`LongestAddGTime` varchar(10) DEFAULT NULL,
`DivAirportLandings` varchar(10) DEFAULT NULL,
`DivReachedDest` varchar(10) DEFAULT NULL,
`DivActualElapsedTime` varchar(10) DEFAULT NULL,
`DivArrDelay` varchar(10) DEFAULT NULL,
`DivDistance` varchar(10) DEFAULT NULL,
`Div1Airport` varchar(10) DEFAULT NULL,
`Div1AirportID` int DEFAULT NULL,
`Div1AirportSeqID` int DEFAULT NULL,
`Div1WheelsOn` varchar(10) DEFAULT NULL,
`Div1TotalGTime` varchar(10) DEFAULT NULL,
`Div1LongestGTime` varchar(10) DEFAULT NULL,
`Div1WheelsOff` varchar(10) DEFAULT NULL,
`Div1TailNum` varchar(10) DEFAULT NULL,
`Div2Airport` varchar(10) DEFAULT NULL,
`Div2AirportID` int DEFAULT NULL,
`Div2AirportSeqID` int DEFAULT NULL,
`Div2WheelsOn` varchar(10) DEFAULT NULL,
`Div2TotalGTime` varchar(10) DEFAULT NULL,
`Div2LongestGTime` varchar(10) DEFAULT NULL,
`Div2WheelsOff` varchar(10) DEFAULT NULL,
`Div2TailNum` varchar(10) DEFAULT NULL,
`Div3Airport` varchar(10) DEFAULT NULL,
`Div3AirportID` int DEFAULT NULL,
`Div3AirportSeqID` int DEFAULT NULL,
`Div3WheelsOn` varchar(10) DEFAULT NULL,
`Div3TotalGTime` varchar(10) DEFAULT NULL,
`Div3LongestGTime` varchar(10) DEFAULT NULL,
`Div3WheelsOff` varchar(10) DEFAULT NULL,
`Div3TailNum` varchar(10) DEFAULT NULL,
`Div4Airport` varchar(10) DEFAULT NULL,
`Div4AirportID` int DEFAULT NULL,
`Div4AirportSeqID` int DEFAULT NULL,
`Div4WheelsOn` varchar(10) DEFAULT NULL,
`Div4TotalGTime` varchar(10) DEFAULT NULL,
`Div4LongestGTime` varchar(10) DEFAULT NULL,
`Div4WheelsOff` varchar(10) DEFAULT NULL,
`Div4TailNum` varchar(10) DEFAULT NULL,
`Div5Airport` varchar(10) DEFAULT NULL,
`Div5AirportID` int DEFAULT NULL,
`Div5AirportSeqID` int DEFAULT NULL,
`Div5WheelsOn` varchar(10) DEFAULT NULL,
`Div5TotalGTime` varchar(10) DEFAULT NULL,
`Div5LongestGTime` varchar(10) DEFAULT NULL,
`Div5WheelsOff` varchar(10) DEFAULT NULL,
`Div5TailNum` varchar(10) DEFAULT NULL,
`id` int NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`),
KEY `idx_year_month` (`Year`,`Month`),
KEY `idx1` ((concat(`Year`,`Month`))),
KEY `idx2` ((substr(`FlightDate`,1,7)))
) ENGINE=InnoDB AUTO_INCREMENT=26995098 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
1 row in set (0.00 sec)
root@localhost 11:13: [test]> select * from ontime limit 1\G;
*************************** 1. row ***************************
Year: 1987
Quarter: 4
Month: 10
DayofMonth: 30
DayOfWeek: 5
FlightDate: 1987-10-30
UniqueCarrier: PS
AirlineID: 19391
Carrier: PS
TailNum:
FlightNum: 1442
OriginAirportID: 13796
OriginAirportSeqID: 1379601
OriginCityMarketID: 32457
Origin: OAK
OriginCityName: Oakland, CA
OriginState: CA
OriginStateFips: 06
OriginStateName: California
OriginWac: 91
DestAirportID: 10800
DestAirportSeqID: 1080001
DestCityMarketID: 32575
Dest: BUR
DestCityName: Burbank, CA
DestState: CA
DestStateFips: 06
DestStateName: California
DestWac: 91
CRSDepTime: 937
DepTime: 936
DepDelay: -1
DepDelayMinutes: 0
DepDel15: 0
DepartureDelayGroups: -1
DepTimeBlk: 0900-0959
TaxiOut: 0
WheelsOff: 0
WheelsOn: 0
TaxiIn: 0
CRSArrTime: 1040
ArrTime: 1046
ArrDelay: 6
ArrDelayMinutes: 6
ArrDel15: 0
ArrivalDelayGroups: 0
ArrTimeBlk: 1000-1059
Cancelled: 0
CancellationCode:
Diverted: 0
CRSElapsedTime: 63
ActualElapsedTime: 70
AirTime: 0
Flights: 1
Distance: 325
DistanceGroup: 2
CarrierDelay: 0
WeatherDelay: 0
NASDelay: 0
SecurityDelay: 0
LateAircraftDelay: 0
FirstDepTime:
TotalAddGTime:
LongestAddGTime:
DivAirportLandings:
DivReachedDest:
DivActualElapsedTime:
DivArrDelay:
DivDistance:
Div1Airport:
Div1AirportID: 0
Div1AirportSeqID: 0
Div1WheelsOn:
Div1TotalGTime:
Div1LongestGTime:
Div1WheelsOff:
Div1TailNum:
Div2Airport:
Div2AirportID: 0
Div2AirportSeqID: 0
Div2WheelsOn:
Div2TotalGTime:
Div2LongestGTime:
Div2WheelsOff:
Div2TailNum:
Div3Airport:
Div3AirportID: 0
Div3AirportSeqID: 0
Div3WheelsOn:
Div3TotalGTime:
Div3LongestGTime:
Div3WheelsOff:
Div3TailNum:
Div4Airport:
Div4AirportID: 0
Div4AirportSeqID: 0
Div4WheelsOn:
Div4TotalGTime:
Div4LongestGTime:
Div4WheelsOff:
Div4TailNum:
Div5Airport:
Div5AirportID: 0
Div5AirportSeqID: 0
Div5WheelsOn:
Div5TotalGTime:
Div5LongestGTime:
Div5WheelsOff:
Div5TailNum:
id: 1
1 row in set (0.00 sec)
以下通过不同方法对每月航班起降数进行查询
1.通过函数索引的方式
方式1.将Year
,Month
建concat函数索引通过concat方式查询
索引
KEY `idx1` ((concat(`Year`,`Month`)))
sql执行时间59.77秒
select concat(Year,Month),count(*) from ontime group by concat(Year,Month)
执行计划
root@localhost 11:24: [test]> explain analyze select concat(Year,Month),count(*) from ontime group by concat(Year,Month)\G;
*************************** 1. row ***************************
EXPLAIN: -> Group aggregate: count(0) (actual time=792.919..61206.119 rows=63 loops=1)
-> Index scan on ontime using idx1 (cost=2811420.62 rows=26160759) (actual time=0.439..57474.986 rows=26995096 loops=1)
方式2.建立FlightDate的函数索引
KEY `idx2` ((substr(`FlightDate`,1,7)))
select substr(`FlightDate`,1,7) from ontime group by substr(`FlightDate`,1,7)
执行时间58.35秒
执行计划
root@localhost 11:34: [test]> explain analyze select substr(`FlightDate`,1,7),count(*) from ontime group by substr(`FlightDate`,1,7)\G;
*************************** 1. row ***************************
EXPLAIN: -> Group aggregate: count(0) (actual time=793.544..60148.968 rows=63 loops=1)
-> Index scan on ontime using idx2 (cost=2811420.62 rows=26160759) (actual time=0.330..56187.935 rows=26995096 loops=1)
2.通过虚拟列的方式
alter table ontime add column `tdate1` varchar(20) GENERATED ALWAYS AS (concat(`Year`,`Month`)) VIRTUAL;
建立索引
create index idx3 on ontime(tdate1)
执行时间7.7秒
select count(*),tdate1 from ontime group by tdate1;
执行计划
root@localhost 12:24: [test]> explain analyze select count(*),tdate1 from ontime group by tdate1\G;
*************************** 1. row ***************************
EXPLAIN: -> Group aggregate: count(0) (actual time=170.576..9275.766 rows=63 loops=1)
-> Index scan on ontime using idx3 (cost=2786889.85 rows=26160030) (actual time=0.046..5824.499 rows=26995096 loops=1)
3.通过先分组再进行concat的方式
执行时间9.07秒
select concat(t1.Year,t1.Month),t1.c1 from (select Year,Month,count(*) c1 from ontime group by Year,Month) t1
执行计划
root@localhost 13:39: [test]> explain analyze select concat(t1.Year,t1.Month),t1.c1 from (select Year,Month,count(*) c1 from ontime group by Year,Month) t1\G;
*************************** 1. row ***************************
EXPLAIN: -> Table scan on t1 (actual time=0.001..0.008 rows=63 loops=1)
-> Materialize (actual time=10521.980..10521.990 rows=63 loops=1)
-> Group aggregate: count(0) (actual time=184.608..10521.799 rows=63 loops=1)
-> Index scan on ontime using idx_year_month (cost=2786889.85 rows=26160030) (actual time=0.057..6632.761 rows=26995096 loops=1)
结论:虽然从执行计划上看cost似乎区别不大,但是虚拟列在进行group by分组查询时执行时间最短,另外建议类似查询可利用clickhouse,oracle in memory,以及tiflash等列式存储来进行查询获得更快的速度.
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。