上一章我们讲了 mysql修改字段长度的时候不能使用instant算法, 而是使用inplace算法, inplace听起来没得instant快, 就认为inplace不止是修改元数据信息, 这是错误的.
实际上INPLACE也是可以只修改元数据信息的, 比如扩展字段长度.
我们随便准备张表:
create table db1.t20250730_3(c1 int, c2 varchar(20), c3 varchar(30));
insert into db1.t20250730_3(1,'r1c2','r1c3');
....
然后我们使用如下python脚本记录下当前 表的数据文件的每页的校验值
#!/usr/bin/env python
import hashlib
import sys
filename = sys.argv[1]
with open(filename,'rb') as f:
while True:
data = f.read(16384)
if data == b'':
break
print(hashlib.md5(data).digest().hex())
记录修改前的校验值
python3 check_page.py /data/mysql_3306/mysqldata/db1/t20250730_3.ibd > /tmp/t20250730_3_before.txt
然后修改表字段长度
alter table db1.t20250730_3 modify column c2 varchar(50),ALGORITHM=INPLACE;
我们看到一瞬间就完成了, 说明大概率是只修改的元数据信息. 然后我们校验下现在的数据文件的每页的校验值
python3 check_page.py /data/mysql_3306/mysqldata/db1/t20250730_3.ibd > /tmp/t20250730_3_after.txt
diff /tmp/t20250730_3_before.txt /tmp/t20250730_3_after.txt
然后对比发现只有第4行不一致, 第4行就表示的第4页(从1开始算), 也就是SDI_PAGE, 也就是表的元数据信息.
也就是说: 扩展字段是只需要修改字段元数据信息即可(满足不越255字节的情况下).
写了半天代码, 发现官方就支持....
虽然官方修改字段的inplace算法很方便, 但我们写都写了instant的了, 就演示下吧... 主要是分享原理和思路(不建议使用, 仅供学习使用).
我们扩展字段也是只修改元数据信息. 大概逻辑如下:
我们直接来测试下吧, 为了简单, 我们只修改磁盘上相关信息, 也就是得停库操作(脚本见文末).
# 第一个参数: 数据目录, select @@datadir
# 第二个参数: 字段信息, 库.表.字段
# 第三个字段: 字段的新大小(字符)
python3 offline_ddl_instant_modify_column.py /data/mysql_3306/mysqldata db1.t20250730.c3 100
当然我们也会检查255越界问题, 只有都满足要求才会修改.
修改成功之后, 我们去数据库里面验证下:
发现确实字段长度变了. 并且字段也确实能达到66字符了. 说明我们写的脚本确实能修改元数据信息(虽然没啥用).
我们这里没有修改字段对应的索引信息, 所以如果修改的字段原来有索引的话, 就会变成前缀索引
扩展字段长度的inplace算法只需要修改元数据信息即可, 建议使用.
但由于对mysql的各种细节不熟悉,导致我们自己编写了一个类似功能的脚本.... 用处不大, 但还是值得参考的, 万一某天需要修改其它元数据信息呢, 所以也不算完全的一无所获.
还是要多看官方文档, 多做验证才行!
参考:
https://dev.mysql.com/doc/refman/8.0/en/innodb-online-ddl-operations.html
附源码(仅供学习使用, 不要生产使用):
#!/usr/bin/env python
# write by ddcw @https://github.com/ddcw
# usage: python3 offline_ddl_instant_modify_column.py /data/mysql_3414/mysqldata dbname.tablename.columnname new_size
# this script is very dangerous !!!
# backup files before use
# support: mysql 8.0
import datetime
import struct
import json
import zlib
import copy
import sys
import os
def create_crc32c_table():
poly = 0x82f63b78
table = []
for i in range(256):
crc = i
for _ in range(8):
if crc & 1:
crc = (crc >> 1) ^ poly
else:
crc >>= 1
table.append(crc)
return table
crc32_slice_table = create_crc32c_table()
def calculate_crc32c(data):
crc = 0xFFFFFFFF
for byte in data:
crc = crc32_slice_table[(crc ^ byte) & 0xFF] ^ (crc >> 8)
return crc ^ 0xFFFFFFFF
def str2dict(data):
dd = {}
for x in data.split(';'):
if x == '':
continue
k,v = x.split('=')
dd[k] = v
return dd
def REPACK_PAGE(data):
c1 = calculate_crc32c(data[4:26])
c2 = calculate_crc32c(data[38:-8])
c3 = struct.pack('>L',(c1^c2)&(2**32-1))
return c3 + data[4:-8] + c3 + data[-4:]
def MODIFY_FILE_INPLACE(filename,pageid,offset,new_value,pagesize=16384):
with open(filename,'r+b') as f:
f.seek(pageid*pagesize,0)
data = f.read(pagesize)
newdata = data[:offset] + new_value + data[offset+len(new_value):]
data = REPACK_PAGE(newdata)
f.seek(pageid*pagesize,0)
f.write(data)
return True
def GET_PAGEID_FROM_MYSQL_COLUMNS_NON_LEAF(data):
offset = 99
offset += struct.unpack('>h',data[offset-2:offset])[0]
return struct.unpack('>L',data[offset+8:offset+8+4])[0] # first rec page id
def GET_INFO_FROM_MYSQL_COLUMNS_LEAF(data):
offset = struct.unpack('>h',data[97:99]) + 99
rdata = {}
while True:
if offset == 112:
break
rec1,rec2,rec_next = struct.unpack('>HBh',data[offset-2:offset])
rec = (rec1<<8)+rec2
nullable_bitmask = int.from_bytes(data[offset-5-3:offset-5],'big')
colname_size = struct.unpack('>B',data[offset-5-3-1:offset-5-3])[0]
col_id = struct.unpack('>Q',data[offset:][:8])[0]
table_id = struct.unpack('>Q',data[offset+21:][:8])[0]
col_name = data[offset+29:][:colname_size].decode()
ordinal_position,col_type,is_nullable,is_zerofill,is_unsigned,char_length = struct.unpack('>LBBBBL',data[offset+29+colname_size:][:12])
#rdata[]
offset += rec_next
def GET_SDI_FROM_IBD(filename,pageid):
rdata = {} # {'db.tble':{}, }
rdata2 = 0
rdata3 = 0,0 # page_id,offset
f = open(filename,'rb')
while pageid < 4294967295:
_ = f.seek(16384*pageid,0)
data = f.read(16384)
offset = 99
while True:
offset += struct.unpack('>h',data[offset-2:offset])[0] # next
if offset >= 16384 or offset == 112:
break
dtype,table_id = struct.unpack('>LQ',data[offset:offset+12])
if rdata2 == 0:
rdata2 = table_id
dunzip_len,dzip_len = struct.unpack('>LL',data[offset+25:offset+33])
unzbdata = b''
if data[offset-7:offset-5] == b'\x14\xc0': # overflow page
SPACE_ID,PAGENO,BLOB_HEADER,REAL_SIZE = struct.unpack('>3LQ',data[offset+33:offset+33+20])
if REAL_SIZE != dzip_len:
print('gg')
sys.exit(1)
while True:
f.seek(PAGENO*16384,0)
ndata = f.read(16384)
REAL_SIZE,PAGENO = struct.unpack('>LL',ndata[38:46])
unzbdata += ndata[46:-8]
if PAGENO == 4294967295:
break
unzbdata = zlib.decompress(unzbdata)
else:
unzbdata = zlib.decompress(data[offset+33:offset+33+dzip_len])
dic_info = json.loads(unzbdata.decode())
table_name = dic_info['dd_object']['name']
if 'schema_ref' not in dic_info['dd_object']:
table_schema = 'mysql'
else:
table_schema = dic_info['dd_object']['schema_ref']
rdata3 = pageid,offset
rdata[f"{table_schema}.{table_name}"] = dic_info
pageid = struct.unpack('>L',data[12:16])[0]
f.close()
return rdata,rdata2,rdata3
def GET_INFO_FROM_MYSQL_COLUMNS(filename,pageid,table_id,column_name,char_length,log):
f = open(filename,'rb')
rdata = 0,0 # pageid,offset
while pageid < 4294967295:
f.seek(pageid*16384,0)
data = f.read(16384)
offset = 99
while True:
offset += struct.unpack('>h',data[offset-2:offset])[0]
if offset >= 16384 or offset == 112:
break
nullable_bitmask = int.from_bytes(data[offset-5-3:offset-5],'big')
colname_size = struct.unpack('>B',data[offset-5-3-1:offset-5-3])[0]
col_id = struct.unpack('>Q',data[offset:][:8])[0]
tbl_id = struct.unpack('>Q',data[offset+21:][:8])[0]
colname = data[offset+29:][:colname_size].decode()
ordinal_position,col_type,is_nullable,is_zerofill,is_unsigned,charlength = struct.unpack('>LBBBBL',data[offset+29+colname_size:][:12])
if tbl_id != table_id or colname != column_name:
continue
if col_type != 16: # MYSQL_TYPE_VARCHAR
log.error(col_type,'!=16 (MYSQL_TYPE_VARCHAR)')
if charlength != char_length:
log.error('table.ibd varchar',char_length,'!= mysql.columns varchar',charlength)
rdata = pageid,offset+29+colname_size+12-4
pageid = struct.unpack('>L',data[12:16])[0]
f.close()
return rdata
def CHECK_USER_TABLE(filename,dbname,table_name,column_name,newsize,log):
table_id = 0
pageid = 0
offset = 0
char_length = 0 # n bytes
status = False
rdata = b'' # if status
log.info(f'\n\n\t\t==== CHECK {filename} START ====')
data = b''
newsize_char_length = 0
with open(filename, 'rb') as f:
data = f.read(16384)
sdi_version,sdino = struct.unpack('>LL',data[10505:][:8])
log.info(f'ROOT PAGE NO: {sdino}')
sdi_info,table_id,pageid_offset = GET_SDI_FROM_IBD(filename,sdino)
pageid,offset = pageid_offset
sdi_info = sdi_info[f'{dbname}.{table_name}']
new_sdi_info = copy.deepcopy(sdi_info)
for x in range(len(sdi_info['dd_object']['columns'])):
col = sdi_info['dd_object']['columns'][x]
if col['name'] == column_name:
char_length = col['char_length']
collation_id_maxlen = int(COLLATION_DICT[str(col['collation_id'])])
newsize_char_length = collation_id_maxlen*newsize
oldsize = char_length//collation_id_maxlen
#newsize_char_length = collation_id_maxlen * newsize
if col['type'] != 16 or col['column_type_utf8'] != f'varchar({oldsize})':
log.error(f"only support varchar: current type:{col['type']} {col['column_type_utf8']} varchar({oldsize})")
if newsize <= oldsize:
log.error(f"new size({newsize}) must great old size({oldsize})")
if char_length < 255:
newsize_max = 255//collation_id_maxlen
if newsize > newsize_max:
log.error(f"new size({newsize}) must less than {newsize_max}")
log.info(f"column: {dbname}.{table_name} {column_name} {col['column_type_utf8']} --> varchar({newsize})")
new_sdi_info['dd_object']['columns'][x]['column_type_utf8'] = f"varchar({newsize})"
new_sdi_info['dd_object']['columns'][x]['char_length'] = collation_id_maxlen * newsize
status = True
for x in range(len(sdi_info['dd_object']['indexes'])):
idx = sdi_info['dd_object']['indexes'][x]
for y in range(len(idx['elements'])):
element = idx['elements'][y]
col = sdi_info['dd_object']['columns'][element['column_opx']]
if col['name'] == column_name:
log.info(f"index: {idx['name']} has column {column_name}")
if element['length'] != 4294967295 and element['length'] == col['char_length']:
new_sdi_info['dd_object']['indexes'][x]['elements'][y]['length'] = newsize_char_length
log.info(f"index: {idx['name']} column({column_name}) {element['length']} --> {newsize_char_length}")
if status:
data = b'\x00'
with open(filename,'rb') as f1:
_ = f1.seek(pageid*16384)
data = f1.read(16384)
rdata = b''
new_sdi = json.dumps(new_sdi_info).encode()
new_dunzip_len = len(new_sdi)
new_sdi = zlib.compress(new_sdi)
new_dzip_len = len(new_sdi)
rdata = data[:offset-7] + struct.pack('>BB',new_dzip_len%256,new_dzip_len//256+128)
rdata += data[offset-5:offset+33-8] + struct.pack('>LL',new_dunzip_len,new_dzip_len)
rdata += new_sdi
rdata += data[offset+33+new_dzip_len:]
rdata = REPACK_PAGE(rdata)
#log.error(rdata[24:26],rdata[1410:][:100],rdata[:4],rdata[-8:-4])
log.info(f'\n\t\t==== CHECK {filename} FINISH ====\n')
return table_id,pageid,offset,char_length,status,rdata,newsize_char_length
def CHECK_MYSQL_IBD_COLUMNS(filename,table_id,column_name,newsize,char_length,log):
pageid = 0
offset = 0
status = False
log.info(f"\n\n\t\t==== CHECK {filename} START ====")
f = open(filename,'rb')
data = f.read(1024)
flags = struct.unpack('>L',data[54:58])[0]
ZIP_SSIZE = (flags & 30) >> 1
PAGE_SSIZE = (flags & 960) >> 6
SHARED = (flags & 2048) >> 11
SDI = (flags & 16384) >> 14
if ZIP_SSIZE == 0 and PAGE_SSIZE == 0 and SHARED == 1 and SDI == 1:
log.info(filename,'is shared tablespace.')
else:
log.error(filename,'is not mysql.ibd')
_ = f.seek(16384*2,0)
data = f.read(16384)
sdi_segment = data[50:50+192*2]
pageid = struct.unpack('>L',sdi_segment[192:][64:68])[0]
sdi_info,sdi_info2,sdi_info3 = GET_SDI_FROM_IBD(filename,pageid)
if 'mysql.columns' not in sdi_info:
log.error('table mysql.columns not exists in',filename)
dd = sdi_info['mysql.columns']
pageid = int(str2dict(dd['dd_object']['indexes'][0]['se_private_data'])['root'])
log.info(filename,'mysql.columns root pageid',pageid)
while True:
_ = f.seek(pageid*16384,0)
data = f.read(16384)
if data[64:66] != b'\x00\x00': # non leaf
pageid = GET_PAGEID_FROM_MYSQL_COLUMNS_NON_LEAF(data)
else:
break
log.info(filename,'mysql.columns leaf(first) pageid',pageid)
pageid,offset = GET_INFO_FROM_MYSQL_COLUMNS(filename,pageid,table_id,column_name,char_length,log)
status = True
f.close()
log.info(f"\n\t\t==== CHECK {filename} FINISH ====\n")
return pageid,offset,status
class LOG(object):
def __init__(self,filename=None):
if filename is None:
self.f = sys.stdout
else:
self.f = open(filename,'a')
def _write(self,msg):
self.f.write(msg)
def info(self,*args):
msg = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] [INFO ] {' '.join([ str(x) for x in args ])}\n"
return self._write(msg)
def error(self,*args):
msg = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] [ERROR] {' '.join([ str(x) for x in args ])}\n"
self._write(msg)
sys.exit(1)
def __close__(self):
if self.filename is not None:
self.f.close()
# select concat("{",group_concat(concat("'",co.id,"':'",cs.maxlen,"'")),"}") from information_schema.collations co join information_schema.character_sets cs on co.character_set_name = cs.character_set_name;
COLLATION_DICT = {'1':'2','84':'2','3':'1','69':'1','4':'1','80':'1','6':'1','72':'1','7':'1','74':'1','5':'1','8':'1','15':'1','31':'1','47':'1','48':'1','49':'1','94':'1','2':'1','9':'1','21':'1','27':'1','77':'1','10':'1','82':'1','11':'1','65':'1','12':'3','91':'3','13':'2','88':'2','16':'1','71':'1','18':'1','89':'1','19':'2','85':'2','22':'1','75':'1','24':'2','86':'2','25':'1','70':'1','26':'1','34':'1','44':'1','66':'1','99':'1','28':'2','87':'2','30':'1','78':'1','32':'1','64':'1','33':'3','76':'3','83':'3','192':'3','193':'3','194':'3','195':'3','196':'3','197':'3','198':'3','199':'3','200':'3','201':'3','202':'3','203':'3','204':'3','205':'3','206':'3','207':'3','208':'3','209':'3','210':'3','211':'3','212':'3','213':'3','214':'3','215':'3','223':'3','35':'2','90':'2','128':'2','129':'2','130':'2','131':'2','132':'2','133':'2','134':'2','135':'2','136':'2','137':'2','138':'2','139':'2','140':'2','141':'2','142':'2','143':'2','144':'2','145':'2','146':'2','147':'2','148':'2','149':'2','150':'2','151':'2','159':'2','36':'1','68':'1','37':'1','73':'1','38':'1','43':'1','39':'1','53':'1','40':'1','81':'1','20':'1','41':'1','42':'1','79':'1','14':'1','23':'1','50':'1','51':'1','52':'1','54':'4','55':'4','101':'4','102':'4','103':'4','104':'4','105':'4','106':'4','107':'4','108':'4','109':'4','110':'4','111':'4','112':'4','113':'4','114':'4','115':'4','116':'4','117':'4','118':'4','119':'4','120':'4','121':'4','122':'4','123':'4','124':'4','56':'4','62':'4','57':'1','67':'1','29':'1','58':'1','59':'1','60':'4','61':'4','160':'4','161':'4','162':'4','163':'4','164':'4','165':'4','166':'4','167':'4','168':'4','169':'4','170':'4','171':'4','172':'4','173':'4','174':'4','175':'4','176':'4','177':'4','178':'4','179':'4','180':'4','181':'4','182':'4','183':'4','63':'1','92':'1','93':'1','95':'2','96':'2','97':'3','98':'3','248':'4','249':'4','250':'4','45':'4','46':'4','224':'4','225':'4','226':'4','227':'4','228':'4','229':'4','230':'4','231':'4','232':'4','233':'4','234':'4','235':'4','236':'4','237':'4','238':'4','239':'4','240':'4','241':'4','242':'4','243':'4','244':'4','245':'4','246':'4','247':'4','255':'4','256':'4','257':'4','258':'4','259':'4','260':'4','261':'4','262':'4','263':'4','264':'4','265':'4','266':'4','267':'4','268':'4','269':'4','270':'4','271':'4','273':'4','274':'4','275':'4','277':'4','278':'4','279':'4','280':'4','281':'4','282':'4','283':'4','284':'4','285':'4','286':'4','287':'4','288':'4','289':'4','290':'4','291':'4','292':'4','293':'4','294':'4','296':'4','297':'4','298':'4','300':'4','303':'4','304':'4','305':'4','306':'4','307':'4','308':'4','309':'4','310':'4','311':'4','312':'4','313':'4','314':'4','315':'4','316':'4','317':'4','318':'4','319':'4','320':'4','321':'4','322':'4','323':'4'}
if __name__ == "__main__":
log = LOG()
if len(sys.argv) != 4:
log.error(f'USAGE: python3 {sys.argv[0]} @@datadir dbname.tablename.columnname newsize')
datadir = sys.argv[1]
db_table_column = sys.argv[2]
new_size = int(sys.argv[3])
mysql_ibd_filename = os.path.join(datadir,'mysql.ibd')
db_name = db_table_column.split('.')[0]
table_name = db_table_column.split('.')[1]
table_filename = os.path.join(datadir,db_name,table_name) + '.ibd'
column_name = db_table_column.split('.')[2]
if os.path.exists(mysql_ibd_filename):
log.info('mysql.ibd filename:',mysql_ibd_filename)
else:
log.error('mysql.ibd',mysql_ibd_filename,'does not exists')
if os.path.exists(table_filename):
log.info('table filename:',table_filename)
else:
log.error('table filename ',mysql_ibd_filename,'does not exists')
log.info('table column name:',column_name)
# table.ibd
# table_id,pageid,offset,char_length,status,rdata
user_table_info = CHECK_USER_TABLE(table_filename,db_name,table_name,column_name,new_size,log)
table_id = user_table_info[0]
user_table_pageid = user_table_info[1]
user_table_offset = user_table_info[2]
char_length = user_table_info[3]
user_table_status = user_table_info[4]
user_table_newdata = user_table_info[5]
newsize_char_length = user_table_info[6]
if not user_table_status:
log.error('CHECK_USER_TABLE',table_filename,'failed')
log.info(f'{table_filename} tableid:{table_id}')
log.info(f'{table_filename} pageid:{user_table_pageid}')
log.info(f'{table_filename} offset:{user_table_offset}')
log.info(f'{table_filename} char_length:{char_length}')
# pageid,offset,status
mysql_column_table_info = CHECK_MYSQL_IBD_COLUMNS(mysql_ibd_filename,table_id,column_name,new_size,char_length,log)
mysql_column_pageid,mysql_column_offset,mysql_column_status = mysql_column_table_info
if not mysql_column_status:
log.error('CHECK_MYSQL_IBD',mysql_ibd_filename,'faild')
log.info(f"{mysql_ibd_filename} pageid:{mysql_column_pageid}")
log.info(f"{mysql_ibd_filename} offset:{mysql_column_offset}")
# modify
if mysql_column_status and user_table_status:
log.info('modify',table_filename,'start')
with open(table_filename,'r+b') as f:
f.seek(user_table_pageid*16384)
f.write(REPACK_PAGE(user_table_newdata))
log.info('modify',table_filename,'finish')
newdata = struct.pack('>L',newsize_char_length)
log.info('modify',mysql_ibd_filename,'start',newsize_char_length)
MODIFY_FILE_INPLACE(mysql_ibd_filename,mysql_column_pageid,mysql_column_offset,newdata,pagesize=16384)
log.info('modify',mysql_ibd_filename,'finish')
else:
log.error('unknown error')
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。