最近给集群扩展ASM空间导致集群异常,后面分析为多路径磁盘残留信息导致识别问题。在这里测试一下残留信息及清理操作,顺便整理一下HBA卡操作命令,供以后查看。
[root@dbrac1 ~]# lspci | grep -i fibre
0b:00.0 Fibre Channel: QLogic Corp. ISP2532-based 8Gb Fibre Channel to PCI Express HBA (rev 02)
-- qlogic
[root@dbrac1 ~]# modinfo qla2xxx | grep version
version: 8.07.00.16.06.7-k
srcversion: C5AC2EED3547B0A71A137C1
vermagic: 2.6.32-573.el6.x86_64 SMP mod_unload modversions
-- emulex
[root@dbrac1 ~]# modinfo lpfc | grep version
version: 0:10.6.0.20
srcversion: C7EDDC41F4AB73368AAD4F4
vermagic: 2.6.32-573.el6.x86_64 SMP mod_unload modversions
[root@dbrac1 ~]# more /sys/class/fc_host/host*/port_name
0x5001438018744582
[root@dbrac1 ~]# cat /sys/class/fc_host/host7/port_state
Online
[root@dbrac1 ~]# cat /sys/class/fc_host/host7/port_id
0x010100
[root@dbrac1 ~]# cat /sys/class/fc_host/host7/supported_speeds
1 Gbit, 2 Gbit, 4 Gbit, 8 Gbit
[root@dbrac1 ~]# cat /sys/class/fc_host/host7/supported_classes
Class 3
[root@dbrac1 ~]# cat /sys/class/fc_host/host7/speed
4 Gbit
[root@dbrac1 ~]# cat /sys/class/fc_host/host7/port_type
NPort (fabric via point-to-point) <--- 与光纤交换机相连
LPort (private loop) <----与其它HBA卡相连(来源网络)
echo "- - -" > /sys/class/scsi_host/$HOST/scan
echo "1" > /sys/class/fc_host/host0/issue_lip
将 $DEVICE 替换为 sda、sdb、sdc 等。
echo 1 > /sys/block/$DEVICE/device/rescan
[root@dbrac1 ~]# multipath -l
mpathi (360014380125d8a670000b000002f0000) dm-11 HP,HSV360
size=20G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=0 status=active
`- 8:0:0:7 sdh 8:112 active undef running
[root@dbrac1 ~]# vim /etc/multipath.conf
defaults {
user_friendly_names yes
}
multipaths {
multipath {
no_path_retry fail
wwid 360014380125d8a670000b000002f0000
alias ASM-TEST
}
}
-- 重启多路径
[root@dbrac1 ~]# /etc/init.d/multipathd restart
ok
正在关闭multipathd 端口监控程序: [确定]
正在启动守护进程multipathd: [确定]
[root@dbrac1 ~]# multipath -l
ASM-TEST (360014380125d8a670000b000002f0000) dm-11 HP,HSV360
size=20G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=0 status=active
`- 8:0:0:7 sdh 8:112 failed undef running
[root@dbrac1 ~]# multipath -F
Nov 25 14:01:58 | ASM-DATA4: map in use
Nov 25 14:01:58 | ASM-DATA3: map in use
Nov 25 14:01:58 | ASM-DATA2: map in use
Nov 25 14:01:58 | ASM-CRS: map in use
Nov 25 14:01:58 | ASM-DATA1: map in use
Nov 25 14:01:58 | ASM-ARCH: map in use
[root@dbrac1 ~]# multipath -v2
Nov 25 14:02:03 | mpatha: ignoring map
create: mpathh (360014380125d8a670000b00000290000) undef HP,HSV360
size=10G features='0' hwhandler='0' wp=undef
`-+- policy='round-robin 0' prio=1 status=undef
`- 8:0:0:7 sdh 8:112 undef ready running
[root@dbrac1 ~]# multipath -l
mpathh (360014380125d8a670000b00000290000) dm-11 HP,HSV360
size=20G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=0 status=active
`- 8:0:0:7 sdh 8:112 active undef running
[root@dbrac1 ~]# ll /sys/block/sd
sda/ sdb/ sdc/ sdd/ sde/ sdf/ sdg/ sdh/ =<sdh>=> 旧的信息
[root@dbrac1 ~]# echo 1 > /sys/block/sdh/device/delete
[root@dbrac1 ~]# multipath -l
......(未再发现:sdh:mpathh (360014380125d8a670000b00000290000))
-- sdh 已被清理
[root@dbrac1 ~]# ll /dev/sd
sda sda1 sda2 sdb sdc sdd sde sdf sdg
[root@dbrac1 ~]# echo "- - -" > /sys/class/scsi_host/host7/scan
[root@dbrac1 ~]# multipath -l
mpathh (360014380125d8a670000b00000290000) dm-11 HP,HSV360
size=10G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=0 status=active
`- 8:0:0:7 sdh 8:112 active undef running
[root@dbrac1 ~]# multipath -l
ASM-DATA1 (360014380125d8a670000a000013e0000) dm-6 HP,HSV360
size=500G features='0' hwhandler='0' wp=rw
`-+- policy='round-robin 0' prio=0 status=active
`- 7:0:0:1 sdb 8:16 active undef running
[root@dbrac1 ~]# echo 1 > /sys/block/sdb/device/delete
Nov 26 14:30:58 dbrac1 multipathd: sdb: remove path (uevent)
Nov 26 14:30:58 dbrac1 multipathd: ASM-DATA1: map in use
Nov 26 14:30:58 dbrac1 multipathd: ASM-DATA1: can't flush
Nov 26 14:30:58 dbrac1 multipathd: ASM-DATA1: load table [0 1048576000 multipath 0 0 0 0]
Nov 26 14:30:58 dbrac1 multipathd: sdb [8:16]: path removed from map ASM-DATA1
Nov 26 14:30:58 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 962656
Nov 26 14:31:00 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 960544
Nov 26 14:31:00 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 960608
Nov 26 14:31:00 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 4088
Nov 26 14:31:01 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 0
Nov 26 14:31:04 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 0
Nov 26 14:31:04 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 0
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 1048575872
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 131071984
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 1048575872
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 131071984
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 1048575984
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 131071998
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 1048575984
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 131071998
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 0
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 0
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 0
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 0
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 8
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 1
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 1048575992
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 131071999
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 1048575992
Nov 26 14:31:10 dbrac1 kernel: Buffer I/O error on device dm-6, logical block 131071999
Nov 26 14:31:10 dbrac1 kernel: end_request: I/O error, dev dm-6, sector 1048575992
[grid@dbrac1 ~]$ crsctl stat res -t
......
ora.dbrac.db
1 ONLINE OFFLINE Instance Shutdown
2 ONLINE ONLINE dbrac2 Open
......
2024-11-26 14:31:01.015:
[crsd(6374)]CRS-2765:Resource 'ora.dbrac.db' has failed on server 'dbrac1'.
Tue Nov 26 14:30:58 2024
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_lmon_8289.trc:
ORA-27072: File I/O error
Linux-x86_64 Error: 5: Input/output error
Additional information: 4
Additional information: 962656
Additional information: -1
WARNING: Read Failed. group:3 disk:0 AU:470 offset:49152 size:16384
WARNING: failed to read mirror side 1 of virtual extent 4 logical extent 0 of file 267 in group [3.985158147] from disk DATA_0000 allocation unit 470 reason error; if possible, will try another mirror side
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_lmon_8289.trc:
ORA-00202: control file: '+DATA/dbrac_standby/controlfile/current.267.1096467161'
ORA-15081: failed to submit an I/O operation to a disk
Tue Nov 26 14:31:00 2024
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_ckpt_8363.trc:
ORA-27072: File I/O error
Linux-x86_64 Error: 5: Input/output error
Additional information: 4
Additional information: 960544
Additional information: -1
WARNING: Read Failed. group:3 disk:0 AU:469 offset:16384 size:16384
WARNING: failed to read mirror side 1 of virtual extent 0 logical extent 0 of file 267 in group [3.985158147] from disk DATA_0000 allocation unit 469 reason error; if possible, will try another mirror side
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_ckpt_8363.trc:
ORA-00202: control file: '+DATA/dbrac_standby/controlfile/current.267.1096467161'
ORA-15081: failed to submit an I/O operation to a disk
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_ckpt_8363.trc:
ORA-27061: waiting for async I/Os failed
Linux-x86_64 Error: 5: Input/output error
Additional information: -1
Additional information: 16384
WARNING: Write Failed. group:3 disk:0 AU:469 offset:49152 size:16384
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_ckpt_8363.trc:
ORA-15080: synchronous I/O operation to a disk failed
WARNING: failed to write mirror side 1 of virtual extent 0 logical extent 0 of file 267 in group 3 on disk 0 allocation unit 469
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_ckpt_8363.trc:
ORA-00206: error in writing (block 3, # blocks 1) of control file
ORA-00202: control file: '+DATA/dbrac_standby/controlfile/current.267.1096467161'
ORA-15081: failed to submit an I/O operation to a disk
ORA-15081: failed to submit an I/O operation to a disk
Errors in file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_ckpt_8363.trc:
ORA-00221: error on write to control file
ORA-00206: error in writing (block 3, # blocks 1) of control file
ORA-00202: control file: '+DATA/dbrac_standby/controlfile/current.267.1096467161'
ORA-15081: failed to submit an I/O operation to a disk
ORA-15081: failed to submit an I/O operation to a disk
Tue Nov 26 14:31:00 2024
System state dump requested by (instance=1, osid=8363 (CKPT)), summary=[abnormal instance termination].
System State dumped to trace file /u01/oracle/diag/rdbms/dbrac/dbrac1/trace/dbrac1_diag_8266.trc
CKPT (ospid: 8363): terminating the instance due to error 221
Tue Nov 26 14:31:01 2024
ORA-1092 : opitsk aborting process
Tue Nov 26 14:31:01 2024
ORA-1092 : opitsk aborting process
Tue Nov 26 14:31:01 2024
License high water mark = 77
Dumping diagnostic data in directory=[cdmp_20241126143100], requested by (instance=1, osid=8363 (CKPT)), summary=[abnormal instance termination].
Instance terminated by CKPT, pid = 8363
USER (ospid: 30515): terminating the instance
Instance terminated by USER, pid = 30515
[root@dbrac1 ~]# echo "- - -" > /sys/class/scsi_host/host7/scan
[root@dbrac1 ~]# tail -f /var/log/messages
Nov 26 14:31:38 dbrac1 -bash[24909]: HISTORY: IP=10.10.6.15 PID=24909 PPID=24907 SID=24909 UID=0 USER=root LOGIN=root CMD=echo "- - -" > /sys/class/scsi_host/host7/scan
Nov 26 14:31:38 dbrac1 kernel: scsi 7:0:0:1: Direct-Access HP HSV360 1100 PQ: 0 ANSI: 5
Nov 26 14:31:38 dbrac1 kernel: sd 7:0:0:1: Attached scsi generic sg4 type 0
Nov 26 14:31:38 dbrac1 kernel: sd 7:0:0:1: [sdb] 1048576000 512-byte logical blocks: (536 GB/500 GiB)
Nov 26 14:31:38 dbrac1 kernel: sd 7:0:0:1: [sdb] Write Protect is off
Nov 26 14:31:38 dbrac1 kernel: sd 7:0:0:1: [sdb] Write cache: disabled, read cache: enabled, supports DPO and FUA
Nov 26 14:31:38 dbrac1 kernel: sdb: unknown partition table
Nov 26 14:31:38 dbrac1 kernel: sd 7:0:0:1: [sdb] Attached SCSI disk
Nov 26 14:31:38 dbrac1 multipathd: sdb: add path (uevent)
Nov 26 14:31:38 dbrac1 multipathd: ASM-DATA1: load table [0 1048576000 multipath 0 0 1 1 round-robin 0 1 1 8:16 1]
Nov 26 14:31:38 dbrac1 multipathd: sdb [8:16]: path added to devmap ASM-DATA1
-- 启库
[grid@dbrac1 ~]$ srvctl start database -d dbrac
[grid@dbrac1 ~]$ crsctl stat res -t
......
ora.dbrac.db
1 ONLINE ONLINE dbrac1 Open
2 ONLINE ONLINE dbrac2 Open
......
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。