349 lines
11 KiB
Go
349 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// ctrlDrive holds the RAID-controller-side view of one physical drive — data
|
|
// smartctl cannot see (predictive-failure, firmware state, controller media/
|
|
// other error counters, physical enclosure:slot). Keyed for matching to a
|
|
// smartctl megaraid passthrough by DeviceID (== the megaraid,N index).
|
|
type ctrlDrive struct {
|
|
DeviceID string
|
|
Enclosure string
|
|
Slot string
|
|
MediaErr int
|
|
OtherErr int
|
|
Predictive int
|
|
SmartAlert bool
|
|
FwState string
|
|
TempC *int
|
|
Inquiry string // Inquiry is the legacy single-line MegaCLI/storcli inquiry (serial model fw).
|
|
Model string // Model is the structured identity (perccli2); used for controller-only drives.
|
|
Serial string
|
|
Firmware string
|
|
Rotation string // Rotation is "SSD"/"NVMe" derived from controller media/interface, when known.
|
|
}
|
|
|
|
// controllerIndex enumerates all RAID controllers found, preferring modern
|
|
// tools (storcli/perccli) then MegaCLI, and returns a DeviceID->ctrlDrive map.
|
|
// If no controller CLI is present (plain HBA / onboard SATA / NVMe) it returns
|
|
// an empty map — that's fine, smartctl still covers those drives directly.
|
|
func controllerIndex() map[string]ctrlDrive {
|
|
idx := map[string]ctrlDrive{}
|
|
|
|
// perccli2 (8.x) is JSON-native. Its plain-text "show all" adds a second
|
|
// status column that breaks positional parsing, so query JSON ('J') and use
|
|
// the dedicated parser. Tried first since it covers the newest controllers.
|
|
for _, bin := range []string{"perccli2", "/opt/MegaRAID/perccli2/perccli2"} {
|
|
p := lookPath(bin)
|
|
if p == "" {
|
|
continue
|
|
}
|
|
drives := parsePerccli2(run(p, "/call/eall/sall", "show", "all", "J"))
|
|
for _, cd := range drives {
|
|
mergeCtrl(idx, cd)
|
|
}
|
|
if len(drives) > 0 {
|
|
break
|
|
}
|
|
}
|
|
|
|
// storcli / perccli (classic) share the same text "show all" layout (perccli
|
|
// is Dell's rebrand). Try each installed binary until one returns drives, so
|
|
// a host with several tools present still resolves.
|
|
for _, bin := range []string{"storcli64", "storcli", "perccli64", "perccli",
|
|
"/opt/MegaRAID/storcli/storcli64", "/opt/MegaRAID/perccli/perccli64"} {
|
|
p := lookPath(bin)
|
|
if p == "" {
|
|
continue
|
|
}
|
|
drives := parseStorcli(run(p, "/call/eall/sall", "show", "all"))
|
|
for _, cd := range drives {
|
|
mergeCtrl(idx, cd)
|
|
}
|
|
if len(drives) > 0 {
|
|
break
|
|
}
|
|
}
|
|
|
|
// MegaCLI (older controllers). Same try-until-data approach.
|
|
for _, bin := range []string{"MegaCli64", "MegaCli", "megacli",
|
|
"/opt/MegaRAID/MegaCli/MegaCli64", "/usr/sbin/megacli"} {
|
|
p := lookPath(bin)
|
|
if p == "" {
|
|
continue
|
|
}
|
|
drives := parseMegacliPDList(run(p, "-PDList", "-aAll"))
|
|
for _, cd := range drives {
|
|
mergeCtrl(idx, cd)
|
|
}
|
|
if len(drives) > 0 {
|
|
break
|
|
}
|
|
}
|
|
return idx
|
|
}
|
|
|
|
// mergeCtrl records cd under its DeviceID, keeping the first writer so the
|
|
// preferred tool (queried earlier) wins and a later tool can't clobber it.
|
|
// Entries without a DeviceID are dropped — they can't be matched to a drive.
|
|
func mergeCtrl(idx map[string]ctrlDrive, cd ctrlDrive) {
|
|
if cd.DeviceID == "" {
|
|
return
|
|
}
|
|
if _, exists := idx[cd.DeviceID]; !exists {
|
|
idx[cd.DeviceID] = cd
|
|
}
|
|
}
|
|
|
|
// afterColon returns the trimmed text following the first colon, or "". It reads
|
|
// the "Key : Value" lines MegaCLI/storcli emit.
|
|
func afterColon(s string) string {
|
|
if i := strings.Index(s, ":"); i >= 0 {
|
|
return strings.TrimSpace(s[i+1:])
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// parseMegacliPDList parses `MegaCli -PDList -aAll`. Record boundary is the
|
|
// "Enclosure Device ID" line.
|
|
func parseMegacliPDList(text string) []ctrlDrive {
|
|
var drives []ctrlDrive
|
|
var cur ctrlDrive
|
|
have := false
|
|
flush := func() {
|
|
if have && (cur.DeviceID != "" || cur.Slot != "") {
|
|
drives = append(drives, cur)
|
|
}
|
|
}
|
|
for _, raw := range strings.Split(text, "\n") {
|
|
s := strings.TrimSpace(raw)
|
|
switch {
|
|
case strings.HasPrefix(s, "Enclosure Device ID"):
|
|
flush()
|
|
cur = ctrlDrive{Enclosure: afterColon(s)}
|
|
have = true
|
|
case strings.HasPrefix(s, "Slot Number"):
|
|
cur.Slot = afterColon(s)
|
|
case strings.HasPrefix(s, "Device Id"):
|
|
cur.DeviceID = afterColon(s)
|
|
case strings.HasPrefix(s, "Media Error Count"):
|
|
cur.MediaErr = atoiSafe(afterColon(s))
|
|
case strings.HasPrefix(s, "Other Error Count"):
|
|
cur.OtherErr = atoiSafe(afterColon(s))
|
|
case strings.HasPrefix(s, "Predictive Failure Count"):
|
|
cur.Predictive = atoiSafe(afterColon(s))
|
|
// MegaCLI phrases this as "Drive has flagged a S.M.A.R.T alert : No";
|
|
// the older "S.M.A.R.T alert flagged by drive" form is kept for safety.
|
|
case strings.HasPrefix(s, "Drive has flagged a S.M.A.R.T alert"),
|
|
strings.HasPrefix(s, "S.M.A.R.T alert flagged by drive"):
|
|
cur.SmartAlert = strings.Contains(s, "Yes")
|
|
case strings.HasPrefix(s, "Firmware state"):
|
|
cur.FwState = afterColon(s)
|
|
case strings.HasPrefix(s, "Drive Temperature"):
|
|
if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(s); m != nil {
|
|
cur.TempC = pInt(atoiSafe(m[1]))
|
|
}
|
|
case strings.HasPrefix(s, "Inquiry Data"):
|
|
cur.Inquiry = afterColon(s)
|
|
}
|
|
}
|
|
flush()
|
|
return drives
|
|
}
|
|
|
|
// parseStorcli parses `storcli /call/eall/sall show all`. A physical drive is
|
|
// introduced by a bare summary header ("Drive /c0/e64/s0 :") followed by a
|
|
// table row ("64:0 22 Onln ...") that carries the DID (== the megaraid index
|
|
// smartctl uses) and the controller state. The same drive then repeats sub-
|
|
// section headers ("Drive .../s0 - Detailed Information :", "... State :",
|
|
// "... Device attributes :") that must NOT open a new record — only the bare
|
|
// summary header does — so detail fields ("Key = Value") accumulate into one
|
|
// record across those sections.
|
|
func parseStorcli(text string) []ctrlDrive {
|
|
var drives []ctrlDrive
|
|
var cur ctrlDrive
|
|
have := false
|
|
// hdr matches only the bare summary header (path then ": " at end), not the
|
|
// "- Detailed Information"/"State"/"Device attributes" sub-section headers.
|
|
hdr := regexp.MustCompile(`^Drive /c\d+/e(\d+)/s(\d+)\s*:$`)
|
|
// row matches the summary table data row "EID:Slt DID State ..."; this
|
|
// storcli version reports the DID here, never as a "DID = N" line.
|
|
row := regexp.MustCompile(`^(\d+):(\d+)\s+(\d+)\s+(\S+)`)
|
|
flush := func() {
|
|
if have && (cur.DeviceID != "" || cur.Slot != "") {
|
|
drives = append(drives, cur)
|
|
}
|
|
}
|
|
kv := func(s string) (string, string, bool) {
|
|
if i := strings.Index(s, "="); i >= 0 {
|
|
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]), true
|
|
}
|
|
return "", "", false
|
|
}
|
|
for _, raw := range strings.Split(text, "\n") {
|
|
s := strings.TrimSpace(raw)
|
|
|
|
// New drive record: only the bare summary header opens one.
|
|
if m := hdr.FindStringSubmatch(s); m != nil {
|
|
flush()
|
|
cur = ctrlDrive{Enclosure: m[1], Slot: m[2]}
|
|
have = true
|
|
continue
|
|
}
|
|
if !have {
|
|
continue
|
|
}
|
|
// Summary table row supplies the DID and controller state.
|
|
if m := row.FindStringSubmatch(s); m != nil && cur.DeviceID == "" {
|
|
cur.DeviceID = m[3]
|
|
cur.FwState = m[4]
|
|
continue
|
|
}
|
|
k, v, ok := kv(s)
|
|
if !ok {
|
|
continue
|
|
}
|
|
switch k {
|
|
case "DID":
|
|
cur.DeviceID = v
|
|
case "Media Error Count":
|
|
cur.MediaErr = atoiSafe(v)
|
|
case "Other Error Count":
|
|
cur.OtherErr = atoiSafe(v)
|
|
case "Predictive Failure Count":
|
|
cur.Predictive = atoiSafe(v)
|
|
case "S.M.A.R.T alert flagged by drive":
|
|
cur.SmartAlert = strings.EqualFold(v, "Yes")
|
|
case "Firmware state", "State":
|
|
if cur.FwState == "" {
|
|
cur.FwState = v
|
|
}
|
|
case "Drive Temperature":
|
|
if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(v); m != nil {
|
|
cur.TempC = pInt(atoiSafe(m[1]))
|
|
}
|
|
case "Model Number", "Manufacturer Identification":
|
|
if cur.Inquiry == "" {
|
|
cur.Inquiry = v
|
|
}
|
|
}
|
|
}
|
|
flush()
|
|
return drives
|
|
}
|
|
|
|
// parsePerccli2 parses `perccli2 /call/eall/sall show all J` (JSON). perccli2
|
|
// (8.x) renames the classic DID to PID and splits the single State column into
|
|
// State (RAID role: Conf/UConf/GHS/JBOD) and Status (health: Online/Offline/
|
|
// Failed/Missing); the latter is what maps to FwState. Drives nest under
|
|
// Controllers[].Response Data.Drives List[]; health counters sit directly in
|
|
// "Drive Detailed Information" (SAS/SATA) or under its "LU/NS Properties" for
|
|
// NVMe namespaces.
|
|
func parsePerccli2(text string) []ctrlDrive {
|
|
m := loadJSON(text)
|
|
if m == nil {
|
|
return nil
|
|
}
|
|
controllers, ok := m["Controllers"].([]interface{})
|
|
if !ok {
|
|
return nil
|
|
}
|
|
var drives []ctrlDrive
|
|
for _, c := range controllers {
|
|
cm, ok := c.(map[string]interface{})
|
|
if !ok {
|
|
continue
|
|
}
|
|
list, ok := jLeaf(cm, "Response Data", "Drives List").([]interface{})
|
|
if !ok {
|
|
continue
|
|
}
|
|
for _, it := range list {
|
|
dm, ok := it.(map[string]interface{})
|
|
if !ok {
|
|
continue
|
|
}
|
|
info := jObj(dm, "Drive Information")
|
|
if info == nil {
|
|
continue
|
|
}
|
|
cd := ctrlDrive{}
|
|
|
|
// Location + identity from the summary block.
|
|
if es := jStr(info, "EID:Slt"); es != "" {
|
|
if i := strings.Index(es, ":"); i >= 0 {
|
|
cd.Enclosure = strings.TrimSpace(es[:i])
|
|
cd.Slot = strings.TrimSpace(es[i+1:])
|
|
}
|
|
}
|
|
if pid := jInt(info, "PID"); pid != nil {
|
|
cd.DeviceID = strconv.Itoa(*pid)
|
|
}
|
|
cd.FwState = jStr(info, "Status") // Health verdict, not the RAID role.
|
|
cd.Model = jStr(info, "Model")
|
|
cd.Rotation = perccli2Rotation(jStr(info, "Intf"), jStr(info, "Med"))
|
|
|
|
// Detail block: identity fallbacks, temperature, error counters.
|
|
if detail := jObj(dm, "Drive Detailed Information"); detail != nil {
|
|
cd.Serial = jStr(detail, "Serial Number")
|
|
cd.Firmware = jStr(detail, "Firmware Revision Level")
|
|
if cd.Model == "" {
|
|
cd.Model = jStr(detail, "Model")
|
|
}
|
|
if t := jInt(detail, "Temperature(C)"); t != nil {
|
|
cd.TempC = t
|
|
}
|
|
// Counters live in detail (SAS/SATA) or LU/NS Properties (NVMe).
|
|
props := jObj(detail, "LU/NS Properties")
|
|
ci := func(key string) int {
|
|
if v := jInt(detail, key); v != nil {
|
|
return *v
|
|
}
|
|
if props != nil {
|
|
if v := jInt(props, key); v != nil {
|
|
return *v
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
cd.MediaErr = ci("Media Error Count")
|
|
cd.OtherErr = ci("Other Error Count")
|
|
cd.Predictive = ci("Predictive Failure Count")
|
|
}
|
|
|
|
if cd.DeviceID != "" || cd.Slot != "" {
|
|
drives = append(drives, cd)
|
|
}
|
|
}
|
|
}
|
|
return drives
|
|
}
|
|
|
|
// perccli2Rotation maps a perccli2 interface/media pair to a rotation label.
|
|
func perccli2Rotation(intf, med string) string {
|
|
switch {
|
|
case strings.EqualFold(intf, "NVMe"):
|
|
return "NVMe"
|
|
case strings.EqualFold(med, "SSD"):
|
|
return "SSD"
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
// atoiSafe extracts the first integer found in s (leading sign allowed) and
|
|
// returns 0 when none is present, since controller output often wraps the number
|
|
// in units or surrounding labels.
|
|
func atoiSafe(s string) int {
|
|
s = strings.TrimSpace(s)
|
|
if m := regexp.MustCompile(`-?\d+`).FindString(s); m != "" {
|
|
if n, err := strconv.Atoi(m); err == nil {
|
|
return n
|
|
}
|
|
}
|
|
return 0
|
|
}
|