package main import ( "regexp" "strconv" "strings" ) // ctrlDrive holds the RAID-controller-side view of one physical drive — data // smartctl cannot see (predictive-failure, firmware state, controller media/ // other error counters, physical enclosure:slot). Keyed for matching to a // smartctl megaraid passthrough by DeviceID (== the megaraid,N index). type ctrlDrive struct { DeviceID string Enclosure string Slot string MediaErr int OtherErr int Predictive int SmartAlert bool FwState string TempC *int Inquiry string // Inquiry is the legacy single-line MegaCLI/storcli inquiry (serial model fw). Model string // Model is the structured identity (perccli2); used for controller-only drives. Serial string Firmware string Rotation string // Rotation is "SSD"/"NVMe" derived from controller media/interface, when known. } // controllerIndex enumerates all RAID controllers found, preferring modern // tools (storcli/perccli) then MegaCLI, and returns a DeviceID->ctrlDrive map. // If no controller CLI is present (plain HBA / onboard SATA / NVMe) it returns // an empty map — that's fine, smartctl still covers those drives directly. func controllerIndex() map[string]ctrlDrive { idx := map[string]ctrlDrive{} // perccli2 (8.x) is JSON-native. Its plain-text "show all" adds a second // status column that breaks positional parsing, so query JSON ('J') and use // the dedicated parser. Tried first since it covers the newest controllers. for _, bin := range []string{"perccli2", "/opt/MegaRAID/perccli2/perccli2"} { p := lookPath(bin) if p == "" { continue } drives := parsePerccli2(run(p, "/call/eall/sall", "show", "all", "J")) for _, cd := range drives { mergeCtrl(idx, cd) } if len(drives) > 0 { break } } // storcli / perccli (classic) share the same text "show all" layout (perccli // is Dell's rebrand). Try each installed binary until one returns drives, so // a host with several tools present still resolves. for _, bin := range []string{"storcli64", "storcli", "perccli64", "perccli", "/opt/MegaRAID/storcli/storcli64", "/opt/MegaRAID/perccli/perccli64"} { p := lookPath(bin) if p == "" { continue } drives := parseStorcli(run(p, "/call/eall/sall", "show", "all")) for _, cd := range drives { mergeCtrl(idx, cd) } if len(drives) > 0 { break } } // MegaCLI (older controllers). Same try-until-data approach. for _, bin := range []string{"MegaCli64", "MegaCli", "megacli", "/opt/MegaRAID/MegaCli/MegaCli64", "/usr/sbin/megacli"} { p := lookPath(bin) if p == "" { continue } drives := parseMegacliPDList(run(p, "-PDList", "-aAll")) for _, cd := range drives { mergeCtrl(idx, cd) } if len(drives) > 0 { break } } return idx } // mergeCtrl records cd under its DeviceID, keeping the first writer so the // preferred tool (queried earlier) wins and a later tool can't clobber it. // Entries without a DeviceID are dropped — they can't be matched to a drive. func mergeCtrl(idx map[string]ctrlDrive, cd ctrlDrive) { if cd.DeviceID == "" { return } if _, exists := idx[cd.DeviceID]; !exists { idx[cd.DeviceID] = cd } } // afterColon returns the trimmed text following the first colon, or "". It reads // the "Key : Value" lines MegaCLI/storcli emit. func afterColon(s string) string { if i := strings.Index(s, ":"); i >= 0 { return strings.TrimSpace(s[i+1:]) } return "" } // parseMegacliPDList parses `MegaCli -PDList -aAll`. Record boundary is the // "Enclosure Device ID" line. func parseMegacliPDList(text string) []ctrlDrive { var drives []ctrlDrive var cur ctrlDrive have := false flush := func() { if have && (cur.DeviceID != "" || cur.Slot != "") { drives = append(drives, cur) } } for _, raw := range strings.Split(text, "\n") { s := strings.TrimSpace(raw) switch { case strings.HasPrefix(s, "Enclosure Device ID"): flush() cur = ctrlDrive{Enclosure: afterColon(s)} have = true case strings.HasPrefix(s, "Slot Number"): cur.Slot = afterColon(s) case strings.HasPrefix(s, "Device Id"): cur.DeviceID = afterColon(s) case strings.HasPrefix(s, "Media Error Count"): cur.MediaErr = atoiSafe(afterColon(s)) case strings.HasPrefix(s, "Other Error Count"): cur.OtherErr = atoiSafe(afterColon(s)) case strings.HasPrefix(s, "Predictive Failure Count"): cur.Predictive = atoiSafe(afterColon(s)) // MegaCLI phrases this as "Drive has flagged a S.M.A.R.T alert : No"; // the older "S.M.A.R.T alert flagged by drive" form is kept for safety. case strings.HasPrefix(s, "Drive has flagged a S.M.A.R.T alert"), strings.HasPrefix(s, "S.M.A.R.T alert flagged by drive"): cur.SmartAlert = strings.Contains(s, "Yes") case strings.HasPrefix(s, "Firmware state"): cur.FwState = afterColon(s) case strings.HasPrefix(s, "Drive Temperature"): if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(s); m != nil { cur.TempC = pInt(atoiSafe(m[1])) } case strings.HasPrefix(s, "Inquiry Data"): cur.Inquiry = afterColon(s) } } flush() return drives } // parseStorcli parses `storcli /call/eall/sall show all`. A physical drive is // introduced by a bare summary header ("Drive /c0/e64/s0 :") followed by a // table row ("64:0 22 Onln ...") that carries the DID (== the megaraid index // smartctl uses) and the controller state. The same drive then repeats sub- // section headers ("Drive .../s0 - Detailed Information :", "... State :", // "... Device attributes :") that must NOT open a new record — only the bare // summary header does — so detail fields ("Key = Value") accumulate into one // record across those sections. func parseStorcli(text string) []ctrlDrive { var drives []ctrlDrive var cur ctrlDrive have := false // hdr matches only the bare summary header (path then ": " at end), not the // "- Detailed Information"/"State"/"Device attributes" sub-section headers. hdr := regexp.MustCompile(`^Drive /c\d+/e(\d+)/s(\d+)\s*:$`) // row matches the summary table data row "EID:Slt DID State ..."; this // storcli version reports the DID here, never as a "DID = N" line. row := regexp.MustCompile(`^(\d+):(\d+)\s+(\d+)\s+(\S+)`) flush := func() { if have && (cur.DeviceID != "" || cur.Slot != "") { drives = append(drives, cur) } } kv := func(s string) (string, string, bool) { if i := strings.Index(s, "="); i >= 0 { return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]), true } return "", "", false } for _, raw := range strings.Split(text, "\n") { s := strings.TrimSpace(raw) // New drive record: only the bare summary header opens one. if m := hdr.FindStringSubmatch(s); m != nil { flush() cur = ctrlDrive{Enclosure: m[1], Slot: m[2]} have = true continue } if !have { continue } // Summary table row supplies the DID and controller state. if m := row.FindStringSubmatch(s); m != nil && cur.DeviceID == "" { cur.DeviceID = m[3] cur.FwState = m[4] continue } k, v, ok := kv(s) if !ok { continue } switch k { case "DID": cur.DeviceID = v case "Media Error Count": cur.MediaErr = atoiSafe(v) case "Other Error Count": cur.OtherErr = atoiSafe(v) case "Predictive Failure Count": cur.Predictive = atoiSafe(v) case "S.M.A.R.T alert flagged by drive": cur.SmartAlert = strings.EqualFold(v, "Yes") case "Firmware state", "State": if cur.FwState == "" { cur.FwState = v } case "Drive Temperature": if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(v); m != nil { cur.TempC = pInt(atoiSafe(m[1])) } case "Model Number", "Manufacturer Identification": if cur.Inquiry == "" { cur.Inquiry = v } } } flush() return drives } // parsePerccli2 parses `perccli2 /call/eall/sall show all J` (JSON). perccli2 // (8.x) renames the classic DID to PID and splits the single State column into // State (RAID role: Conf/UConf/GHS/JBOD) and Status (health: Online/Offline/ // Failed/Missing); the latter is what maps to FwState. Drives nest under // Controllers[].Response Data.Drives List[]; health counters sit directly in // "Drive Detailed Information" (SAS/SATA) or under its "LU/NS Properties" for // NVMe namespaces. func parsePerccli2(text string) []ctrlDrive { m := loadJSON(text) if m == nil { return nil } controllers, ok := m["Controllers"].([]interface{}) if !ok { return nil } var drives []ctrlDrive for _, c := range controllers { cm, ok := c.(map[string]interface{}) if !ok { continue } list, ok := jLeaf(cm, "Response Data", "Drives List").([]interface{}) if !ok { continue } for _, it := range list { dm, ok := it.(map[string]interface{}) if !ok { continue } info := jObj(dm, "Drive Information") if info == nil { continue } cd := ctrlDrive{} // Location + identity from the summary block. if es := jStr(info, "EID:Slt"); es != "" { if i := strings.Index(es, ":"); i >= 0 { cd.Enclosure = strings.TrimSpace(es[:i]) cd.Slot = strings.TrimSpace(es[i+1:]) } } if pid := jInt(info, "PID"); pid != nil { cd.DeviceID = strconv.Itoa(*pid) } cd.FwState = jStr(info, "Status") // Health verdict, not the RAID role. cd.Model = jStr(info, "Model") cd.Rotation = perccli2Rotation(jStr(info, "Intf"), jStr(info, "Med")) // Detail block: identity fallbacks, temperature, error counters. if detail := jObj(dm, "Drive Detailed Information"); detail != nil { cd.Serial = jStr(detail, "Serial Number") cd.Firmware = jStr(detail, "Firmware Revision Level") if cd.Model == "" { cd.Model = jStr(detail, "Model") } if t := jInt(detail, "Temperature(C)"); t != nil { cd.TempC = t } // Counters live in detail (SAS/SATA) or LU/NS Properties (NVMe). props := jObj(detail, "LU/NS Properties") ci := func(key string) int { if v := jInt(detail, key); v != nil { return *v } if props != nil { if v := jInt(props, key); v != nil { return *v } } return 0 } cd.MediaErr = ci("Media Error Count") cd.OtherErr = ci("Other Error Count") cd.Predictive = ci("Predictive Failure Count") } if cd.DeviceID != "" || cd.Slot != "" { drives = append(drives, cd) } } } return drives } // perccli2Rotation maps a perccli2 interface/media pair to a rotation label. func perccli2Rotation(intf, med string) string { switch { case strings.EqualFold(intf, "NVMe"): return "NVMe" case strings.EqualFold(med, "SSD"): return "SSD" default: return "" } } // atoiSafe extracts the first integer found in s (leading sign allowed) and // returns 0 when none is present, since controller output often wraps the number // in units or surrounding labels. func atoiSafe(s string) int { s = strings.TrimSpace(s) if m := regexp.MustCompile(`-?\d+`).FindString(s); m != "" { if n, err := strconv.Atoi(m); err == nil { return n } } return 0 }