package main import ( "os" "path/filepath" "strings" "testing" ) // readFixture loads a captured real-world sample from testdata/. func readFixture(t *testing.T, name string) string { t.Helper() b, err := os.ReadFile(filepath.Join("testdata", name)) if err != nil { t.Fatalf("read fixture %s: %v", name, err) } return string(b) } // ---- Real smartctl JSON: Samsung 870 EVO via sat+megaraid (smartmontools 7.0) ---- func TestParseSmartJSON_RealMegaraidSSD(t *testing.T) { d := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_ata_ssd_megaraid.json")), d) if d.Model != "Samsung SSD 870 EVO 2TB" { t.Errorf("model = %q", d.Model) } if d.Serial != "S624NS0RC00003M" { t.Errorf("serial = %q", d.Serial) } if d.Firmware != "SVT02B6Q" { t.Errorf("firmware = %q", d.Firmware) } if d.Rotation != "SSD" { t.Errorf("rotation = %q, want SSD", d.Rotation) } if d.Capacity != "2.00 TB" { t.Errorf("capacity = %q, want 2.00 TB", d.Capacity) } if d.SmartHealth != "PASSED" { t.Errorf("health = %q", d.SmartHealth) } if iv(d.PowerOnHours) != 33518 { t.Errorf("poh = %v, want 33518", d.PowerOnHours) } if iv(d.PowerCycleCount) != 7 { t.Errorf("power_cycles = %v, want 7", d.PowerCycleCount) } if iv(d.TempC) != 31 { t.Errorf("temp = %v, want 31", d.TempC) } // Healthy drive: all defect counters present and zero. if iv(d.Reallocated) != 0 || iv(d.UdmaCrc) != 0 || iv(d.ReportedUncorrect) != 0 { t.Errorf("defects: realloc=%v crc=%v reported=%v, want 0/0/0", d.Reallocated, d.UdmaCrc, d.ReportedUncorrect) } // Samsung Wear_Leveling_Count (ID177) value 93 -> 7% consumed. if d.WearSrc != "Samsung/ID177" || iv(d.WearPctConsumed) != 7 { t.Errorf("wear src=%q consumed=%v, want Samsung/ID177 / 7", d.WearSrc, d.WearPctConsumed) } d.HaveSmart = true finalizeDerived(d) if d.Recommendation != "OK" { t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) } if iv(d.DefectTotal) != 0 { t.Errorf("defect_total = %v, want 0", d.DefectTotal) } } // ---- Real smartctl JSON: Micron 5400 via sat+megaraid, no controller CLI ---- // util01 (Ubuntu 24, smartmontools 7.4) has megaraid drives but no storcli/ // MegaCLI, so no controller data merges. The drive exposes ID173/202/233 wear // attributes at once, and its model is a bare part number (no vendor prefix). func TestParseSmartJSON_RealMicronSSD(t *testing.T) { d := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_ata_ssd_micron.json")), d) if d.Model != "MTFDDAK960TGA-1BC1ZABDA" { t.Errorf("model = %q", d.Model) } if isPseudoDevice(d) { t.Errorf("bare part-number model wrongly flagged as pseudo device") } if d.Rotation != "SSD" { t.Errorf("rotation = %q, want SSD", d.Rotation) } if d.Capacity != "0.96 TB" { t.Errorf("capacity = %q, want 0.96 TB", d.Capacity) } if d.SmartHealth != "PASSED" { t.Errorf("health = %q", d.SmartHealth) } if iv(d.PowerOnHours) != 20238 || iv(d.PowerCycleCount) != 12 { t.Errorf("poh=%v cycles=%v, want 20238/12", d.PowerOnHours, d.PowerCycleCount) } // ID173 wins the wearAttr precedence; VALUE 100 -> 0% consumed (fresh). if d.WearSrc != "Micron/ID173" || iv(d.WearPctConsumed) != 0 { t.Errorf("wear src=%q consumed=%v, want Micron/ID173 / 0", d.WearSrc, d.WearPctConsumed) } d.HaveSmart = true finalizeDerived(d) if d.Recommendation != "OK" { t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) } } // ---- Real NVMe JSON: health log drives wear/identity (direct-attached SSD) ---- func TestParseSmartJSON_RealNVMe(t *testing.T) { d := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_nvme.json")), d) if d.Model != "Force MP510" { t.Errorf("model = %q", d.Model) } if d.Rotation != "NVMe" { t.Errorf("rotation = %q, want NVMe", d.Rotation) } if iv(d.PowerOnHours) != 42811 { t.Errorf("poh = %v, want 42811", d.PowerOnHours) } // NVMe percentage_used 6 -> 6% consumed. if d.WearSrc != "NVMe/percentage_used" || iv(d.WearPctConsumed) != 6 { t.Errorf("wear src=%q consumed=%v, want NVMe/percentage_used / 6", d.WearSrc, d.WearPctConsumed) } if iv(d.NvmeCriticalWarning) != 0 || iv(d.NvmeAvailSpare) != 100 { t.Errorf("nvme crit=%v spare=%v, want 0/100", d.NvmeCriticalWarning, d.NvmeAvailSpare) } d.HaveSmart = true finalizeDerived(d) // Healthy NVMe, 42811h (>4yr -> age +4), wear 6% -> OK. if d.Recommendation != "OK" { t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) } } // ---- Real SAS SSD JSON: endurance + grown-defect/error-counter health ---- // SAMSUNG ARFX0920S5xnNTRI behind a SAS HBA (tama, smartmontools 7.5). SAS // drives have no ATA attribute table; hard-defect signals come from the SCSI // logs (grown defect list, error counter log, pending defects). func TestParseSmartJSON_RealSASSSD(t *testing.T) { d := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_sas_ssd.json")), d) if d.Model != "SAMSUNG ARFX0920S5xnNTRI" || d.Serial != "S43YNF0K000001" { t.Errorf("identity model=%q serial=%q", d.Model, d.Serial) } if !strings.HasPrefix(d.Transport, "SAS") { t.Errorf("transport = %q, want SAS*", d.Transport) } if d.Rotation != "SSD" { t.Errorf("rotation = %q, want SSD", d.Rotation) } if d.SmartHealth != "PASSED" { t.Errorf("health = %q", d.SmartHealth) } if iv(d.PowerOnHours) != 2487 || iv(d.TempC) != 56 { t.Errorf("poh=%v temp=%v, want 2487/56", d.PowerOnHours, d.TempC) } // SCSI endurance indicator 0% -> 0% consumed. if d.WearSrc != "SCSI/endurance" || iv(d.WearPctConsumed) != 0 { t.Errorf("wear src=%q consumed=%v, want SCSI/endurance / 0", d.WearSrc, d.WearPctConsumed) } // Healthy SAS drive: grown defects, uncorrected errors, pending all zero // (and non-nil, since the SCSI logs were present). if iv(d.Reallocated) != 0 || iv(d.Uncorrectable) != 0 || iv(d.Pending) != 0 { t.Errorf("defects: grown=%v uncorrected=%v pending=%v, want 0/0/0", d.Reallocated, d.Uncorrectable, d.Pending) } d.HaveSmart = true finalizeDerived(d) if d.Recommendation != "OK" { t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) } } // ---- SAS hard-defect signals drive the score (synthetic, both JSON + text) ---- // The captured SAS drives are healthy; verify the SCSI error counter log and // pending-defect count actually feed the scorer when nonzero. func TestSASUncorrectedErrorsScored(t *testing.T) { const j = `{ "model_name": "SEAGATE ST4000NM", "device": {"type": "scsi", "protocol": "SCSI"}, "smart_status": {"passed": true}, "scsi_grown_defect_list": 3, "scsi_error_counter_log": { "read": {"total_uncorrected_errors": 2}, "write": {"total_uncorrected_errors": 0}, "verify": {"total_uncorrected_errors": 1} }, "scsi_pending_defects": {"count": 4} }` d := &Drive{} parseSmartJSON(loadJSON(j), d) if iv(d.Uncorrectable) != 3 { // 2 read + 1 verify t.Errorf("uncorrectable = %v, want 3", d.Uncorrectable) } if iv(d.Pending) != 4 { t.Errorf("pending = %v, want 4", d.Pending) } if iv(d.Reallocated) != 3 { // grown defect list t.Errorf("reallocated(grown) = %v, want 3", d.Reallocated) } d.HaveSmart = true finalizeDerived(d) if d.Recommendation != "REPLACE_NOW" { t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore) } // Text path: same signals from `smartctl -a` SAS output. const text = `Vendor: SEAGATE Product: ST4000NM SMART Health Status: OK Elements in grown defect list: 3 Error counter log: read: 0 0 0 0 0 2620.555 2 write: 0 0 0 0 0 2091.250 0 verify: 0 0 0 0 0 46.845 1 Pending defect count:4 Pending Defects ` dt := &Drive{} parseSmartText(text, dt) if iv(dt.Uncorrectable) != 3 || iv(dt.Pending) != 4 || iv(dt.Reallocated) != 3 { t.Errorf("text path: uncorrected=%v pending=%v grown=%v, want 3/4/3", dt.Uncorrectable, dt.Pending, dt.Reallocated) } } // ---- iSCSI LUNs (LIO/IET) are dropped via the SCSI transport, not by model ---- func TestISCSILunFilter(t *testing.T) { d := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_iscsi_lio.json")), d) if d.Transport != "iSCSI" { t.Fatalf("transport = %q, want iSCSI", d.Transport) } if !isPseudoDevice(d) { t.Errorf("LIO-ORG iSCSI LUN not flagged as pseudo device") } // Transport alone is authoritative even if the model looks like a real drive. if !isPseudoDevice(&Drive{Model: "Samsung SSD 860 EVO 1TB", Transport: "iSCSI"}) { t.Errorf("iSCSI transport not honored over a real-looking model") } // LIO model token still catches it on the text path (no transport field). if !isPseudoDevice(&Drive{Model: "LIO-ORG k8s1"}) { t.Errorf("LIO-ORG model token not matched") } // A real SAS/SATA drive (transport SAS) is not filtered. if isPseudoDevice(&Drive{Model: "HGST HUH721010ALE604", Transport: "SAS"}) { t.Errorf("real SAS drive wrongly flagged as pseudo device") } } // ---- Pseudo devices must be filtered: iSCSI LUN + RAID controller VD ---- func TestPseudoDeviceFilter(t *testing.T) { iscsi := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_iscsi_virtual_disk.json")), iscsi) if iscsi.Model != "IET VIRTUAL-DISK" { t.Fatalf("iscsi model = %q", iscsi.Model) } if !isPseudoDevice(iscsi) { t.Errorf("IET VIRTUAL-DISK not flagged as pseudo device") } vd := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_raid_vd.json")), vd) if vd.Model != "BROADCOM MR9560-16i" { t.Fatalf("raid vd model = %q", vd.Model) } if !isPseudoDevice(vd) { t.Errorf("BROADCOM MR9560-16i VD not flagged as pseudo device") } avago := &Drive{} parseSmartJSON(loadJSON(readFixture(t, "smart_raid_vd_avago.json")), avago) if avago.Model != "AVAGO MR9363-4i" { t.Fatalf("avago vd model = %q", avago.Model) } if !isPseudoDevice(avago) { t.Errorf("AVAGO MR9363-4i VD not flagged as pseudo device") } // SMART-less RAID virtual disk identified by model (DELL RAID / PERC VDs). for _, model := range []string{"DELL RAID", "DELL PERC H730"} { if !isPseudoDevice(&Drive{Model: model}) { t.Errorf("RAID VD %q not flagged as pseudo device", model) } } // Real drives behind these controllers must NOT be filtered. for _, model := range []string{"Samsung SSD 870 EVO 2TB", "MICRON_M510DC_MTFDDAK960MBP", "HFS960G3H2X069N", "Dell DC NVMe PM9A3 RI U.2 960GB"} { if isPseudoDevice(&Drive{Model: model}) { t.Errorf("real drive %q wrongly flagged as pseudo device", model) } } } // ---- Real storcli: DID comes from the table row; sub-headers don't fragment ---- func TestParseStorcli_Real(t *testing.T) { drives := parseStorcli(readFixture(t, "storcli_show_all.txt")) if len(drives) != 4 { t.Fatalf("parsed %d drives, want 4", len(drives)) } byDID := map[string]ctrlDrive{} for _, cd := range drives { if cd.DeviceID == "" { t.Errorf("drive at %s:%s has empty DID", cd.Enclosure, cd.Slot) } byDID[cd.DeviceID] = cd } // Slot->DID->OtherErr mapping pulled straight from the capture. wantOther := map[string]struct { slot string otherErr int }{ "22": {"0", 7}, "23": {"1", 6}, "20": {"2", 21}, "21": {"3", 12}, } for did, want := range wantOther { cd, ok := byDID[did] if !ok { t.Errorf("DID %s missing", did) continue } if cd.Enclosure != "64" || cd.Slot != want.slot { t.Errorf("DID %s location %s:%s, want 64:%s", did, cd.Enclosure, cd.Slot, want.slot) } if cd.OtherErr != want.otherErr { t.Errorf("DID %s other_err=%d, want %d", did, cd.OtherErr, want.otherErr) } if cd.MediaErr != 0 || cd.Predictive != 0 || cd.SmartAlert { t.Errorf("DID %s media=%d pred=%d alert=%v, want 0/0/false", did, cd.MediaErr, cd.Predictive, cd.SmartAlert) } if cd.FwState != "Onln" { t.Errorf("DID %s fw_state=%q, want Onln", did, cd.FwState) } } } // ---- Real storcli, second version/enclosure: parser generalizes ---- // storcli v007.1907, enclosure 252, Micron SSDs (jarvis01-kvm92). Confirms the // summary-header + table-row DID logic is not specific to one storcli build. func TestParseStorcli_RealV2(t *testing.T) { drives := parseStorcli(readFixture(t, "storcli_show_all_v2.txt")) if len(drives) != 4 { t.Fatalf("parsed %d drives, want 4", len(drives)) } // DID N lives in slot (N-4), enclosure 252; all Online with zero errors. for _, cd := range drives { if cd.DeviceID == "" { t.Errorf("drive at %s:%s has empty DID", cd.Enclosure, cd.Slot) } if cd.Enclosure != "252" { t.Errorf("DID %s enclosure=%q, want 252", cd.DeviceID, cd.Enclosure) } if cd.FwState != "Onln" { t.Errorf("DID %s fw_state=%q, want Onln", cd.DeviceID, cd.FwState) } if cd.MediaErr != 0 || cd.OtherErr != 0 || cd.Predictive != 0 { t.Errorf("DID %s errors media=%d other=%d pred=%d, want 0/0/0", cd.DeviceID, cd.MediaErr, cd.OtherErr, cd.Predictive) } } byDID := map[string]ctrlDrive{} for _, cd := range drives { byDID[cd.DeviceID] = cd } if cd, ok := byDID["4"]; !ok || cd.Slot != "0" { t.Errorf("DID 4 -> %s:%s, want 252:0", cd.Enclosure, cd.Slot) } if cd, ok := byDID["7"]; !ok || cd.Slot != "3" { t.Errorf("DID 7 -> %s:%s, want 252:3", cd.Enclosure, cd.Slot) } } // ---- Real perccli2 JSON: DID->PID, State vs Status, NVMe namespace counters ---- // perccli2 (8.x) is JSON-native; the text form adds a second status column that // breaks positional parsing, so parsePerccli2 consumes the JSON. Fixture is a // real `perccli2 /call/eall/sall show all J` (Dell PM9A3 NVMe behind a PERC). func TestParsePerccli2_Real(t *testing.T) { drives := parsePerccli2(readFixture(t, "perccli2_show_all.json")) if len(drives) != 2 { t.Fatalf("parsed %d drives, want 2", len(drives)) } byPID := map[string]ctrlDrive{} for _, cd := range drives { byPID[cd.DeviceID] = cd } cd, ok := byPID["275"] if !ok { t.Fatal("PID 275 missing") } if cd.Enclosure != "284" || cd.Slot != "0" { t.Errorf("PID 275 location %s:%s, want 284:0", cd.Enclosure, cd.Slot) } // FwState comes from Status (Online), NOT State (Conf). if cd.FwState != "Online" { t.Errorf("PID 275 fw_state=%q, want Online", cd.FwState) } if cd.Model != "Dell DC NVMe PM9A3 RI U.2 960GB" || cd.Serial != "S6JGNA0X000001" { t.Errorf("PID 275 model=%q serial=%q", cd.Model, cd.Serial) } if cd.Rotation != "NVMe" { t.Errorf("PID 275 rotation=%q, want NVMe", cd.Rotation) } if iv(cd.TempC) != 30 { t.Errorf("PID 275 temp=%v, want 30", cd.TempC) } // NVMe error counters live under "LU/NS Properties", not directly in detail. if cd.MediaErr != 0 || cd.OtherErr != 0 || cd.Predictive != 0 { t.Errorf("PID 275 errors media=%d other=%d pred=%d, want 0/0/0", cd.MediaErr, cd.OtherErr, cd.Predictive) } if cd, ok := byPID["276"]; !ok || cd.Slot != "1" || iv(cd.TempC) != 32 { t.Errorf("PID 276 -> %s:%s temp=%v, want 284:1 / 32", cd.Enclosure, cd.Slot, cd.TempC) } } // ---- Controller-only drive: surfaced from controller data, scored not NO_DATA ---- func TestPerccli2ControllerOnlyDrive(t *testing.T) { drives := parsePerccli2(readFixture(t, "perccli2_show_all.json")) byPID := map[string]ctrlDrive{} for _, cd := range drives { byPID[cd.DeviceID] = cd } cd := byPID["275"] // Healthy controller-only drive: identity + health from controller only. d := &Drive{} applyController(d, cd) d.HaveSmart = false finalizeDerived(d) if d.enclosureSlot() != "284:0" { t.Errorf("enclosure_slot = %q, want 284:0", d.enclosureSlot()) } if d.Model != "Dell DC NVMe PM9A3 RI U.2 960GB" || d.Rotation != "NVMe" { t.Errorf("identity model=%q rotation=%q", d.Model, d.Rotation) } if d.Recommendation != "OK" { t.Errorf("healthy controller-only rec=%q (score %d), want OK", d.Recommendation, d.RiskScore) } // A failed controller drive is surfaced (scored), never dropped as NO_DATA. cd.FwState = "Failed" df := &Drive{} applyController(df, cd) df.HaveSmart = false finalizeDerived(df) if df.Recommendation == "NO_DATA" { t.Errorf("failed controller-only drive scored NO_DATA") } if df.RiskScore < 40 { t.Errorf("failed controller-only drive score=%d, want >=40", df.RiskScore) } } // ---- Controller merge by DID, where DID != slot (perccli v007.2616) ---- // On util01 the slot<->DID numbering is crossed (slot 0 = DID 1, slot 1 = // DID 0). smartctl's "megaraid,N" index equals the controller DID, so matching // controller data by DID (not slot) must still yield the correct enclosure:slot. func TestStorcli_DIDMatchingCrossed(t *testing.T) { idx := map[string]ctrlDrive{} for _, cd := range parseStorcli(readFixture(t, "perccli_show_all.txt")) { mergeCtrl(idx, cd) } cases := map[string]string{"0": "252:1", "1": "252:0"} // megaraid index -> enclosure:slot. for did, wantLoc := range cases { cd, ok := idx[did] if !ok { t.Errorf("DID %s missing from index", did) continue } d := &Drive{DeviceID: did} // DeviceID is the smartctl megaraid,N index. applyController(d, cd) if d.enclosureSlot() != wantLoc { t.Errorf("megaraid,%s -> %q, want %q", did, d.enclosureSlot(), wantLoc) } } } // ---- Real MegaCLI: matches storcli on the same hardware ---- func TestParseMegacli_Real(t *testing.T) { drives := parseMegacliPDList(readFixture(t, "megacli_pdlist.txt")) if len(drives) != 4 { t.Fatalf("parsed %d drives, want 4", len(drives)) } byDID := map[string]ctrlDrive{} for _, cd := range drives { byDID[cd.DeviceID] = cd } cd, ok := byDID["20"] if !ok { t.Fatal("DID 20 missing") } // DID 20 == slot 2 (cross-checks the storcli capture above). if cd.Enclosure != "64" || cd.Slot != "2" || cd.OtherErr != 21 { t.Errorf("DID 20 = %s:%s other=%d, want 64:2 / 21", cd.Enclosure, cd.Slot, cd.OtherErr) } if cd.FwState != "Online, Spun Up" { t.Errorf("DID 20 fw_state=%q", cd.FwState) } if iv(cd.TempC) != 31 { t.Errorf("DID 20 temp=%v, want 31", cd.TempC) } if cd.SmartAlert { t.Errorf("DID 20 smart alert set, want false") } } // ---- MegaCLI SMART-alert line uses "Drive has flagged a S.M.A.R.T alert" ---- func TestMegacliSmartAlertPhrasing(t *testing.T) { text := `Enclosure Device ID: 64 Slot Number: 5 Device Id: 99 Firmware state: Online, Spun Up Drive has flagged a S.M.A.R.T alert : Yes ` drives := parseMegacliPDList(text) if len(drives) != 1 { t.Fatalf("parsed %d drives, want 1", len(drives)) } if !drives[0].SmartAlert { t.Errorf("smart alert not detected from MegaCLI phrasing") } } // ---- Controller state scoring: storcli abbreviations are not faults ---- func TestFwStateScoring(t *testing.T) { cases := []struct { state string wantPts bool // true => the +40 fw_state penalty should apply }{ {"Onln", false}, {"Online, Spun Up", false}, {"GHS", false}, {"JBOD", false}, {"", false}, {"Offln", true}, {"Failed", true}, {"Rebuild", true}, } for _, c := range cases { d := &Drive{HaveSmart: true, Model: "X", SmartHealth: "PASSED", FwState: c.state} score, _, _ := scoreDrive(d) got := score >= 40 if got != c.wantPts { t.Errorf("fw_state %q: penalized=%v (score %d), want %v", c.state, got, score, c.wantPts) } } }