drive-health-metrics/drive_test.go
James Coleman ddafa90a02
Some checks failed
Go package / build (push) Has been cancelled
first commit
2026-06-22 17:16:34 -05:00

279 lines
9.1 KiB
Go

package main
import "testing"
// ---- JSON path (smartmontools >= 7.0): ATA SSD with a reallocated sector ----
const ataJSON = `{
"model_name": "Micron_1300_MTFDDAK512TDL",
"serial_number": "21512A3B4C5D",
"firmware_version": "M5MU000",
"user_capacity": {"bytes": 512110190592},
"rotation_rate": 0,
"smart_status": {"passed": true},
"power_on_time": {"hours": 26280},
"power_cycle_count": 42,
"temperature": {"current": 31},
"ata_smart_attributes": {"table": [
{"id": 5, "name": "Reallocated_Sector_Ct", "value": 100, "worst": 100, "thresh": 10, "when_failed": "", "raw": {"value": 8, "string": "8"}},
{"id": 9, "name": "Power_On_Hours", "value": 95, "worst": 95, "thresh": 0, "when_failed": "", "raw": {"value": 26280, "string": "26280"}},
{"id": 199,"name": "UDMA_CRC_Error_Count", "value": 100, "worst": 100, "thresh": 0, "when_failed": "", "raw": {"value": 3, "string": "3"}},
{"id": 202,"name": "Percent_Lifetime_Remain","value": 88, "worst": 88, "thresh": 1, "when_failed": "", "raw": {"value": 12, "string": "12"}}
]}
}`
func TestParseSmartJSON_ATA(t *testing.T) {
d := &Drive{}
parseSmartJSON(loadJSON(ataJSON), d)
if d.Model != "Micron_1300_MTFDDAK512TDL" {
t.Errorf("model = %q", d.Model)
}
if d.Rotation != "SSD" {
t.Errorf("rotation = %q, want SSD", d.Rotation)
}
if iv(d.Reallocated) != 8 {
t.Errorf("reallocated = %v, want 8", d.Reallocated)
}
if iv(d.UdmaCrc) != 3 {
t.Errorf("udma_crc = %v, want 3", d.UdmaCrc)
}
if iv(d.PowerOnHours) != 26280 {
t.Errorf("poh = %v, want 26280", d.PowerOnHours)
}
if d.WearSrc != "Intel/ID202" || iv(d.WearPctConsumed) != 12 {
t.Errorf("wear src=%q consumed=%v, want Intel/ID202 / 12", d.WearSrc, d.WearPctConsumed)
}
if d.SmartHealth != "PASSED" {
t.Errorf("health = %q", d.SmartHealth)
}
d.HaveSmart = true
finalizeDerived(d)
// reallocated 8 -> min(40+40,100)=80; udma 3 -> 9; total 89 -> REPLACE_SOON.
if d.Recommendation != "REPLACE_SOON" {
t.Errorf("rec = %q (score %d), want REPLACE_SOON", d.Recommendation, d.RiskScore)
}
if iv(d.DefectTotal) != 8 {
t.Errorf("defect_total = %v, want 8", d.DefectTotal)
}
}
// ---- Text path (CentOS 6/7, smartmontools 5.x/6.x: no JSON) ----
const ataText = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux] (local build)
=== START OF INFORMATION SECTION ===
Device Model: INTEL SSDSC2BB480G6
Serial Number: BTWA12345678480BGN
Firmware Version: G2010140
User Capacity: 480,103,981,056 bytes [480 GB]
Rotation Rate: Solid State Device
=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
5 Reallocated_Sector_Ct 0x0032 100 100 000 Old_age Always - 0
9 Power_On_Hours 0x0032 100 100 000 Old_age Always - 51000
197 Current_Pending_Sector 0x0012 100 100 000 Old_age Always - 5
233 Media_Wearout_Indicator 0x0032 072 072 000 Old_age Always - 0
`
func TestParseSmartText_ATA(t *testing.T) {
d := &Drive{}
parseSmartText(ataText, d)
if d.Model != "INTEL SSDSC2BB480G6" {
t.Errorf("model = %q", d.Model)
}
if d.SmartHealth != "PASSED" {
t.Errorf("health = %q", d.SmartHealth)
}
if iv(d.Pending) != 5 {
t.Errorf("pending = %v, want 5", d.Pending)
}
if iv(d.PowerOnHours) != 51000 {
t.Errorf("poh = %v, want 51000", d.PowerOnHours)
}
if d.WearSrc != "Generic/ID233" || iv(d.WearPctConsumed) != 28 {
t.Errorf("wear src=%q consumed=%v, want Generic/ID233 / 28", d.WearSrc, d.WearPctConsumed)
}
d.HaveSmart = true
finalizeDerived(d)
// pending 5 -> min(50+25,100)=75 -> REPLACE_SOON.
if d.Recommendation != "REPLACE_SOON" {
t.Errorf("rec = %q (score %d), want REPLACE_SOON", d.Recommendation, d.RiskScore)
}
}
// ---- NVMe text path ----
const nvmeText = `smartctl 7.2 2020-12-30 r5155 [x86_64-linux]
=== START OF INFORMATION SECTION ===
Model Number: Samsung SSD 980 PRO 1TB
Serial Number: S5GXNX0R123456
Firmware Version: 5B2QGXA7
=== START OF SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
SMART/Health Information (NVMe Log 0x02)
Critical Warning: 0x04
Temperature: 40 Celsius
Available Spare: 8%
Available Spare Threshold: 10%
Percentage Used: 96%
Power On Hours: 30,123
Power Cycles: 210
Media and Data Integrity Errors: 0
`
func TestParseSmartText_NVMe(t *testing.T) {
d := &Drive{}
parseSmartText(nvmeText, d)
if d.Rotation != "NVMe" {
t.Errorf("rotation = %q, want NVMe", d.Rotation)
}
if d.NvmeCriticalWarning == nil || *d.NvmeCriticalWarning != 4 {
t.Errorf("critical_warning = %v, want 4", d.NvmeCriticalWarning)
}
if iv(d.NvmeAvailSpare) != 8 || iv(d.NvmeAvailSpareThresh) != 10 {
t.Errorf("spare=%v thresh=%v, want 8/10", d.NvmeAvailSpare, d.NvmeAvailSpareThresh)
}
if iv(d.WearPctConsumed) != 96 {
t.Errorf("wear consumed = %v, want 96", d.WearPctConsumed)
}
d.HaveSmart = true
finalizeDerived(d)
// crit warning +60, spare<=thresh +40, wear96 +80 -> >=100 REPLACE_NOW.
if d.Recommendation != "REPLACE_NOW" {
t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore)
}
}
// ---- NO_DATA: nothing readable, no controller flags ----
func TestNoData(t *testing.T) {
d := &Drive{HaveSmart: false}
finalizeDerived(d)
if d.Recommendation != "NO_DATA" {
t.Errorf("rec = %q, want NO_DATA", d.Recommendation)
}
if d.DefectTotal != nil {
t.Errorf("defect_total = %v, want nil (blank)", d.DefectTotal)
}
}
// ---- MegaCLI PDList parsing + controller-driven scoring ----
const megacliText = `
Enclosure Device ID: 64
Slot Number: 3
Device Id: 11
WWN: 5000C500A1B2C3D4
Media Error Count: 369
Other Error Count: 2
Predictive Failure Count: 1
Drive has flagged a S.M.A.R.T alert : Yes
Firmware state: Online, Spun Up
Inquiry Data: BTWA12345678 INTELSSDSC2BB480G6 G2010140
Drive Temperature: 35C (95.00 F)
Enclosure Device ID: 64
Slot Number: 0
Device Id: 8
Firmware state: Online, Spun Up
Media Error Count: 0
`
func TestMegacliAndScore(t *testing.T) {
drives := parseMegacliPDList(megacliText)
if len(drives) != 2 {
t.Fatalf("parsed %d drives, want 2", len(drives))
}
idx := map[string]ctrlDrive{}
for _, cd := range drives {
idx[cd.DeviceID] = cd
}
cd, ok := idx["11"]
if !ok {
t.Fatal("device 11 not found")
}
if cd.MediaErr != 369 || cd.Predictive != 1 || !cd.SmartAlert {
t.Errorf("dev11 media=%d pred=%d alert=%v", cd.MediaErr, cd.Predictive, cd.SmartAlert)
}
if cd.Enclosure != "64" || cd.Slot != "3" {
t.Errorf("dev11 location %s:%s, want 64:3", cd.Enclosure, cd.Slot)
}
// Controller-only drive (no smartctl): predictive + alert -> not NO_DATA.
d := &Drive{HaveSmart: false}
applyController(d, cd)
finalizeDerived(d)
// predictive +70, alert +50, media 369 +30 = 150 -> REPLACE_NOW.
if d.Recommendation != "REPLACE_NOW" {
t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore)
}
if d.enclosureSlot() != "64:3" {
t.Errorf("enclosure_slot = %q", d.enclosureSlot())
}
}
// ---- MegaCLI Inquiry Data identity fallback (real-world layouts) ----
// applyController's inquiry fallback must anchor serial=first / firmware=last and
// treat the (space-containing) remainder as the model. These samples are the real
// MegaCLI "Inquiry Data" forms documented alongside the parser; the naive
// serial/model/firmware positional split mis-handled them.
func TestApplyControllerInquiry(t *testing.T) {
cases := []struct {
name string
inquiry string
wantSerial, wantModel, wantFw string
}{
{
"model with internal space",
"50026B727A005DED KINGSTON SEDC400S37480G SAFM02.H",
"50026B727A005DED", "KINGSTON SEDC400S37480G", "SAFM02.H",
},
{
"two tokens: serial + firmware only",
"ZRT0CQ55ST12000NM000J-2TY103 SN02",
"ZRT0CQ55ST12000NM000J-2TY103", "", "SN02",
},
{
"clean three tokens",
"BTWA12345678 INTELSSDSC2BB480G6 G2010140",
"BTWA12345678", "INTELSSDSC2BB480G6", "G2010140",
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
d := &Drive{}
applyController(d, ctrlDrive{Inquiry: c.inquiry})
if d.Serial != c.wantSerial || d.Model != c.wantModel || d.Firmware != c.wantFw {
t.Errorf("got serial=%q model=%q fw=%q; want serial=%q model=%q fw=%q",
d.Serial, d.Model, d.Firmware, c.wantSerial, c.wantModel, c.wantFw)
}
})
}
}
// ---- Output smoke: CSV header + influx line shape ----
func TestOutputShapes(t *testing.T) {
d := &Drive{Hostname: "kvm60", Model: "X", Serial: "S1", SmartHealth: "PASSED"}
finalizeDerived(d)
csv := recordsToCSV([]*Drive{d})
if len(csv) == 0 || csv[:len("collected_at")] != "collected_at" {
t.Errorf("csv header malformed: %.40q", csv)
}
inf := recordsToInflux([]*Drive{d}, 1700000000000000000)
if len(inf) < len(influxMeasurement) || inf[:len(influxMeasurement)] != influxMeasurement {
t.Errorf("influx line malformed: %.60q", inf)
}
}