package main import ( "fmt" "sort" "strings" "time" ) // collect discovers every drive, queries SMART, attaches controller data, and // scores it. func collect() ([]*Drive, int64) { host := hostname() if app != nil && app.config != nil && app.config.Hostname != "" { host = app.config.Hostname } collectedAt := time.Now().UTC().Format("2006-01-02T15:04:05Z") tsNs := time.Now().Unix() * 1e9 st := newSmartTool() ctrl := controllerIndex() devices := st.scan() // Fallback: no scan-open results but we do have controller drives -> probe // a base device by megaraid index. if len(devices) == 0 && len(ctrl) > 0 { base := findBaseDev() ids := make([]string, 0, len(ctrl)) for id := range ctrl { ids = append(ids, id) } sort.Strings(ids) for _, id := range ids { for _, tmpl := range megaraidDtypes { devices = append(devices, scanned{ path: base, dtype: fmt.Sprintf(tmpl, id), megaraidN: id, }) } } } var drives []*Drive matched := map[string]bool{} // Controller IDs covered by a smartctl device. for _, sc := range devices { if sc.megaraidN != "" { matched[sc.megaraidN] = true } d := &Drive{ CollectedAt: collectedAt, Hostname: host, DeviceID: sc.megaraidN, } ok := st.querySmart(sc.path, sc.dtype, d) // Skip iSCSI LUNs and RAID virtual disks; they are not physical drives. if isPseudoDevice(d) { continue } // Attach controller-side data by megaraid index == DeviceID. if sc.megaraidN != "" { if cd, found := ctrl[sc.megaraidN]; found { applyController(d, cd) } } // Determine whether real SMART attribute data was obtained. d.HaveSmart = ok && d.Model != "" && (d.PowerOnHours != nil || d.WearPctRemaining != nil || d.SmartHealth == "PASSED" || d.SmartHealth == "FAILED" || d.SmartHealth == "PASSED_BY_ATTR") finalizeDerived(d) drives = append(drives, d) } // Emit controller-only drives: physical drives the controller reports but // smartctl cannot reach (e.g. NVMe behind a PERC). Health comes entirely // from the controller (Status, predictive-failure, media/other counters). ids := make([]string, 0, len(ctrl)) for id := range ctrl { ids = append(ids, id) } sort.Strings(ids) for _, id := range ids { if matched[id] { continue } d := &Drive{CollectedAt: collectedAt, Hostname: host, DeviceID: id} applyController(d, ctrl[id]) if isPseudoDevice(d) { continue } d.HaveSmart = false finalizeDerived(d) drives = append(drives, d) } return drives, tsNs } // applyController fills controller-side fields and uses MegaCLI/storcli inquiry // as an identity fallback when smartctl passthrough failed. func applyController(d *Drive, cd ctrlDrive) { d.Enclosure = cd.Enclosure d.Slot = cd.Slot d.MediaErrCtrl = cd.MediaErr d.OtherErrCtrl = cd.OtherErr d.PredictiveFailureCtrl = cd.Predictive d.SmartAlertCtrl = cd.SmartAlert d.FwState = cd.FwState // Identity fallback for when smartctl could not read the drive. Prefer the // structured fields (perccli2); else split the legacy single-line Inquiry. if cd.Model != "" || cd.Serial != "" || cd.Firmware != "" { if d.Serial == "" { d.Serial = cd.Serial } if d.Model == "" { d.Model = cd.Model } if d.Firmware == "" { d.Firmware = cd.Firmware } } else if cd.Inquiry != "" { // Legacy MegaCLI "Inquiry Data" packs " " on // one line, where the model itself can contain spaces and the token count // varies. Serial is always first and the firmware revision always last, so // anchor on those and treat everything between as the model. parts := strings.Fields(cd.Inquiry) if d.Serial == "" && len(parts) >= 1 { d.Serial = parts[0] } if d.Firmware == "" && len(parts) >= 2 { d.Firmware = parts[len(parts)-1] } if d.Model == "" && len(parts) >= 3 { d.Model = strings.Join(parts[1:len(parts)-1], " ") } } if d.Rotation == "" { d.Rotation = cd.Rotation } if d.TempC == nil { d.TempC = cd.TempC } } // finalizeDerived computes defect_total, power_on_years, and the risk score. func finalizeDerived(d *Drive) { // Aggregate drive-attributable defect counters. nil only when NONE was // readable, so NO_DATA rows stay blank instead of showing a misleading 0. defectParts := []*int{ d.Reallocated, d.Pending, d.Uncorrectable, d.ReportedUncorrect, d.RuntimeBadBlocks, d.EndToEnd, } anyKnown := false sum := 0 for _, p := range defectParts { if p != nil { anyKnown = true sum += *p } } if anyKnown { d.DefectTotal = pInt(sum) } if d.PowerOnHours != nil && *d.PowerOnHours > 0 { y := float64(*d.PowerOnHours) / 8760.0 d.PowerOnYears = pF(float64(int(y*100+0.5)) / 100) // Round to two decimals. } d.RiskScore, d.Recommendation, d.RiskReasons = scoreDrive(d) } // findBaseDev returns a real base block device to anchor the megaraid // passthrough fallback probe, skipping loop and md devices and defaulting to // /dev/sda when lsblk yields nothing usable. func findBaseDev() string { out := run("lsblk", "-dno", "NAME") for _, ln := range strings.Split(out, "\n") { name := strings.TrimSpace(ln) if name != "" && !strings.Contains(name, "loop") && !strings.HasPrefix(name, "md") { return "/dev/" + name } } return "/dev/sda" }