first commit

This commit is contained in:
James Coleman 2026-06-22 17:14:03 -05:00
commit 8ca6e0494f
45 changed files with 7962 additions and 0 deletions

29
.github/workflows/release.yaml vendored Normal file
View file

@ -0,0 +1,29 @@
on:
release:
types: [created]
permissions:
contents: write
packages: write
jobs:
goreleaser:
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
-
name: Set up Go
uses: actions/setup-go@v4
-
name: Run GoReleaser
uses: goreleaser/goreleaser-action@v6
with:
distribution: goreleaser
version: latest
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

21
.github/workflows/test_golang.yaml vendored Normal file
View file

@ -0,0 +1,21 @@
name: Go package
on: [push]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: '1.21'
- name: Build
run: go build -v ./...
- name: Test
run: go test -v ./...

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
# Build output
/dist/
/drive_health

50
.goreleaser.yaml Normal file
View file

@ -0,0 +1,50 @@
# GoReleaser config for drive-health-metrics.
# https://goreleaser.com
#
# CGO is disabled so the binary is fully static (no glibc dependency) and runs
# unmodified across modern Linux distributions.
version: 2
project_name: drive-health-metrics
before:
hooks:
- go mod tidy
- go test ./...
builds:
- id: drive-health-metrics
main: .
binary: drive-health-metrics
env:
- CGO_ENABLED=0
flags:
- -trimpath
ldflags:
- -s -w -X main.version={{ .Version }} -X main.commit={{ .ShortCommit }} -X main.date={{ .Date }}
goos:
- linux
goarch:
- amd64
archives:
- id: default
format: tar.gz
name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
files:
- README.md
checksum:
name_template: "checksums.txt"
snapshot:
version_template: "{{ incpatch .Version }}-snapshot"
changelog:
use: git
sort: asc
filters:
exclude:
- "^docs:"
- "^test:"
- "^chore:"

19
LICENSE.txt Normal file
View file

@ -0,0 +1,19 @@
Copyright (c) 2026 Mr. Gecko's Media (James Coleman). http://mrgeckosmedia.com/
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

36
Makefile Normal file
View file

@ -0,0 +1,36 @@
BINARY := drive-health-metrics
# VERSION is the single source of truth for the version string. COMMIT and DATE
# are derived from git and the build clock.
VERSION ?= $(shell cat VERSION 2>/dev/null || echo dev)
COMMIT := $(shell git rev-parse --short HEAD 2>/dev/null)
DATE := $(shell date -u '+%Y-%m-%dT%H:%M:%SZ')
LDFLAGS := -s -w -X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.date=$(DATE)
.PHONY: all build test vet fmt snapshot release clean tools
all: test build
## build: native static binary into dist/
build:
CGO_ENABLED=0 go build -trimpath -ldflags '$(LDFLAGS)' -o dist/$(BINARY) .
## test: run the unit tests (smartctl/text/NVMe/MegaCLI parsers + scoring)
test:
go test ./...
vet:
go vet ./...
fmt:
gofmt -w *.go
## snapshot: local GoReleaser build without publishing (artifacts in dist/)
snapshot:
goreleaser release --snapshot --clean
## release: full GoReleaser release (CI runs this on a tag)
release:
goreleaser release --clean
clean:
rm -rf dist

98
README.md Normal file
View file

@ -0,0 +1,98 @@
# drive-health-metrics
Collects per-drive SMART health from **every physical drive** on a host
— direct SATA/SAS, NVMe, and drives hidden behind a RAID controller
(MegaCLI / storcli / perccli) — scores each drive, and exports the result as
**CSV**, **InfluxDB** (line protocol / API push / Kafka), and **Prometheus**.
## Modes
The tool runs one-shot by default and as a long-lived service with `--server`.
### One-shot (default)
Writes CSV or InfluxDB line protocol to stdout once and exits. Run as root
(SMART access requires it):
```
drive-health-metrics # CSV to stdout
drive-health-metrics --format influx # InfluxDB line protocol to stdout (Telegraf exec input)
drive-health-metrics --version
```
### Service (`--server`)
Runs continuously, exposing a Prometheus `/metrics` endpoint and (when
configured) pushing to InfluxDB and/or Kafka on a schedule. Each scrape and
each push re-collects fresh SMART data.
```
drive-health-metrics --server # Prometheus on :9101/metrics
drive-health-metrics --server --http-port 9200 # override the port
drive-health-metrics --server -c /etc/drive-health-metrics.yaml
```
Send `SIGHUP` to reload the configuration without a full restart.
The InfluxDB measurement and Prometheus metric prefix are both `drive_health`
(e.g. `drive_health_risk_score`, `drive_health_temp_c`). Identity columns
(serial, model, enclosure_slot, …) are attached as tags/labels.
## Configuration
Service mode reads an optional YAML config, searched in this order: the path
given to `-c`/`--config`, then `./config.yaml`,
`~/.config/drive-health-metrics/config.yaml`, and
`/etc/drive-health-metrics.yaml`. Without a file, sensible defaults apply
(Prometheus enabled on `:9101/metrics`, no Influx push).
```yaml
# config.yaml
hostname: "" # host tag/label; defaults to the system hostname
http_output:
enabled: true # Prometheus /metrics endpoint
bind_addr: "" # default: all interfaces
port: 9101
metrics_path: /metrics
influx_output:
frequency: 60s # push interval; 0 (default) disables the push
# InfluxDB v2 API (all four required to enable)
influx_server: https://influx.example.com:8086
token: my-token
org: my-org
bucket: drive-health
# Kafka (brokers + topic required to enable)
kafka_brokers: ["kafka1:9092", "kafka2:9092"]
kafka_topic: telegraf
kafka_username: ""
kafka_password: ""
kafka_insecure_skip_verify: false
kafka_output_format: lineprotocol # lineprotocol (default) or json
```
## Recommendation scoring
Each drive gets a `risk_score` and a `recommendation`:
| Recommendation | Meaning |
|----------------|---------|
| `REPLACE_NOW` | hard defect — drive failing/failed (score ≥ 100) |
| `REPLACE_SOON` | serious wear or accumulating defects (≥ 50) |
| `MONITOR` | early warning signs (≥ 20) |
| `OK` | no meaningful defects (< 20) |
| `NO_DATA` | SMART unreadable **and** no controller red flags — re-collect, don't replace |
Only real, drive-attributable defects add meaningful score; missing/unreadable
data is never treated as a failure.
## Building
```
make build # native static binary -> dist/drive-health-metrics
make test # unit tests (parsers + scoring + exporters)
make snapshot # local GoReleaser build, no publish
```

1
VERSION Normal file
View file

@ -0,0 +1 @@
0.1.0

189
collect.go Normal file
View file

@ -0,0 +1,189 @@
package main
import (
"fmt"
"sort"
"strings"
"time"
)
// collect discovers every drive, queries SMART, attaches controller data, and
// scores it.
func collect() ([]*Drive, int64) {
host := hostname()
if app != nil && app.config != nil && app.config.Hostname != "" {
host = app.config.Hostname
}
collectedAt := time.Now().UTC().Format("2006-01-02T15:04:05Z")
tsNs := time.Now().Unix() * 1e9
st := newSmartTool()
ctrl := controllerIndex()
devices := st.scan()
// Fallback: no scan-open results but we do have controller drives -> probe
// a base device by megaraid index.
if len(devices) == 0 && len(ctrl) > 0 {
base := findBaseDev()
ids := make([]string, 0, len(ctrl))
for id := range ctrl {
ids = append(ids, id)
}
sort.Strings(ids)
for _, id := range ids {
for _, tmpl := range megaraidDtypes {
devices = append(devices, scanned{
path: base, dtype: fmt.Sprintf(tmpl, id), megaraidN: id,
})
}
}
}
var drives []*Drive
matched := map[string]bool{} // Controller IDs covered by a smartctl device.
for _, sc := range devices {
if sc.megaraidN != "" {
matched[sc.megaraidN] = true
}
d := &Drive{
CollectedAt: collectedAt,
Hostname: host,
DeviceID: sc.megaraidN,
}
ok := st.querySmart(sc.path, sc.dtype, d)
// Skip iSCSI LUNs and RAID virtual disks; they are not physical drives.
if isPseudoDevice(d) {
continue
}
// Attach controller-side data by megaraid index == DeviceID.
if sc.megaraidN != "" {
if cd, found := ctrl[sc.megaraidN]; found {
applyController(d, cd)
}
}
// Determine whether real SMART attribute data was obtained.
d.HaveSmart = ok && d.Model != "" && (d.PowerOnHours != nil ||
d.WearPctRemaining != nil ||
d.SmartHealth == "PASSED" || d.SmartHealth == "FAILED" ||
d.SmartHealth == "PASSED_BY_ATTR")
finalizeDerived(d)
drives = append(drives, d)
}
// Emit controller-only drives: physical drives the controller reports but
// smartctl cannot reach (e.g. NVMe behind a PERC). Health comes entirely
// from the controller (Status, predictive-failure, media/other counters).
ids := make([]string, 0, len(ctrl))
for id := range ctrl {
ids = append(ids, id)
}
sort.Strings(ids)
for _, id := range ids {
if matched[id] {
continue
}
d := &Drive{CollectedAt: collectedAt, Hostname: host, DeviceID: id}
applyController(d, ctrl[id])
if isPseudoDevice(d) {
continue
}
d.HaveSmart = false
finalizeDerived(d)
drives = append(drives, d)
}
return drives, tsNs
}
// applyController fills controller-side fields and uses MegaCLI/storcli inquiry
// as an identity fallback when smartctl passthrough failed.
func applyController(d *Drive, cd ctrlDrive) {
d.Enclosure = cd.Enclosure
d.Slot = cd.Slot
d.MediaErrCtrl = cd.MediaErr
d.OtherErrCtrl = cd.OtherErr
d.PredictiveFailureCtrl = cd.Predictive
d.SmartAlertCtrl = cd.SmartAlert
d.FwState = cd.FwState
// Identity fallback for when smartctl could not read the drive. Prefer the
// structured fields (perccli2); else split the legacy single-line Inquiry.
if cd.Model != "" || cd.Serial != "" || cd.Firmware != "" {
if d.Serial == "" {
d.Serial = cd.Serial
}
if d.Model == "" {
d.Model = cd.Model
}
if d.Firmware == "" {
d.Firmware = cd.Firmware
}
} else if cd.Inquiry != "" {
// Legacy MegaCLI "Inquiry Data" packs "<serial> <model...> <firmware>" on
// one line, where the model itself can contain spaces and the token count
// varies. Serial is always first and the firmware revision always last, so
// anchor on those and treat everything between as the model.
parts := strings.Fields(cd.Inquiry)
if d.Serial == "" && len(parts) >= 1 {
d.Serial = parts[0]
}
if d.Firmware == "" && len(parts) >= 2 {
d.Firmware = parts[len(parts)-1]
}
if d.Model == "" && len(parts) >= 3 {
d.Model = strings.Join(parts[1:len(parts)-1], " ")
}
}
if d.Rotation == "" {
d.Rotation = cd.Rotation
}
if d.TempC == nil {
d.TempC = cd.TempC
}
}
// finalizeDerived computes defect_total, power_on_years, and the risk score.
func finalizeDerived(d *Drive) {
// Aggregate drive-attributable defect counters. nil only when NONE was
// readable, so NO_DATA rows stay blank instead of showing a misleading 0.
defectParts := []*int{
d.Reallocated, d.Pending, d.Uncorrectable,
d.ReportedUncorrect, d.RuntimeBadBlocks, d.EndToEnd,
}
anyKnown := false
sum := 0
for _, p := range defectParts {
if p != nil {
anyKnown = true
sum += *p
}
}
if anyKnown {
d.DefectTotal = pInt(sum)
}
if d.PowerOnHours != nil && *d.PowerOnHours > 0 {
y := float64(*d.PowerOnHours) / 8760.0
d.PowerOnYears = pF(float64(int(y*100+0.5)) / 100) // Round to two decimals.
}
d.RiskScore, d.Recommendation, d.RiskReasons = scoreDrive(d)
}
// findBaseDev returns a real base block device to anchor the megaraid
// passthrough fallback probe, skipping loop and md devices and defaulting to
// /dev/sda when lsblk yields nothing usable.
func findBaseDev() string {
out := run("lsblk", "-dno", "NAME")
for _, ln := range strings.Split(out, "\n") {
name := strings.TrimSpace(ln)
if name != "" && !strings.Contains(name, "loop") && !strings.HasPrefix(name, "md") {
return "/dev/" + name
}
}
return "/dev/sda"
}

131
config.go Normal file
View file

@ -0,0 +1,131 @@
package main
import (
"log"
"os"
"os/user"
"path"
"path/filepath"
"time"
"github.com/kkyr/fig"
)
// Config is the service-mode configuration, loaded from YAML (via fig) and
// overridable by flags. It only governs the output exporters; drive discovery
// and SMART collection auto-detect their tools and need no configuration.
type Config struct {
// Hostname is used as the host tag/label on every metric. When empty it is
// resolved from the system hostname.
Hostname string `fig:"hostname"`
// Metric outputs.
HTTP HTTPOutputConfig `fig:"http_output"`
Influx InfluxOutputConfig `fig:"influx_output"`
}
// HTTPOutputConfig configures the Prometheus HTTP exporter.
type HTTPOutputConfig struct {
Enabled bool `fig:"enabled"`
BindAddr string `fig:"bind_addr"`
Port uint `fig:"port"`
MetricsPath string `fig:"metrics_path"`
}
// InfluxOutputConfig configures the scheduled InfluxDB output. Metrics are
// pushed every Frequency to InfluxDB's v2 API and/or to Kafka. A zero Frequency
// (or no destination configured) disables the output.
type InfluxOutputConfig struct {
Frequency time.Duration `fig:"frequency"`
KafkaBrokers []string `fig:"kafka_brokers"`
KafkaTopic string `fig:"kafka_topic"`
KafkaUsername string `fig:"kafka_username"`
KafkaPassword string `fig:"kafka_password"`
KafkaInsecureSkipVerify bool `fig:"kafka_insecure_skip_verify"`
KafkaOutputFormat string `fig:"kafka_output_format"` // lineprotocol (default) or json.
InfluxServer string `fig:"influx_server"`
Token string `fig:"token"`
Org string `fig:"org"`
Bucket string `fig:"bucket"`
}
// defaultConfig returns the configuration with all defaults applied, used as the
// base before a file (if any) is loaded over it.
func defaultConfig() *Config {
return &Config{
HTTP: HTTPOutputConfig{
Enabled: true,
Port: 9101,
MetricsPath: "/metrics",
},
Influx: InfluxOutputConfig{
KafkaOutputFormat: "lineprotocol",
},
}
}
// findConfigFile returns the first configuration file that exists, preferring
// the -config flag (configPath), then a local file, the user config dir, and
// finally /etc. It returns "" when none is found — configuration is optional.
func findConfigFile(configPath string) string {
if configPath != "" {
if _, err := os.Stat(configPath); err == nil {
return configPath
}
log.Printf("Configured config path %q not found, falling back to defaults", configPath)
}
candidates := []string{}
if local, err := filepath.Abs("./config.yaml"); err == nil {
candidates = append(candidates, local)
}
if usr, err := user.Current(); err == nil {
candidates = append(candidates, usr.HomeDir+"/.config/drive-health-metrics/config.yaml")
}
candidates = append(candidates, "/etc/drive-health-metrics.yaml")
for _, c := range candidates {
if _, err := os.Stat(c); err == nil {
return c
}
}
return ""
}
// ReadConfig loads the configuration into app.config. It always succeeds with a
// usable config: a file is loaded over the defaults when present, flag overrides
// are applied, and the host tag is resolved when unset.
func (a *App) ReadConfig() {
config := defaultConfig()
// Load a configuration file over the defaults when one is available.
if configFile := findConfigFile(a.flags.ConfigPath); configFile != "" {
dir, name := path.Split(configFile)
if dir == "" {
dir = "."
}
if err := fig.Load(config, fig.File(name), fig.Dirs(dir)); err != nil {
log.Printf("Error parsing configuration %q: %s", configFile, err)
}
}
// Resolve the host tag from the system when not configured.
if config.Hostname == "" {
config.Hostname = hostname()
}
// Flag overrides for the HTTP output.
if a.flags.HTTPBind != "" {
config.HTTP.BindAddr = a.flags.HTTPBind
}
if a.flags.HTTPPort != 0 {
config.HTTP.Port = a.flags.HTTPPort
}
if a.flags.HTTPMetricsPath != "" {
config.HTTP.MetricsPath = a.flags.HTTPMetricsPath
}
a.config = config
}

349
controller.go Normal file
View file

@ -0,0 +1,349 @@
package main
import (
"regexp"
"strconv"
"strings"
)
// ctrlDrive holds the RAID-controller-side view of one physical drive — data
// smartctl cannot see (predictive-failure, firmware state, controller media/
// other error counters, physical enclosure:slot). Keyed for matching to a
// smartctl megaraid passthrough by DeviceID (== the megaraid,N index).
type ctrlDrive struct {
DeviceID string
Enclosure string
Slot string
MediaErr int
OtherErr int
Predictive int
SmartAlert bool
FwState string
TempC *int
Inquiry string // Inquiry is the legacy single-line MegaCLI/storcli inquiry (serial model fw).
Model string // Model is the structured identity (perccli2); used for controller-only drives.
Serial string
Firmware string
Rotation string // Rotation is "SSD"/"NVMe" derived from controller media/interface, when known.
}
// controllerIndex enumerates all RAID controllers found, preferring modern
// tools (storcli/perccli) then MegaCLI, and returns a DeviceID->ctrlDrive map.
// If no controller CLI is present (plain HBA / onboard SATA / NVMe) it returns
// an empty map — that's fine, smartctl still covers those drives directly.
func controllerIndex() map[string]ctrlDrive {
idx := map[string]ctrlDrive{}
// perccli2 (8.x) is JSON-native. Its plain-text "show all" adds a second
// status column that breaks positional parsing, so query JSON ('J') and use
// the dedicated parser. Tried first since it covers the newest controllers.
for _, bin := range []string{"perccli2", "/opt/MegaRAID/perccli2/perccli2"} {
p := lookPath(bin)
if p == "" {
continue
}
drives := parsePerccli2(run(p, "/call/eall/sall", "show", "all", "J"))
for _, cd := range drives {
mergeCtrl(idx, cd)
}
if len(drives) > 0 {
break
}
}
// storcli / perccli (classic) share the same text "show all" layout (perccli
// is Dell's rebrand). Try each installed binary until one returns drives, so
// a host with several tools present still resolves.
for _, bin := range []string{"storcli64", "storcli", "perccli64", "perccli",
"/opt/MegaRAID/storcli/storcli64", "/opt/MegaRAID/perccli/perccli64"} {
p := lookPath(bin)
if p == "" {
continue
}
drives := parseStorcli(run(p, "/call/eall/sall", "show", "all"))
for _, cd := range drives {
mergeCtrl(idx, cd)
}
if len(drives) > 0 {
break
}
}
// MegaCLI (older controllers). Same try-until-data approach.
for _, bin := range []string{"MegaCli64", "MegaCli", "megacli",
"/opt/MegaRAID/MegaCli/MegaCli64", "/usr/sbin/megacli"} {
p := lookPath(bin)
if p == "" {
continue
}
drives := parseMegacliPDList(run(p, "-PDList", "-aAll"))
for _, cd := range drives {
mergeCtrl(idx, cd)
}
if len(drives) > 0 {
break
}
}
return idx
}
// mergeCtrl records cd under its DeviceID, keeping the first writer so the
// preferred tool (queried earlier) wins and a later tool can't clobber it.
// Entries without a DeviceID are dropped — they can't be matched to a drive.
func mergeCtrl(idx map[string]ctrlDrive, cd ctrlDrive) {
if cd.DeviceID == "" {
return
}
if _, exists := idx[cd.DeviceID]; !exists {
idx[cd.DeviceID] = cd
}
}
// afterColon returns the trimmed text following the first colon, or "". It reads
// the "Key : Value" lines MegaCLI/storcli emit.
func afterColon(s string) string {
if i := strings.Index(s, ":"); i >= 0 {
return strings.TrimSpace(s[i+1:])
}
return ""
}
// parseMegacliPDList parses `MegaCli -PDList -aAll`. Record boundary is the
// "Enclosure Device ID" line.
func parseMegacliPDList(text string) []ctrlDrive {
var drives []ctrlDrive
var cur ctrlDrive
have := false
flush := func() {
if have && (cur.DeviceID != "" || cur.Slot != "") {
drives = append(drives, cur)
}
}
for _, raw := range strings.Split(text, "\n") {
s := strings.TrimSpace(raw)
switch {
case strings.HasPrefix(s, "Enclosure Device ID"):
flush()
cur = ctrlDrive{Enclosure: afterColon(s)}
have = true
case strings.HasPrefix(s, "Slot Number"):
cur.Slot = afterColon(s)
case strings.HasPrefix(s, "Device Id"):
cur.DeviceID = afterColon(s)
case strings.HasPrefix(s, "Media Error Count"):
cur.MediaErr = atoiSafe(afterColon(s))
case strings.HasPrefix(s, "Other Error Count"):
cur.OtherErr = atoiSafe(afterColon(s))
case strings.HasPrefix(s, "Predictive Failure Count"):
cur.Predictive = atoiSafe(afterColon(s))
// MegaCLI phrases this as "Drive has flagged a S.M.A.R.T alert : No";
// the older "S.M.A.R.T alert flagged by drive" form is kept for safety.
case strings.HasPrefix(s, "Drive has flagged a S.M.A.R.T alert"),
strings.HasPrefix(s, "S.M.A.R.T alert flagged by drive"):
cur.SmartAlert = strings.Contains(s, "Yes")
case strings.HasPrefix(s, "Firmware state"):
cur.FwState = afterColon(s)
case strings.HasPrefix(s, "Drive Temperature"):
if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(s); m != nil {
cur.TempC = pInt(atoiSafe(m[1]))
}
case strings.HasPrefix(s, "Inquiry Data"):
cur.Inquiry = afterColon(s)
}
}
flush()
return drives
}
// parseStorcli parses `storcli /call/eall/sall show all`. A physical drive is
// introduced by a bare summary header ("Drive /c0/e64/s0 :") followed by a
// table row ("64:0 22 Onln ...") that carries the DID (== the megaraid index
// smartctl uses) and the controller state. The same drive then repeats sub-
// section headers ("Drive .../s0 - Detailed Information :", "... State :",
// "... Device attributes :") that must NOT open a new record — only the bare
// summary header does — so detail fields ("Key = Value") accumulate into one
// record across those sections.
func parseStorcli(text string) []ctrlDrive {
var drives []ctrlDrive
var cur ctrlDrive
have := false
// hdr matches only the bare summary header (path then ": " at end), not the
// "- Detailed Information"/"State"/"Device attributes" sub-section headers.
hdr := regexp.MustCompile(`^Drive /c\d+/e(\d+)/s(\d+)\s*:$`)
// row matches the summary table data row "EID:Slt DID State ..."; this
// storcli version reports the DID here, never as a "DID = N" line.
row := regexp.MustCompile(`^(\d+):(\d+)\s+(\d+)\s+(\S+)`)
flush := func() {
if have && (cur.DeviceID != "" || cur.Slot != "") {
drives = append(drives, cur)
}
}
kv := func(s string) (string, string, bool) {
if i := strings.Index(s, "="); i >= 0 {
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]), true
}
return "", "", false
}
for _, raw := range strings.Split(text, "\n") {
s := strings.TrimSpace(raw)
// New drive record: only the bare summary header opens one.
if m := hdr.FindStringSubmatch(s); m != nil {
flush()
cur = ctrlDrive{Enclosure: m[1], Slot: m[2]}
have = true
continue
}
if !have {
continue
}
// Summary table row supplies the DID and controller state.
if m := row.FindStringSubmatch(s); m != nil && cur.DeviceID == "" {
cur.DeviceID = m[3]
cur.FwState = m[4]
continue
}
k, v, ok := kv(s)
if !ok {
continue
}
switch k {
case "DID":
cur.DeviceID = v
case "Media Error Count":
cur.MediaErr = atoiSafe(v)
case "Other Error Count":
cur.OtherErr = atoiSafe(v)
case "Predictive Failure Count":
cur.Predictive = atoiSafe(v)
case "S.M.A.R.T alert flagged by drive":
cur.SmartAlert = strings.EqualFold(v, "Yes")
case "Firmware state", "State":
if cur.FwState == "" {
cur.FwState = v
}
case "Drive Temperature":
if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(v); m != nil {
cur.TempC = pInt(atoiSafe(m[1]))
}
case "Model Number", "Manufacturer Identification":
if cur.Inquiry == "" {
cur.Inquiry = v
}
}
}
flush()
return drives
}
// parsePerccli2 parses `perccli2 /call/eall/sall show all J` (JSON). perccli2
// (8.x) renames the classic DID to PID and splits the single State column into
// State (RAID role: Conf/UConf/GHS/JBOD) and Status (health: Online/Offline/
// Failed/Missing); the latter is what maps to FwState. Drives nest under
// Controllers[].Response Data.Drives List[]; health counters sit directly in
// "Drive Detailed Information" (SAS/SATA) or under its "LU/NS Properties" for
// NVMe namespaces.
func parsePerccli2(text string) []ctrlDrive {
m := loadJSON(text)
if m == nil {
return nil
}
controllers, ok := m["Controllers"].([]interface{})
if !ok {
return nil
}
var drives []ctrlDrive
for _, c := range controllers {
cm, ok := c.(map[string]interface{})
if !ok {
continue
}
list, ok := jLeaf(cm, "Response Data", "Drives List").([]interface{})
if !ok {
continue
}
for _, it := range list {
dm, ok := it.(map[string]interface{})
if !ok {
continue
}
info := jObj(dm, "Drive Information")
if info == nil {
continue
}
cd := ctrlDrive{}
// Location + identity from the summary block.
if es := jStr(info, "EID:Slt"); es != "" {
if i := strings.Index(es, ":"); i >= 0 {
cd.Enclosure = strings.TrimSpace(es[:i])
cd.Slot = strings.TrimSpace(es[i+1:])
}
}
if pid := jInt(info, "PID"); pid != nil {
cd.DeviceID = strconv.Itoa(*pid)
}
cd.FwState = jStr(info, "Status") // Health verdict, not the RAID role.
cd.Model = jStr(info, "Model")
cd.Rotation = perccli2Rotation(jStr(info, "Intf"), jStr(info, "Med"))
// Detail block: identity fallbacks, temperature, error counters.
if detail := jObj(dm, "Drive Detailed Information"); detail != nil {
cd.Serial = jStr(detail, "Serial Number")
cd.Firmware = jStr(detail, "Firmware Revision Level")
if cd.Model == "" {
cd.Model = jStr(detail, "Model")
}
if t := jInt(detail, "Temperature(C)"); t != nil {
cd.TempC = t
}
// Counters live in detail (SAS/SATA) or LU/NS Properties (NVMe).
props := jObj(detail, "LU/NS Properties")
ci := func(key string) int {
if v := jInt(detail, key); v != nil {
return *v
}
if props != nil {
if v := jInt(props, key); v != nil {
return *v
}
}
return 0
}
cd.MediaErr = ci("Media Error Count")
cd.OtherErr = ci("Other Error Count")
cd.Predictive = ci("Predictive Failure Count")
}
if cd.DeviceID != "" || cd.Slot != "" {
drives = append(drives, cd)
}
}
}
return drives
}
// perccli2Rotation maps a perccli2 interface/media pair to a rotation label.
func perccli2Rotation(intf, med string) string {
switch {
case strings.EqualFold(intf, "NVMe"):
return "NVMe"
case strings.EqualFold(med, "SSD"):
return "SSD"
default:
return ""
}
}
// atoiSafe extracts the first integer found in s (leading sign allowed) and
// returns 0 when none is present, since controller output often wraps the number
// in units or surrounding labels.
func atoiSafe(s string) int {
s = strings.TrimSpace(s)
if m := regexp.MustCompile(`-?\d+`).FindString(s); m != "" {
if n, err := strconv.Atoi(m); err == nil {
return n
}
}
return 0
}

180
discover.go Normal file
View file

@ -0,0 +1,180 @@
package main
import (
"regexp"
"strconv"
"strings"
)
// smartTool holds the resolved smartctl binary and whether it can emit JSON
// (smartmontools >= 7.0). On CentOS 6/7 jsonCapable is false and we parse text.
type smartTool struct {
bin string
jsonCapable bool
}
// newSmartTool resolves the smartctl binary and detects whether it can emit JSON
// (smartmontools >= 7.0); older builds fall back to text parsing.
func newSmartTool() smartTool {
bin := lookPath("smartctl", "/usr/sbin/smartctl", "/sbin/smartctl", "/usr/local/sbin/smartctl")
if bin == "" {
bin = "smartctl"
}
st := smartTool{bin: bin}
ver := run(bin, "--version")
// "smartctl 7.2 2020-12-30 r5155 ..."
if m := regexp.MustCompile(`smartctl\s+(\d+)\.(\d+)`).FindStringSubmatch(ver); m != nil {
major, _ := strconv.Atoi(m[1])
if major >= 7 {
st.jsonCapable = true
}
}
return st
}
// scanned describes one device from `smartctl --scan-open`.
type scanned struct {
path string
dtype string
megaraidN string // megaraidN is empty when the device is not a megaraid passthrough.
comment string
}
var scanLine = regexp.MustCompile(`^(\S+)\s+-d\s+(\S+)\s*#?(.*)$`)
var megaraidIdx = regexp.MustCompile(`megaraid,(\d+)`)
// scan enumerates physical drives from `smartctl --scan-open`, keeping direct
// SATA/SAS/NVMe devices and megaraid passthroughs while skipping iSCSI virtual
// disks.
func (s smartTool) scan() []scanned {
out := run(s.bin, "--scan-open")
var res []scanned
for _, ln := range strings.Split(out, "\n") {
ln = strings.TrimSpace(ln)
if ln == "" || strings.HasPrefix(ln, "#") {
continue
}
m := scanLine.FindStringSubmatch(ln)
if m == nil {
continue
}
path, dtype, comment := m[1], m[2], m[3]
if strings.Contains(strings.ToUpper(comment), "VIRTUAL-DISK") {
continue // Skip iSCSI IET virtual disks; they are not physical drives.
}
sc := scanned{path: path, dtype: dtype, comment: comment}
if mn := megaraidIdx.FindStringSubmatch(dtype); mn != nil {
sc.megaraidN = mn[1]
}
res = append(res, sc)
}
return res
}
// querySmart runs smartctl against a device. With a JSON-capable smartctl it
// parses -j; otherwise it parses -a text. On the megaraid path, when no
// explicit -d works it tries the common passthrough type variants.
func (s smartTool) querySmart(path, dtype string, d *Drive) bool {
args := func(extra ...string) []string {
a := []string{}
if s.jsonCapable {
a = append(a, "-j")
}
a = append(a, "-a")
if dtype != "" {
a = append(a, "-d", dtype)
}
a = append(a, extra...)
a = append(a, path)
return a
}
raw := run(s.bin, args()...)
if s.jsonCapable {
j := loadJSON(raw)
// Capture identity + transport even when SMART is unusable, so pseudo-
// device filtering can recognize SMART-less controller VDs (e.g. "DELL
// RAID") and iSCSI LUNs that expose no usable SMART.
if d.Model == "" {
d.Model = first(jStr(j, "model_name"), jStr(j, "scsi_model_name"))
}
if d.Transport == "" {
d.Transport = jStr(j, "scsi_transport_protocol", "name")
}
if jsonUsable(j) {
parseSmartJSON(j, d)
d.DevicePath = path
d.Dtype = dtype
return true
}
return false
}
// Text path: usability check is "did we get a model + some health/attrs".
if looksLikeSmartText(raw) {
parseSmartText(raw, d)
d.DevicePath = path
d.Dtype = dtype
return d.Model != "" || d.SmartHealth != "UNKNOWN"
}
return false
}
// megaraidDtypes lists the megaraid passthrough type variants to try when the
// scan didn't pin one.
var megaraidDtypes = []string{"sat+megaraid,%s", "megaraid,%s", "scsi+megaraid,%s"}
// pseudoDeviceModels lists lowercase model substrings that identify devices
// which are not physical drives: iSCSI targets and RAID controller virtual
// disks. `smartctl --scan-open` presents these as plain "-d scsi" with no
// VIRTUAL-DISK hint in the scan comment, so they are filtered after identity is
// read. Extend this list as new controller families appear in the fleet.
var pseudoDeviceModels = []string{
"virtual-disk", "virtual disk", // iSCSI IET LUNs (e.g. "IET VIRTUAL-DISK").
"lio-org", // Linux-IO iSCSI target LUNs (text-path fallback).
// RAID controller virtual disks report the HBA vendor/model as their
// identity (e.g. "AVAGO MR9363-4i", "BROADCOM MR9560-16i", "DELL PERC
// H730", "DELL RAID"). These tokens appear on controllers, never on bare
// drives.
"avago", "broadcom", "lsi", "megaraid", "perc", "adaptec", "microsemi",
"dell raid",
}
// isPseudoDevice reports whether a queried device is an iSCSI target or a RAID
// controller virtual disk rather than a physical drive. An iSCSI SCSI transport
// is the authoritative signal (covers LIO, IET, any target software); the model
// token list catches RAID virtual disks and the legacy text path that has no
// transport field.
func isPseudoDevice(d *Drive) bool {
if strings.EqualFold(d.Transport, "iSCSI") {
return true
}
m := strings.ToLower(d.Model)
if m == "" {
return false
}
for _, p := range pseudoDeviceModels {
if strings.Contains(m, p) {
return true
}
}
return false
}
// looksLikeSmartText reports whether raw is real smartctl text output worth
// parsing, keyed off identity and health section markers. It guards the text
// path from acting on error messages or empty output.
func looksLikeSmartText(raw string) bool {
if strings.TrimSpace(raw) == "" {
return false
}
for _, marker := range []string{
"=== START OF INFORMATION SECTION ===",
"Device Model:", "Model Number:", "Product:",
"SMART overall-health", "SMART Health Status",
} {
if strings.Contains(raw, marker) {
return true
}
}
return false
}

BIN
drive-health-metrics Executable file

Binary file not shown.

88
drive.go Normal file
View file

@ -0,0 +1,88 @@
package main
// Drive is the normalized, vendor-agnostic health record for one physical
// drive. Nullable numeric fields use *int / *float64 so that "unknown" (the
// counter could not be read) is distinguishable from a real zero — this
// distinction drives the NO_DATA recommendation and keeps NO_DATA rows blank
// instead of misleadingly showing 0.
type Drive struct {
CollectedAt string
Hostname string
// Where/how smartctl reached the drive.
DevicePath string
Dtype string
Transport string // Transport is the SCSI transport ("iSCSI", "SAS", ...); used to drop iSCSI LUNs.
// Physical location reported by the RAID controller (if any).
DeviceID string // DeviceID is the megaraid,N index used by smartctl.
Enclosure string
Slot string
// Identity.
Serial string
Model string
Firmware string
Capacity string
Rotation string // Rotation is "SSD", "NVMe", or "7200 rpm".
SmartHealth string // PASSED | FAILED | PASSED_BY_ATTR | OK | UNKNOWN
// Drive-attributable defect counters (nil = not readable).
Reallocated *int
ReallocatedEvents *int
Pending *int
Uncorrectable *int
ReportedUncorrect *int
RuntimeBadBlocks *int
EndToEnd *int
UdmaCrc *int
// RAID controller signals (MegaCLI / storcli / perccli).
MediaErrCtrl int
OtherErrCtrl int
PredictiveFailureCtrl int
SmartAlertCtrl bool
FwState string
// Wear (vendor-normalized; remaining = % life left, consumed = 100 - that).
WearPctRemaining *int
WearPctWorst *int
WearPctConsumed *int
WearSrc string
UnusedReservePct *int
HostWrittenTB *float64
// NVMe-specific health (from the NVMe SMART/Health log).
NvmeCriticalWarning *int
NvmeAvailSpare *int
NvmeAvailSpareThresh *int
NvmeMediaErrors *int
// Age.
PowerOnHours *int
PowerOnYears *float64
PowerCycleCount *int
TempC *int
// Derived.
HaveSmart bool
DefectTotal *int
RiskScore int
Recommendation string
RiskReasons string
// Diagnostics.
SmartctlMessages string
}
// ---- Small pointer helpers ----
func pInt(n int) *int { return &n }
func pF(f float64) *float64 { return &f }
func iv(p *int) int {
if p == nil {
return 0
}
return *p
}

279
drive_test.go Normal file
View file

@ -0,0 +1,279 @@
package main
import "testing"
// ---- JSON path (smartmontools >= 7.0): ATA SSD with a reallocated sector ----
const ataJSON = `{
"model_name": "Micron_1300_MTFDDAK512TDL",
"serial_number": "21512A3B4C5D",
"firmware_version": "M5MU000",
"user_capacity": {"bytes": 512110190592},
"rotation_rate": 0,
"smart_status": {"passed": true},
"power_on_time": {"hours": 26280},
"power_cycle_count": 42,
"temperature": {"current": 31},
"ata_smart_attributes": {"table": [
{"id": 5, "name": "Reallocated_Sector_Ct", "value": 100, "worst": 100, "thresh": 10, "when_failed": "", "raw": {"value": 8, "string": "8"}},
{"id": 9, "name": "Power_On_Hours", "value": 95, "worst": 95, "thresh": 0, "when_failed": "", "raw": {"value": 26280, "string": "26280"}},
{"id": 199,"name": "UDMA_CRC_Error_Count", "value": 100, "worst": 100, "thresh": 0, "when_failed": "", "raw": {"value": 3, "string": "3"}},
{"id": 202,"name": "Percent_Lifetime_Remain","value": 88, "worst": 88, "thresh": 1, "when_failed": "", "raw": {"value": 12, "string": "12"}}
]}
}`
func TestParseSmartJSON_ATA(t *testing.T) {
d := &Drive{}
parseSmartJSON(loadJSON(ataJSON), d)
if d.Model != "Micron_1300_MTFDDAK512TDL" {
t.Errorf("model = %q", d.Model)
}
if d.Rotation != "SSD" {
t.Errorf("rotation = %q, want SSD", d.Rotation)
}
if iv(d.Reallocated) != 8 {
t.Errorf("reallocated = %v, want 8", d.Reallocated)
}
if iv(d.UdmaCrc) != 3 {
t.Errorf("udma_crc = %v, want 3", d.UdmaCrc)
}
if iv(d.PowerOnHours) != 26280 {
t.Errorf("poh = %v, want 26280", d.PowerOnHours)
}
if d.WearSrc != "Intel/ID202" || iv(d.WearPctConsumed) != 12 {
t.Errorf("wear src=%q consumed=%v, want Intel/ID202 / 12", d.WearSrc, d.WearPctConsumed)
}
if d.SmartHealth != "PASSED" {
t.Errorf("health = %q", d.SmartHealth)
}
d.HaveSmart = true
finalizeDerived(d)
// reallocated 8 -> min(40+40,100)=80; udma 3 -> 9; total 89 -> REPLACE_SOON.
if d.Recommendation != "REPLACE_SOON" {
t.Errorf("rec = %q (score %d), want REPLACE_SOON", d.Recommendation, d.RiskScore)
}
if iv(d.DefectTotal) != 8 {
t.Errorf("defect_total = %v, want 8", d.DefectTotal)
}
}
// ---- Text path (CentOS 6/7, smartmontools 5.x/6.x: no JSON) ----
const ataText = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux] (local build)
=== START OF INFORMATION SECTION ===
Device Model: INTEL SSDSC2BB480G6
Serial Number: BTWA12345678480BGN
Firmware Version: G2010140
User Capacity: 480,103,981,056 bytes [480 GB]
Rotation Rate: Solid State Device
=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
5 Reallocated_Sector_Ct 0x0032 100 100 000 Old_age Always - 0
9 Power_On_Hours 0x0032 100 100 000 Old_age Always - 51000
197 Current_Pending_Sector 0x0012 100 100 000 Old_age Always - 5
233 Media_Wearout_Indicator 0x0032 072 072 000 Old_age Always - 0
`
func TestParseSmartText_ATA(t *testing.T) {
d := &Drive{}
parseSmartText(ataText, d)
if d.Model != "INTEL SSDSC2BB480G6" {
t.Errorf("model = %q", d.Model)
}
if d.SmartHealth != "PASSED" {
t.Errorf("health = %q", d.SmartHealth)
}
if iv(d.Pending) != 5 {
t.Errorf("pending = %v, want 5", d.Pending)
}
if iv(d.PowerOnHours) != 51000 {
t.Errorf("poh = %v, want 51000", d.PowerOnHours)
}
if d.WearSrc != "Generic/ID233" || iv(d.WearPctConsumed) != 28 {
t.Errorf("wear src=%q consumed=%v, want Generic/ID233 / 28", d.WearSrc, d.WearPctConsumed)
}
d.HaveSmart = true
finalizeDerived(d)
// pending 5 -> min(50+25,100)=75 -> REPLACE_SOON.
if d.Recommendation != "REPLACE_SOON" {
t.Errorf("rec = %q (score %d), want REPLACE_SOON", d.Recommendation, d.RiskScore)
}
}
// ---- NVMe text path ----
const nvmeText = `smartctl 7.2 2020-12-30 r5155 [x86_64-linux]
=== START OF INFORMATION SECTION ===
Model Number: Samsung SSD 980 PRO 1TB
Serial Number: S5GXNX0R123456
Firmware Version: 5B2QGXA7
=== START OF SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
SMART/Health Information (NVMe Log 0x02)
Critical Warning: 0x04
Temperature: 40 Celsius
Available Spare: 8%
Available Spare Threshold: 10%
Percentage Used: 96%
Power On Hours: 30,123
Power Cycles: 210
Media and Data Integrity Errors: 0
`
func TestParseSmartText_NVMe(t *testing.T) {
d := &Drive{}
parseSmartText(nvmeText, d)
if d.Rotation != "NVMe" {
t.Errorf("rotation = %q, want NVMe", d.Rotation)
}
if d.NvmeCriticalWarning == nil || *d.NvmeCriticalWarning != 4 {
t.Errorf("critical_warning = %v, want 4", d.NvmeCriticalWarning)
}
if iv(d.NvmeAvailSpare) != 8 || iv(d.NvmeAvailSpareThresh) != 10 {
t.Errorf("spare=%v thresh=%v, want 8/10", d.NvmeAvailSpare, d.NvmeAvailSpareThresh)
}
if iv(d.WearPctConsumed) != 96 {
t.Errorf("wear consumed = %v, want 96", d.WearPctConsumed)
}
d.HaveSmart = true
finalizeDerived(d)
// crit warning +60, spare<=thresh +40, wear96 +80 -> >=100 REPLACE_NOW.
if d.Recommendation != "REPLACE_NOW" {
t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore)
}
}
// ---- NO_DATA: nothing readable, no controller flags ----
func TestNoData(t *testing.T) {
d := &Drive{HaveSmart: false}
finalizeDerived(d)
if d.Recommendation != "NO_DATA" {
t.Errorf("rec = %q, want NO_DATA", d.Recommendation)
}
if d.DefectTotal != nil {
t.Errorf("defect_total = %v, want nil (blank)", d.DefectTotal)
}
}
// ---- MegaCLI PDList parsing + controller-driven scoring ----
const megacliText = `
Enclosure Device ID: 64
Slot Number: 3
Device Id: 11
WWN: 5000C500A1B2C3D4
Media Error Count: 369
Other Error Count: 2
Predictive Failure Count: 1
Drive has flagged a S.M.A.R.T alert : Yes
Firmware state: Online, Spun Up
Inquiry Data: BTWA12345678 INTELSSDSC2BB480G6 G2010140
Drive Temperature: 35C (95.00 F)
Enclosure Device ID: 64
Slot Number: 0
Device Id: 8
Firmware state: Online, Spun Up
Media Error Count: 0
`
func TestMegacliAndScore(t *testing.T) {
drives := parseMegacliPDList(megacliText)
if len(drives) != 2 {
t.Fatalf("parsed %d drives, want 2", len(drives))
}
idx := map[string]ctrlDrive{}
for _, cd := range drives {
idx[cd.DeviceID] = cd
}
cd, ok := idx["11"]
if !ok {
t.Fatal("device 11 not found")
}
if cd.MediaErr != 369 || cd.Predictive != 1 || !cd.SmartAlert {
t.Errorf("dev11 media=%d pred=%d alert=%v", cd.MediaErr, cd.Predictive, cd.SmartAlert)
}
if cd.Enclosure != "64" || cd.Slot != "3" {
t.Errorf("dev11 location %s:%s, want 64:3", cd.Enclosure, cd.Slot)
}
// Controller-only drive (no smartctl): predictive + alert -> not NO_DATA.
d := &Drive{HaveSmart: false}
applyController(d, cd)
finalizeDerived(d)
// predictive +70, alert +50, media 369 +30 = 150 -> REPLACE_NOW.
if d.Recommendation != "REPLACE_NOW" {
t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore)
}
if d.enclosureSlot() != "64:3" {
t.Errorf("enclosure_slot = %q", d.enclosureSlot())
}
}
// ---- MegaCLI Inquiry Data identity fallback (real-world layouts) ----
// applyController's inquiry fallback must anchor serial=first / firmware=last and
// treat the (space-containing) remainder as the model. These samples are the real
// MegaCLI "Inquiry Data" forms documented alongside the parser; the naive
// serial/model/firmware positional split mis-handled them.
func TestApplyControllerInquiry(t *testing.T) {
cases := []struct {
name string
inquiry string
wantSerial, wantModel, wantFw string
}{
{
"model with internal space",
"50026B727A005DED KINGSTON SEDC400S37480G SAFM02.H",
"50026B727A005DED", "KINGSTON SEDC400S37480G", "SAFM02.H",
},
{
"two tokens: serial + firmware only",
"ZRT0CQ55ST12000NM000J-2TY103 SN02",
"ZRT0CQ55ST12000NM000J-2TY103", "", "SN02",
},
{
"clean three tokens",
"BTWA12345678 INTELSSDSC2BB480G6 G2010140",
"BTWA12345678", "INTELSSDSC2BB480G6", "G2010140",
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
d := &Drive{}
applyController(d, ctrlDrive{Inquiry: c.inquiry})
if d.Serial != c.wantSerial || d.Model != c.wantModel || d.Firmware != c.wantFw {
t.Errorf("got serial=%q model=%q fw=%q; want serial=%q model=%q fw=%q",
d.Serial, d.Model, d.Firmware, c.wantSerial, c.wantModel, c.wantFw)
}
})
}
}
// ---- Output smoke: CSV header + influx line shape ----
func TestOutputShapes(t *testing.T) {
d := &Drive{Hostname: "kvm60", Model: "X", Serial: "S1", SmartHealth: "PASSED"}
finalizeDerived(d)
csv := recordsToCSV([]*Drive{d})
if len(csv) == 0 || csv[:len("collected_at")] != "collected_at" {
t.Errorf("csv header malformed: %.40q", csv)
}
inf := recordsToInflux([]*Drive{d}, 1700000000000000000)
if len(inf) < len(influxMeasurement) || inf[:len(influxMeasurement)] != influxMeasurement {
t.Errorf("influx line malformed: %.60q", inf)
}
}

46
exec.go Normal file
View file

@ -0,0 +1,46 @@
package main
import (
"os"
"os/exec"
"strings"
)
// run executes a command and returns combined stdout+stderr as a string.
// Non-zero exit is NOT an error here: smartctl uses a bitmask exit code (e.g.
// bit 0 = command-line error, bits 3-7 = disk health flags) yet still prints
// valid JSON/text, and MegaCLI is similarly noisy. We want whatever it printed.
func run(name string, args ...string) string {
cmd := exec.Command(name, args...)
out, _ := cmd.CombinedOutput()
return string(out)
}
// lookPath returns the first existing executable from candidates, trying PATH
// first (via exec.LookPath) then absolute fallbacks. Returns "" if none found.
func lookPath(candidates ...string) string {
for _, c := range candidates {
if strings.ContainsRune(c, os.PathSeparator) {
if fi, err := os.Stat(c); err == nil && !fi.IsDir() {
return c
}
continue
}
if p, err := exec.LookPath(c); err == nil {
return p
}
}
return ""
}
// hostname returns the host identity, preferring the FQDN from "hostname -f",
// then os.Hostname, then "unknown" so records always carry a host tag.
func hostname() string {
if h := strings.TrimSpace(run("hostname", "-f")); h != "" {
return h
}
if h, err := os.Hostname(); err == nil {
return h
}
return "unknown"
}

83
exporter.go Normal file
View file

@ -0,0 +1,83 @@
package main
import (
"github.com/prometheus/client_golang/prometheus"
)
// DriveExporter is a Prometheus collector that, on each scrape, discovers the
// host's drives and emits their numeric health columns as gauges. The label set
// (the schema's string columns) is attached to every metric so the Prometheus
// and InfluxDB outputs describe each drive identically.
type DriveExporter struct {
descs map[string]*prometheus.Desc
labels []column // String columns carried as labels.
gauges []column // Numeric columns emitted as gauges.
// collect discovers the drives; a field so tests can inject a fixed set
// without touching real hardware.
collect func() ([]*Drive, int64)
}
// NewDriveExporter builds the collector with one gauge descriptor per numeric
// column, labelled with the schema's string columns.
func NewDriveExporter() *DriveExporter {
labels, gauges := labelColumns(), gaugeColumns()
labelNames := make([]string, len(labels))
for i, c := range labels {
labelNames[i] = c.name
}
descs := make(map[string]*prometheus.Desc, len(gauges))
for _, c := range gauges {
descs[c.name] = prometheus.NewDesc(namespace+"_"+c.name, "drive health metric: "+c.name, labelNames, nil)
}
return &DriveExporter{descs: descs, labels: labels, gauges: gauges, collect: collect}
}
// Reload is a no-op; the exporter holds no configurable state.
func (e *DriveExporter) Reload() {}
// Describe sends every metric descriptor to the channel.
func (e *DriveExporter) Describe(ch chan<- *prometheus.Desc) {
for _, d := range e.descs {
ch <- d
}
}
// Collect discovers the drives and emits a gauge per numeric column, with the
// shared identity label set.
func (e *DriveExporter) Collect(ch chan<- prometheus.Metric) {
drives, _ := e.collect()
for _, d := range drives {
labelValues := make([]string, len(e.labels))
for i, c := range e.labels {
labelValues[i] = format(c.raw(d))
}
for _, c := range e.gauges {
val, ok := gaugeValue(c, d)
if !ok {
continue
}
ch <- prometheus.MustNewConstMetric(e.descs[c.name], prometheus.GaugeValue, val, labelValues...)
}
}
}
// gaugeValue converts a numeric column's value to a float for Prometheus,
// reporting ok=false when the value is unknown.
func gaugeValue(c column, d *Drive) (float64, bool) {
r := c.raw(d)
switch t := r.(type) {
case nil:
return 0, false
case int:
return float64(t), true
case float64:
return t, true
case bool:
if t {
return 1, true
}
return 0, true
default:
return 0, false
}
}

119
exporter_test.go Normal file
View file

@ -0,0 +1,119 @@
package main
import (
"encoding/json"
"strings"
"testing"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)
// sampleDrive returns a populated drive for exercising the output encoders.
func sampleDrive() *Drive {
d := &Drive{
Hostname: "kvm60",
Model: "Samsung SSD",
Serial: "S1",
Firmware: "1B6Q",
SmartHealth: "PASSED",
Enclosure: "64",
Slot: "3",
WearSrc: "nvme",
TempC: pInt(34),
PowerOnHours: pInt(17520),
PowerCycleCount: pInt(12),
WearPctConsumed: pInt(7),
HostWrittenTB: pF(12.5),
SmartAlertCtrl: true,
}
finalizeDerived(d)
return d
}
// The Prometheus collector must emit numeric gauges named with the namespace
// prefix, carrying the shared identity label set with consistent cardinality.
func TestDriveExporterCollect(t *testing.T) {
app = &App{config: defaultConfig()}
app.config.Hostname = "kvm60"
exp := NewDriveExporter()
// Inject a fixed drive set so Collect runs without touching real hardware.
exp.collect = func() ([]*Drive, int64) { return []*Drive{sampleDrive()}, 0 }
reg := prometheus.NewRegistry()
reg.MustRegister(exp)
mfs, err := reg.Gather()
if err != nil {
t.Fatalf("gather: %v", err)
}
byName := map[string]*dto.MetricFamily{}
for _, mf := range mfs {
byName[mf.GetName()] = mf
}
// A representative int, float, and bool field must be present and typed.
checks := map[string]float64{
"drive_health_temp_c": 34,
"drive_health_power_cycle_count": 12,
"drive_health_host_written_tb": 12.5,
"drive_health_smart_alert_ctrl": 1, // bool true -> 1
"drive_health_risk_score": float64(sampleDrive().RiskScore),
}
for name, want := range checks {
mf, ok := byName[name]
if !ok {
t.Errorf("missing metric %s", name)
continue
}
m := mf.GetMetric()[0]
if got := m.GetGauge().GetValue(); got != want {
t.Errorf("%s = %v, want %v", name, got, want)
}
// Identity labels must be attached.
labels := map[string]string{}
for _, l := range m.GetLabel() {
labels[l.GetName()] = l.GetValue()
}
if labels["serial"] != "S1" || labels["hostname"] != "kvm60" || labels["enclosure_slot"] != "64:3" {
t.Errorf("%s labels = %v", name, labels)
}
}
}
// The InfluxDB JSON encoder must produce one typed object per drive with tags,
// fields, and a microsecond timestamp.
func TestRecordsToInfluxJSON(t *testing.T) {
out := recordsToInfluxJSON([]*Drive{sampleDrive()}, 1700000000000000000)
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
if len(lines) != 1 {
t.Fatalf("got %d lines, want 1: %q", len(lines), out)
}
var obj struct {
Name string `json:"name"`
Tags map[string]string `json:"tags"`
Fields map[string]interface{} `json:"fields"`
Timestamp int64 `json:"timestamp"`
}
if err := json.Unmarshal([]byte(lines[0]), &obj); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if obj.Name != influxMeasurement {
t.Errorf("name = %q, want %q", obj.Name, influxMeasurement)
}
if obj.Tags["serial"] != "S1" || obj.Tags["model"] != "Samsung SSD" {
t.Errorf("tags = %v", obj.Tags)
}
if obj.Timestamp != 1700000000000000 {
t.Errorf("timestamp = %d, want microseconds", obj.Timestamp)
}
// int field decodes as a JSON number; bool field as a real bool.
if v, ok := obj.Fields["temp_c"].(float64); !ok || v != 34 {
t.Errorf("temp_c field = %v", obj.Fields["temp_c"])
}
if v, ok := obj.Fields["smart_alert_ctrl"].(bool); !ok || !v {
t.Errorf("smart_alert_ctrl field = %v", obj.Fields["smart_alert_ctrl"])
}
}

63
flags.go Normal file
View file

@ -0,0 +1,63 @@
package main
import (
"flag"
"fmt"
"os"
)
// Flags holds the command-line arguments. One-shot output (CSV / InfluxDB line
// protocol to stdout) remains the default; -server switches to the long-lived
// service that exposes the Prometheus endpoint and pushes to Influx.
type Flags struct {
ConfigPath string
// One-shot output controls (default mode).
Format string
// Service mode.
Server bool
// HTTP output overrides (service mode).
HTTPBind string
HTTPPort uint
HTTPMetricsPath string
}
// ParseFlags parses the command line into app.flags, printing the version and
// exiting when -version is supplied.
func (a *App) ParseFlags() {
a.flags = new(Flags)
flag.Usage = func() {
fmt.Printf("%s: %s.\n\nUsage:\n", serviceName, serviceDescription)
flag.PrintDefaults()
}
// Version.
var printVer bool
flag.BoolVar(&printVer, "version", false, "print version and exit")
flag.BoolVar(&printVer, "v", false, "print version and exit (shorthand)")
// Configuration path override.
usage := "load configuration from `FILE`"
flag.StringVar(&a.flags.ConfigPath, "config", "", usage)
flag.StringVar(&a.flags.ConfigPath, "c", "", usage+" (shorthand)")
// One-shot output controls.
flag.StringVar(&a.flags.Format, "format", "csv", "output format: csv | influx")
// Service mode.
flag.BoolVar(&a.flags.Server, "server", false, "run as a service: Prometheus HTTP endpoint and scheduled InfluxDB output")
// HTTP output overrides (service mode).
flag.StringVar(&a.flags.HTTPBind, "http-bind", "", "bind address for the HTTP server")
flag.UintVar(&a.flags.HTTPPort, "http-port", 0, "bind port for the HTTP server")
flag.StringVar(&a.flags.HTTPMetricsPath, "http-metrics-path", "", "path for the Prometheus metrics endpoint")
flag.Parse()
if printVer {
printVersion()
os.Exit(0)
}
}

34
go.mod Normal file
View file

@ -0,0 +1,34 @@
module github.com/grmrgecko/drive-health-metrics
go 1.20
require (
github.com/gorilla/handlers v1.5.1
github.com/influxdata/influxdb-client-go/v2 v2.12.3
github.com/kkyr/fig v0.3.2
github.com/prometheus/client_golang v1.16.0
github.com/prometheus/client_model v0.3.0
github.com/segmentio/kafka-go v0.4.42
)
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/deepmap/oapi-codegen v1.8.2 // indirect
github.com/felixge/httpsnoop v1.0.1 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mitchellh/mapstructure v1.4.1 // indirect
github.com/pelletier/go-toml v1.9.3 // indirect
github.com/pierrec/lz4/v4 v4.1.15 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/rogpeppe/go-internal v1.11.0 // indirect
golang.org/x/net v0.7.0 // indirect
golang.org/x/sys v0.26.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

161
go.sum Normal file
View file

@ -0,0 +1,161 @@
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deepmap/oapi-codegen v1.8.2 h1:SegyeYGcdi0jLLrpbCMoJxnUUn8GBXHsvr4rbzjuhfU=
github.com/deepmap/oapi-codegen v1.8.2/go.mod h1:YLgSKSDv/bZQB7N4ws6luhozi3cEdRktEqrX88CvjIw=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ=
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/getkin/kin-openapi v0.61.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs=
github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/influxdata/influxdb-client-go/v2 v2.12.3 h1:28nRlNMRIV4QbtIUvxhWqaxn0IpXeMSkY/uJa/O/vC4=
github.com/influxdata/influxdb-client-go/v2 v2.12.3/go.mod h1:IrrLUbCjjfkmRuaCiGQg4m2GbkaeJDcuWoxiWdQEbA0=
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU=
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/kkyr/fig v0.3.2 h1:+vMj52FL6RJUxeKOBB6JXIMyyi1/2j1ERDrZXjoBjzM=
github.com/kkyr/fig v0.3.2/go.mod h1:ItUILF8IIzgZOMhx5xpJ1W/bviQsWRKOwKXfE/tqUoA=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/labstack/echo/v4 v4.2.1/go.mod h1:AA49e0DZ8kk5jTOOCKNuPR6oTnBS0dYiM4FW1e6jwpg=
github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ=
github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag=
github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ=
github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0=
github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4=
github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM=
github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc=
github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg=
github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/segmentio/kafka-go v0.4.42 h1:qffhBZCz4WcWyNuHEclHjIMLs2slp6mZO8px+5W5tfU=
github.com/segmentio/kafka-go v0.4.42/go.mod h1:d0g15xPMqoUookug0OU75DhGZxXwCFxSLeJ4uphwJzg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

90
http.go Normal file
View file

@ -0,0 +1,90 @@
package main
import (
"context"
"fmt"
"log"
"net"
"net/http"
"os"
"github.com/gorilla/handlers"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// HTTPOutput serves the Prometheus metrics endpoint.
type HTTPOutput struct {
server *http.Server
config *HTTPOutputConfig
}
// NewHTTPOutput creates the HTTP output and applies the current configuration.
func NewHTTPOutput() *HTTPOutput {
s := new(HTTPOutput)
s.server = &http.Server{}
s.Reload()
return s
}
// AddHandlers (re)builds the request multiplexer: the metrics endpoint plus a
// landing page linking to it.
func (s *HTTPOutput) AddHandlers() {
mux := http.NewServeMux()
s.server.Handler = mux
mux.Handle(s.config.MetricsPath, handlers.CombinedLoggingHandler(os.Stdout, promhttp.HandlerFor(app.registry, promhttp.HandlerOpts{})))
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<html>
<head><title>Drive Health Metrics</title></head>
<body>
<h1>Drive Health Metrics</h1>
<p><a href='` + s.config.MetricsPath + `'>Metrics</a></p>
</body>
</html>`))
})
}
// Reload refreshes the configuration, listen address, and handlers.
func (s *HTTPOutput) Reload() {
s.config = &app.config.HTTP
s.server.Addr = fmt.Sprintf("%s:%d", s.config.BindAddr, s.config.Port)
s.AddHandlers()
}
// OutputEnabled reports whether the HTTP output is enabled.
func (s *HTTPOutput) OutputEnabled() bool {
return s.config.Enabled
}
// Start launches the server and blocks until it is accepting connections.
func (s *HTTPOutput) Start(ctx context.Context) {
isListening := make(chan bool)
go s.StartWithIsListening(ctx, isListening)
<-isListening
}
// StartWithIsListening runs the server, signalling on isListening once the
// listener is bound, and shutting down when the context is cancelled.
func (s *HTTPOutput) StartWithIsListening(ctx context.Context, isListening chan bool) {
if !s.config.Enabled {
isListening <- true
return
}
go func() {
<-ctx.Done()
if err := s.server.Shutdown(context.Background()); err != nil {
log.Println("Error shutting down http server:", err)
}
}()
log.Println("Starting http server:", s.server.Addr)
l, err := net.Listen("tcp", s.server.Addr)
if err != nil {
log.Fatal("Listen: ", err)
}
isListening <- true
if err := s.server.Serve(l); err != nil && err != http.ErrServerClosed {
log.Println("HTTP server failure:", err)
}
}

146
influx.go Normal file
View file

@ -0,0 +1,146 @@
package main
import (
"bufio"
"bytes"
"context"
"crypto/tls"
"log"
"time"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
"github.com/segmentio/kafka-go"
"github.com/segmentio/kafka-go/sasl/plain"
)
// InfluxOutput pushes drive metrics, as InfluxDB line protocol or JSON, to the
// InfluxDB v2 API and/or Kafka on a fixed schedule.
type InfluxOutput struct {
kwriter *kafka.Writer
client *influxdb2.Client
config *InfluxOutputConfig
}
// NewInfluxOutput creates the output and applies the current configuration.
func NewInfluxOutput() *InfluxOutput {
i := new(InfluxOutput)
i.Reload()
return i
}
// Reload rebuilds the Kafka writer and InfluxDB client from the configuration.
// A destination is only configured when its required settings are present.
func (i *InfluxOutput) Reload() {
i.config = &app.config.Influx
i.kwriter = nil
i.client = nil
// Kafka output.
if len(i.config.KafkaBrokers) != 0 && i.config.KafkaTopic != "" {
dialer := &kafka.Dialer{
Timeout: 10 * time.Second,
DualStack: true,
TLS: &tls.Config{InsecureSkipVerify: i.config.KafkaInsecureSkipVerify},
}
if i.config.KafkaUsername != "" {
dialer.SASLMechanism = plain.Mechanism{
Username: i.config.KafkaUsername,
Password: i.config.KafkaPassword,
}
}
i.kwriter = kafka.NewWriter(kafka.WriterConfig{
Brokers: i.config.KafkaBrokers,
Topic: i.config.KafkaTopic,
Dialer: dialer,
})
}
// InfluxDB v2 API output.
if i.config.InfluxServer != "" && i.config.Token != "" && i.config.Org != "" && i.config.Bucket != "" {
c := influxdb2.NewClient(i.config.InfluxServer, i.config.Token)
i.client = &c
}
}
// CollectAndLineprotocolFormat discovers the drives and renders them as InfluxDB
// line protocol.
func (i *InfluxOutput) CollectAndLineprotocolFormat() []byte {
drives, tsNs := collect()
return []byte(recordsToInflux(drives, tsNs))
}
// CollectAndJSONFormat discovers the drives and renders them as InfluxDB JSON.
func (i *InfluxOutput) CollectAndJSONFormat() []byte {
drives, tsNs := collect()
return recordsToInfluxJSON(drives, tsNs)
}
// OutputEnabled reports whether a destination is configured and a push interval
// is set.
func (i *InfluxOutput) OutputEnabled() bool {
return (i.kwriter != nil || i.client != nil) && i.config.Frequency != 0
}
// Start runs the scheduled push loop until the context is cancelled.
func (i *InfluxOutput) Start(ctx context.Context) {
if !i.OutputEnabled() {
return
}
ticker := time.NewTicker(i.config.Frequency)
defer ticker.Stop()
for {
select {
case <-ticker.C:
i.push(ctx)
case <-ctx.Done():
if i.kwriter != nil {
i.kwriter.Close()
}
if i.client != nil {
(*i.client).Close()
}
return
}
}
}
// push collects metrics once and writes them to every configured destination.
func (i *InfluxOutput) push(ctx context.Context) {
// Kafka receives one message per drive in the configured format.
if i.kwriter != nil {
var data []byte
if i.config.KafkaOutputFormat == "json" {
data = i.CollectAndJSONFormat()
} else {
data = i.CollectAndLineprotocolFormat()
}
var messages []kafka.Message
routingKey := []byte(app.config.Hostname)
scanner := bufio.NewScanner(bytes.NewReader(data))
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
b := append(scanner.Bytes(), '\n')
messages = append(messages, kafka.Message{Key: routingKey, Value: b})
}
if len(messages) != 0 {
if err := i.kwriter.WriteMessages(ctx, messages...); err != nil {
log.Println("Unable to write to Kafka:", err)
}
}
}
// InfluxDB API receives the full line-protocol document.
if i.client != nil {
data := i.CollectAndLineprotocolFormat()
if len(data) != 0 {
writeAPI := (*i.client).WriteAPIBlocking(i.config.Org, i.config.Bucket)
if err := writeAPI.WriteRecord(ctx, string(data)); err != nil {
log.Println("Unable to write to InfluxDB:", err)
}
}
}
}

114
jsonutil.go Normal file
View file

@ -0,0 +1,114 @@
package main
import (
"encoding/json"
"regexp"
"strconv"
"strings"
)
// loadJSON parses smartctl -j output. If leading noise precedes the object
// (rare, but some controllers emit warnings before the JSON), it retries from
// the first '{'.
func loadJSON(raw string) map[string]interface{} {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil
}
var m map[string]interface{}
if err := json.Unmarshal([]byte(raw), &m); err == nil {
return m
}
if i := strings.IndexByte(raw, '{'); i >= 0 {
if err := json.Unmarshal([]byte(raw[i:]), &m); err == nil {
return m
}
}
return nil
}
// jObj navigates nested maps by key path, returning the leaf map or nil.
func jObj(m map[string]interface{}, keys ...string) map[string]interface{} {
cur := m
for _, k := range keys {
if cur == nil {
return nil
}
v, ok := cur[k].(map[string]interface{})
if !ok {
return nil
}
cur = v
}
return cur
}
// jInt returns an *int for a numeric leaf (JSON numbers decode as float64).
func jInt(m map[string]interface{}, keys ...string) *int {
v := jLeaf(m, keys...)
switch t := v.(type) {
case float64:
n := int(t)
return &n
case string:
if n, err := strconv.Atoi(strings.TrimSpace(t)); err == nil {
return &n
}
}
return nil
}
// jStr returns a trimmed string leaf, or "".
func jStr(m map[string]interface{}, keys ...string) string {
if s, ok := jLeaf(m, keys...).(string); ok {
return strings.TrimSpace(s)
}
return ""
}
// jBoolPtr returns *bool for a boolean leaf.
func jBoolPtr(m map[string]interface{}, keys ...string) *bool {
if b, ok := jLeaf(m, keys...).(bool); ok {
return &b
}
return nil
}
// jLeaf returns the raw value at the key path (the final key looked up in its
// parent map), or nil when any segment along the path is missing.
func jLeaf(m map[string]interface{}, keys ...string) interface{} {
if len(keys) == 0 {
return nil
}
parent := jObj(m, keys[:len(keys)-1]...)
if parent == nil {
return nil
}
return parent[keys[len(keys)-1]]
}
var leadingInt = regexp.MustCompile(`^\s*(\d+)`)
// firstInt extracts the leading run of digits from a string ("345 hours" -> 345).
// It stops at the first non-digit, so for comma-grouped numbers ("12,345") use
// parseIntLoose, which strips separators first.
func firstInt(s string) (int, bool) {
m := leadingInt.FindStringSubmatch(s)
if m == nil {
return 0, false
}
n, err := strconv.Atoi(m[1])
return n, err == nil
}
// parseIntLoose strips commas/spaces and parses an integer anywhere in s.
func parseIntLoose(s string) (int, bool) {
s = strings.TrimSpace(strings.ReplaceAll(s, ",", ""))
// Take the leading run of digits (and optional sign).
m := regexp.MustCompile(`-?\d+`).FindString(s)
if m == "" {
return 0, false
}
n, err := strconv.Atoi(m)
return n, err == nil
}

137
main.go Normal file
View file

@ -0,0 +1,137 @@
// Command drive-health-metrics collects per-drive SMART health from every physical
// drive on a host — direct SATA/SAS, NVMe, and drives hidden behind a RAID
// controller (MegaCLI / storcli / perccli) — scores each drive, and exports the
// result. By default it emits CSV or InfluxDB line protocol once and exits;
// with -server it runs as a service exposing a Prometheus endpoint and pushing to
// InfluxDB/Kafka on a schedule.
package main
import (
"context"
"fmt"
"log"
"os"
"os/signal"
"syscall"
"github.com/prometheus/client_golang/prometheus"
)
// Basic application info. namespace is the Prometheus metric prefix and matches
// the InfluxDB measurement name.
const (
serviceName = "drive-health-metrics"
serviceDescription = "Collects and exports per-drive SMART health metrics"
namespace = "drive_health"
)
// App holds the shared application state: parsed flags, configuration, the
// Prometheus registry, and the exporter/outputs.
type App struct {
flags *Flags
config *Config
registry *prometheus.Registry
driveExporter *DriveExporter
httpOutput *HTTPOutput
influxOutput *InfluxOutput
}
// app is the global application state.
var app *App
func main() {
app = new(App)
app.ParseFlags()
app.ReadConfig()
switch {
case app.flags.Server:
runServer()
default:
runOneShot()
}
}
// runOneShot collects once and writes CSV or InfluxDB line protocol to stdout.
func runOneShot() {
switch app.flags.Format {
case "csv", "influx":
default:
fmt.Fprintf(os.Stderr, "invalid --format %q (want csv|influx)\n", app.flags.Format)
os.Exit(2)
}
drives, tsNs := collect()
if len(drives) == 0 {
fmt.Fprintln(os.Stderr, "WARNING: no drive records collected")
return
}
if app.flags.Format == "csv" {
fmt.Println(recordsToCSV(drives))
} else {
fmt.Println(recordsToInflux(drives, tsNs))
}
}
// runServer runs the long-lived service: a Prometheus HTTP endpoint plus the
// scheduled InfluxDB output, reloading configuration on SIGHUP and shutting down
// on SIGINT/SIGTERM.
func runServer() {
// Build the exporter and registry.
app.driveExporter = NewDriveExporter()
reg := prometheus.NewRegistry()
reg.MustRegister(app.driveExporter)
app.registry = reg
// Build the outputs.
app.httpOutput = NewHTTPOutput()
app.influxOutput = NewInfluxOutput()
if !app.httpOutput.OutputEnabled() && !app.influxOutput.OutputEnabled() {
log.Fatalln("No output services are enabled (set http_output.enabled or configure influx_output).")
}
// Monitor signals.
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)
// Each outer iteration owns one background context for the output services;
// the inner loop applies SIGHUP reloads in place and only breaks out (to
// recreate the context and restart the services) when a config change
// requires it.
for {
ctx, cancel := context.WithCancel(context.Background())
go app.httpOutput.Start(ctx)
go app.influxOutput.Start(ctx)
restart := false
for !restart {
sig := <-c
if sig != syscall.SIGHUP {
// Termination/interruption: stop the services and exit.
cancel()
return
}
log.Println("Reloading configurations")
oldConfig := app.config
influxWasEnabled := app.influxOutput.OutputEnabled()
app.ReadConfig()
app.httpOutput.Reload()
app.influxOutput.Reload()
httpNeedsRestart := oldConfig.HTTP.BindAddr != app.config.HTTP.BindAddr ||
oldConfig.HTTP.Port != app.config.HTTP.Port ||
oldConfig.HTTP.Enabled != app.config.HTTP.Enabled
influxNeedsRestart := app.influxOutput.OutputEnabled() != influxWasEnabled ||
oldConfig.Influx.Frequency != app.config.Influx.Frequency
restart = httpNeedsRestart || influxNeedsRestart
}
// A restart-worthy change occurred: stop the current services and loop
// to start them on a fresh context.
cancel()
}
}

178
output.go Normal file
View file

@ -0,0 +1,178 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"sort"
"strconv"
"strings"
)
// influxMeasurement is the InfluxDB measurement name; it matches the Prometheus
// namespace so both outputs describe the same series.
const influxMeasurement = namespace
// enclosureSlot formats the physical location as "<enclosure>:<slot>", falling
// back to whichever single value is known.
func (d *Drive) enclosureSlot() string {
switch {
case d.Enclosure != "" && d.Slot != "":
return d.Enclosure + ":" + d.Slot
case d.Slot != "":
return d.Slot
default:
return d.Enclosure
}
}
// csvEscape quotes s and doubles embedded quotes when it contains a comma,
// quote, or newline, per RFC 4180.
func csvEscape(s string) string {
if strings.ContainsAny(s, ",\"\n") {
return "\"" + strings.ReplaceAll(s, "\"", "\"\"") + "\""
}
return s
}
// recordsToCSV renders the drives as a CSV document: the header row followed by
// one escaped row per drive, in schema (columns) order.
func recordsToCSV(drives []*Drive) string {
var b strings.Builder
names := make([]string, len(columns))
for i, c := range columns {
names[i] = c.name
}
b.WriteString(strings.Join(names, ","))
for _, d := range drives {
b.WriteByte('\n')
cells := make([]string, len(columns))
for i, c := range columns {
cells[i] = csvEscape(format(c.raw(d)))
}
b.WriteString(strings.Join(cells, ","))
}
return b.String()
}
// influxTagEscape escapes spaces, commas, and equals signs in an InfluxDB tag
// value, which the line protocol treats as delimiters.
func influxTagEscape(s string) string {
r := strings.NewReplacer(" ", `\ `, ",", `\,`, "=", `\=`)
return r.Replace(s)
}
// recordsToInflux renders the drives as InfluxDB line protocol, one line per
// drive: tag columns become tags, the remaining (non-csvOnly) columns become
// typed fields (int "i" suffix, float, bool, or quoted string), all sharing the
// collection timestamp. A drive with no usable fields is skipped.
func recordsToInflux(drives []*Drive, tsNs int64) string {
var lines []string
for _, d := range drives {
// Tags.
tags := map[string]string{}
for _, c := range columns {
if !c.influxTag {
continue
}
if v := format(c.raw(d)); v != "" {
tags[c.name] = influxTagEscape(v)
}
}
// Fields: every non-csvOnly, non-tag column with a value.
fields := map[string]string{}
for _, c := range columns {
if c.csvOnly || c.influxTag {
continue
}
r := c.raw(d)
if r == nil {
continue
}
switch c.kind {
case kindInt:
fields[c.name] = strconv.Itoa(r.(int)) + "i"
case kindFloat:
fields[c.name] = format(r)
case kindBool:
fields[c.name] = format(r) // "true"/"false".
default:
fields[c.name] = "\"" + strings.ReplaceAll(format(r), "\"", "\\\"") + "\""
}
}
if len(fields) == 0 {
continue
}
tagStr := joinSorted(tags)
fieldStr := joinSorted(fields)
if tagStr != "" {
lines = append(lines, fmt.Sprintf("%s,%s %s %d", influxMeasurement, tagStr, fieldStr, tsNs))
} else {
lines = append(lines, fmt.Sprintf("%s %s %d", influxMeasurement, fieldStr, tsNs))
}
}
return strings.Join(lines, "\n")
}
// recordsToInfluxJSON renders the drives as newline-delimited InfluxDB JSON, one
// object per drive ({name, tags, fields, timestamp}). It applies the same
// tag/field split and typing as recordsToInflux; the timestamp is microseconds.
// A drive with no usable fields is skipped.
func recordsToInfluxJSON(drives []*Drive, tsNs int64) []byte {
var buff bytes.Buffer
tsMicro := tsNs / 1000
for _, d := range drives {
// Tags.
tags := map[string]string{}
for _, c := range columns {
if !c.influxTag {
continue
}
if v := format(c.raw(d)); v != "" {
tags[c.name] = v
}
}
// Typed fields.
fields := map[string]interface{}{}
for _, c := range columns {
if c.csvOnly || c.influxTag {
continue
}
r := c.raw(d)
if r == nil {
continue
}
fields[c.name] = r // int, float64, bool, or string — already typed.
}
if len(fields) == 0 {
continue
}
serialized, err := json.Marshal(map[string]interface{}{
"name": influxMeasurement,
"tags": tags,
"fields": fields,
"timestamp": tsMicro,
})
if err != nil {
continue
}
buff.Write(serialized)
buff.WriteByte('\n')
}
return buff.Bytes()
}
// joinSorted joins m as "k=v" pairs in key order, keeping line-protocol tag and
// field sets deterministic across runs.
func joinSorted(m map[string]string) string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
parts := make([]string, len(keys))
for i, k := range keys {
parts[i] = k + "=" + m[k]
}
return strings.Join(parts, ",")
}

562
realdata_test.go Normal file
View file

@ -0,0 +1,562 @@
package main
import (
"os"
"path/filepath"
"strings"
"testing"
)
// readFixture loads a captured real-world sample from testdata/.
func readFixture(t *testing.T, name string) string {
t.Helper()
b, err := os.ReadFile(filepath.Join("testdata", name))
if err != nil {
t.Fatalf("read fixture %s: %v", name, err)
}
return string(b)
}
// ---- Real smartctl JSON: Samsung 870 EVO via sat+megaraid (smartmontools 7.0) ----
func TestParseSmartJSON_RealMegaraidSSD(t *testing.T) {
d := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_ata_ssd_megaraid.json")), d)
if d.Model != "Samsung SSD 870 EVO 2TB" {
t.Errorf("model = %q", d.Model)
}
if d.Serial != "S624NS0RC00003M" {
t.Errorf("serial = %q", d.Serial)
}
if d.Firmware != "SVT02B6Q" {
t.Errorf("firmware = %q", d.Firmware)
}
if d.Rotation != "SSD" {
t.Errorf("rotation = %q, want SSD", d.Rotation)
}
if d.Capacity != "2.00 TB" {
t.Errorf("capacity = %q, want 2.00 TB", d.Capacity)
}
if d.SmartHealth != "PASSED" {
t.Errorf("health = %q", d.SmartHealth)
}
if iv(d.PowerOnHours) != 33518 {
t.Errorf("poh = %v, want 33518", d.PowerOnHours)
}
if iv(d.PowerCycleCount) != 7 {
t.Errorf("power_cycles = %v, want 7", d.PowerCycleCount)
}
if iv(d.TempC) != 31 {
t.Errorf("temp = %v, want 31", d.TempC)
}
// Healthy drive: all defect counters present and zero.
if iv(d.Reallocated) != 0 || iv(d.UdmaCrc) != 0 || iv(d.ReportedUncorrect) != 0 {
t.Errorf("defects: realloc=%v crc=%v reported=%v, want 0/0/0",
d.Reallocated, d.UdmaCrc, d.ReportedUncorrect)
}
// Samsung Wear_Leveling_Count (ID177) value 93 -> 7% consumed.
if d.WearSrc != "Samsung/ID177" || iv(d.WearPctConsumed) != 7 {
t.Errorf("wear src=%q consumed=%v, want Samsung/ID177 / 7", d.WearSrc, d.WearPctConsumed)
}
d.HaveSmart = true
finalizeDerived(d)
if d.Recommendation != "OK" {
t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore)
}
if iv(d.DefectTotal) != 0 {
t.Errorf("defect_total = %v, want 0", d.DefectTotal)
}
}
// ---- Real smartctl JSON: Micron 5400 via sat+megaraid, no controller CLI ----
// util01 (Ubuntu 24, smartmontools 7.4) has megaraid drives but no storcli/
// MegaCLI, so no controller data merges. The drive exposes ID173/202/233 wear
// attributes at once, and its model is a bare part number (no vendor prefix).
func TestParseSmartJSON_RealMicronSSD(t *testing.T) {
d := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_ata_ssd_micron.json")), d)
if d.Model != "MTFDDAK960TGA-1BC1ZABDA" {
t.Errorf("model = %q", d.Model)
}
if isPseudoDevice(d) {
t.Errorf("bare part-number model wrongly flagged as pseudo device")
}
if d.Rotation != "SSD" {
t.Errorf("rotation = %q, want SSD", d.Rotation)
}
if d.Capacity != "0.96 TB" {
t.Errorf("capacity = %q, want 0.96 TB", d.Capacity)
}
if d.SmartHealth != "PASSED" {
t.Errorf("health = %q", d.SmartHealth)
}
if iv(d.PowerOnHours) != 20238 || iv(d.PowerCycleCount) != 12 {
t.Errorf("poh=%v cycles=%v, want 20238/12", d.PowerOnHours, d.PowerCycleCount)
}
// ID173 wins the wearAttr precedence; VALUE 100 -> 0% consumed (fresh).
if d.WearSrc != "Micron/ID173" || iv(d.WearPctConsumed) != 0 {
t.Errorf("wear src=%q consumed=%v, want Micron/ID173 / 0", d.WearSrc, d.WearPctConsumed)
}
d.HaveSmart = true
finalizeDerived(d)
if d.Recommendation != "OK" {
t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore)
}
}
// ---- Real NVMe JSON: health log drives wear/identity (direct-attached SSD) ----
func TestParseSmartJSON_RealNVMe(t *testing.T) {
d := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_nvme.json")), d)
if d.Model != "Force MP510" {
t.Errorf("model = %q", d.Model)
}
if d.Rotation != "NVMe" {
t.Errorf("rotation = %q, want NVMe", d.Rotation)
}
if iv(d.PowerOnHours) != 42811 {
t.Errorf("poh = %v, want 42811", d.PowerOnHours)
}
// NVMe percentage_used 6 -> 6% consumed.
if d.WearSrc != "NVMe/percentage_used" || iv(d.WearPctConsumed) != 6 {
t.Errorf("wear src=%q consumed=%v, want NVMe/percentage_used / 6", d.WearSrc, d.WearPctConsumed)
}
if iv(d.NvmeCriticalWarning) != 0 || iv(d.NvmeAvailSpare) != 100 {
t.Errorf("nvme crit=%v spare=%v, want 0/100", d.NvmeCriticalWarning, d.NvmeAvailSpare)
}
d.HaveSmart = true
finalizeDerived(d)
// Healthy NVMe, 42811h (>4yr -> age +4), wear 6% -> OK.
if d.Recommendation != "OK" {
t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore)
}
}
// ---- Real SAS SSD JSON: endurance + grown-defect/error-counter health ----
// SAMSUNG ARFX0920S5xnNTRI behind a SAS HBA (tama, smartmontools 7.5). SAS
// drives have no ATA attribute table; hard-defect signals come from the SCSI
// logs (grown defect list, error counter log, pending defects).
func TestParseSmartJSON_RealSASSSD(t *testing.T) {
d := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_sas_ssd.json")), d)
if d.Model != "SAMSUNG ARFX0920S5xnNTRI" || d.Serial != "S43YNF0K000001" {
t.Errorf("identity model=%q serial=%q", d.Model, d.Serial)
}
if !strings.HasPrefix(d.Transport, "SAS") {
t.Errorf("transport = %q, want SAS*", d.Transport)
}
if d.Rotation != "SSD" {
t.Errorf("rotation = %q, want SSD", d.Rotation)
}
if d.SmartHealth != "PASSED" {
t.Errorf("health = %q", d.SmartHealth)
}
if iv(d.PowerOnHours) != 2487 || iv(d.TempC) != 56 {
t.Errorf("poh=%v temp=%v, want 2487/56", d.PowerOnHours, d.TempC)
}
// SCSI endurance indicator 0% -> 0% consumed.
if d.WearSrc != "SCSI/endurance" || iv(d.WearPctConsumed) != 0 {
t.Errorf("wear src=%q consumed=%v, want SCSI/endurance / 0", d.WearSrc, d.WearPctConsumed)
}
// Healthy SAS drive: grown defects, uncorrected errors, pending all zero
// (and non-nil, since the SCSI logs were present).
if iv(d.Reallocated) != 0 || iv(d.Uncorrectable) != 0 || iv(d.Pending) != 0 {
t.Errorf("defects: grown=%v uncorrected=%v pending=%v, want 0/0/0",
d.Reallocated, d.Uncorrectable, d.Pending)
}
d.HaveSmart = true
finalizeDerived(d)
if d.Recommendation != "OK" {
t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore)
}
}
// ---- SAS hard-defect signals drive the score (synthetic, both JSON + text) ----
// The captured SAS drives are healthy; verify the SCSI error counter log and
// pending-defect count actually feed the scorer when nonzero.
func TestSASUncorrectedErrorsScored(t *testing.T) {
const j = `{
"model_name": "SEAGATE ST4000NM",
"device": {"type": "scsi", "protocol": "SCSI"},
"smart_status": {"passed": true},
"scsi_grown_defect_list": 3,
"scsi_error_counter_log": {
"read": {"total_uncorrected_errors": 2},
"write": {"total_uncorrected_errors": 0},
"verify": {"total_uncorrected_errors": 1}
},
"scsi_pending_defects": {"count": 4}
}`
d := &Drive{}
parseSmartJSON(loadJSON(j), d)
if iv(d.Uncorrectable) != 3 { // 2 read + 1 verify
t.Errorf("uncorrectable = %v, want 3", d.Uncorrectable)
}
if iv(d.Pending) != 4 {
t.Errorf("pending = %v, want 4", d.Pending)
}
if iv(d.Reallocated) != 3 { // grown defect list
t.Errorf("reallocated(grown) = %v, want 3", d.Reallocated)
}
d.HaveSmart = true
finalizeDerived(d)
if d.Recommendation != "REPLACE_NOW" {
t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore)
}
// Text path: same signals from `smartctl -a` SAS output.
const text = `Vendor: SEAGATE
Product: ST4000NM
SMART Health Status: OK
Elements in grown defect list: 3
Error counter log:
read: 0 0 0 0 0 2620.555 2
write: 0 0 0 0 0 2091.250 0
verify: 0 0 0 0 0 46.845 1
Pending defect count:4 Pending Defects
`
dt := &Drive{}
parseSmartText(text, dt)
if iv(dt.Uncorrectable) != 3 || iv(dt.Pending) != 4 || iv(dt.Reallocated) != 3 {
t.Errorf("text path: uncorrected=%v pending=%v grown=%v, want 3/4/3",
dt.Uncorrectable, dt.Pending, dt.Reallocated)
}
}
// ---- iSCSI LUNs (LIO/IET) are dropped via the SCSI transport, not by model ----
func TestISCSILunFilter(t *testing.T) {
d := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_iscsi_lio.json")), d)
if d.Transport != "iSCSI" {
t.Fatalf("transport = %q, want iSCSI", d.Transport)
}
if !isPseudoDevice(d) {
t.Errorf("LIO-ORG iSCSI LUN not flagged as pseudo device")
}
// Transport alone is authoritative even if the model looks like a real drive.
if !isPseudoDevice(&Drive{Model: "Samsung SSD 860 EVO 1TB", Transport: "iSCSI"}) {
t.Errorf("iSCSI transport not honored over a real-looking model")
}
// LIO model token still catches it on the text path (no transport field).
if !isPseudoDevice(&Drive{Model: "LIO-ORG k8s1"}) {
t.Errorf("LIO-ORG model token not matched")
}
// A real SAS/SATA drive (transport SAS) is not filtered.
if isPseudoDevice(&Drive{Model: "HGST HUH721010ALE604", Transport: "SAS"}) {
t.Errorf("real SAS drive wrongly flagged as pseudo device")
}
}
// ---- Pseudo devices must be filtered: iSCSI LUN + RAID controller VD ----
func TestPseudoDeviceFilter(t *testing.T) {
iscsi := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_iscsi_virtual_disk.json")), iscsi)
if iscsi.Model != "IET VIRTUAL-DISK" {
t.Fatalf("iscsi model = %q", iscsi.Model)
}
if !isPseudoDevice(iscsi) {
t.Errorf("IET VIRTUAL-DISK not flagged as pseudo device")
}
vd := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_raid_vd.json")), vd)
if vd.Model != "BROADCOM MR9560-16i" {
t.Fatalf("raid vd model = %q", vd.Model)
}
if !isPseudoDevice(vd) {
t.Errorf("BROADCOM MR9560-16i VD not flagged as pseudo device")
}
avago := &Drive{}
parseSmartJSON(loadJSON(readFixture(t, "smart_raid_vd_avago.json")), avago)
if avago.Model != "AVAGO MR9363-4i" {
t.Fatalf("avago vd model = %q", avago.Model)
}
if !isPseudoDevice(avago) {
t.Errorf("AVAGO MR9363-4i VD not flagged as pseudo device")
}
// SMART-less RAID virtual disk identified by model (DELL RAID / PERC VDs).
for _, model := range []string{"DELL RAID", "DELL PERC H730"} {
if !isPseudoDevice(&Drive{Model: model}) {
t.Errorf("RAID VD %q not flagged as pseudo device", model)
}
}
// Real drives behind these controllers must NOT be filtered.
for _, model := range []string{"Samsung SSD 870 EVO 2TB", "MICRON_M510DC_MTFDDAK960MBP",
"HFS960G3H2X069N", "Dell DC NVMe PM9A3 RI U.2 960GB"} {
if isPseudoDevice(&Drive{Model: model}) {
t.Errorf("real drive %q wrongly flagged as pseudo device", model)
}
}
}
// ---- Real storcli: DID comes from the table row; sub-headers don't fragment ----
func TestParseStorcli_Real(t *testing.T) {
drives := parseStorcli(readFixture(t, "storcli_show_all.txt"))
if len(drives) != 4 {
t.Fatalf("parsed %d drives, want 4", len(drives))
}
byDID := map[string]ctrlDrive{}
for _, cd := range drives {
if cd.DeviceID == "" {
t.Errorf("drive at %s:%s has empty DID", cd.Enclosure, cd.Slot)
}
byDID[cd.DeviceID] = cd
}
// Slot->DID->OtherErr mapping pulled straight from the capture.
wantOther := map[string]struct {
slot string
otherErr int
}{
"22": {"0", 7}, "23": {"1", 6}, "20": {"2", 21}, "21": {"3", 12},
}
for did, want := range wantOther {
cd, ok := byDID[did]
if !ok {
t.Errorf("DID %s missing", did)
continue
}
if cd.Enclosure != "64" || cd.Slot != want.slot {
t.Errorf("DID %s location %s:%s, want 64:%s", did, cd.Enclosure, cd.Slot, want.slot)
}
if cd.OtherErr != want.otherErr {
t.Errorf("DID %s other_err=%d, want %d", did, cd.OtherErr, want.otherErr)
}
if cd.MediaErr != 0 || cd.Predictive != 0 || cd.SmartAlert {
t.Errorf("DID %s media=%d pred=%d alert=%v, want 0/0/false",
did, cd.MediaErr, cd.Predictive, cd.SmartAlert)
}
if cd.FwState != "Onln" {
t.Errorf("DID %s fw_state=%q, want Onln", did, cd.FwState)
}
}
}
// ---- Real storcli, second version/enclosure: parser generalizes ----
// storcli v007.1907, enclosure 252, Micron SSDs (jarvis01-kvm92). Confirms the
// summary-header + table-row DID logic is not specific to one storcli build.
func TestParseStorcli_RealV2(t *testing.T) {
drives := parseStorcli(readFixture(t, "storcli_show_all_v2.txt"))
if len(drives) != 4 {
t.Fatalf("parsed %d drives, want 4", len(drives))
}
// DID N lives in slot (N-4), enclosure 252; all Online with zero errors.
for _, cd := range drives {
if cd.DeviceID == "" {
t.Errorf("drive at %s:%s has empty DID", cd.Enclosure, cd.Slot)
}
if cd.Enclosure != "252" {
t.Errorf("DID %s enclosure=%q, want 252", cd.DeviceID, cd.Enclosure)
}
if cd.FwState != "Onln" {
t.Errorf("DID %s fw_state=%q, want Onln", cd.DeviceID, cd.FwState)
}
if cd.MediaErr != 0 || cd.OtherErr != 0 || cd.Predictive != 0 {
t.Errorf("DID %s errors media=%d other=%d pred=%d, want 0/0/0",
cd.DeviceID, cd.MediaErr, cd.OtherErr, cd.Predictive)
}
}
byDID := map[string]ctrlDrive{}
for _, cd := range drives {
byDID[cd.DeviceID] = cd
}
if cd, ok := byDID["4"]; !ok || cd.Slot != "0" {
t.Errorf("DID 4 -> %s:%s, want 252:0", cd.Enclosure, cd.Slot)
}
if cd, ok := byDID["7"]; !ok || cd.Slot != "3" {
t.Errorf("DID 7 -> %s:%s, want 252:3", cd.Enclosure, cd.Slot)
}
}
// ---- Real perccli2 JSON: DID->PID, State vs Status, NVMe namespace counters ----
// perccli2 (8.x) is JSON-native; the text form adds a second status column that
// breaks positional parsing, so parsePerccli2 consumes the JSON. Fixture is a
// real `perccli2 /call/eall/sall show all J` (Dell PM9A3 NVMe behind a PERC).
func TestParsePerccli2_Real(t *testing.T) {
drives := parsePerccli2(readFixture(t, "perccli2_show_all.json"))
if len(drives) != 2 {
t.Fatalf("parsed %d drives, want 2", len(drives))
}
byPID := map[string]ctrlDrive{}
for _, cd := range drives {
byPID[cd.DeviceID] = cd
}
cd, ok := byPID["275"]
if !ok {
t.Fatal("PID 275 missing")
}
if cd.Enclosure != "284" || cd.Slot != "0" {
t.Errorf("PID 275 location %s:%s, want 284:0", cd.Enclosure, cd.Slot)
}
// FwState comes from Status (Online), NOT State (Conf).
if cd.FwState != "Online" {
t.Errorf("PID 275 fw_state=%q, want Online", cd.FwState)
}
if cd.Model != "Dell DC NVMe PM9A3 RI U.2 960GB" || cd.Serial != "S6JGNA0X000001" {
t.Errorf("PID 275 model=%q serial=%q", cd.Model, cd.Serial)
}
if cd.Rotation != "NVMe" {
t.Errorf("PID 275 rotation=%q, want NVMe", cd.Rotation)
}
if iv(cd.TempC) != 30 {
t.Errorf("PID 275 temp=%v, want 30", cd.TempC)
}
// NVMe error counters live under "LU/NS Properties", not directly in detail.
if cd.MediaErr != 0 || cd.OtherErr != 0 || cd.Predictive != 0 {
t.Errorf("PID 275 errors media=%d other=%d pred=%d, want 0/0/0",
cd.MediaErr, cd.OtherErr, cd.Predictive)
}
if cd, ok := byPID["276"]; !ok || cd.Slot != "1" || iv(cd.TempC) != 32 {
t.Errorf("PID 276 -> %s:%s temp=%v, want 284:1 / 32", cd.Enclosure, cd.Slot, cd.TempC)
}
}
// ---- Controller-only drive: surfaced from controller data, scored not NO_DATA ----
func TestPerccli2ControllerOnlyDrive(t *testing.T) {
drives := parsePerccli2(readFixture(t, "perccli2_show_all.json"))
byPID := map[string]ctrlDrive{}
for _, cd := range drives {
byPID[cd.DeviceID] = cd
}
cd := byPID["275"]
// Healthy controller-only drive: identity + health from controller only.
d := &Drive{}
applyController(d, cd)
d.HaveSmart = false
finalizeDerived(d)
if d.enclosureSlot() != "284:0" {
t.Errorf("enclosure_slot = %q, want 284:0", d.enclosureSlot())
}
if d.Model != "Dell DC NVMe PM9A3 RI U.2 960GB" || d.Rotation != "NVMe" {
t.Errorf("identity model=%q rotation=%q", d.Model, d.Rotation)
}
if d.Recommendation != "OK" {
t.Errorf("healthy controller-only rec=%q (score %d), want OK", d.Recommendation, d.RiskScore)
}
// A failed controller drive is surfaced (scored), never dropped as NO_DATA.
cd.FwState = "Failed"
df := &Drive{}
applyController(df, cd)
df.HaveSmart = false
finalizeDerived(df)
if df.Recommendation == "NO_DATA" {
t.Errorf("failed controller-only drive scored NO_DATA")
}
if df.RiskScore < 40 {
t.Errorf("failed controller-only drive score=%d, want >=40", df.RiskScore)
}
}
// ---- Controller merge by DID, where DID != slot (perccli v007.2616) ----
// On util01 the slot<->DID numbering is crossed (slot 0 = DID 1, slot 1 =
// DID 0). smartctl's "megaraid,N" index equals the controller DID, so matching
// controller data by DID (not slot) must still yield the correct enclosure:slot.
func TestStorcli_DIDMatchingCrossed(t *testing.T) {
idx := map[string]ctrlDrive{}
for _, cd := range parseStorcli(readFixture(t, "perccli_show_all.txt")) {
mergeCtrl(idx, cd)
}
cases := map[string]string{"0": "252:1", "1": "252:0"} // megaraid index -> enclosure:slot.
for did, wantLoc := range cases {
cd, ok := idx[did]
if !ok {
t.Errorf("DID %s missing from index", did)
continue
}
d := &Drive{DeviceID: did} // DeviceID is the smartctl megaraid,N index.
applyController(d, cd)
if d.enclosureSlot() != wantLoc {
t.Errorf("megaraid,%s -> %q, want %q", did, d.enclosureSlot(), wantLoc)
}
}
}
// ---- Real MegaCLI: matches storcli on the same hardware ----
func TestParseMegacli_Real(t *testing.T) {
drives := parseMegacliPDList(readFixture(t, "megacli_pdlist.txt"))
if len(drives) != 4 {
t.Fatalf("parsed %d drives, want 4", len(drives))
}
byDID := map[string]ctrlDrive{}
for _, cd := range drives {
byDID[cd.DeviceID] = cd
}
cd, ok := byDID["20"]
if !ok {
t.Fatal("DID 20 missing")
}
// DID 20 == slot 2 (cross-checks the storcli capture above).
if cd.Enclosure != "64" || cd.Slot != "2" || cd.OtherErr != 21 {
t.Errorf("DID 20 = %s:%s other=%d, want 64:2 / 21", cd.Enclosure, cd.Slot, cd.OtherErr)
}
if cd.FwState != "Online, Spun Up" {
t.Errorf("DID 20 fw_state=%q", cd.FwState)
}
if iv(cd.TempC) != 31 {
t.Errorf("DID 20 temp=%v, want 31", cd.TempC)
}
if cd.SmartAlert {
t.Errorf("DID 20 smart alert set, want false")
}
}
// ---- MegaCLI SMART-alert line uses "Drive has flagged a S.M.A.R.T alert" ----
func TestMegacliSmartAlertPhrasing(t *testing.T) {
text := `Enclosure Device ID: 64
Slot Number: 5
Device Id: 99
Firmware state: Online, Spun Up
Drive has flagged a S.M.A.R.T alert : Yes
`
drives := parseMegacliPDList(text)
if len(drives) != 1 {
t.Fatalf("parsed %d drives, want 1", len(drives))
}
if !drives[0].SmartAlert {
t.Errorf("smart alert not detected from MegaCLI phrasing")
}
}
// ---- Controller state scoring: storcli abbreviations are not faults ----
func TestFwStateScoring(t *testing.T) {
cases := []struct {
state string
wantPts bool // true => the +40 fw_state penalty should apply
}{
{"Onln", false},
{"Online, Spun Up", false},
{"GHS", false},
{"JBOD", false},
{"", false},
{"Offln", true},
{"Failed", true},
{"Rebuild", true},
}
for _, c := range cases {
d := &Drive{HaveSmart: true, Model: "X", SmartHealth: "PASSED", FwState: c.state}
score, _, _ := scoreDrive(d)
got := score >= 40
if got != c.wantPts {
t.Errorf("fw_state %q: penalized=%v (score %d), want %v", c.state, got, score, c.wantPts)
}
}
}

158
schema.go Normal file
View file

@ -0,0 +1,158 @@
package main
import "strconv"
// colKind is a column's value type. It drives formatting (CSV), typing (InfluxDB
// "i" suffix / JSON number / bool), and the tag-vs-field and label-vs-gauge
// splits the outputs derive from the table.
type colKind int
const (
kindStr colKind = iota
kindInt
kindFloat
kindBool
)
// column is the single source of truth for one output column: its name, value
// type, where it appears, and how to read it from a Drive. Every output (CSV,
// InfluxDB line protocol/JSON, Prometheus) is driven by this one table, so a new
// column is added in exactly one place.
//
// The output partitions are derived, not duplicated:
// - InfluxDB tag : kindStr column with influxTag set.
// - InfluxDB field : any non-csvOnly column that is not a tag.
// - Prometheus label : every non-csvOnly kindStr column.
// - Prometheus gauge : every non-csvOnly numeric (non-kindStr) column.
// - csvOnly : present in CSV only (timestamp, risk_reasons).
type column struct {
name string
kind colKind
influxTag bool
csvOnly bool
// value reads the column from d. It returns the typed Go value (string,
// *int, int, *float64, float64, or bool); a nil pointer means "unknown" and
// renders blank / is skipped by the metric outputs.
value func(d *Drive) any
}
// columns is the ordered output schema. CSV emits these in this order; the
// metric outputs sort their own tag/field sets independently.
var columns = []column{
{name: "collected_at", kind: kindStr, csvOnly: true, value: func(d *Drive) any { return d.CollectedAt }},
{name: "hostname", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Hostname }},
{name: "device_path", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.DevicePath }},
{name: "dtype", kind: kindStr, value: func(d *Drive) any { return d.Dtype }},
{name: "enclosure_slot", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.enclosureSlot() }},
{name: "device_id", kind: kindStr, value: func(d *Drive) any { return d.DeviceID }},
{name: "serial", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Serial }},
{name: "model", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Model }},
{name: "firmware", kind: kindStr, value: func(d *Drive) any { return d.Firmware }},
{name: "capacity", kind: kindStr, value: func(d *Drive) any { return d.Capacity }},
{name: "rotation", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Rotation }},
{name: "smart_health", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.SmartHealth }},
{name: "defect_total", kind: kindInt, value: func(d *Drive) any { return d.DefectTotal }},
{name: "udma_crc_errors", kind: kindInt, value: func(d *Drive) any { return d.UdmaCrc }},
{name: "media_errors_ctrl", kind: kindInt, value: func(d *Drive) any { return d.MediaErrCtrl }},
{name: "other_errors_ctrl", kind: kindInt, value: func(d *Drive) any { return d.OtherErrCtrl }},
{name: "predictive_failure_ctrl", kind: kindInt, value: func(d *Drive) any { return d.PredictiveFailureCtrl }},
{name: "smart_alert_ctrl", kind: kindBool, value: func(d *Drive) any { return d.SmartAlertCtrl }},
{name: "fw_state", kind: kindStr, value: func(d *Drive) any { return d.FwState }},
{name: "wear_pct_consumed", kind: kindInt, value: func(d *Drive) any { return d.WearPctConsumed }},
{name: "wear_src", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.WearSrc }},
{name: "unused_reserve_pct", kind: kindInt, value: func(d *Drive) any { return d.UnusedReservePct }},
{name: "host_written_tb", kind: kindFloat, value: func(d *Drive) any { return d.HostWrittenTB }},
{name: "nvme_critical_warning", kind: kindInt, value: func(d *Drive) any { return d.NvmeCriticalWarning }},
{name: "nvme_avail_spare", kind: kindInt, value: func(d *Drive) any { return d.NvmeAvailSpare }},
{name: "nvme_avail_spare_thresh", kind: kindInt, value: func(d *Drive) any { return d.NvmeAvailSpareThresh }},
{name: "power_on_years", kind: kindFloat, value: func(d *Drive) any { return d.PowerOnYears }},
{name: "power_cycle_count", kind: kindInt, value: func(d *Drive) any { return d.PowerCycleCount }},
{name: "temp_c", kind: kindInt, value: func(d *Drive) any { return d.TempC }},
{name: "risk_score", kind: kindInt, value: func(d *Drive) any { return d.RiskScore }},
{name: "recommendation", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Recommendation }},
{name: "risk_reasons", kind: kindStr, csvOnly: true, value: func(d *Drive) any { return d.RiskReasons }},
}
// raw returns the column's typed value for d, or nil when the value is unknown
// (a nil pointer) or a blank string. Callers that need a presence test treat a
// nil return as "absent".
func (c column) raw(d *Drive) any {
switch t := c.value(d).(type) {
case nil:
return nil
case *int:
if t == nil {
return nil
}
return *t
case *float64:
if t == nil {
return nil
}
return *t
case string:
if t == "" {
return nil
}
return t
default:
return t // int, float64, bool — always present.
}
}
// format renders a raw value as its display string ("" for an absent value),
// matching the CSV/line-protocol textual form.
func format(v any) string {
switch t := v.(type) {
case nil:
return ""
case string:
return t
case int:
return strconv.Itoa(t)
case float64:
return strconv.FormatFloat(t, 'f', -1, 64)
case bool:
if t {
return "true"
}
return "false"
default:
return ""
}
}
// field returns the column's display string for d, or "" when unknown. It is the
// canonical text form shared by the CSV output and the test helpers.
func (d *Drive) field(name string) string {
for _, c := range columns {
if c.name == name {
return format(c.raw(d))
}
}
return ""
}
// labelColumns returns the string columns carried as Prometheus labels and (the
// influxTag subset) as InfluxDB tags — every non-csvOnly kindStr column.
func labelColumns() []column {
var out []column
for _, c := range columns {
if c.kind == kindStr && !c.csvOnly {
out = append(out, c)
}
}
return out
}
// gaugeColumns returns the numeric columns emitted as Prometheus gauges and
// InfluxDB fields — every non-csvOnly column that is not a string.
func gaugeColumns() []column {
var out []column
for _, c := range columns {
if c.kind != kindStr && !c.csvOnly {
out = append(out, c)
}
}
return out
}

173
score.go Normal file
View file

@ -0,0 +1,173 @@
package main
import (
"fmt"
"strings"
)
// scoreDrive computes a drive's risk score, recommendation, and the reason
// string behind them. The scoring rules are deliberate:
//
// - Only real, drive-attributable defects add meaningful score.
// - Missing or unreadable data is never treated as a failure (no points).
// - Wear and age are graded to nudge toward planned replacement.
//
// The score maps to a recommendation:
//
// >= 100 -> REPLACE_NOW (hard defect: drive is failing or failed)
// >= 50 -> REPLACE_SOON (serious wear or accumulating defects)
// >= 20 -> MONITOR (early warning signs)
// < 20 -> OK
//
// A drive with no SMART data and no controller red flags scores NO_DATA, meaning
// re-collect rather than replace.
func scoreDrive(d *Drive) (int, string, string) {
score := 0
var reasons []string
add := func(pts int, msg string) {
score += pts
reasons = append(reasons, msg)
}
min := func(a, b int) int {
if a < b {
return a
}
return b
}
// ---- Hard physical defects (drive-attributable) ----
if realloc := iv(d.Reallocated); realloc > 0 {
pts := min(40+realloc*5, 100)
add(pts, fmt.Sprintf("reallocated=%d(+%d)", realloc, pts))
}
if pending := iv(d.Pending); pending > 0 {
pts := min(50+pending*5, 100)
add(pts, fmt.Sprintf("pending=%d(+%d)", pending, pts))
}
if uncorr := iv(d.Uncorrectable); uncorr > 0 {
pts := min(60+uncorr*5, 100)
add(pts, fmt.Sprintf("uncorrectable=%d(+%d)", uncorr, pts))
}
if reported := iv(d.ReportedUncorrect); reported > 0 {
pts := min(reported*10, 60)
add(pts, fmt.Sprintf("reported_uncorrect=%d(+%d)", reported, pts))
}
if e2e := iv(d.EndToEnd); e2e > 0 {
pts := min(e2e*20, 80)
add(pts, fmt.Sprintf("end_to_end_err=%d(+%d)", e2e, pts))
}
if badblk := iv(d.RuntimeBadBlocks); badblk > 0 {
pts := min(badblk*5, 40)
add(pts, fmt.Sprintf("runtime_bad_blocks=%d(+%d)", badblk, pts))
}
// ---- SMART self-assessment: only penalize an EXPLICIT failure ----
if d.SmartHealth == "FAILED" {
add(100, "SMART_health=FAILED(+100)")
}
// ---- SATA link quality (cabling/backplane, not the NAND) ----
if crc := iv(d.UdmaCrc); crc > 0 {
pts := min(crc*3, 25)
add(pts, fmt.Sprintf("udma_crc=%d(+%d)", crc, pts))
}
// ---- RAID-controller signals ----
if d.PredictiveFailureCtrl > 0 {
add(70, "ctrl_predictive_failure(+70)")
}
if d.SmartAlertCtrl {
add(50, "ctrl_smart_alert(+50)")
}
// Penalize a controller-reported state only when it is not a healthy one.
// MegaCLI spells these out ("Online, Spun Up", "Hotspare") while storcli
// abbreviates ("Onln", "GHS"/"DHS" for hot spares, "JBOD"); all are fine.
fw := strings.ToLower(d.FwState)
fwHealthy := fw == "" ||
strings.Contains(fw, "online") || strings.Contains(fw, "onln") ||
strings.Contains(fw, "hotspare") || strings.Contains(fw, "ghs") ||
strings.Contains(fw, "dhs") || strings.Contains(fw, "jbod")
if !fwHealthy {
add(40, fmt.Sprintf("fw_state=%s(+40)", d.FwState))
}
// MegaCLI/storcli media errors: soft signal, graded gently and capped.
me := d.MediaErrCtrl
switch {
case me >= 100:
add(30, fmt.Sprintf("ctrl_media_errors=%d(+30)", me))
case me >= 20:
add(15, fmt.Sprintf("ctrl_media_errors=%d(+15)", me))
case me > 0:
add(5, fmt.Sprintf("ctrl_media_errors=%d(+5)", me))
}
// ---- NVMe critical warning bitmask (any bit set is a real alert) ----
if d.NvmeCriticalWarning != nil && *d.NvmeCriticalWarning > 0 {
add(60, fmt.Sprintf("nvme_critical_warning=0x%02x(+60)", *d.NvmeCriticalWarning))
}
// NVMe spare below threshold -> reserve exhaustion.
if d.NvmeAvailSpare != nil && d.NvmeAvailSpareThresh != nil &&
*d.NvmeAvailSpare <= *d.NvmeAvailSpareThresh {
add(40, fmt.Sprintf("nvme_avail_spare<=thresh(%d<=%d)(+40)",
*d.NvmeAvailSpare, *d.NvmeAvailSpareThresh))
}
// ---- Wear (graded; only meaningful with real SMART data) ----
if d.WearPctConsumed != nil {
wc := *d.WearPctConsumed
switch {
case wc >= 95:
add(80, fmt.Sprintf("wear_consumed=%d%%(+80)", wc))
case wc >= 90:
add(55, fmt.Sprintf("wear_consumed=%d%%(+55)", wc))
case wc >= 80:
add(30, fmt.Sprintf("wear_consumed=%d%%(+30)", wc))
case wc >= 70:
add(15, fmt.Sprintf("wear_consumed=%d%%(+15)", wc))
case wc >= 60:
add(8, fmt.Sprintf("wear_consumed=%d%%(+8)", wc))
}
}
// ---- Reserve-block exhaustion (Micron ID180 VALUE -> remaining %) ----
if d.UnusedReservePct != nil && *d.UnusedReservePct <= 10 {
add(30, fmt.Sprintf("reserve_blocks_low(val=%d)(+30)", *d.UnusedReservePct))
}
// ---- Age (gentle nudge only) ----
hours := iv(d.PowerOnHours)
switch {
case hours >= 61320: // Older than 7 years.
add(15, fmt.Sprintf("age=%dh(+15)", hours))
case hours >= 52560: // Older than 6 years.
add(8, fmt.Sprintf("age=%dh(+8)", hours))
case hours >= 43800: // Older than 5 years.
add(4, fmt.Sprintf("age=%dh(+4)", hours))
}
// ---- Decide recommendation ----
// NO_DATA only when nothing observed the drive: no SMART, no controller error
// signals, and no controller state. A controller-only drive (e.g. NVMe behind
// a PERC) reports a FwState, so it is scored on controller evidence instead.
if !d.HaveSmart && me == 0 && d.PredictiveFailureCtrl == 0 && !d.SmartAlertCtrl && d.FwState == "" {
return 0, "NO_DATA", "smartctl returned no usable SMART data; re-collect"
}
var rec string
switch {
case score >= 100:
rec = "REPLACE_NOW"
case score >= 50:
rec = "REPLACE_SOON"
case score >= 20:
rec = "MONITOR"
default:
rec = "OK"
}
if len(reasons) == 0 {
return score, rec, "no defects detected"
}
return score, rec, strings.Join(reasons, "; ")
}

284
smart_json.go Normal file
View file

@ -0,0 +1,284 @@
package main
import (
"fmt"
"strings"
)
// wearAttr maps vendor SSD-life attribute IDs to a source label. For all of
// these the normalized VALUE expresses "% life remaining".
var wearAttr = []struct {
id int
src string
}{
{173, "Micron/ID173"},
{202, "Intel/ID202"},
{231, "Intel/ID231"},
{177, "Samsung/ID177"},
{233, "Generic/ID233"},
}
// ataAttr is one parsed row of an ATA SMART attribute table, shared by the JSON
// and text paths. raw is the attribute's raw counter; value/worst are the
// vendor-normalized current/worst values.
type ataAttr struct {
value *int
worst *int
whenFailed string
raw *int
}
// attrRaw returns the raw counter for attribute id, or nil when absent.
func attrRaw(attrs map[int]ataAttr, id int) *int {
if a, ok := attrs[id]; ok {
return a.raw
}
return nil
}
// attrVal returns the normalized current value for attribute id, or nil.
func attrVal(attrs map[int]ataAttr, id int) *int {
if a, ok := attrs[id]; ok {
return a.value
}
return nil
}
// attrsFailed reports whether any attribute is flagged failed now or in the
// past — the basis for the PASSED_BY_ATTR/FAILED verdict when no explicit
// overall health result is available.
func attrsFailed(attrs map[int]ataAttr) bool {
for _, a := range attrs {
wf := strings.ToLower(a.whenFailed)
if wf == "now" || wf == "past" {
return true
}
}
return false
}
// applyAtaCounters maps the parsed ATA attribute table onto the defect, wear,
// reserve-block, and host-write fields shared by the JSON and text paths. Power-
// on hours fall back to attribute 9 only when not already set from a dedicated
// field. Path-specific fallbacks (power-cycle/temperature on text, SCSI/NVMe on
// JSON) stay with their callers.
func applyAtaCounters(attrs map[int]ataAttr, d *Drive) {
d.Reallocated = attrRaw(attrs, 5)
d.ReallocatedEvents = attrRaw(attrs, 196)
d.Pending = attrRaw(attrs, 197)
d.Uncorrectable = attrRaw(attrs, 198)
d.UdmaCrc = attrRaw(attrs, 199)
d.ReportedUncorrect = attrRaw(attrs, 187)
d.RuntimeBadBlocks = attrRaw(attrs, 183)
d.EndToEnd = attrRaw(attrs, 184)
if d.PowerOnHours == nil {
d.PowerOnHours = attrRaw(attrs, 9)
}
// Wear (vendor-normalized; VALUE = % remaining).
for _, w := range wearAttr {
if v := attrVal(attrs, w.id); v != nil {
d.WearPctRemaining = v
if a, ok := attrs[w.id]; ok {
d.WearPctWorst = a.worst
}
d.WearSrc = w.src
d.WearPctConsumed = pInt(100 - *v)
break
}
}
// Micron ID180 reserve blocks (VALUE = % remaining) and ID246 host writes.
if a, ok := attrs[180]; ok {
d.UnusedReservePct = a.value
}
if lba := attrRaw(attrs, 246); lba != nil && *lba > 0 {
d.HostWrittenTB = pF(float64(*lba) * 512.0 / 1e12)
}
}
// parseSmartJSON fills d from a smartctl -j object (ATA/SATA, SAS/SCSI, or NVMe).
func parseSmartJSON(j map[string]interface{}, d *Drive) {
if j == nil {
return
}
d.Model = first(jStr(j, "model_name"), jStr(j, "scsi_model_name"))
d.Serial = jStr(j, "serial_number")
d.Transport = jStr(j, "scsi_transport_protocol", "name")
d.Firmware = first(jStr(j, "firmware_version"), jStr(j, "scsi_revision"), jStr(j, "revision"))
if cap := jInt(j, "user_capacity", "bytes"); cap != nil && *cap > 0 {
d.Capacity = fmt.Sprintf("%.2f TB", float64(*cap)/1e12)
}
switch rr := jInt(j, "rotation_rate"); {
case rr != nil && *rr == 0:
d.Rotation = "SSD"
case rr != nil:
d.Rotation = fmt.Sprintf("%d rpm", *rr)
default:
d.Rotation = "SSD" // Absent rotation_rate: assume SSD; NVMe is corrected just below.
}
if strings.Contains(strings.ToLower(jStr(j, "device", "type")), "nvme") ||
jObj(j, "nvme_smart_health_information_log") != nil {
d.Rotation = "NVMe"
}
d.PowerOnHours = jInt(j, "power_on_time", "hours")
d.PowerCycleCount = jInt(j, "power_cycle_count")
d.TempC = jInt(j, "temperature", "current")
// ---- ATA attribute table ----
attrs := map[int]ataAttr{}
if table, ok := jLeaf(j, "ata_smart_attributes", "table").([]interface{}); ok {
for _, it := range table {
a, ok := it.(map[string]interface{})
if !ok {
continue
}
id := jInt(a, "id")
if id == nil {
continue
}
at := ataAttr{
value: jInt(a, "value"),
worst: jInt(a, "worst"),
whenFailed: jStr(a, "when_failed"),
}
// Prefer the leading integer of raw.string (raw.value overflows
// for some attributes); fall back to raw.value.
if rs := jStr(a, "raw", "string"); rs != "" {
if n, ok := firstInt(rs); ok {
at.raw = &n
}
}
if at.raw == nil {
at.raw = jInt(a, "raw", "value")
}
attrs[*id] = at
}
}
// ---- SMART health verdict ----
if passed := jBoolPtr(j, "smart_status", "passed"); passed != nil {
if *passed {
d.SmartHealth = "PASSED"
} else {
d.SmartHealth = "FAILED"
}
} else if len(attrs) > 0 {
if attrsFailed(attrs) {
d.SmartHealth = "FAILED"
} else {
d.SmartHealth = "PASSED_BY_ATTR"
}
} else {
d.SmartHealth = "UNKNOWN"
}
// Defect, wear, reserve, and host-write fields shared with the text path.
applyAtaCounters(attrs, d)
// ---- SCSI/SAS endurance + grown defect list ----
if d.WearPctRemaining == nil {
if pu := jInt(j, "scsi_percentage_used_endurance_indicator"); pu != nil {
d.WearPctConsumed = pu
d.WearPctRemaining = pInt(100 - *pu)
d.WearSrc = "SCSI/endurance"
}
}
if grown := jInt(j, "scsi_grown_defect_list"); grown != nil && d.Reallocated == nil {
d.Reallocated = grown
}
// SAS drives have no ATA attribute table; their hard-defect signals live in
// the SCSI logs. Map them onto the fields the scorer already grades: total
// uncorrected read/write/verify errors -> uncorrectable sectors, and the
// pending (to-be-reassigned) defect count -> current pending sectors.
if d.Uncorrectable == nil {
if ec := jObj(j, "scsi_error_counter_log"); ec != nil {
sum, any := 0, false
for _, op := range []string{"read", "write", "verify"} {
if u := jInt(ec, op, "total_uncorrected_errors"); u != nil {
any = true
sum += *u
}
}
if any {
d.Uncorrectable = pInt(sum)
}
}
}
if d.Pending == nil {
if pd := jInt(j, "scsi_pending_defects", "count"); pd != nil {
d.Pending = pd
}
}
// ---- NVMe SMART/Health log ----
if nv := jObj(j, "nvme_smart_health_information_log"); nv != nil {
d.NvmeCriticalWarning = jInt(nv, "critical_warning")
d.NvmeAvailSpare = jInt(nv, "available_spare")
d.NvmeAvailSpareThresh = jInt(nv, "available_spare_threshold")
d.NvmeMediaErrors = jInt(nv, "media_errors")
if d.PowerOnHours == nil {
d.PowerOnHours = jInt(nv, "power_on_hours")
}
if d.PowerCycleCount == nil {
d.PowerCycleCount = jInt(nv, "power_cycles")
}
if d.TempC == nil {
d.TempC = jInt(nv, "temperature")
}
if pu := jInt(nv, "percentage_used"); pu != nil {
d.WearPctConsumed = pu
d.WearPctRemaining = pInt(100 - *pu)
d.WearSrc = "NVMe/percentage_used"
}
// Treat NVMe media+data integrity errors like uncorrectable sectors.
if d.Uncorrectable == nil && d.NvmeMediaErrors != nil {
d.Uncorrectable = d.NvmeMediaErrors
}
}
// Capture smartctl passthrough diagnostics.
if msgs, ok := jLeaf(j, "smartctl", "messages").([]interface{}); ok {
var parts []string
for _, mm := range msgs {
if mo, ok := mm.(map[string]interface{}); ok {
if s := jStr(mo, "string"); s != "" {
parts = append(parts, s)
}
}
}
d.SmartctlMessages = strings.Join(parts, "; ")
}
}
// jsonUsable reports whether the parsed object carries real identity + health.
func jsonUsable(j map[string]interface{}) bool {
if j == nil {
return false
}
hasID := jStr(j, "model_name") != "" || jStr(j, "scsi_model_name") != ""
if !hasID {
return false
}
if jObj(j, "ata_smart_attributes") != nil ||
jObj(j, "smart_status") != nil ||
jObj(j, "nvme_smart_health_information_log") != nil ||
jInt(j, "scsi_percentage_used_endurance_indicator") != nil {
return true
}
return false
}
// first returns the first non-empty string in vals, or "".
func first(vals ...string) string {
for _, v := range vals {
if v != "" {
return v
}
}
return ""
}

303
smart_text.go Normal file
View file

@ -0,0 +1,303 @@
package main
import (
"fmt"
"regexp"
"strings"
)
// parseSmartText parses `smartctl -a` PLAIN TEXT output. This is the path used
// on CentOS 6/7 where smartmontools (5.x/6.x) predates `--json` (7.0, 2019).
// Handles ATA/SATA, NVMe, and SAS/SCSI layouts.
func parseSmartText(text string, d *Drive) {
if strings.TrimSpace(text) == "" {
return
}
lines := strings.Split(text, "\n")
d.Rotation = "SSD"
val := func(prefix string) string {
for _, ln := range lines {
if i := strings.Index(ln, prefix); i >= 0 {
return strings.TrimSpace(ln[i+len(prefix):])
}
}
return ""
}
// ---- Identity ----
d.Model = first(val("Device Model:"), val("Model Number:"), val("Product:"), val("Model Family:"))
d.Serial = first(val("Serial Number:"), val("Serial number:"))
d.Firmware = first(val("Firmware Version:"), val("Revision:"))
if uc := val("User Capacity:"); uc != "" {
// "1,920,383,410,176 bytes [1.92 TB]"
if m := regexp.MustCompile(`([\d,]+)\s*bytes`).FindStringSubmatch(uc); m != nil {
if n, ok := parseIntLoose(m[1]); ok && n > 0 {
d.Capacity = fmt.Sprintf("%.2f TB", float64(n)/1e12)
}
}
}
if rr := val("Rotation Rate:"); rr != "" {
if strings.Contains(strings.ToLower(rr), "solid state") {
d.Rotation = "SSD"
} else if m := regexp.MustCompile(`(\d+)\s*rpm`).FindStringSubmatch(strings.ToLower(rr)); m != nil {
d.Rotation = m[1] + " rpm"
}
}
isNVMe := false
for _, ln := range lines {
if strings.Contains(ln, "NVMe Log") || strings.Contains(ln, "SMART/Health Information (NVMe") {
isNVMe = true
break
}
}
if isNVMe {
d.Rotation = "NVMe"
}
// ---- SMART overall health ----
// ATA: "SMART overall-health self-assessment test result: PASSED"
// SCSI: "SMART Health Status: OK"
if h := val("self-assessment test result:"); h != "" {
up := strings.ToUpper(h)
if strings.Contains(up, "PASS") {
d.SmartHealth = "PASSED"
} else if strings.Contains(up, "FAIL") {
d.SmartHealth = "FAILED"
} else {
d.SmartHealth = "UNKNOWN"
}
} else if h := val("SMART Health Status:"); h != "" {
if strings.Contains(strings.ToUpper(h), "OK") {
d.SmartHealth = "PASSED"
} else {
d.SmartHealth = "FAILED"
}
} else {
d.SmartHealth = "UNKNOWN"
}
if isNVMe {
parseNVMeText(val, d)
return
}
if attrs := parseATAAttrTable(lines); len(attrs) > 0 {
applyATAAttrs(attrs, d)
return
}
// SAS/SCSI fallback fields.
parseSCSIText(val, d)
parseSCSIErrors(lines, d)
}
// parseATAAttrTable parses the "Vendor Specific SMART Attributes" table:
//
// ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
// 5 Reallocated_Sector_Ct 0x0033 100 100 010 Pre-fail Always - 0
func parseATAAttrTable(lines []string) map[int]ataAttr {
attrs := map[int]ataAttr{}
inTable := false
for _, ln := range lines {
if strings.Contains(ln, "ATTRIBUTE_NAME") && strings.Contains(ln, "RAW_VALUE") {
inTable = true
continue
}
if !inTable {
continue
}
if strings.TrimSpace(ln) == "" {
break
}
f := strings.Fields(ln)
if len(f) < 10 {
continue
}
id, ok := firstInt(f[0])
if !ok {
continue
}
ta := ataAttr{whenFailed: f[8]}
if v, ok := firstInt(f[3]); ok {
ta.value = &v
}
if w, ok := firstInt(f[4]); ok {
ta.worst = &w
}
// RAW_VALUE is the remainder from field 9 onward; take leading int.
if r, ok := firstInt(strings.Join(f[9:], " ")); ok {
ta.raw = &r
}
attrs[id] = ta
}
return attrs
}
// applyATAAttrs maps the parsed ATA attribute table onto d: the health-by-
// attribute verdict when the overall result was unknown, the shared defect/wear/
// reserve fields, and the text-path-only power-cycle and temperature fallbacks.
func applyATAAttrs(attrs map[int]ataAttr, d *Drive) {
// Health-by-attribute when the overall verdict was unknown.
if d.SmartHealth == "UNKNOWN" && len(attrs) > 0 {
if attrsFailed(attrs) {
d.SmartHealth = "FAILED"
} else {
d.SmartHealth = "PASSED_BY_ATTR"
}
}
// Defect, wear, reserve, and host-write fields shared with the JSON path.
applyAtaCounters(attrs, d)
// Text-path fallbacks: JSON reads these from dedicated fields instead.
if d.PowerCycleCount == nil {
d.PowerCycleCount = attrRaw(attrs, 12)
}
if d.TempC == nil {
d.TempC = attrRaw(attrs, 194)
}
}
// parseNVMeText fills NVMe health fields from the "SMART/Health Information
// (NVMe Log)" section using the shared val() prefix lookup.
func parseNVMeText(val func(string) string, d *Drive) {
if cw := val("Critical Warning:"); cw != "" {
// "0x00"
if n, ok := parseHexOrInt(cw); ok {
d.NvmeCriticalWarning = &n
}
}
if as := val("Available Spare:"); as != "" {
if n, ok := firstInt(as); ok {
d.NvmeAvailSpare = &n
}
}
if at := val("Available Spare Threshold:"); at != "" {
if n, ok := firstInt(at); ok {
d.NvmeAvailSpareThresh = &n
}
}
if pu := val("Percentage Used:"); pu != "" {
if n, ok := firstInt(pu); ok {
d.WearPctConsumed = &n
d.WearPctRemaining = pInt(100 - n)
d.WearSrc = "NVMe/percentage_used"
}
}
if me := val("Media and Data Integrity Errors:"); me != "" {
if n, ok := parseIntLoose(me); ok {
d.NvmeMediaErrors = &n
if d.Uncorrectable == nil {
d.Uncorrectable = &n
}
}
}
if d.PowerOnHours == nil {
if n, ok := parseIntLoose(val("Power On Hours:")); ok {
d.PowerOnHours = &n
}
}
if d.PowerCycleCount == nil {
if n, ok := parseIntLoose(val("Power Cycles:")); ok {
d.PowerCycleCount = &n
}
}
if d.TempC == nil {
if n, ok := firstInt(val("Temperature:")); ok {
d.TempC = &n
}
}
}
// parseSCSIText fills SAS/SCSI fields (temperature, grown defect list,
// endurance, power-on hours) that smartctl prints as "Label: value" lines.
func parseSCSIText(val func(string) string, d *Drive) {
if t := val("Current Drive Temperature:"); t != "" {
if n, ok := firstInt(t); ok {
d.TempC = &n
}
}
if g := val("Elements in grown defect list:"); g != "" {
if n, ok := parseIntLoose(g); ok {
d.Reallocated = &n
}
}
if e := val("Percentage used endurance indicator:"); e != "" {
if n, ok := firstInt(e); ok {
d.WearPctConsumed = &n
d.WearPctRemaining = pInt(100 - n)
d.WearSrc = "SCSI/endurance"
}
}
if h := val("number of hours powered up"); h != "" {
// "= 12345.67"
if m := regexp.MustCompile(`([\d,]+)`).FindString(h); m != "" {
if n, ok := parseIntLoose(m); ok {
d.PowerOnHours = &n
}
}
}
// Newer smartctl prints "Accumulated power on time, hours:minutes 2487:44".
if d.PowerOnHours == nil {
if h := val("Accumulated power on time, hours:minutes"); h != "" {
if n, ok := firstInt(h); ok {
d.PowerOnHours = &n
}
}
}
}
// parseSCSIErrors handles the SAS "Error counter log" and pending-defect count,
// the SAS analog of ATA uncorrectable/pending sectors. These need the full line
// list (the error log is a multi-line table), not just the val() prefix lookup.
func parseSCSIErrors(lines []string, d *Drive) {
if d.Uncorrectable == nil {
// Each of read:/write:/verify: ends in a "total uncorrected errors" count.
sum, any := 0, false
for _, ln := range lines {
f := strings.Fields(ln)
if len(f) < 7 {
continue
}
switch f[0] {
case "read:", "write:", "verify:":
if n, ok := parseIntLoose(f[len(f)-1]); ok {
any = true
sum += n
}
}
}
if any {
d.Uncorrectable = pInt(sum)
}
}
if d.Pending == nil {
// " Pending defect count:0 Pending Defects" (no space after the colon).
for _, ln := range lines {
if i := strings.Index(ln, "Pending defect count:"); i >= 0 {
if n, ok := firstInt(ln[i+len("Pending defect count:"):]); ok {
d.Pending = pInt(n)
}
break
}
}
}
}
// parseHexOrInt parses s as hex when it carries a 0x/0X prefix, otherwise as a
// loose decimal int. The NVMe critical-warning field arrives as "0x00".
func parseHexOrInt(s string) (int, bool) {
s = strings.TrimSpace(s)
if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") {
var n int
if _, err := fmt.Sscanf(s, "0x%x", &n); err == nil {
return n, true
}
if _, err := fmt.Sscanf(s, "0X%x", &n); err == nil {
return n, true
}
return 0, false
}
return parseIntLoose(s)
}

189
testdata/megacli_pdlist.txt vendored Normal file
View file

@ -0,0 +1,189 @@
Adapter #0
Enclosure Device ID: 64
Slot Number: 0
Drive's position: DiskGroup: 0, Span: 0, Arm: 0
Enclosure position: 1
Device Id: 22
WWN: 5002538f31000001
Sequence Number: 2
Media Error Count: 0
Other Error Count: 7
Predictive Failure Count: 0
Last Predictive Failure Event Seq Number: 0
PD Type: SATA
Raw Size: 1.819 TB [0xe8e088b0 Sectors]
Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors]
Coerced Size: 1.818 TB [0xe8d00000 Sectors]
Sector Size: 512
Firmware state: Online, Spun Up
Device Firmware Level: 2B6Q
Shield Counter: 0
Successful diagnostics completion on : N/A
SAS Address(0): 0x3b07b250d55b7500
Connected Port Number: 0(path0)
Inquiry Data: S624NS0RA00001J Samsung SSD 870 EVO 2TB SVT02B6Q
FDE Capable: Capable
FDE Enable: Disable
Secured: Unsecured
Locked: Unlocked
Needs EKM Attention: No
Foreign State: None
Device Speed: 6.0Gb/s
Link Speed: 6.0Gb/s
Media Type: Solid State Device
Drive: Not Certified
Drive Temperature :31C (87.80 F)
PI Eligibility: No
Drive is formatted for PI information: No
PI: No PI
Drive's NCQ setting : Enabled
Port-0 :
Port status: Active
Port's Linkspeed: 6.0Gb/s
Drive has flagged a S.M.A.R.T alert : No
Enclosure Device ID: 64
Slot Number: 1
Drive's position: DiskGroup: 0, Span: 0, Arm: 1
Enclosure position: 1
Device Id: 23
WWN: 5002538f31000002
Sequence Number: 2
Media Error Count: 0
Other Error Count: 6
Predictive Failure Count: 0
Last Predictive Failure Event Seq Number: 0
PD Type: SATA
Raw Size: 1.819 TB [0xe8e088b0 Sectors]
Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors]
Coerced Size: 1.818 TB [0xe8d00000 Sectors]
Sector Size: 512
Firmware state: Online, Spun Up
Device Firmware Level: 2B6Q
Shield Counter: 0
Successful diagnostics completion on : N/A
SAS Address(0): 0x3b07b250d55b7501
Connected Port Number: 1(path0)
Inquiry Data: S624NS0RA00002L Samsung SSD 870 EVO 2TB SVT02B6Q
FDE Capable: Capable
FDE Enable: Disable
Secured: Unsecured
Locked: Unlocked
Needs EKM Attention: No
Foreign State: None
Device Speed: 6.0Gb/s
Link Speed: 6.0Gb/s
Media Type: Solid State Device
Drive: Not Certified
Drive Temperature :30C (86.00 F)
PI Eligibility: No
Drive is formatted for PI information: No
PI: No PI
Drive's NCQ setting : Enabled
Port-0 :
Port status: Active
Port's Linkspeed: 6.0Gb/s
Drive has flagged a S.M.A.R.T alert : No
Enclosure Device ID: 64
Slot Number: 2
Drive's position: DiskGroup: 0, Span: 0, Arm: 2
Enclosure position: 1
Device Id: 20
WWN: 5002538f31000003
Sequence Number: 2
Media Error Count: 0
Other Error Count: 21
Predictive Failure Count: 0
Last Predictive Failure Event Seq Number: 0
PD Type: SATA
Raw Size: 1.819 TB [0xe8e088b0 Sectors]
Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors]
Coerced Size: 1.818 TB [0xe8d00000 Sectors]
Sector Size: 512
Firmware state: Online, Spun Up
Device Firmware Level: 2B6Q
Shield Counter: 0
Successful diagnostics completion on : N/A
SAS Address(0): 0x3b07b250d55b7502
Connected Port Number: 2(path0)
Inquiry Data: S624NS0RC00003M Samsung SSD 870 EVO 2TB SVT02B6Q
FDE Capable: Capable
FDE Enable: Disable
Secured: Unsecured
Locked: Unlocked
Needs EKM Attention: No
Foreign State: None
Device Speed: 6.0Gb/s
Link Speed: 6.0Gb/s
Media Type: Solid State Device
Drive: Not Certified
Drive Temperature :31C (87.80 F)
PI Eligibility: No
Drive is formatted for PI information: No
PI: No PI
Drive's NCQ setting : Enabled
Port-0 :
Port status: Active
Port's Linkspeed: 6.0Gb/s
Drive has flagged a S.M.A.R.T alert : No
Enclosure Device ID: 64
Slot Number: 3
Drive's position: DiskGroup: 0, Span: 0, Arm: 3
Enclosure position: 1
Device Id: 21
WWN: 5002538f31000004
Sequence Number: 2
Media Error Count: 0
Other Error Count: 12
Predictive Failure Count: 0
Last Predictive Failure Event Seq Number: 0
PD Type: SATA
Raw Size: 1.819 TB [0xe8e088b0 Sectors]
Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors]
Coerced Size: 1.818 TB [0xe8d00000 Sectors]
Sector Size: 512
Firmware state: Online, Spun Up
Device Firmware Level: 2B6Q
Shield Counter: 0
Successful diagnostics completion on : N/A
SAS Address(0): 0x3b07b250d55b7503
Connected Port Number: 3(path0)
Inquiry Data: S624NS0RA00004W Samsung SSD 870 EVO 2TB SVT02B6Q
FDE Capable: Capable
FDE Enable: Disable
Secured: Unsecured
Locked: Unlocked
Needs EKM Attention: No
Foreign State: None
Device Speed: 6.0Gb/s
Link Speed: 6.0Gb/s
Media Type: Solid State Device
Drive: Not Certified
Drive Temperature :30C (86.00 F)
PI Eligibility: No
Drive is formatted for PI information: No
PI: No PI
Drive's NCQ setting : Enabled
Port-0 :
Port status: Active
Port's Linkspeed: 6.0Gb/s
Drive has flagged a S.M.A.R.T alert : No
Exit Code: 0x00

185
testdata/perccli2_show_all.json vendored Normal file
View file

@ -0,0 +1,185 @@
{
"Controllers":[
{
"Command Status" : {
"CLI Version" : "008.0004.0000.0022 Apr 28, 2023",
"Operating system" : "Linux6.8.0-124-generic",
"Controller" : "0",
"Status" : "Success",
"Description" : "Show Drive Information Succeeded."
},
"Response Data" : {
"Drives List" : [
{
"Drive Information" : {
"EID:Slt" : "284:0",
"PID" : 275,
"State" : "Conf",
"Status" : "Online",
"DG" : 0,
"Size" : "893.75 GiB",
"Intf" : "NVMe",
"Med" : "SSD",
"SED_Type" : "-",
"SeSz" : "512B",
"Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ",
"Sp" : "U",
"LU/NS Count" : 1,
"Alt-EID" : "-"
},
"LU/NS Information" : [
{
"PID" : 275,
"LUN/NSID" : "0/1",
"Status" : "Online",
"Size" : "893.75 GiB"
}
],
"Drive Detailed Information" : {
"Shield Counter" : 0,
"Temperature(C)" : 30,
"Serial Number" : "S6JGNA0X000001 ",
"Vendor" : "NVMe ",
"Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ",
"WWN" : "3100166D00000001",
"Firmware Revision Level" : "1.0.0 ",
"Logical Sector Size" : "512B",
"Physical Sector Size" : "512B",
"Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]",
"Coerced size" : "893.75 GiB [0x6fb80000 Sectors]",
"Capable Speed" : "16.0GT/s",
"Capable Link Width" : "x4",
"Negotiated Link Width" : "x2",
"Drive position" : "DriveGroup:0, Span:0, Row:0",
"Sequence Number" : 2,
"Commissioned Spare" : "No",
"Emergency Spare" : "No",
"Successful Shield Diagnostics completed on(Localtime yyyy/mm/dd hh:mm:sec)" : "NA",
"SED Capable" : "No",
"ISE Capable" : "Yes",
"T10 Power Mode" : "No",
"Needs EKM Attention" : "No",
"Secured By EKM" : "No",
"Certified" : "Yes",
"Supported Data Format" : "PRP",
"Device port count" : 1,
"Path Information" : [
{
"WWID" : "0x4100166d002538c1",
"DevicePID" : 275,
"Path" : "Primary",
"Negotiated Speed" : "16.0GT/s",
"Num Phys/Lanes used" : "Unknown"
}
],
"LU/NS Properties" : {
"Media Error Count" : 0,
"Other Error Count" : 0,
"Predictive Failure Count" : 0,
"Last Predictive Failure Event Sequence Number" : 0,
"Logical Sector Size" : "512B",
"Physical Sector Size" : "512B",
"Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]",
"Coerced size" : "893.75 GiB [0x6fb80000 Sectors]",
"FW managed drive security" : "No",
"Secured" : "No",
"Locked" : "No",
"PI Formatted" : "No",
"PI type" : "No PI",
"Number of bytes of user data in LBA" : "512B",
"Current Write Cache" : "Off",
"Default Write Cache" : "Off",
"Write Cache Changeable" : "No"
},
"Inquiry Data" : "4d 14 28 10 53 36 4a 47 4e 41 30 58 31 30 31 34 38 33 20 20 20 20 20 20 44 65 6c 6c 20 44 43 20 4e 56 4d 65 20 50 4d 39 41 33 20 52 49 20 55 2e 32 20 39 36 30 47 42 20 20 20 20 20 20 20 20 20 31 2e 30 2e 30 20 20 20 02 38 25 00 00 09 06 00 00 04 01 00 00 12 7a 00 00 12 7a 00 00 03 00 00 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 "
}
},
{
"Drive Information" : {
"EID:Slt" : "284:1",
"PID" : 276,
"State" : "Conf",
"Status" : "Online",
"DG" : 0,
"Size" : "893.75 GiB",
"Intf" : "NVMe",
"Med" : "SSD",
"SED_Type" : "-",
"SeSz" : "512B",
"Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ",
"Sp" : "U",
"LU/NS Count" : 1,
"Alt-EID" : "-"
},
"LU/NS Information" : [
{
"PID" : 276,
"LUN/NSID" : "0/1",
"Status" : "Online",
"Size" : "893.75 GiB"
}
],
"Drive Detailed Information" : {
"Shield Counter" : 0,
"Temperature(C)" : 32,
"Serial Number" : "S6JGNA0X000002 ",
"Vendor" : "NVMe ",
"Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ",
"WWN" : "3100167200000002",
"Firmware Revision Level" : "1.0.0 ",
"Logical Sector Size" : "512B",
"Physical Sector Size" : "512B",
"Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]",
"Coerced size" : "893.75 GiB [0x6fb80000 Sectors]",
"Capable Speed" : "16.0GT/s",
"Capable Link Width" : "x4",
"Negotiated Link Width" : "x2",
"Drive position" : "DriveGroup:0, Span:0, Row:1",
"Sequence Number" : 2,
"Commissioned Spare" : "No",
"Emergency Spare" : "No",
"Successful Shield Diagnostics completed on(Localtime yyyy/mm/dd hh:mm:sec)" : "NA",
"SED Capable" : "No",
"ISE Capable" : "Yes",
"T10 Power Mode" : "No",
"Needs EKM Attention" : "No",
"Secured By EKM" : "No",
"Certified" : "Yes",
"Supported Data Format" : "PRP",
"Device port count" : 1,
"Path Information" : [
{
"WWID" : "0x41001672002538c1",
"DevicePID" : 276,
"Path" : "Primary",
"Negotiated Speed" : "16.0GT/s",
"Num Phys/Lanes used" : "Unknown"
}
],
"LU/NS Properties" : {
"Media Error Count" : 0,
"Other Error Count" : 0,
"Predictive Failure Count" : 0,
"Last Predictive Failure Event Sequence Number" : 0,
"Logical Sector Size" : "512B",
"Physical Sector Size" : "512B",
"Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]",
"Coerced size" : "893.75 GiB [0x6fb80000 Sectors]",
"FW managed drive security" : "No",
"Secured" : "No",
"Locked" : "No",
"PI Formatted" : "No",
"PI type" : "No PI",
"Number of bytes of user data in LBA" : "512B",
"Current Write Cache" : "Off",
"Default Write Cache" : "Off",
"Write Cache Changeable" : "No"
},
"Inquiry Data" : "4d 14 28 10 53 36 4a 47 4e 41 30 58 31 30 31 34 38 38 20 20 20 20 20 20 44 65 6c 6c 20 44 43 20 4e 56 4d 65 20 50 4d 39 41 33 20 52 49 20 55 2e 32 20 39 36 30 47 42 20 20 20 20 20 20 20 20 20 31 2e 30 2e 30 20 20 20 02 38 25 00 00 09 06 00 00 04 01 00 00 12 7a 00 00 12 7a 00 00 03 00 00 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 "
}
}
]
}
}
]
}

209
testdata/perccli_show_all.txt vendored Normal file
View file

@ -0,0 +1,209 @@
CLI Version = 007.2616.0000.0000 Dec 06, 2023
Operating system = Linux 6.8.0-124-generic
Controller = 0
Status = Success
Description = Show Drive Information Succeeded.
Drive /c0/e252/s0 :
=================
-----------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------
252:0 1 Onln 0 893.750 GB SATA SSD N N 512B HFS960G3H2X069N U -
-----------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=PI Eligible
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e252/s0 - Detailed Information :
========================================
Drive /c0/e252/s0 State :
=======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 0
Drive Temperature = 36C (96.80 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e252/s0 Device attributes :
===================================
SN = ESC3N5538I0000001
Manufacturer Id = ATA
Model Number = HFS960G3H2X069N
NAND Vendor = NA
WWN = 5ACE42E000000001
Firmware Revision = DZ02
Raw size = 894.252 GB [0x6fc81ab0 Sectors]
Coerced size = 893.750 GB [0x6fb80000 Sectors]
Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = Enabled
Logical Sector Size = 512B
Physical Sector Size = 4 KB
Connector Name = 00 x1
Drive /c0/e252/s0 Policies/Settings :
===================================
Drive position = DriveGroup:0, Span:0, Row:0
Enclosure position = 1
Connected Port Number = 8(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = None
SED Capable = No
SED Enabled = No
Secured = No
Cryptographic Erase Capable = Yes
Sanitize Support = CryptoErase, BlockErase
Locked = No
Needs EKM Attention = No
PI Eligible = No
Drive is formatted for PI = No
PI type = No PI
Number of bytes of user data in LBA = 0 KB
Certified = Yes
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x3f4ee0806c023d08
-----------------------------------------
Inquiry Data =
40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 20 20 45 20 43 53 4e 33 35 35 38 33
31 49 30 38 4f 33 51 30 00 00 00 00 00 00 20 20
20 20 5a 44 32 30 46 48 39 53 30 36 33 47 32 48
30 58 39 36 20 4e 20 20 20 20 20 20 20 20 20 20
20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80
00 40 00 2f 00 40 00 00 00 00 06 00 ff 3f 10 00
3f 00 10 fc fb 00 01 bd ff ff ff 0f 00 00 07 00
Drive /c0/e252/s1 :
=================
-----------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------
252:1 0 Onln 0 893.750 GB SATA SSD N N 512B HFS960G3H2X069N U -
-----------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=PI Eligible
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e252/s1 - Detailed Information :
========================================
Drive /c0/e252/s1 State :
=======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 0
Drive Temperature = 36C (96.80 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e252/s1 Device attributes :
===================================
SN = ESC3N5538I0000002
Manufacturer Id = ATA
Model Number = HFS960G3H2X069N
NAND Vendor = NA
WWN = 5ACE42E000000002
Firmware Revision = DZ02
Raw size = 894.252 GB [0x6fc81ab0 Sectors]
Coerced size = 893.750 GB [0x6fb80000 Sectors]
Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = Enabled
Logical Sector Size = 512B
Physical Sector Size = 4 KB
Connector Name = 00 x1
Drive /c0/e252/s1 Policies/Settings :
===================================
Drive position = DriveGroup:0, Span:0, Row:1
Enclosure position = 0
Connected Port Number = 9(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = None
SED Capable = No
SED Enabled = No
Secured = No
Cryptographic Erase Capable = Yes
Sanitize Support = CryptoErase, BlockErase
Locked = No
Needs EKM Attention = No
PI Eligible = No
Drive is formatted for PI = No
PI type = No PI
Number of bytes of user data in LBA = 0 KB
Certified = Yes
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x3f4ee0806c023d09
-----------------------------------------
Inquiry Data =
40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 20 20 45 20 43 53 4e 33 35 35 38 33
31 49 30 38 4f 33 50 30 00 00 00 00 00 00 20 20
20 20 5a 44 32 30 46 48 39 53 30 36 33 47 32 48
30 58 39 36 20 4e 20 20 20 20 20 20 20 20 20 20
20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80
00 40 00 2f 00 40 00 00 00 00 06 00 ff 3f 10 00
3f 00 10 fc fb 00 01 bd ff ff ff 0f 00 00 07 00

796
testdata/smart_ata_ssd_megaraid.json vendored Normal file
View file

@ -0,0 +1,796 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
0
],
"svn_revision": "4883",
"platform_info": "x86_64-linux-5.4.225-200.el7.x86_64",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"sat+megaraid,20",
"/dev/bus/8"
],
"messages": [
{
"string": "Warning: This result is based on an Attribute check.",
"severity": "warning"
}
],
"exit_status": 4
},
"device": {
"name": "/dev/bus/8",
"info_name": "/dev/bus/8 [megaraid_disk_20] [SAT]",
"type": "sat+megaraid,20",
"protocol": "ATA"
},
"model_name": "Samsung SSD 870 EVO 2TB",
"serial_number": "S624NS0RC00003M",
"wwn": {
"naa": 5,
"oui": 9528,
"id": 65259200556
},
"firmware_version": "SVT02B6Q",
"user_capacity": {
"blocks": 3907029168,
"bytes": 2000398934016
},
"logical_block_size": 512,
"physical_block_size": 512,
"rotation_rate": 0,
"form_factor": {
"ata_value": 3,
"name": "2.5 inches"
},
"in_smartctl_database": false,
"ata_version": {
"string": "ACS-4 T13/BSR INCITS 529 revision 5",
"major_value": 2556,
"minor_value": 94
},
"sata_version": {
"string": "SATA 3.3",
"value": 511
},
"interface_speed": {
"max": {
"sata_value": 14,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
},
"current": {
"sata_value": 3,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
}
},
"local_time": {
"time_t": 1782134221,
"asctime": "Mon Jun 22 09:17:01 2026 EDT"
},
"smart_status": {
"passed": true
},
"ata_smart_data": {
"offline_data_collection": {
"status": {
"value": 0,
"string": "was never started"
},
"completion_seconds": 0
},
"self_test": {
"status": {
"value": 0,
"string": "completed without error",
"passed": true
},
"polling_minutes": {
"short": 2,
"extended": 160
}
},
"capabilities": {
"values": [
83,
3
],
"exec_offline_immediate_supported": true,
"offline_is_aborted_upon_new_cmd": false,
"offline_surface_scan_supported": false,
"self_tests_supported": true,
"conveyance_self_test_supported": false,
"selective_self_test_supported": true,
"attribute_autosave_enabled": true,
"error_logging_supported": true,
"gp_logging_supported": true
}
},
"ata_sct_capabilities": {
"value": 61,
"error_recovery_control_supported": true,
"feature_control_supported": true,
"data_table_supported": true
},
"ata_smart_attributes": {
"revision": 1,
"table": [
{
"id": 5,
"name": "Reallocated_Sector_Ct",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 9,
"name": "Power_On_Hours",
"value": 93,
"worst": 93,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 33518,
"string": "33518"
}
},
{
"id": 12,
"name": "Power_Cycle_Count",
"value": 99,
"worst": 99,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 7,
"string": "7"
}
},
{
"id": 177,
"name": "Wear_Leveling_Count",
"value": 93,
"worst": 93,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 19,
"string": "PO--C- ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 157,
"string": "157"
}
},
{
"id": 179,
"name": "Used_Rsvd_Blk_Cnt_Tot",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 19,
"string": "PO--C- ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 181,
"name": "Program_Fail_Cnt_Total",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 182,
"name": "Erase_Fail_Count_Total",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 183,
"name": "Runtime_Bad_Block",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 19,
"string": "PO--C- ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 187,
"name": "Reported_Uncorrect",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 190,
"name": "Airflow_Temperature_Cel",
"value": 69,
"worst": 60,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 31,
"string": "31"
}
},
{
"id": 195,
"name": "Hardware_ECC_Recovered",
"value": 200,
"worst": 200,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 26,
"string": "-O-RC- ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": true,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 199,
"name": "UDMA_CRC_Error_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 62,
"string": "-OSRCK ",
"prefailure": false,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 235,
"name": "Unknown_Attribute",
"value": 99,
"worst": 99,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 18,
"string": "-O--C- ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": false
},
"raw": {
"value": 5,
"string": "5"
}
},
{
"id": 241,
"name": "Total_LBAs_Written",
"value": 99,
"worst": 99,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 332951560676,
"string": "332951560676"
}
},
{
"id": 252,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 273,
"string": "273"
}
}
]
},
"power_on_time": {
"hours": 33518
},
"power_cycle_count": 7,
"temperature": {
"current": 31
},
"ata_smart_error_log": {
"summary": {
"revision": 1,
"count": 0
}
},
"ata_smart_self_test_log": {
"standard": {
"revision": 1,
"table": [
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33510
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33486
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33462
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33438
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33414
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33390
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33366
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33342
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33318
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33294
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33270
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33246
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33222
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33198
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33174
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33150
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33126
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33102
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33078
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33054
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 33030
}
],
"count": 21,
"error_count_total": 0,
"error_count_outdated": 0
}
},
"ata_smart_selective_self_test_log": {
"revision": 1,
"table": [
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
}
],
"current_read_scan": {
"lba_min": 0,
"lba_max": 65535,
"status": {
"value": 0,
"string": "was never started"
}
},
"flags": {
"value": 0,
"remainder_scan_enabled": false
},
"power_up_scan_resume_minutes": 0
}
}

905
testdata/smart_ata_ssd_micron.json vendored Normal file
View file

@ -0,0 +1,905 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
4
],
"pre_release": false,
"svn_revision": "5530",
"platform_info": "x86_64-linux-6.8.0-124-generic",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"sat+megaraid,0",
"/dev/bus/0"
],
"drive_database_version": {
"string": "7.3/5528"
},
"messages": [
{
"string": "Warning: This result is based on an Attribute check.",
"severity": "warning"
}
],
"exit_status": 0
},
"local_time": {
"time_t": 1782135879,
"asctime": "Mon Jun 22 09:44:39 2026 EDT"
},
"device": {
"name": "/dev/bus/0",
"info_name": "/dev/bus/0 [megaraid_disk_00] [SAT]",
"type": "sat+megaraid,0",
"protocol": "ATA"
},
"model_name": "MTFDDAK960TGA-1BC1ZABDA",
"serial_number": "232442000000",
"wwn": {
"naa": 5,
"oui": 41077,
"id": 5415414560
},
"ata_additional_product_id": "DELL(tm)",
"firmware_version": "D4DK003",
"user_capacity": {
"blocks": 1875385008,
"bytes": 960197124096
},
"logical_block_size": 512,
"physical_block_size": 4096,
"rotation_rate": 0,
"form_factor": {
"ata_value": 3,
"name": "2.5 inches"
},
"trim": {
"supported": true,
"deterministic": true,
"zeroed": true
},
"in_smartctl_database": false,
"ata_version": {
"string": "ACS-4 (minor revision not indicated)",
"major_value": 4088,
"minor_value": 65535
},
"sata_version": {
"string": "SATA 3.3",
"value": 511
},
"interface_speed": {
"max": {
"sata_value": 14,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
},
"current": {
"sata_value": 3,
"string": "6.0 Gb/s",
"units_per_second": 60,
"bits_per_unit": 100000000
}
},
"smart_support": {
"available": true,
"enabled": true
},
"smart_status": {
"passed": true
},
"ata_smart_data": {
"offline_data_collection": {
"status": {
"value": 0,
"string": "was never started"
},
"completion_seconds": 3348
},
"self_test": {
"status": {
"value": 0,
"string": "completed without error",
"passed": true
},
"polling_minutes": {
"short": 2,
"extended": 57,
"conveyance": 3
}
},
"capabilities": {
"values": [
123,
3
],
"exec_offline_immediate_supported": true,
"offline_is_aborted_upon_new_cmd": false,
"offline_surface_scan_supported": true,
"self_tests_supported": true,
"conveyance_self_test_supported": true,
"selective_self_test_supported": true,
"attribute_autosave_enabled": true,
"error_logging_supported": true,
"gp_logging_supported": true
}
},
"ata_sct_capabilities": {
"value": 61,
"error_recovery_control_supported": true,
"feature_control_supported": true,
"data_table_supported": true
},
"ata_smart_attributes": {
"revision": 16,
"table": [
{
"id": 1,
"name": "Raw_Read_Error_Rate",
"value": 100,
"worst": 100,
"thresh": 50,
"when_failed": "",
"flags": {
"value": 46,
"string": "-OSR-K ",
"prefailure": false,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 5,
"name": "Reallocated_Sector_Ct",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 9,
"name": "Power_On_Hours",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 20238,
"string": "20238"
}
},
{
"id": 12,
"name": "Power_Cycle_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 12,
"string": "12"
}
},
{
"id": 13,
"name": "Read_Soft_Error_Rate",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 173,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 94,
"string": "94"
}
},
{
"id": 174,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 9,
"string": "9"
}
},
{
"id": 175,
"name": "Program_Fail_Count_Chip",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 46,
"string": "46"
}
},
{
"id": 179,
"name": "Used_Rsvd_Blk_Cnt_Tot",
"value": 100,
"worst": 100,
"thresh": 10,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 180,
"name": "Unused_Rsvd_Blk_Cnt_Tot",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 6323,
"string": "6323"
}
},
{
"id": 181,
"name": "Program_Fail_Cnt_Total",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 182,
"name": "Erase_Fail_Count_Total",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 183,
"name": "Runtime_Bad_Block",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 184,
"name": "End-to-End_Error",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 188,
"name": "Command_Timeout",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 90,
"string": "90"
}
},
{
"id": 194,
"name": "Temperature_Celsius",
"value": 71,
"worst": 63,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 34,
"string": "-O---K ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": false,
"auto_keep": true
},
"raw": {
"value": 158914838557,
"string": "29 (Min/Max 16/37)"
}
},
{
"id": 195,
"name": "Hardware_ECC_Recovered",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 196,
"name": "Reallocated_Event_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 198,
"name": "Offline_Uncorrectable",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 48,
"string": "----CK ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 199,
"name": "UDMA_CRC_Error_Count",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 201,
"name": "Unknown_SSD_Attribute",
"value": 100,
"worst": 100,
"thresh": 1,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 202,
"name": "Unknown_SSD_Attribute",
"value": 100,
"worst": 100,
"thresh": 5,
"when_failed": "",
"flags": {
"value": 51,
"string": "PO--CK ",
"prefailure": true,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 5865,
"string": "5865"
}
},
{
"id": 206,
"name": "Unknown_SSD_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 14,
"string": "-OSR-- ",
"prefailure": false,
"updated_online": true,
"performance": true,
"error_rate": true,
"event_count": false,
"auto_keep": false
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 210,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 0,
"string": "0"
}
},
{
"id": 233,
"name": "Media_Wearout_Indicator",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 34970317773,
"string": "34970317773"
}
},
{
"id": 235,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 34970317773,
"string": "34970317773"
}
},
{
"id": 245,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 48,
"string": "----CK ",
"prefailure": false,
"updated_online": false,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 100,
"string": "100"
}
},
{
"id": 247,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 1092919960,
"string": "1092919960"
}
},
{
"id": 248,
"name": "Unknown_Attribute",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 451491208,
"string": "451491208"
}
},
{
"id": 241,
"name": "Total_LBAs_Written",
"value": 100,
"worst": 100,
"thresh": 0,
"when_failed": "",
"flags": {
"value": 50,
"string": "-O--CK ",
"prefailure": false,
"updated_online": true,
"performance": false,
"error_rate": false,
"event_count": true,
"auto_keep": true
},
"raw": {
"value": 49421157344,
"string": "49421157344"
}
}
]
},
"power_on_time": {
"hours": 20238
},
"power_cycle_count": 12,
"temperature": {
"current": 29
},
"ata_smart_error_log": {
"summary": {
"revision": 1,
"count": 0
}
},
"ata_smart_self_test_log": {
"standard": {
"revision": 1,
"table": [
{
"type": {
"value": 2,
"string": "Extended offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 1
},
{
"type": {
"value": 1,
"string": "Short offline"
},
"status": {
"value": 0,
"string": "Completed without error",
"passed": true
},
"lifetime_hours": 1
}
],
"count": 2,
"error_count_total": 0,
"error_count_outdated": 0
}
},
"ata_smart_selective_self_test_log": {
"revision": 1,
"table": [
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
},
{
"lba_min": 0,
"lba_max": 0,
"status": {
"value": 0,
"string": "Not_testing"
}
}
],
"flags": {
"value": 0,
"remainder_scan_enabled": false
},
"power_up_scan_resume_minutes": 0
}
}

88
testdata/smart_iscsi_lio.json vendored Normal file
View file

@ -0,0 +1,88 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
5
],
"pre_release": false,
"svn_revision": "5714",
"platform_info": "x86_64-linux-6.18.26-2-lts",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"scsi",
"/dev/sdj"
],
"exit_status": 4
},
"local_time": {
"time_t": 1782138466,
"asctime": "Mon Jun 22 09:27:46 2026 CDT"
},
"device": {
"name": "/dev/sdj",
"info_name": "/dev/sdj",
"type": "scsi",
"protocol": "SCSI"
},
"scsi_vendor": "LIO-ORG",
"scsi_product": "MainServer",
"model_name": "LIO-ORG MainServer",
"scsi_model_name": "LIO-ORG MainServer",
"scsi_revision": "4.0",
"scsi_version": "SPC-4",
"user_capacity": {
"blocks": 4294967296,
"bytes": 2199023255552
},
"logical_block_size": 512,
"physical_block_size": 65536,
"scsi_lb_provisioning": {
"name": "fully provisioned",
"value": 0,
"management_enabled": {
"name": "LBPME",
"value": 0
},
"read_zeros": {
"name": "LBPRZ",
"value": 0
}
},
"rotation_rate": 0,
"logical_unit_id": "0x6001405277a9bf9a82147e4b954ece39",
"serial_number": "00000000-0000-0000-0000-000000000000",
"device_type": {
"scsi_terminology": "Peripheral Device Type [PDT]",
"scsi_value": 0,
"name": "disk"
},
"scsi_transport_protocol": {
"name": "iSCSI",
"value": 5
},
"smart_support": {
"available": true,
"enabled": true
},
"temperature_warning": {
"enabled": false
},
"smart_status": {
"passed": true
},
"temperature": {
"current": 0,
"drive_trip": 0
},
"seagate_farm_log": {
"supported": false
}
}

56
testdata/smart_iscsi_virtual_disk.json vendored Normal file
View file

@ -0,0 +1,56 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
0
],
"svn_revision": "4883",
"platform_info": "x86_64-linux-5.4.225-200.el7.x86_64",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"scsi",
"/dev/sdb"
],
"exit_status": 4
},
"device": {
"name": "/dev/sdb",
"info_name": "/dev/sdb",
"type": "scsi",
"protocol": "SCSI"
},
"vendor": "IET",
"product": "VIRTUAL-DISK",
"model_name": "IET VIRTUAL-DISK",
"revision": "0001",
"scsi_version": "SPC-3",
"user_capacity": {
"blocks": 2831155200,
"bytes": 1449551462400
},
"logical_block_size": 512,
"serial_number": "000000",
"device_type": {
"scsi_value": 0,
"name": "disk"
},
"local_time": {
"time_t": 1782134221,
"asctime": "Mon Jun 22 09:17:01 2026 EDT"
},
"smart_status": {
"passed": true
},
"temperature": {
"current": 0,
"drive_trip": 0
}
}

328
testdata/smart_nvme.json vendored Normal file
View file

@ -0,0 +1,328 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
5
],
"pre_release": false,
"svn_revision": "5714",
"platform_info": "x86_64-linux-6.18.34-1-lts",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"nvme",
"/dev/nvme0"
],
"exit_status": 0
},
"local_time": {
"time_t": 1782138345,
"asctime": "Mon Jun 22 09:25:45 2026 CDT"
},
"device": {
"name": "/dev/nvme0",
"info_name": "/dev/nvme0",
"type": "nvme",
"protocol": "NVMe"
},
"model_name": "Force MP510",
"serial_number": "21038270000000000000",
"firmware_version": "ECFM13.3",
"nvme_pci_vendor": {
"id": 6535,
"subsystem_id": 6535
},
"nvme_ieee_oui_identifier": 6584743,
"nvme_total_capacity": 4000787030016,
"nvme_unallocated_capacity": 0,
"nvme_controller_id": 1,
"nvme_version": {
"string": "1.3",
"value": 66304
},
"nvme_number_of_namespaces": 1,
"nvme_namespaces": [
{
"id": 1,
"size": {
"blocks": 7814037168,
"bytes": 4000787030016
},
"capacity": {
"blocks": 7814037168,
"bytes": 4000787030016
},
"utilization": {
"blocks": 7814037168,
"bytes": 4000787030016
},
"formatted_lba_size": 512,
"eui64": {
"oui": 6584743,
"ext_id": 299576073406
},
"features": {
"value": 0,
"thin_provisioning": false,
"na_fields": false,
"dealloc_or_unwritten_block_error": false,
"uid_reuse": false,
"np_fields": false,
"other": 0
},
"lba_formats": [
{
"formatted": true,
"data_bytes": 512,
"metadata_bytes": 0,
"relative_performance": 2
},
{
"formatted": false,
"data_bytes": 4096,
"metadata_bytes": 0,
"relative_performance": 1
}
]
}
],
"user_capacity": {
"blocks": 7814037168,
"bytes": 4000787030016
},
"logical_block_size": 512,
"smart_support": {
"available": true,
"enabled": true
},
"nvme_firmware_update_capabilities": {
"value": 18,
"slots": 1,
"first_slot_is_read_only": false,
"activiation_without_reset": true,
"multiple_update_detection": false,
"other": 0
},
"nvme_optional_admin_commands": {
"value": 23,
"security_send_receive": true,
"format_nvm": true,
"firmware_download": true,
"namespace_management": false,
"self_test": true,
"directives": false,
"mi_send_receive": false,
"virtualization_management": false,
"doorbell_buffer_config": false,
"get_lba_status": false,
"command_and_feature_lockdown": false,
"other": 0
},
"nvme_optional_nvm_commands": {
"value": 93,
"compare": true,
"write_uncorrectable": false,
"dataset_management": true,
"write_zeroes": true,
"save_select_feature_nonzero": true,
"reservations": false,
"timestamp": true,
"verify": false,
"copy": false,
"other": 0
},
"nvme_log_page_attributes": {
"value": 8,
"smart_health_per_namespace": false,
"commands_effects_log": false,
"extended_get_log_page_cmd": false,
"telemetry_log": true,
"persistent_event_log": false,
"supported_log_pages_log": false,
"telemetry_data_area_4": false,
"other": 0
},
"nvme_maximum_data_transfer_pages": 512,
"nvme_composite_temperature_threshold": {
"warning": 75,
"critical": 80
},
"temperature": {
"op_limit_max": 75,
"critical_limit_max": 80,
"current": 42
},
"nvme_power_states": [
{
"non_operational_state": false,
"relative_read_latency": 0,
"relative_read_throughput": 0,
"relative_write_latency": 0,
"relative_write_throughput": 0,
"entry_latency_us": 0,
"exit_latency_us": 0,
"max_power": {
"value": 1533,
"scale": 2,
"units_per_watt": 100
}
},
{
"non_operational_state": false,
"relative_read_latency": 1,
"relative_read_throughput": 1,
"relative_write_latency": 1,
"relative_write_throughput": 1,
"entry_latency_us": 0,
"exit_latency_us": 0,
"max_power": {
"value": 965,
"scale": 2,
"units_per_watt": 100
}
},
{
"non_operational_state": false,
"relative_read_latency": 2,
"relative_read_throughput": 2,
"relative_write_latency": 2,
"relative_write_throughput": 2,
"entry_latency_us": 0,
"exit_latency_us": 0,
"max_power": {
"value": 682,
"scale": 2,
"units_per_watt": 100
}
},
{
"non_operational_state": true,
"relative_read_latency": 3,
"relative_read_throughput": 3,
"relative_write_latency": 3,
"relative_write_throughput": 3,
"entry_latency_us": 2000,
"exit_latency_us": 2000,
"max_power": {
"value": 490,
"scale": 1,
"units_per_watt": 10000
}
},
{
"non_operational_state": true,
"relative_read_latency": 4,
"relative_read_throughput": 4,
"relative_write_latency": 4,
"relative_write_throughput": 4,
"entry_latency_us": 25000,
"exit_latency_us": 25000,
"max_power": {
"value": 18,
"scale": 1,
"units_per_watt": 10000
}
}
],
"smart_status": {
"passed": true,
"nvme": {
"value": 0
}
},
"nvme_smart_health_information_log": {
"nsid": -1,
"critical_warning": 0,
"temperature": 42,
"available_spare": 100,
"available_spare_threshold": 5,
"percentage_used": 6,
"data_units_read": 221438663,
"data_units_written": 439320520,
"host_reads": 15653023750,
"host_writes": 8150052010,
"controller_busy_time": 21564,
"power_cycles": 289,
"power_on_hours": 42811,
"unsafe_shutdowns": 239,
"media_errors": 0,
"num_err_log_entries": 2340,
"warning_temp_time": 0,
"critical_comp_time": 0
},
"spare_available": {
"current_percent": 100,
"threshold_percent": 5
},
"endurance_used": {
"current_percent": 6
},
"power_cycle_count": 289,
"power_on_time": {
"hours": 42811
},
"nvme_error_information_log": {
"size": 63,
"read": 16,
"unread": 0,
"table": [
{
"error_count": 2340,
"submission_queue_id": 0,
"command_id": 16,
"status_field": {
"value": 8194,
"do_not_retry": false,
"status_code_type": 0,
"status_code": 2,
"string": "Invalid Field in Command"
},
"phase_tag": false,
"parm_error_location": 40,
"lba": {
"value": 0
},
"nsid": 0
}
]
},
"nvme_self_test_log": {
"nsid": -1,
"current_self_test_operation": {
"value": 0,
"string": "No self-test in progress"
},
"table": [
{
"self_test_code": {
"value": 1,
"string": "Short"
},
"self_test_result": {
"value": 2,
"string": "Aborted: Controller Reset"
},
"power_on_hours": 5892
},
{
"self_test_code": {
"value": 1,
"string": "Short"
},
"self_test_result": {
"value": 0,
"string": "Completed without error"
},
"power_on_hours": 5801
}
]
}
}

55
testdata/smart_raid_vd.json vendored Normal file
View file

@ -0,0 +1,55 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
2
],
"svn_revision": "5155",
"platform_info": "x86_64-linux-6.10.6-1.el9.x86_64",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"scsi",
"/dev/sda"
],
"exit_status": 4
},
"device": {
"name": "/dev/sda",
"info_name": "/dev/sda",
"type": "scsi",
"protocol": "SCSI"
},
"vendor": "BROADCOM",
"product": "MR9560-16i",
"model_name": "BROADCOM MR9560-16i",
"revision": "5.26",
"scsi_version": "SPC-3",
"user_capacity": {
"blocks": 93746888704,
"bytes": 47998407016448
},
"logical_block_size": 512,
"physical_block_size": 4096,
"rotation_rate": 0,
"serial_number": "00000000000000000000000000000001",
"device_type": {
"scsi_value": 0,
"name": "disk"
},
"local_time": {
"time_t": 1782134423,
"asctime": "Mon Jun 22 09:20:23 2026 EDT"
},
"temperature": {
"current": 0,
"drive_trip": 0
}
}

54
testdata/smart_raid_vd_avago.json vendored Normal file
View file

@ -0,0 +1,54 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
0
],
"svn_revision": "4883",
"platform_info": "x86_64-linux-5.4.134-200.el7.x86_64",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"scsi",
"/dev/sda"
],
"exit_status": 4
},
"device": {
"name": "/dev/sda",
"info_name": "/dev/sda",
"type": "scsi",
"protocol": "SCSI"
},
"vendor": "AVAGO",
"product": "MR9363-4i",
"model_name": "AVAGO MR9363-4i",
"revision": "4.68",
"scsi_version": "SPC-3",
"user_capacity": {
"blocks": 3748659200,
"bytes": 1919313510400
},
"logical_block_size": 512,
"physical_block_size": 4096,
"serial_number": "00000000000000000000000000000002",
"device_type": {
"scsi_value": 0,
"name": "disk"
},
"local_time": {
"time_t": 1782135089,
"asctime": "Mon Jun 22 09:31:29 2026 EDT"
},
"temperature": {
"current": 0,
"drive_trip": 0
}
}

155
testdata/smart_sas_ssd.json vendored Normal file
View file

@ -0,0 +1,155 @@
{
"json_format_version": [
1,
0
],
"smartctl": {
"version": [
7,
5
],
"pre_release": false,
"svn_revision": "5714",
"platform_info": "x86_64-linux-6.18.26-2-lts",
"build_info": "(local build)",
"argv": [
"smartctl",
"-j",
"-a",
"-d",
"scsi",
"/dev/sdb"
],
"exit_status": 0
},
"local_time": {
"time_t": 1782138465,
"asctime": "Mon Jun 22 09:27:45 2026 CDT"
},
"device": {
"name": "/dev/sdb",
"info_name": "/dev/sdb",
"type": "scsi",
"protocol": "SCSI"
},
"scsi_vendor": "SAMSUNG",
"scsi_product": "ARFX0920S5xnNTRI",
"model_name": "SAMSUNG ARFX0920S5xnNTRI",
"scsi_model_name": "SAMSUNG ARFX0920S5xnNTRI",
"scsi_revision": "3P00",
"scsi_version": "SPC-5",
"user_capacity": {
"blocks": 223621814,
"bytes": 915954950144
},
"logical_block_size": 4096,
"scsi_lb_provisioning": {
"name": "resource provisioned",
"value": 1,
"management_enabled": {
"name": "LBPME",
"value": 1
},
"read_zeros": {
"name": "LBPRZ",
"value": 1
}
},
"rotation_rate": 0,
"form_factor": {
"scsi_value": 3,
"name": "2.5 inches"
},
"logical_unit_id": "0x5002538b48c8c360",
"serial_number": "S43YNF0K000001",
"device_type": {
"scsi_terminology": "Peripheral Device Type [PDT]",
"scsi_value": 0,
"name": "disk"
},
"scsi_transport_protocol": {
"name": "SAS (SPL-4)",
"value": 6
},
"smart_support": {
"available": true,
"enabled": true
},
"temperature_warning": {
"enabled": true
},
"smart_status": {
"passed": true
},
"scsi_percentage_used_endurance_indicator": 0,
"endurance_used": {
"current_percent": 0
},
"temperature": {
"current": 56,
"drive_trip": 70
},
"power_on_time": {
"hours": 2487,
"minutes": 44
},
"scsi_start_stop_cycle_counter": {
"year_of_manufacture": "2018",
"week_of_manufacture": "51",
"accumulated_start_stop_cycles": 12,
"specified_load_unload_count_over_device_lifetime": 0,
"accumulated_load_unload_cycles": 0
},
"scsi_grown_defect_list": 0,
"seagate_farm_log": {
"supported": false
},
"scsi_error_counter_log": {
"read": {
"errors_corrected_by_eccfast": 0,
"errors_corrected_by_eccdelayed": 0,
"errors_corrected_by_rereads_rewrites": 0,
"total_errors_corrected": 0,
"correction_algorithm_invocations": 0,
"gigabytes_processed": "2620.555",
"total_uncorrected_errors": 0
},
"write": {
"errors_corrected_by_eccfast": 0,
"errors_corrected_by_eccdelayed": 0,
"errors_corrected_by_rereads_rewrites": 0,
"total_errors_corrected": 0,
"correction_algorithm_invocations": 0,
"gigabytes_processed": "2091.250",
"total_uncorrected_errors": 0
},
"verify": {
"errors_corrected_by_eccfast": 0,
"errors_corrected_by_eccdelayed": 0,
"errors_corrected_by_rereads_rewrites": 0,
"total_errors_corrected": 0,
"correction_algorithm_invocations": 0,
"gigabytes_processed": "46.845",
"total_uncorrected_errors": 0
}
},
"scsi_pending_defects": {
"count": 0
},
"scsi_self_test_0": {
"code": {
"value": 0,
"string": "Default"
},
"result": {
"value": 0,
"string": "Completed"
},
"power_on_time": {
"hours": 2,
"aka": "accumulated_power_on_hours"
}
},
"scsi_extended_self_test_seconds": 3600
}

397
testdata/storcli_show_all.txt vendored Normal file
View file

@ -0,0 +1,397 @@
CLI Version = 007.1420.0000.0000 Dec 10, 2020
Operating system = Linux 5.4.225-200.el7.x86_64
Controller = 0
Status = Success
Description = Show Drive Information Succeeded.
Drive /c0/e64/s0 :
================
-----------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------
64:0 22 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U -
-----------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e64/s0 - Detailed Information :
=======================================
Drive /c0/e64/s0 State :
======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 7
Drive Temperature = 31C (87.80 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e64/s0 Device attributes :
==================================
SN = S624NS0RA00001J
Manufacturer Id = ATA
Model Number = Samsung SSD 870 EVO 2TB
NAND Vendor = NA
WWN = 5002538F31000001
Firmware Revision = SVT02B6Q
Raw size = 1.819 TB [0xe8e088b0 Sectors]
Coerced size = 1.818 TB [0xe8d00000 Sectors]
Non Coerced size = 1.818 TB [0xe8d088b0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = Disabled
Logical Sector Size = 512B
Physical Sector Size = 512B
Connector Name = 00 x1
Drive /c0/e64/s0 Policies/Settings :
==================================
Drive position = DriveGroup:0
Enclosure position = 1
Connected Port Number = 0(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = TCG Opal
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = No
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x3b07b250d55b7500
-----------------------------------------
Inquiry Data =
40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 36 53 34 32 53 4e 52 30 30 41 30 32
30 33 20 4a 20 20 20 20 00 00 00 00 00 00 56 53
30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53
20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20
20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80
01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00
3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00
Drive /c0/e64/s1 :
================
-----------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------
64:1 23 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U -
-----------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e64/s1 - Detailed Information :
=======================================
Drive /c0/e64/s1 State :
======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 6
Drive Temperature = 30C (86.00 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e64/s1 Device attributes :
==================================
SN = S624NS0RA00002L
Manufacturer Id = ATA
Model Number = Samsung SSD 870 EVO 2TB
NAND Vendor = NA
WWN = 5002538F31000002
Firmware Revision = SVT02B6Q
Raw size = 1.819 TB [0xe8e088b0 Sectors]
Coerced size = 1.818 TB [0xe8d00000 Sectors]
Non Coerced size = 1.818 TB [0xe8d088b0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = Disabled
Logical Sector Size = 512B
Physical Sector Size = 512B
Connector Name = 00 x1
Drive /c0/e64/s1 Policies/Settings :
==================================
Drive position = DriveGroup:0
Enclosure position = 0
Connected Port Number = 1(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = TCG Opal
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = No
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x3b07b250d55b7501
-----------------------------------------
Inquiry Data =
40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 36 53 34 32 53 4e 52 30 30 41 34 33
31 38 20 4c 20 20 20 20 00 00 00 00 00 00 56 53
30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53
20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20
20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80
01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00
3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00
Drive /c0/e64/s2 :
================
-----------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------
64:2 20 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U -
-----------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e64/s2 - Detailed Information :
=======================================
Drive /c0/e64/s2 State :
======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 21
Drive Temperature = 31C (87.80 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e64/s2 Device attributes :
==================================
SN = S624NS0RC00003M
Manufacturer Id = ATA
Model Number = Samsung SSD 870 EVO 2TB
NAND Vendor = NA
WWN = 5002538F31000003
Firmware Revision = SVT02B6Q
Raw size = 1.819 TB [0xe8e088b0 Sectors]
Coerced size = 1.818 TB [0xe8d00000 Sectors]
Non Coerced size = 1.818 TB [0xe8d088b0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = Disabled
Logical Sector Size = 512B
Physical Sector Size = 512B
Connector Name = 00 x1
Drive /c0/e64/s2 Policies/Settings :
==================================
Drive position = DriveGroup:0
Enclosure position = 0
Connected Port Number = 2(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = TCG Opal
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = No
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x3b07b250d55b7502
-----------------------------------------
Inquiry Data =
40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 36 53 34 32 53 4e 52 30 30 43 38 35
31 37 20 4d 20 20 20 20 00 00 00 00 00 00 56 53
30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53
20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20
20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80
01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00
3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00
Drive /c0/e64/s3 :
================
-----------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------
64:3 21 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U -
-----------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e64/s3 - Detailed Information :
=======================================
Drive /c0/e64/s3 State :
======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 12
Drive Temperature = 30C (86.00 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e64/s3 Device attributes :
==================================
SN = S624NS0RA00004W
Manufacturer Id = ATA
Model Number = Samsung SSD 870 EVO 2TB
NAND Vendor = NA
WWN = 5002538F31000004
Firmware Revision = SVT02B6Q
Raw size = 1.819 TB [0xe8e088b0 Sectors]
Coerced size = 1.818 TB [0xe8d00000 Sectors]
Non Coerced size = 1.818 TB [0xe8d088b0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = Disabled
Logical Sector Size = 512B
Physical Sector Size = 512B
Connector Name = 00 x1
Drive /c0/e64/s3 Policies/Settings :
==================================
Drive position = DriveGroup:0
Enclosure position = 0
Connected Port Number = 3(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = TCG Opal
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = No
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x3b07b250d55b7503
-----------------------------------------
Inquiry Data =
40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 36 53 34 32 53 4e 52 30 30 41 39 30
36 36 20 57 20 20 20 20 00 00 00 00 00 00 56 53
30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53
20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20
20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80
01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00
3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00

397
testdata/storcli_show_all_v2.txt vendored Normal file
View file

@ -0,0 +1,397 @@
CLI Version = 007.1907.0000.0000 Sep 13, 2021
Operating system = Linux 5.4.134-200.el7.x86_64
Controller = 0
Status = Success
Description = Show Drive Information Succeeded.
Drive /c0/e252/s0 :
=================
-----------------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------------
252:0 4 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U -
-----------------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e252/s0 - Detailed Information :
========================================
Drive /c0/e252/s0 State :
=======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 0
Drive Temperature = 24C (75.20 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e252/s0 Device attributes :
===================================
SN = 163100000001
Manufacturer Id = ATA
Model Number = MICRON_M510DC_MTFDDAK960MBP
NAND Vendor = NA
WWN = 500A075100000001
Firmware Revision = 0013
Raw size = 894.252 GB [0x6fc81ab0 Sectors]
Coerced size = 893.750 GB [0x6fb80000 Sectors]
Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = N/A
Logical Sector Size = 512B
Physical Sector Size = 4 KB
Connector Name = Port 0 - 3 x1
Drive /c0/e252/s0 Policies/Settings :
===================================
Drive position = DriveGroup:0, Span:0, Row:0
Enclosure position = 1
Connected Port Number = 0(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = None
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = Yes
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x4433221100000000
-----------------------------------------
Inquiry Data =
40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 20 20 20 20 20 20 20 20 36 31 31 33
33 31 35 37 35 43 31 35 00 00 00 00 00 00 30 30
33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35
44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d
20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80
01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00
3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00
Drive /c0/e252/s1 :
=================
-----------------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------------
252:1 5 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U -
-----------------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e252/s1 - Detailed Information :
========================================
Drive /c0/e252/s1 State :
=======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 0
Drive Temperature = 22C (71.60 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e252/s1 Device attributes :
===================================
SN = 163100000002
Manufacturer Id = ATA
Model Number = MICRON_M510DC_MTFDDAK960MBP
NAND Vendor = NA
WWN = 500A075100000002
Firmware Revision = 0013
Raw size = 894.252 GB [0x6fc81ab0 Sectors]
Coerced size = 893.750 GB [0x6fb80000 Sectors]
Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = N/A
Logical Sector Size = 512B
Physical Sector Size = 4 KB
Connector Name = Port 0 - 3 x1
Drive /c0/e252/s1 Policies/Settings :
===================================
Drive position = DriveGroup:0, Span:0, Row:1
Enclosure position = 0
Connected Port Number = 1(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = None
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = Yes
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x4433221101000000
-----------------------------------------
Inquiry Data =
40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 20 20 20 20 20 20 20 20 36 31 31 33
33 31 35 37 45 44 30 46 00 00 00 00 00 00 30 30
33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35
44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d
20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80
01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00
3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00
Drive /c0/e252/s2 :
=================
-----------------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------------
252:2 6 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U -
-----------------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e252/s2 - Detailed Information :
========================================
Drive /c0/e252/s2 State :
=======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 0
Drive Temperature = 20C (68.00 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e252/s2 Device attributes :
===================================
SN = 165000000003
Manufacturer Id = ATA
Model Number = MICRON_M510DC_MTFDDAK960MBP
NAND Vendor = NA
WWN = 500A075100000003
Firmware Revision = 0013
Raw size = 894.252 GB [0x6fc81ab0 Sectors]
Coerced size = 893.750 GB [0x6fb80000 Sectors]
Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = N/A
Logical Sector Size = 512B
Physical Sector Size = 4 KB
Connector Name = Port 0 - 3 x1
Drive /c0/e252/s2 Policies/Settings :
===================================
Drive position = DriveGroup:0, Span:1, Row:0
Enclosure position = 0
Connected Port Number = 2(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = None
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = Yes
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x4433221102000000
-----------------------------------------
Inquiry Data =
40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 20 20 20 20 20 20 20 20 36 31 30 35
35 31 30 31 35 30 42 42 00 00 00 00 00 00 30 30
33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35
44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d
20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80
01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00
3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00
Drive /c0/e252/s3 :
=================
-----------------------------------------------------------------------------------------
EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type
-----------------------------------------------------------------------------------------
252:3 7 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U -
-----------------------------------------------------------------------------------------
EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup
DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare
UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface
Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info
SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign
UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded
CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded
UBUnsp=UBad Unsupported|Rbld=Rebuild
Drive /c0/e252/s3 - Detailed Information :
========================================
Drive /c0/e252/s3 State :
=======================
Shield Counter = 0
Media Error Count = 0
Other Error Count = 0
Drive Temperature = 20C (68.00 F)
Predictive Failure Count = 0
S.M.A.R.T alert flagged by drive = No
Drive /c0/e252/s3 Device attributes :
===================================
SN = 165000000004
Manufacturer Id = ATA
Model Number = MICRON_M510DC_MTFDDAK960MBP
NAND Vendor = NA
WWN = 500A075100000004
Firmware Revision = 0013
Raw size = 894.252 GB [0x6fc81ab0 Sectors]
Coerced size = 893.750 GB [0x6fb80000 Sectors]
Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors]
Device Speed = 6.0Gb/s
Link Speed = 6.0Gb/s
NCQ setting = Enabled
Write Cache = N/A
Logical Sector Size = 512B
Physical Sector Size = 4 KB
Connector Name = Port 0 - 3 x1
Drive /c0/e252/s3 Policies/Settings :
===================================
Drive position = DriveGroup:0, Span:1, Row:1
Enclosure position = 0
Connected Port Number = 3(path0)
Sequence Number = 2
Commissioned Spare = No
Emergency Spare = No
Last Predictive Failure Event Sequence Number = 0
Successful diagnostics completion on = N/A
FDE Type = None
SED Capable = Yes
SED Enabled = No
Secured = No
Cryptographic Erase Capable = Yes
Sanitize Support = Not supported
Locked = No
Needs EKM Attention = No
PI Eligible = No
Certified = No
Wide Port Capable = No
Multipath = No
Port Information :
================
-----------------------------------------
Port Status Linkspeed SAS address
-----------------------------------------
0 Active 6.0Gb/s 0x4433221103000000
-----------------------------------------
Inquiry Data =
40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00
00 00 00 00 20 20 20 20 20 20 20 20 36 31 30 35
35 31 30 31 35 30 37 42 00 00 00 00 00 00 30 30
33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35
44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d
20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80
01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00
3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00

24
version.go Normal file
View file

@ -0,0 +1,24 @@
package main
import "fmt"
// Build metadata injected at build time via -ldflags -X (see Makefile). version
// is sourced from the VERSION file so a single file is the authoritative version;
// commit and date are filled from git and the build clock.
var (
version = "dev"
commit = ""
date = ""
)
// printVersion writes the version line plus commit and build date when those
// were injected at build time.
func printVersion() {
fmt.Println("drive-health-metrics", version)
if commit != "" {
fmt.Printf(" commit: %s\n", commit)
}
if date != "" {
fmt.Printf(" built: %s\n", date)
}
}