commit ddafa90a02c880596c4e78bacd73d83283918975 Author: James Coleman Date: Mon Jun 22 17:14:03 2026 -0500 first commit diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..1d1dd80 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,29 @@ +on: + release: + types: [created] + +permissions: + contents: write + packages: write + +jobs: + goreleaser: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - + name: Set up Go + uses: actions/setup-go@v4 + - + name: Run GoReleaser + uses: goreleaser/goreleaser-action@v6 + with: + distribution: goreleaser + version: latest + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test_golang.yaml b/.github/workflows/test_golang.yaml new file mode 100644 index 0000000..9cef003 --- /dev/null +++ b/.github/workflows/test_golang.yaml @@ -0,0 +1,21 @@ +name: Go package + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.21' + + - name: Build + run: go build -v ./... + + - name: Test + run: go test -v ./... diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e63356e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Build output +/dist/ +/drive_health diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 0000000..615e175 --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,50 @@ +# GoReleaser config for drive-health-metrics. +# https://goreleaser.com +# +# CGO is disabled so the binary is fully static (no glibc dependency) and runs +# unmodified across modern Linux distributions. +version: 2 + +project_name: drive-health-metrics + +before: + hooks: + - go mod tidy + - go test ./... + +builds: + - id: drive-health-metrics + main: . + binary: drive-health-metrics + env: + - CGO_ENABLED=0 + flags: + - -trimpath + ldflags: + - -s -w -X main.version={{ .Version }} -X main.commit={{ .ShortCommit }} -X main.date={{ .Date }} + goos: + - linux + goarch: + - amd64 + +archives: + - id: default + format: tar.gz + name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}" + files: + - README.md + +checksum: + name_template: "checksums.txt" + +snapshot: + version_template: "{{ incpatch .Version }}-snapshot" + +changelog: + use: git + sort: asc + filters: + exclude: + - "^docs:" + - "^test:" + - "^chore:" diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..83d4a90 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2026 Mr. Gecko's Media (James Coleman). http://mrgeckosmedia.com/ + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0cbc03b --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +BINARY := drive-health-metrics +# VERSION is the single source of truth for the version string. COMMIT and DATE +# are derived from git and the build clock. +VERSION ?= $(shell cat VERSION 2>/dev/null || echo dev) +COMMIT := $(shell git rev-parse --short HEAD 2>/dev/null) +DATE := $(shell date -u '+%Y-%m-%dT%H:%M:%SZ') +LDFLAGS := -s -w -X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.date=$(DATE) + +.PHONY: all build test vet fmt snapshot release clean tools + +all: test build + +## build: native static binary into dist/ +build: + CGO_ENABLED=0 go build -trimpath -ldflags '$(LDFLAGS)' -o dist/$(BINARY) . + +## test: run the unit tests (smartctl/text/NVMe/MegaCLI parsers + scoring) +test: + go test ./... + +vet: + go vet ./... + +fmt: + gofmt -w *.go + +## snapshot: local GoReleaser build without publishing (artifacts in dist/) +snapshot: + goreleaser release --snapshot --clean + +## release: full GoReleaser release (CI runs this on a tag) +release: + goreleaser release --clean + +clean: + rm -rf dist diff --git a/README.md b/README.md new file mode 100644 index 0000000..ca53d07 --- /dev/null +++ b/README.md @@ -0,0 +1,98 @@ +# drive-health-metrics + +Collects per-drive SMART health from **every physical drive** on a host +— direct SATA/SAS, NVMe, and drives hidden behind a RAID controller +(MegaCLI / storcli / perccli) — scores each drive, and exports the result as +**CSV**, **InfluxDB** (line protocol / API push / Kafka), and **Prometheus**. + +## Modes + +The tool runs one-shot by default and as a long-lived service with `--server`. + +### One-shot (default) + +Writes CSV or InfluxDB line protocol to stdout once and exits. Run as root +(SMART access requires it): + +``` +drive-health-metrics # CSV to stdout +drive-health-metrics --format influx # InfluxDB line protocol to stdout (Telegraf exec input) +drive-health-metrics --version +``` + +### Service (`--server`) + +Runs continuously, exposing a Prometheus `/metrics` endpoint and (when +configured) pushing to InfluxDB and/or Kafka on a schedule. Each scrape and +each push re-collects fresh SMART data. + +``` +drive-health-metrics --server # Prometheus on :9101/metrics +drive-health-metrics --server --http-port 9200 # override the port +drive-health-metrics --server -c /etc/drive-health-metrics.yaml +``` + +Send `SIGHUP` to reload the configuration without a full restart. + +The InfluxDB measurement and Prometheus metric prefix are both `drive_health` +(e.g. `drive_health_risk_score`, `drive_health_temp_c`). Identity columns +(serial, model, enclosure_slot, …) are attached as tags/labels. + +## Configuration + +Service mode reads an optional YAML config, searched in this order: the path +given to `-c`/`--config`, then `./config.yaml`, +`~/.config/drive-health-metrics/config.yaml`, and +`/etc/drive-health-metrics.yaml`. Without a file, sensible defaults apply +(Prometheus enabled on `:9101/metrics`, no Influx push). + +```yaml +# config.yaml +hostname: "" # host tag/label; defaults to the system hostname + +http_output: + enabled: true # Prometheus /metrics endpoint + bind_addr: "" # default: all interfaces + port: 9101 + metrics_path: /metrics + +influx_output: + frequency: 60s # push interval; 0 (default) disables the push + + # InfluxDB v2 API (all four required to enable) + influx_server: https://influx.example.com:8086 + token: my-token + org: my-org + bucket: drive-health + + # Kafka (brokers + topic required to enable) + kafka_brokers: ["kafka1:9092", "kafka2:9092"] + kafka_topic: telegraf + kafka_username: "" + kafka_password: "" + kafka_insecure_skip_verify: false + kafka_output_format: lineprotocol # lineprotocol (default) or json +``` + +## Recommendation scoring + +Each drive gets a `risk_score` and a `recommendation`: + +| Recommendation | Meaning | +|----------------|---------| +| `REPLACE_NOW` | hard defect — drive failing/failed (score ≥ 100) | +| `REPLACE_SOON` | serious wear or accumulating defects (≥ 50) | +| `MONITOR` | early warning signs (≥ 20) | +| `OK` | no meaningful defects (< 20) | +| `NO_DATA` | SMART unreadable **and** no controller red flags — re-collect, don't replace | + +Only real, drive-attributable defects add meaningful score; missing/unreadable +data is never treated as a failure. + +## Building + +``` +make build # native static binary -> dist/drive-health-metrics +make test # unit tests (parsers + scoring + exporters) +make snapshot # local GoReleaser build, no publish +``` diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..6e8bf73 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/collect.go b/collect.go new file mode 100644 index 0000000..3783b1d --- /dev/null +++ b/collect.go @@ -0,0 +1,189 @@ +package main + +import ( + "fmt" + "sort" + "strings" + "time" +) + +// collect discovers every drive, queries SMART, attaches controller data, and +// scores it. +func collect() ([]*Drive, int64) { + host := hostname() + if app != nil && app.config != nil && app.config.Hostname != "" { + host = app.config.Hostname + } + collectedAt := time.Now().UTC().Format("2006-01-02T15:04:05Z") + tsNs := time.Now().Unix() * 1e9 + + st := newSmartTool() + ctrl := controllerIndex() + devices := st.scan() + + // Fallback: no scan-open results but we do have controller drives -> probe + // a base device by megaraid index. + if len(devices) == 0 && len(ctrl) > 0 { + base := findBaseDev() + ids := make([]string, 0, len(ctrl)) + for id := range ctrl { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + for _, tmpl := range megaraidDtypes { + devices = append(devices, scanned{ + path: base, dtype: fmt.Sprintf(tmpl, id), megaraidN: id, + }) + } + } + } + + var drives []*Drive + matched := map[string]bool{} // Controller IDs covered by a smartctl device. + for _, sc := range devices { + if sc.megaraidN != "" { + matched[sc.megaraidN] = true + } + d := &Drive{ + CollectedAt: collectedAt, + Hostname: host, + DeviceID: sc.megaraidN, + } + + ok := st.querySmart(sc.path, sc.dtype, d) + + // Skip iSCSI LUNs and RAID virtual disks; they are not physical drives. + if isPseudoDevice(d) { + continue + } + + // Attach controller-side data by megaraid index == DeviceID. + if sc.megaraidN != "" { + if cd, found := ctrl[sc.megaraidN]; found { + applyController(d, cd) + } + } + + // Determine whether real SMART attribute data was obtained. + d.HaveSmart = ok && d.Model != "" && (d.PowerOnHours != nil || + d.WearPctRemaining != nil || + d.SmartHealth == "PASSED" || d.SmartHealth == "FAILED" || + d.SmartHealth == "PASSED_BY_ATTR") + + finalizeDerived(d) + drives = append(drives, d) + } + + // Emit controller-only drives: physical drives the controller reports but + // smartctl cannot reach (e.g. NVMe behind a PERC). Health comes entirely + // from the controller (Status, predictive-failure, media/other counters). + ids := make([]string, 0, len(ctrl)) + for id := range ctrl { + ids = append(ids, id) + } + sort.Strings(ids) + for _, id := range ids { + if matched[id] { + continue + } + d := &Drive{CollectedAt: collectedAt, Hostname: host, DeviceID: id} + applyController(d, ctrl[id]) + if isPseudoDevice(d) { + continue + } + d.HaveSmart = false + finalizeDerived(d) + drives = append(drives, d) + } + return drives, tsNs +} + +// applyController fills controller-side fields and uses MegaCLI/storcli inquiry +// as an identity fallback when smartctl passthrough failed. +func applyController(d *Drive, cd ctrlDrive) { + d.Enclosure = cd.Enclosure + d.Slot = cd.Slot + d.MediaErrCtrl = cd.MediaErr + d.OtherErrCtrl = cd.OtherErr + d.PredictiveFailureCtrl = cd.Predictive + d.SmartAlertCtrl = cd.SmartAlert + d.FwState = cd.FwState + + // Identity fallback for when smartctl could not read the drive. Prefer the + // structured fields (perccli2); else split the legacy single-line Inquiry. + if cd.Model != "" || cd.Serial != "" || cd.Firmware != "" { + if d.Serial == "" { + d.Serial = cd.Serial + } + if d.Model == "" { + d.Model = cd.Model + } + if d.Firmware == "" { + d.Firmware = cd.Firmware + } + } else if cd.Inquiry != "" { + // Legacy MegaCLI "Inquiry Data" packs " " on + // one line, where the model itself can contain spaces and the token count + // varies. Serial is always first and the firmware revision always last, so + // anchor on those and treat everything between as the model. + parts := strings.Fields(cd.Inquiry) + if d.Serial == "" && len(parts) >= 1 { + d.Serial = parts[0] + } + if d.Firmware == "" && len(parts) >= 2 { + d.Firmware = parts[len(parts)-1] + } + if d.Model == "" && len(parts) >= 3 { + d.Model = strings.Join(parts[1:len(parts)-1], " ") + } + } + if d.Rotation == "" { + d.Rotation = cd.Rotation + } + if d.TempC == nil { + d.TempC = cd.TempC + } +} + +// finalizeDerived computes defect_total, power_on_years, and the risk score. +func finalizeDerived(d *Drive) { + // Aggregate drive-attributable defect counters. nil only when NONE was + // readable, so NO_DATA rows stay blank instead of showing a misleading 0. + defectParts := []*int{ + d.Reallocated, d.Pending, d.Uncorrectable, + d.ReportedUncorrect, d.RuntimeBadBlocks, d.EndToEnd, + } + anyKnown := false + sum := 0 + for _, p := range defectParts { + if p != nil { + anyKnown = true + sum += *p + } + } + if anyKnown { + d.DefectTotal = pInt(sum) + } + + if d.PowerOnHours != nil && *d.PowerOnHours > 0 { + y := float64(*d.PowerOnHours) / 8760.0 + d.PowerOnYears = pF(float64(int(y*100+0.5)) / 100) // Round to two decimals. + } + + d.RiskScore, d.Recommendation, d.RiskReasons = scoreDrive(d) +} + +// findBaseDev returns a real base block device to anchor the megaraid +// passthrough fallback probe, skipping loop and md devices and defaulting to +// /dev/sda when lsblk yields nothing usable. +func findBaseDev() string { + out := run("lsblk", "-dno", "NAME") + for _, ln := range strings.Split(out, "\n") { + name := strings.TrimSpace(ln) + if name != "" && !strings.Contains(name, "loop") && !strings.HasPrefix(name, "md") { + return "/dev/" + name + } + } + return "/dev/sda" +} diff --git a/config.go b/config.go new file mode 100644 index 0000000..97891f4 --- /dev/null +++ b/config.go @@ -0,0 +1,131 @@ +package main + +import ( + "log" + "os" + "os/user" + "path" + "path/filepath" + "time" + + "github.com/kkyr/fig" +) + +// Config is the service-mode configuration, loaded from YAML (via fig) and +// overridable by flags. It only governs the output exporters; drive discovery +// and SMART collection auto-detect their tools and need no configuration. +type Config struct { + // Hostname is used as the host tag/label on every metric. When empty it is + // resolved from the system hostname. + Hostname string `fig:"hostname"` + + // Metric outputs. + HTTP HTTPOutputConfig `fig:"http_output"` + Influx InfluxOutputConfig `fig:"influx_output"` +} + +// HTTPOutputConfig configures the Prometheus HTTP exporter. +type HTTPOutputConfig struct { + Enabled bool `fig:"enabled"` + BindAddr string `fig:"bind_addr"` + Port uint `fig:"port"` + MetricsPath string `fig:"metrics_path"` +} + +// InfluxOutputConfig configures the scheduled InfluxDB output. Metrics are +// pushed every Frequency to InfluxDB's v2 API and/or to Kafka. A zero Frequency +// (or no destination configured) disables the output. +type InfluxOutputConfig struct { + Frequency time.Duration `fig:"frequency"` + + KafkaBrokers []string `fig:"kafka_brokers"` + KafkaTopic string `fig:"kafka_topic"` + KafkaUsername string `fig:"kafka_username"` + KafkaPassword string `fig:"kafka_password"` + KafkaInsecureSkipVerify bool `fig:"kafka_insecure_skip_verify"` + KafkaOutputFormat string `fig:"kafka_output_format"` // lineprotocol (default) or json. + + InfluxServer string `fig:"influx_server"` + Token string `fig:"token"` + Org string `fig:"org"` + Bucket string `fig:"bucket"` +} + +// defaultConfig returns the configuration with all defaults applied, used as the +// base before a file (if any) is loaded over it. +func defaultConfig() *Config { + return &Config{ + HTTP: HTTPOutputConfig{ + Enabled: true, + Port: 9101, + MetricsPath: "/metrics", + }, + Influx: InfluxOutputConfig{ + KafkaOutputFormat: "lineprotocol", + }, + } +} + +// findConfigFile returns the first configuration file that exists, preferring +// the -config flag (configPath), then a local file, the user config dir, and +// finally /etc. It returns "" when none is found — configuration is optional. +func findConfigFile(configPath string) string { + if configPath != "" { + if _, err := os.Stat(configPath); err == nil { + return configPath + } + log.Printf("Configured config path %q not found, falling back to defaults", configPath) + } + + candidates := []string{} + if local, err := filepath.Abs("./config.yaml"); err == nil { + candidates = append(candidates, local) + } + if usr, err := user.Current(); err == nil { + candidates = append(candidates, usr.HomeDir+"/.config/drive-health-metrics/config.yaml") + } + candidates = append(candidates, "/etc/drive-health-metrics.yaml") + + for _, c := range candidates { + if _, err := os.Stat(c); err == nil { + return c + } + } + return "" +} + +// ReadConfig loads the configuration into app.config. It always succeeds with a +// usable config: a file is loaded over the defaults when present, flag overrides +// are applied, and the host tag is resolved when unset. +func (a *App) ReadConfig() { + config := defaultConfig() + + // Load a configuration file over the defaults when one is available. + if configFile := findConfigFile(a.flags.ConfigPath); configFile != "" { + dir, name := path.Split(configFile) + if dir == "" { + dir = "." + } + if err := fig.Load(config, fig.File(name), fig.Dirs(dir)); err != nil { + log.Printf("Error parsing configuration %q: %s", configFile, err) + } + } + + // Resolve the host tag from the system when not configured. + if config.Hostname == "" { + config.Hostname = hostname() + } + + // Flag overrides for the HTTP output. + if a.flags.HTTPBind != "" { + config.HTTP.BindAddr = a.flags.HTTPBind + } + if a.flags.HTTPPort != 0 { + config.HTTP.Port = a.flags.HTTPPort + } + if a.flags.HTTPMetricsPath != "" { + config.HTTP.MetricsPath = a.flags.HTTPMetricsPath + } + + a.config = config +} diff --git a/controller.go b/controller.go new file mode 100644 index 0000000..9690850 --- /dev/null +++ b/controller.go @@ -0,0 +1,349 @@ +package main + +import ( + "regexp" + "strconv" + "strings" +) + +// ctrlDrive holds the RAID-controller-side view of one physical drive — data +// smartctl cannot see (predictive-failure, firmware state, controller media/ +// other error counters, physical enclosure:slot). Keyed for matching to a +// smartctl megaraid passthrough by DeviceID (== the megaraid,N index). +type ctrlDrive struct { + DeviceID string + Enclosure string + Slot string + MediaErr int + OtherErr int + Predictive int + SmartAlert bool + FwState string + TempC *int + Inquiry string // Inquiry is the legacy single-line MegaCLI/storcli inquiry (serial model fw). + Model string // Model is the structured identity (perccli2); used for controller-only drives. + Serial string + Firmware string + Rotation string // Rotation is "SSD"/"NVMe" derived from controller media/interface, when known. +} + +// controllerIndex enumerates all RAID controllers found, preferring modern +// tools (storcli/perccli) then MegaCLI, and returns a DeviceID->ctrlDrive map. +// If no controller CLI is present (plain HBA / onboard SATA / NVMe) it returns +// an empty map — that's fine, smartctl still covers those drives directly. +func controllerIndex() map[string]ctrlDrive { + idx := map[string]ctrlDrive{} + + // perccli2 (8.x) is JSON-native. Its plain-text "show all" adds a second + // status column that breaks positional parsing, so query JSON ('J') and use + // the dedicated parser. Tried first since it covers the newest controllers. + for _, bin := range []string{"perccli2", "/opt/MegaRAID/perccli2/perccli2"} { + p := lookPath(bin) + if p == "" { + continue + } + drives := parsePerccli2(run(p, "/call/eall/sall", "show", "all", "J")) + for _, cd := range drives { + mergeCtrl(idx, cd) + } + if len(drives) > 0 { + break + } + } + + // storcli / perccli (classic) share the same text "show all" layout (perccli + // is Dell's rebrand). Try each installed binary until one returns drives, so + // a host with several tools present still resolves. + for _, bin := range []string{"storcli64", "storcli", "perccli64", "perccli", + "/opt/MegaRAID/storcli/storcli64", "/opt/MegaRAID/perccli/perccli64"} { + p := lookPath(bin) + if p == "" { + continue + } + drives := parseStorcli(run(p, "/call/eall/sall", "show", "all")) + for _, cd := range drives { + mergeCtrl(idx, cd) + } + if len(drives) > 0 { + break + } + } + + // MegaCLI (older controllers). Same try-until-data approach. + for _, bin := range []string{"MegaCli64", "MegaCli", "megacli", + "/opt/MegaRAID/MegaCli/MegaCli64", "/usr/sbin/megacli"} { + p := lookPath(bin) + if p == "" { + continue + } + drives := parseMegacliPDList(run(p, "-PDList", "-aAll")) + for _, cd := range drives { + mergeCtrl(idx, cd) + } + if len(drives) > 0 { + break + } + } + return idx +} + +// mergeCtrl records cd under its DeviceID, keeping the first writer so the +// preferred tool (queried earlier) wins and a later tool can't clobber it. +// Entries without a DeviceID are dropped — they can't be matched to a drive. +func mergeCtrl(idx map[string]ctrlDrive, cd ctrlDrive) { + if cd.DeviceID == "" { + return + } + if _, exists := idx[cd.DeviceID]; !exists { + idx[cd.DeviceID] = cd + } +} + +// afterColon returns the trimmed text following the first colon, or "". It reads +// the "Key : Value" lines MegaCLI/storcli emit. +func afterColon(s string) string { + if i := strings.Index(s, ":"); i >= 0 { + return strings.TrimSpace(s[i+1:]) + } + return "" +} + +// parseMegacliPDList parses `MegaCli -PDList -aAll`. Record boundary is the +// "Enclosure Device ID" line. +func parseMegacliPDList(text string) []ctrlDrive { + var drives []ctrlDrive + var cur ctrlDrive + have := false + flush := func() { + if have && (cur.DeviceID != "" || cur.Slot != "") { + drives = append(drives, cur) + } + } + for _, raw := range strings.Split(text, "\n") { + s := strings.TrimSpace(raw) + switch { + case strings.HasPrefix(s, "Enclosure Device ID"): + flush() + cur = ctrlDrive{Enclosure: afterColon(s)} + have = true + case strings.HasPrefix(s, "Slot Number"): + cur.Slot = afterColon(s) + case strings.HasPrefix(s, "Device Id"): + cur.DeviceID = afterColon(s) + case strings.HasPrefix(s, "Media Error Count"): + cur.MediaErr = atoiSafe(afterColon(s)) + case strings.HasPrefix(s, "Other Error Count"): + cur.OtherErr = atoiSafe(afterColon(s)) + case strings.HasPrefix(s, "Predictive Failure Count"): + cur.Predictive = atoiSafe(afterColon(s)) + // MegaCLI phrases this as "Drive has flagged a S.M.A.R.T alert : No"; + // the older "S.M.A.R.T alert flagged by drive" form is kept for safety. + case strings.HasPrefix(s, "Drive has flagged a S.M.A.R.T alert"), + strings.HasPrefix(s, "S.M.A.R.T alert flagged by drive"): + cur.SmartAlert = strings.Contains(s, "Yes") + case strings.HasPrefix(s, "Firmware state"): + cur.FwState = afterColon(s) + case strings.HasPrefix(s, "Drive Temperature"): + if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(s); m != nil { + cur.TempC = pInt(atoiSafe(m[1])) + } + case strings.HasPrefix(s, "Inquiry Data"): + cur.Inquiry = afterColon(s) + } + } + flush() + return drives +} + +// parseStorcli parses `storcli /call/eall/sall show all`. A physical drive is +// introduced by a bare summary header ("Drive /c0/e64/s0 :") followed by a +// table row ("64:0 22 Onln ...") that carries the DID (== the megaraid index +// smartctl uses) and the controller state. The same drive then repeats sub- +// section headers ("Drive .../s0 - Detailed Information :", "... State :", +// "... Device attributes :") that must NOT open a new record — only the bare +// summary header does — so detail fields ("Key = Value") accumulate into one +// record across those sections. +func parseStorcli(text string) []ctrlDrive { + var drives []ctrlDrive + var cur ctrlDrive + have := false + // hdr matches only the bare summary header (path then ": " at end), not the + // "- Detailed Information"/"State"/"Device attributes" sub-section headers. + hdr := regexp.MustCompile(`^Drive /c\d+/e(\d+)/s(\d+)\s*:$`) + // row matches the summary table data row "EID:Slt DID State ..."; this + // storcli version reports the DID here, never as a "DID = N" line. + row := regexp.MustCompile(`^(\d+):(\d+)\s+(\d+)\s+(\S+)`) + flush := func() { + if have && (cur.DeviceID != "" || cur.Slot != "") { + drives = append(drives, cur) + } + } + kv := func(s string) (string, string, bool) { + if i := strings.Index(s, "="); i >= 0 { + return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]), true + } + return "", "", false + } + for _, raw := range strings.Split(text, "\n") { + s := strings.TrimSpace(raw) + + // New drive record: only the bare summary header opens one. + if m := hdr.FindStringSubmatch(s); m != nil { + flush() + cur = ctrlDrive{Enclosure: m[1], Slot: m[2]} + have = true + continue + } + if !have { + continue + } + // Summary table row supplies the DID and controller state. + if m := row.FindStringSubmatch(s); m != nil && cur.DeviceID == "" { + cur.DeviceID = m[3] + cur.FwState = m[4] + continue + } + k, v, ok := kv(s) + if !ok { + continue + } + switch k { + case "DID": + cur.DeviceID = v + case "Media Error Count": + cur.MediaErr = atoiSafe(v) + case "Other Error Count": + cur.OtherErr = atoiSafe(v) + case "Predictive Failure Count": + cur.Predictive = atoiSafe(v) + case "S.M.A.R.T alert flagged by drive": + cur.SmartAlert = strings.EqualFold(v, "Yes") + case "Firmware state", "State": + if cur.FwState == "" { + cur.FwState = v + } + case "Drive Temperature": + if m := regexp.MustCompile(`(\d+)\s*C`).FindStringSubmatch(v); m != nil { + cur.TempC = pInt(atoiSafe(m[1])) + } + case "Model Number", "Manufacturer Identification": + if cur.Inquiry == "" { + cur.Inquiry = v + } + } + } + flush() + return drives +} + +// parsePerccli2 parses `perccli2 /call/eall/sall show all J` (JSON). perccli2 +// (8.x) renames the classic DID to PID and splits the single State column into +// State (RAID role: Conf/UConf/GHS/JBOD) and Status (health: Online/Offline/ +// Failed/Missing); the latter is what maps to FwState. Drives nest under +// Controllers[].Response Data.Drives List[]; health counters sit directly in +// "Drive Detailed Information" (SAS/SATA) or under its "LU/NS Properties" for +// NVMe namespaces. +func parsePerccli2(text string) []ctrlDrive { + m := loadJSON(text) + if m == nil { + return nil + } + controllers, ok := m["Controllers"].([]interface{}) + if !ok { + return nil + } + var drives []ctrlDrive + for _, c := range controllers { + cm, ok := c.(map[string]interface{}) + if !ok { + continue + } + list, ok := jLeaf(cm, "Response Data", "Drives List").([]interface{}) + if !ok { + continue + } + for _, it := range list { + dm, ok := it.(map[string]interface{}) + if !ok { + continue + } + info := jObj(dm, "Drive Information") + if info == nil { + continue + } + cd := ctrlDrive{} + + // Location + identity from the summary block. + if es := jStr(info, "EID:Slt"); es != "" { + if i := strings.Index(es, ":"); i >= 0 { + cd.Enclosure = strings.TrimSpace(es[:i]) + cd.Slot = strings.TrimSpace(es[i+1:]) + } + } + if pid := jInt(info, "PID"); pid != nil { + cd.DeviceID = strconv.Itoa(*pid) + } + cd.FwState = jStr(info, "Status") // Health verdict, not the RAID role. + cd.Model = jStr(info, "Model") + cd.Rotation = perccli2Rotation(jStr(info, "Intf"), jStr(info, "Med")) + + // Detail block: identity fallbacks, temperature, error counters. + if detail := jObj(dm, "Drive Detailed Information"); detail != nil { + cd.Serial = jStr(detail, "Serial Number") + cd.Firmware = jStr(detail, "Firmware Revision Level") + if cd.Model == "" { + cd.Model = jStr(detail, "Model") + } + if t := jInt(detail, "Temperature(C)"); t != nil { + cd.TempC = t + } + // Counters live in detail (SAS/SATA) or LU/NS Properties (NVMe). + props := jObj(detail, "LU/NS Properties") + ci := func(key string) int { + if v := jInt(detail, key); v != nil { + return *v + } + if props != nil { + if v := jInt(props, key); v != nil { + return *v + } + } + return 0 + } + cd.MediaErr = ci("Media Error Count") + cd.OtherErr = ci("Other Error Count") + cd.Predictive = ci("Predictive Failure Count") + } + + if cd.DeviceID != "" || cd.Slot != "" { + drives = append(drives, cd) + } + } + } + return drives +} + +// perccli2Rotation maps a perccli2 interface/media pair to a rotation label. +func perccli2Rotation(intf, med string) string { + switch { + case strings.EqualFold(intf, "NVMe"): + return "NVMe" + case strings.EqualFold(med, "SSD"): + return "SSD" + default: + return "" + } +} + +// atoiSafe extracts the first integer found in s (leading sign allowed) and +// returns 0 when none is present, since controller output often wraps the number +// in units or surrounding labels. +func atoiSafe(s string) int { + s = strings.TrimSpace(s) + if m := regexp.MustCompile(`-?\d+`).FindString(s); m != "" { + if n, err := strconv.Atoi(m); err == nil { + return n + } + } + return 0 +} diff --git a/discover.go b/discover.go new file mode 100644 index 0000000..c420a4d --- /dev/null +++ b/discover.go @@ -0,0 +1,180 @@ +package main + +import ( + "regexp" + "strconv" + "strings" +) + +// smartTool holds the resolved smartctl binary and whether it can emit JSON +// (smartmontools >= 7.0). On CentOS 6/7 jsonCapable is false and we parse text. +type smartTool struct { + bin string + jsonCapable bool +} + +// newSmartTool resolves the smartctl binary and detects whether it can emit JSON +// (smartmontools >= 7.0); older builds fall back to text parsing. +func newSmartTool() smartTool { + bin := lookPath("smartctl", "/usr/sbin/smartctl", "/sbin/smartctl", "/usr/local/sbin/smartctl") + if bin == "" { + bin = "smartctl" + } + st := smartTool{bin: bin} + ver := run(bin, "--version") + // "smartctl 7.2 2020-12-30 r5155 ..." + if m := regexp.MustCompile(`smartctl\s+(\d+)\.(\d+)`).FindStringSubmatch(ver); m != nil { + major, _ := strconv.Atoi(m[1]) + if major >= 7 { + st.jsonCapable = true + } + } + return st +} + +// scanned describes one device from `smartctl --scan-open`. +type scanned struct { + path string + dtype string + megaraidN string // megaraidN is empty when the device is not a megaraid passthrough. + comment string +} + +var scanLine = regexp.MustCompile(`^(\S+)\s+-d\s+(\S+)\s*#?(.*)$`) +var megaraidIdx = regexp.MustCompile(`megaraid,(\d+)`) + +// scan enumerates physical drives from `smartctl --scan-open`, keeping direct +// SATA/SAS/NVMe devices and megaraid passthroughs while skipping iSCSI virtual +// disks. +func (s smartTool) scan() []scanned { + out := run(s.bin, "--scan-open") + var res []scanned + for _, ln := range strings.Split(out, "\n") { + ln = strings.TrimSpace(ln) + if ln == "" || strings.HasPrefix(ln, "#") { + continue + } + m := scanLine.FindStringSubmatch(ln) + if m == nil { + continue + } + path, dtype, comment := m[1], m[2], m[3] + if strings.Contains(strings.ToUpper(comment), "VIRTUAL-DISK") { + continue // Skip iSCSI IET virtual disks; they are not physical drives. + } + sc := scanned{path: path, dtype: dtype, comment: comment} + if mn := megaraidIdx.FindStringSubmatch(dtype); mn != nil { + sc.megaraidN = mn[1] + } + res = append(res, sc) + } + return res +} + +// querySmart runs smartctl against a device. With a JSON-capable smartctl it +// parses -j; otherwise it parses -a text. On the megaraid path, when no +// explicit -d works it tries the common passthrough type variants. +func (s smartTool) querySmart(path, dtype string, d *Drive) bool { + args := func(extra ...string) []string { + a := []string{} + if s.jsonCapable { + a = append(a, "-j") + } + a = append(a, "-a") + if dtype != "" { + a = append(a, "-d", dtype) + } + a = append(a, extra...) + a = append(a, path) + return a + } + + raw := run(s.bin, args()...) + if s.jsonCapable { + j := loadJSON(raw) + // Capture identity + transport even when SMART is unusable, so pseudo- + // device filtering can recognize SMART-less controller VDs (e.g. "DELL + // RAID") and iSCSI LUNs that expose no usable SMART. + if d.Model == "" { + d.Model = first(jStr(j, "model_name"), jStr(j, "scsi_model_name")) + } + if d.Transport == "" { + d.Transport = jStr(j, "scsi_transport_protocol", "name") + } + if jsonUsable(j) { + parseSmartJSON(j, d) + d.DevicePath = path + d.Dtype = dtype + return true + } + return false + } + // Text path: usability check is "did we get a model + some health/attrs". + if looksLikeSmartText(raw) { + parseSmartText(raw, d) + d.DevicePath = path + d.Dtype = dtype + return d.Model != "" || d.SmartHealth != "UNKNOWN" + } + return false +} + +// megaraidDtypes lists the megaraid passthrough type variants to try when the +// scan didn't pin one. +var megaraidDtypes = []string{"sat+megaraid,%s", "megaraid,%s", "scsi+megaraid,%s"} + +// pseudoDeviceModels lists lowercase model substrings that identify devices +// which are not physical drives: iSCSI targets and RAID controller virtual +// disks. `smartctl --scan-open` presents these as plain "-d scsi" with no +// VIRTUAL-DISK hint in the scan comment, so they are filtered after identity is +// read. Extend this list as new controller families appear in the fleet. +var pseudoDeviceModels = []string{ + "virtual-disk", "virtual disk", // iSCSI IET LUNs (e.g. "IET VIRTUAL-DISK"). + "lio-org", // Linux-IO iSCSI target LUNs (text-path fallback). + // RAID controller virtual disks report the HBA vendor/model as their + // identity (e.g. "AVAGO MR9363-4i", "BROADCOM MR9560-16i", "DELL PERC + // H730", "DELL RAID"). These tokens appear on controllers, never on bare + // drives. + "avago", "broadcom", "lsi", "megaraid", "perc", "adaptec", "microsemi", + "dell raid", +} + +// isPseudoDevice reports whether a queried device is an iSCSI target or a RAID +// controller virtual disk rather than a physical drive. An iSCSI SCSI transport +// is the authoritative signal (covers LIO, IET, any target software); the model +// token list catches RAID virtual disks and the legacy text path that has no +// transport field. +func isPseudoDevice(d *Drive) bool { + if strings.EqualFold(d.Transport, "iSCSI") { + return true + } + m := strings.ToLower(d.Model) + if m == "" { + return false + } + for _, p := range pseudoDeviceModels { + if strings.Contains(m, p) { + return true + } + } + return false +} + +// looksLikeSmartText reports whether raw is real smartctl text output worth +// parsing, keyed off identity and health section markers. It guards the text +// path from acting on error messages or empty output. +func looksLikeSmartText(raw string) bool { + if strings.TrimSpace(raw) == "" { + return false + } + for _, marker := range []string{ + "=== START OF INFORMATION SECTION ===", + "Device Model:", "Model Number:", "Product:", + "SMART overall-health", "SMART Health Status", + } { + if strings.Contains(raw, marker) { + return true + } + } + return false +} diff --git a/drive-health-metrics b/drive-health-metrics new file mode 100755 index 0000000..ad69be7 Binary files /dev/null and b/drive-health-metrics differ diff --git a/drive.go b/drive.go new file mode 100644 index 0000000..8bdd563 --- /dev/null +++ b/drive.go @@ -0,0 +1,88 @@ +package main + +// Drive is the normalized, vendor-agnostic health record for one physical +// drive. Nullable numeric fields use *int / *float64 so that "unknown" (the +// counter could not be read) is distinguishable from a real zero — this +// distinction drives the NO_DATA recommendation and keeps NO_DATA rows blank +// instead of misleadingly showing 0. +type Drive struct { + CollectedAt string + Hostname string + + // Where/how smartctl reached the drive. + DevicePath string + Dtype string + Transport string // Transport is the SCSI transport ("iSCSI", "SAS", ...); used to drop iSCSI LUNs. + + // Physical location reported by the RAID controller (if any). + DeviceID string // DeviceID is the megaraid,N index used by smartctl. + Enclosure string + Slot string + + // Identity. + Serial string + Model string + Firmware string + Capacity string + Rotation string // Rotation is "SSD", "NVMe", or "7200 rpm". + + SmartHealth string // PASSED | FAILED | PASSED_BY_ATTR | OK | UNKNOWN + + // Drive-attributable defect counters (nil = not readable). + Reallocated *int + ReallocatedEvents *int + Pending *int + Uncorrectable *int + ReportedUncorrect *int + RuntimeBadBlocks *int + EndToEnd *int + UdmaCrc *int + + // RAID controller signals (MegaCLI / storcli / perccli). + MediaErrCtrl int + OtherErrCtrl int + PredictiveFailureCtrl int + SmartAlertCtrl bool + FwState string + + // Wear (vendor-normalized; remaining = % life left, consumed = 100 - that). + WearPctRemaining *int + WearPctWorst *int + WearPctConsumed *int + WearSrc string + UnusedReservePct *int + HostWrittenTB *float64 + + // NVMe-specific health (from the NVMe SMART/Health log). + NvmeCriticalWarning *int + NvmeAvailSpare *int + NvmeAvailSpareThresh *int + NvmeMediaErrors *int + + // Age. + PowerOnHours *int + PowerOnYears *float64 + PowerCycleCount *int + TempC *int + + // Derived. + HaveSmart bool + DefectTotal *int + RiskScore int + Recommendation string + RiskReasons string + + // Diagnostics. + SmartctlMessages string +} + +// ---- Small pointer helpers ---- + +func pInt(n int) *int { return &n } +func pF(f float64) *float64 { return &f } +func iv(p *int) int { + if p == nil { + return 0 + } + return *p +} diff --git a/drive_test.go b/drive_test.go new file mode 100644 index 0000000..aed4635 --- /dev/null +++ b/drive_test.go @@ -0,0 +1,279 @@ +package main + +import "testing" + +// ---- JSON path (smartmontools >= 7.0): ATA SSD with a reallocated sector ---- + +const ataJSON = `{ + "model_name": "Micron_1300_MTFDDAK512TDL", + "serial_number": "21512A3B4C5D", + "firmware_version": "M5MU000", + "user_capacity": {"bytes": 512110190592}, + "rotation_rate": 0, + "smart_status": {"passed": true}, + "power_on_time": {"hours": 26280}, + "power_cycle_count": 42, + "temperature": {"current": 31}, + "ata_smart_attributes": {"table": [ + {"id": 5, "name": "Reallocated_Sector_Ct", "value": 100, "worst": 100, "thresh": 10, "when_failed": "", "raw": {"value": 8, "string": "8"}}, + {"id": 9, "name": "Power_On_Hours", "value": 95, "worst": 95, "thresh": 0, "when_failed": "", "raw": {"value": 26280, "string": "26280"}}, + {"id": 199,"name": "UDMA_CRC_Error_Count", "value": 100, "worst": 100, "thresh": 0, "when_failed": "", "raw": {"value": 3, "string": "3"}}, + {"id": 202,"name": "Percent_Lifetime_Remain","value": 88, "worst": 88, "thresh": 1, "when_failed": "", "raw": {"value": 12, "string": "12"}} + ]} +}` + +func TestParseSmartJSON_ATA(t *testing.T) { + d := &Drive{} + parseSmartJSON(loadJSON(ataJSON), d) + + if d.Model != "Micron_1300_MTFDDAK512TDL" { + t.Errorf("model = %q", d.Model) + } + if d.Rotation != "SSD" { + t.Errorf("rotation = %q, want SSD", d.Rotation) + } + if iv(d.Reallocated) != 8 { + t.Errorf("reallocated = %v, want 8", d.Reallocated) + } + if iv(d.UdmaCrc) != 3 { + t.Errorf("udma_crc = %v, want 3", d.UdmaCrc) + } + if iv(d.PowerOnHours) != 26280 { + t.Errorf("poh = %v, want 26280", d.PowerOnHours) + } + if d.WearSrc != "Intel/ID202" || iv(d.WearPctConsumed) != 12 { + t.Errorf("wear src=%q consumed=%v, want Intel/ID202 / 12", d.WearSrc, d.WearPctConsumed) + } + if d.SmartHealth != "PASSED" { + t.Errorf("health = %q", d.SmartHealth) + } + + d.HaveSmart = true + finalizeDerived(d) + // reallocated 8 -> min(40+40,100)=80; udma 3 -> 9; total 89 -> REPLACE_SOON. + if d.Recommendation != "REPLACE_SOON" { + t.Errorf("rec = %q (score %d), want REPLACE_SOON", d.Recommendation, d.RiskScore) + } + if iv(d.DefectTotal) != 8 { + t.Errorf("defect_total = %v, want 8", d.DefectTotal) + } +} + +// ---- Text path (CentOS 6/7, smartmontools 5.x/6.x: no JSON) ---- + +const ataText = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux] (local build) + +=== START OF INFORMATION SECTION === +Device Model: INTEL SSDSC2BB480G6 +Serial Number: BTWA12345678480BGN +Firmware Version: G2010140 +User Capacity: 480,103,981,056 bytes [480 GB] +Rotation Rate: Solid State Device + +=== START OF READ SMART DATA SECTION === +SMART overall-health self-assessment test result: PASSED + +ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE + 5 Reallocated_Sector_Ct 0x0032 100 100 000 Old_age Always - 0 + 9 Power_On_Hours 0x0032 100 100 000 Old_age Always - 51000 +197 Current_Pending_Sector 0x0012 100 100 000 Old_age Always - 5 +233 Media_Wearout_Indicator 0x0032 072 072 000 Old_age Always - 0 +` + +func TestParseSmartText_ATA(t *testing.T) { + d := &Drive{} + parseSmartText(ataText, d) + + if d.Model != "INTEL SSDSC2BB480G6" { + t.Errorf("model = %q", d.Model) + } + if d.SmartHealth != "PASSED" { + t.Errorf("health = %q", d.SmartHealth) + } + if iv(d.Pending) != 5 { + t.Errorf("pending = %v, want 5", d.Pending) + } + if iv(d.PowerOnHours) != 51000 { + t.Errorf("poh = %v, want 51000", d.PowerOnHours) + } + if d.WearSrc != "Generic/ID233" || iv(d.WearPctConsumed) != 28 { + t.Errorf("wear src=%q consumed=%v, want Generic/ID233 / 28", d.WearSrc, d.WearPctConsumed) + } + + d.HaveSmart = true + finalizeDerived(d) + // pending 5 -> min(50+25,100)=75 -> REPLACE_SOON. + if d.Recommendation != "REPLACE_SOON" { + t.Errorf("rec = %q (score %d), want REPLACE_SOON", d.Recommendation, d.RiskScore) + } +} + +// ---- NVMe text path ---- + +const nvmeText = `smartctl 7.2 2020-12-30 r5155 [x86_64-linux] + +=== START OF INFORMATION SECTION === +Model Number: Samsung SSD 980 PRO 1TB +Serial Number: S5GXNX0R123456 +Firmware Version: 5B2QGXA7 +=== START OF SMART DATA SECTION === +SMART overall-health self-assessment test result: PASSED + +SMART/Health Information (NVMe Log 0x02) +Critical Warning: 0x04 +Temperature: 40 Celsius +Available Spare: 8% +Available Spare Threshold: 10% +Percentage Used: 96% +Power On Hours: 30,123 +Power Cycles: 210 +Media and Data Integrity Errors: 0 +` + +func TestParseSmartText_NVMe(t *testing.T) { + d := &Drive{} + parseSmartText(nvmeText, d) + + if d.Rotation != "NVMe" { + t.Errorf("rotation = %q, want NVMe", d.Rotation) + } + if d.NvmeCriticalWarning == nil || *d.NvmeCriticalWarning != 4 { + t.Errorf("critical_warning = %v, want 4", d.NvmeCriticalWarning) + } + if iv(d.NvmeAvailSpare) != 8 || iv(d.NvmeAvailSpareThresh) != 10 { + t.Errorf("spare=%v thresh=%v, want 8/10", d.NvmeAvailSpare, d.NvmeAvailSpareThresh) + } + if iv(d.WearPctConsumed) != 96 { + t.Errorf("wear consumed = %v, want 96", d.WearPctConsumed) + } + + d.HaveSmart = true + finalizeDerived(d) + // crit warning +60, spare<=thresh +40, wear96 +80 -> >=100 REPLACE_NOW. + if d.Recommendation != "REPLACE_NOW" { + t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore) + } +} + +// ---- NO_DATA: nothing readable, no controller flags ---- + +func TestNoData(t *testing.T) { + d := &Drive{HaveSmart: false} + finalizeDerived(d) + if d.Recommendation != "NO_DATA" { + t.Errorf("rec = %q, want NO_DATA", d.Recommendation) + } + if d.DefectTotal != nil { + t.Errorf("defect_total = %v, want nil (blank)", d.DefectTotal) + } +} + +// ---- MegaCLI PDList parsing + controller-driven scoring ---- + +const megacliText = ` +Enclosure Device ID: 64 +Slot Number: 3 +Device Id: 11 +WWN: 5000C500A1B2C3D4 +Media Error Count: 369 +Other Error Count: 2 +Predictive Failure Count: 1 +Drive has flagged a S.M.A.R.T alert : Yes +Firmware state: Online, Spun Up +Inquiry Data: BTWA12345678 INTELSSDSC2BB480G6 G2010140 +Drive Temperature: 35C (95.00 F) + +Enclosure Device ID: 64 +Slot Number: 0 +Device Id: 8 +Firmware state: Online, Spun Up +Media Error Count: 0 +` + +func TestMegacliAndScore(t *testing.T) { + drives := parseMegacliPDList(megacliText) + if len(drives) != 2 { + t.Fatalf("parsed %d drives, want 2", len(drives)) + } + idx := map[string]ctrlDrive{} + for _, cd := range drives { + idx[cd.DeviceID] = cd + } + cd, ok := idx["11"] + if !ok { + t.Fatal("device 11 not found") + } + if cd.MediaErr != 369 || cd.Predictive != 1 || !cd.SmartAlert { + t.Errorf("dev11 media=%d pred=%d alert=%v", cd.MediaErr, cd.Predictive, cd.SmartAlert) + } + if cd.Enclosure != "64" || cd.Slot != "3" { + t.Errorf("dev11 location %s:%s, want 64:3", cd.Enclosure, cd.Slot) + } + + // Controller-only drive (no smartctl): predictive + alert -> not NO_DATA. + d := &Drive{HaveSmart: false} + applyController(d, cd) + finalizeDerived(d) + // predictive +70, alert +50, media 369 +30 = 150 -> REPLACE_NOW. + if d.Recommendation != "REPLACE_NOW" { + t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore) + } + if d.enclosureSlot() != "64:3" { + t.Errorf("enclosure_slot = %q", d.enclosureSlot()) + } +} + +// ---- MegaCLI Inquiry Data identity fallback (real-world layouts) ---- + +// applyController's inquiry fallback must anchor serial=first / firmware=last and +// treat the (space-containing) remainder as the model. These samples are the real +// MegaCLI "Inquiry Data" forms documented alongside the parser; the naive +// serial/model/firmware positional split mis-handled them. +func TestApplyControllerInquiry(t *testing.T) { + cases := []struct { + name string + inquiry string + wantSerial, wantModel, wantFw string + }{ + { + "model with internal space", + "50026B727A005DED KINGSTON SEDC400S37480G SAFM02.H", + "50026B727A005DED", "KINGSTON SEDC400S37480G", "SAFM02.H", + }, + { + "two tokens: serial + firmware only", + "ZRT0CQ55ST12000NM000J-2TY103 SN02", + "ZRT0CQ55ST12000NM000J-2TY103", "", "SN02", + }, + { + "clean three tokens", + "BTWA12345678 INTELSSDSC2BB480G6 G2010140", + "BTWA12345678", "INTELSSDSC2BB480G6", "G2010140", + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + d := &Drive{} + applyController(d, ctrlDrive{Inquiry: c.inquiry}) + if d.Serial != c.wantSerial || d.Model != c.wantModel || d.Firmware != c.wantFw { + t.Errorf("got serial=%q model=%q fw=%q; want serial=%q model=%q fw=%q", + d.Serial, d.Model, d.Firmware, c.wantSerial, c.wantModel, c.wantFw) + } + }) + } +} + +// ---- Output smoke: CSV header + influx line shape ---- + +func TestOutputShapes(t *testing.T) { + d := &Drive{Hostname: "kvm60", Model: "X", Serial: "S1", SmartHealth: "PASSED"} + finalizeDerived(d) + csv := recordsToCSV([]*Drive{d}) + if len(csv) == 0 || csv[:len("collected_at")] != "collected_at" { + t.Errorf("csv header malformed: %.40q", csv) + } + inf := recordsToInflux([]*Drive{d}, 1700000000000000000) + if len(inf) < len(influxMeasurement) || inf[:len(influxMeasurement)] != influxMeasurement { + t.Errorf("influx line malformed: %.60q", inf) + } +} diff --git a/exec.go b/exec.go new file mode 100644 index 0000000..7c2a41f --- /dev/null +++ b/exec.go @@ -0,0 +1,46 @@ +package main + +import ( + "os" + "os/exec" + "strings" +) + +// run executes a command and returns combined stdout+stderr as a string. +// Non-zero exit is NOT an error here: smartctl uses a bitmask exit code (e.g. +// bit 0 = command-line error, bits 3-7 = disk health flags) yet still prints +// valid JSON/text, and MegaCLI is similarly noisy. We want whatever it printed. +func run(name string, args ...string) string { + cmd := exec.Command(name, args...) + out, _ := cmd.CombinedOutput() + return string(out) +} + +// lookPath returns the first existing executable from candidates, trying PATH +// first (via exec.LookPath) then absolute fallbacks. Returns "" if none found. +func lookPath(candidates ...string) string { + for _, c := range candidates { + if strings.ContainsRune(c, os.PathSeparator) { + if fi, err := os.Stat(c); err == nil && !fi.IsDir() { + return c + } + continue + } + if p, err := exec.LookPath(c); err == nil { + return p + } + } + return "" +} + +// hostname returns the host identity, preferring the FQDN from "hostname -f", +// then os.Hostname, then "unknown" so records always carry a host tag. +func hostname() string { + if h := strings.TrimSpace(run("hostname", "-f")); h != "" { + return h + } + if h, err := os.Hostname(); err == nil { + return h + } + return "unknown" +} diff --git a/exporter.go b/exporter.go new file mode 100644 index 0000000..5f39a6f --- /dev/null +++ b/exporter.go @@ -0,0 +1,83 @@ +package main + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +// DriveExporter is a Prometheus collector that, on each scrape, discovers the +// host's drives and emits their numeric health columns as gauges. The label set +// (the schema's string columns) is attached to every metric so the Prometheus +// and InfluxDB outputs describe each drive identically. +type DriveExporter struct { + descs map[string]*prometheus.Desc + labels []column // String columns carried as labels. + gauges []column // Numeric columns emitted as gauges. + // collect discovers the drives; a field so tests can inject a fixed set + // without touching real hardware. + collect func() ([]*Drive, int64) +} + +// NewDriveExporter builds the collector with one gauge descriptor per numeric +// column, labelled with the schema's string columns. +func NewDriveExporter() *DriveExporter { + labels, gauges := labelColumns(), gaugeColumns() + labelNames := make([]string, len(labels)) + for i, c := range labels { + labelNames[i] = c.name + } + descs := make(map[string]*prometheus.Desc, len(gauges)) + for _, c := range gauges { + descs[c.name] = prometheus.NewDesc(namespace+"_"+c.name, "drive health metric: "+c.name, labelNames, nil) + } + return &DriveExporter{descs: descs, labels: labels, gauges: gauges, collect: collect} +} + +// Reload is a no-op; the exporter holds no configurable state. +func (e *DriveExporter) Reload() {} + +// Describe sends every metric descriptor to the channel. +func (e *DriveExporter) Describe(ch chan<- *prometheus.Desc) { + for _, d := range e.descs { + ch <- d + } +} + +// Collect discovers the drives and emits a gauge per numeric column, with the +// shared identity label set. +func (e *DriveExporter) Collect(ch chan<- prometheus.Metric) { + drives, _ := e.collect() + for _, d := range drives { + labelValues := make([]string, len(e.labels)) + for i, c := range e.labels { + labelValues[i] = format(c.raw(d)) + } + for _, c := range e.gauges { + val, ok := gaugeValue(c, d) + if !ok { + continue + } + ch <- prometheus.MustNewConstMetric(e.descs[c.name], prometheus.GaugeValue, val, labelValues...) + } + } +} + +// gaugeValue converts a numeric column's value to a float for Prometheus, +// reporting ok=false when the value is unknown. +func gaugeValue(c column, d *Drive) (float64, bool) { + r := c.raw(d) + switch t := r.(type) { + case nil: + return 0, false + case int: + return float64(t), true + case float64: + return t, true + case bool: + if t { + return 1, true + } + return 0, true + default: + return 0, false + } +} diff --git a/exporter_test.go b/exporter_test.go new file mode 100644 index 0000000..a0d4a54 --- /dev/null +++ b/exporter_test.go @@ -0,0 +1,119 @@ +package main + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +// sampleDrive returns a populated drive for exercising the output encoders. +func sampleDrive() *Drive { + d := &Drive{ + Hostname: "kvm60", + Model: "Samsung SSD", + Serial: "S1", + Firmware: "1B6Q", + SmartHealth: "PASSED", + Enclosure: "64", + Slot: "3", + WearSrc: "nvme", + TempC: pInt(34), + PowerOnHours: pInt(17520), + PowerCycleCount: pInt(12), + WearPctConsumed: pInt(7), + HostWrittenTB: pF(12.5), + SmartAlertCtrl: true, + } + finalizeDerived(d) + return d +} + +// The Prometheus collector must emit numeric gauges named with the namespace +// prefix, carrying the shared identity label set with consistent cardinality. +func TestDriveExporterCollect(t *testing.T) { + app = &App{config: defaultConfig()} + app.config.Hostname = "kvm60" + + exp := NewDriveExporter() + // Inject a fixed drive set so Collect runs without touching real hardware. + exp.collect = func() ([]*Drive, int64) { return []*Drive{sampleDrive()}, 0 } + reg := prometheus.NewRegistry() + reg.MustRegister(exp) + + mfs, err := reg.Gather() + if err != nil { + t.Fatalf("gather: %v", err) + } + + byName := map[string]*dto.MetricFamily{} + for _, mf := range mfs { + byName[mf.GetName()] = mf + } + + // A representative int, float, and bool field must be present and typed. + checks := map[string]float64{ + "drive_health_temp_c": 34, + "drive_health_power_cycle_count": 12, + "drive_health_host_written_tb": 12.5, + "drive_health_smart_alert_ctrl": 1, // bool true -> 1 + "drive_health_risk_score": float64(sampleDrive().RiskScore), + } + for name, want := range checks { + mf, ok := byName[name] + if !ok { + t.Errorf("missing metric %s", name) + continue + } + m := mf.GetMetric()[0] + if got := m.GetGauge().GetValue(); got != want { + t.Errorf("%s = %v, want %v", name, got, want) + } + // Identity labels must be attached. + labels := map[string]string{} + for _, l := range m.GetLabel() { + labels[l.GetName()] = l.GetValue() + } + if labels["serial"] != "S1" || labels["hostname"] != "kvm60" || labels["enclosure_slot"] != "64:3" { + t.Errorf("%s labels = %v", name, labels) + } + } +} + +// The InfluxDB JSON encoder must produce one typed object per drive with tags, +// fields, and a microsecond timestamp. +func TestRecordsToInfluxJSON(t *testing.T) { + out := recordsToInfluxJSON([]*Drive{sampleDrive()}, 1700000000000000000) + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + if len(lines) != 1 { + t.Fatalf("got %d lines, want 1: %q", len(lines), out) + } + + var obj struct { + Name string `json:"name"` + Tags map[string]string `json:"tags"` + Fields map[string]interface{} `json:"fields"` + Timestamp int64 `json:"timestamp"` + } + if err := json.Unmarshal([]byte(lines[0]), &obj); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if obj.Name != influxMeasurement { + t.Errorf("name = %q, want %q", obj.Name, influxMeasurement) + } + if obj.Tags["serial"] != "S1" || obj.Tags["model"] != "Samsung SSD" { + t.Errorf("tags = %v", obj.Tags) + } + if obj.Timestamp != 1700000000000000 { + t.Errorf("timestamp = %d, want microseconds", obj.Timestamp) + } + // int field decodes as a JSON number; bool field as a real bool. + if v, ok := obj.Fields["temp_c"].(float64); !ok || v != 34 { + t.Errorf("temp_c field = %v", obj.Fields["temp_c"]) + } + if v, ok := obj.Fields["smart_alert_ctrl"].(bool); !ok || !v { + t.Errorf("smart_alert_ctrl field = %v", obj.Fields["smart_alert_ctrl"]) + } +} diff --git a/flags.go b/flags.go new file mode 100644 index 0000000..cfcc0a8 --- /dev/null +++ b/flags.go @@ -0,0 +1,63 @@ +package main + +import ( + "flag" + "fmt" + "os" +) + +// Flags holds the command-line arguments. One-shot output (CSV / InfluxDB line +// protocol to stdout) remains the default; -server switches to the long-lived +// service that exposes the Prometheus endpoint and pushes to Influx. +type Flags struct { + ConfigPath string + + // One-shot output controls (default mode). + Format string + + // Service mode. + Server bool + + // HTTP output overrides (service mode). + HTTPBind string + HTTPPort uint + HTTPMetricsPath string +} + +// ParseFlags parses the command line into app.flags, printing the version and +// exiting when -version is supplied. +func (a *App) ParseFlags() { + a.flags = new(Flags) + flag.Usage = func() { + fmt.Printf("%s: %s.\n\nUsage:\n", serviceName, serviceDescription) + flag.PrintDefaults() + } + + // Version. + var printVer bool + flag.BoolVar(&printVer, "version", false, "print version and exit") + flag.BoolVar(&printVer, "v", false, "print version and exit (shorthand)") + + // Configuration path override. + usage := "load configuration from `FILE`" + flag.StringVar(&a.flags.ConfigPath, "config", "", usage) + flag.StringVar(&a.flags.ConfigPath, "c", "", usage+" (shorthand)") + + // One-shot output controls. + flag.StringVar(&a.flags.Format, "format", "csv", "output format: csv | influx") + + // Service mode. + flag.BoolVar(&a.flags.Server, "server", false, "run as a service: Prometheus HTTP endpoint and scheduled InfluxDB output") + + // HTTP output overrides (service mode). + flag.StringVar(&a.flags.HTTPBind, "http-bind", "", "bind address for the HTTP server") + flag.UintVar(&a.flags.HTTPPort, "http-port", 0, "bind port for the HTTP server") + flag.StringVar(&a.flags.HTTPMetricsPath, "http-metrics-path", "", "path for the Prometheus metrics endpoint") + + flag.Parse() + + if printVer { + printVersion() + os.Exit(0) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6930d95 --- /dev/null +++ b/go.mod @@ -0,0 +1,34 @@ +module github.com/grmrgecko/drive-health-metrics + +go 1.20 + +require ( + github.com/gorilla/handlers v1.5.1 + github.com/influxdata/influxdb-client-go/v2 v2.12.3 + github.com/kkyr/fig v0.3.2 + github.com/prometheus/client_golang v1.16.0 + github.com/prometheus/client_model v0.3.0 + github.com/segmentio/kafka-go v0.4.42 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/deepmap/oapi-codegen v1.8.2 // indirect + github.com/felixge/httpsnoop v1.0.1 // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 // indirect + github.com/klauspost/compress v1.15.9 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/mitchellh/mapstructure v1.4.1 // indirect + github.com/pelletier/go-toml v1.9.3 // indirect + github.com/pierrec/lz4/v4 v4.1.15 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/common v0.42.0 // indirect + github.com/prometheus/procfs v0.10.1 // indirect + github.com/rogpeppe/go-internal v1.11.0 // indirect + golang.org/x/net v0.7.0 // indirect + golang.org/x/sys v0.26.0 // indirect + google.golang.org/protobuf v1.30.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ce8fcb8 --- /dev/null +++ b/go.sum @@ -0,0 +1,161 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deepmap/oapi-codegen v1.8.2 h1:SegyeYGcdi0jLLrpbCMoJxnUUn8GBXHsvr4rbzjuhfU= +github.com/deepmap/oapi-codegen v1.8.2/go.mod h1:YLgSKSDv/bZQB7N4ws6luhozi3cEdRktEqrX88CvjIw= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= +github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/getkin/kin-openapi v0.61.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs= +github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4= +github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= +github.com/influxdata/influxdb-client-go/v2 v2.12.3 h1:28nRlNMRIV4QbtIUvxhWqaxn0IpXeMSkY/uJa/O/vC4= +github.com/influxdata/influxdb-client-go/v2 v2.12.3/go.mod h1:IrrLUbCjjfkmRuaCiGQg4m2GbkaeJDcuWoxiWdQEbA0= +github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU= +github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= +github.com/kkyr/fig v0.3.2 h1:+vMj52FL6RJUxeKOBB6JXIMyyi1/2j1ERDrZXjoBjzM= +github.com/kkyr/fig v0.3.2/go.mod h1:ItUILF8IIzgZOMhx5xpJ1W/bviQsWRKOwKXfE/tqUoA= +github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= +github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/labstack/echo/v4 v4.2.1/go.mod h1:AA49e0DZ8kk5jTOOCKNuPR6oTnBS0dYiM4FW1e6jwpg= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= +github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag= +github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ= +github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= +github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= +github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= +github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= +github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= +github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= +github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= +github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= +github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/segmentio/kafka-go v0.4.42 h1:qffhBZCz4WcWyNuHEclHjIMLs2slp6mZO8px+5W5tfU= +github.com/segmentio/kafka-go v0.4.42/go.mod h1:d0g15xPMqoUookug0OU75DhGZxXwCFxSLeJ4uphwJzg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/http.go b/http.go new file mode 100644 index 0000000..6dbd5a9 --- /dev/null +++ b/http.go @@ -0,0 +1,90 @@ +package main + +import ( + "context" + "fmt" + "log" + "net" + "net/http" + "os" + + "github.com/gorilla/handlers" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +// HTTPOutput serves the Prometheus metrics endpoint. +type HTTPOutput struct { + server *http.Server + config *HTTPOutputConfig +} + +// NewHTTPOutput creates the HTTP output and applies the current configuration. +func NewHTTPOutput() *HTTPOutput { + s := new(HTTPOutput) + s.server = &http.Server{} + s.Reload() + return s +} + +// AddHandlers (re)builds the request multiplexer: the metrics endpoint plus a +// landing page linking to it. +func (s *HTTPOutput) AddHandlers() { + mux := http.NewServeMux() + s.server.Handler = mux + + mux.Handle(s.config.MetricsPath, handlers.CombinedLoggingHandler(os.Stdout, promhttp.HandlerFor(app.registry, promhttp.HandlerOpts{}))) + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(` + Drive Health Metrics + +

Drive Health Metrics

+

Metrics

+ + `)) + }) +} + +// Reload refreshes the configuration, listen address, and handlers. +func (s *HTTPOutput) Reload() { + s.config = &app.config.HTTP + s.server.Addr = fmt.Sprintf("%s:%d", s.config.BindAddr, s.config.Port) + s.AddHandlers() +} + +// OutputEnabled reports whether the HTTP output is enabled. +func (s *HTTPOutput) OutputEnabled() bool { + return s.config.Enabled +} + +// Start launches the server and blocks until it is accepting connections. +func (s *HTTPOutput) Start(ctx context.Context) { + isListening := make(chan bool) + go s.StartWithIsListening(ctx, isListening) + <-isListening +} + +// StartWithIsListening runs the server, signalling on isListening once the +// listener is bound, and shutting down when the context is cancelled. +func (s *HTTPOutput) StartWithIsListening(ctx context.Context, isListening chan bool) { + if !s.config.Enabled { + isListening <- true + return + } + + go func() { + <-ctx.Done() + if err := s.server.Shutdown(context.Background()); err != nil { + log.Println("Error shutting down http server:", err) + } + }() + + log.Println("Starting http server:", s.server.Addr) + l, err := net.Listen("tcp", s.server.Addr) + if err != nil { + log.Fatal("Listen: ", err) + } + isListening <- true + if err := s.server.Serve(l); err != nil && err != http.ErrServerClosed { + log.Println("HTTP server failure:", err) + } +} diff --git a/influx.go b/influx.go new file mode 100644 index 0000000..f9ae278 --- /dev/null +++ b/influx.go @@ -0,0 +1,146 @@ +package main + +import ( + "bufio" + "bytes" + "context" + "crypto/tls" + "log" + "time" + + influxdb2 "github.com/influxdata/influxdb-client-go/v2" + "github.com/segmentio/kafka-go" + "github.com/segmentio/kafka-go/sasl/plain" +) + +// InfluxOutput pushes drive metrics, as InfluxDB line protocol or JSON, to the +// InfluxDB v2 API and/or Kafka on a fixed schedule. +type InfluxOutput struct { + kwriter *kafka.Writer + client *influxdb2.Client + config *InfluxOutputConfig +} + +// NewInfluxOutput creates the output and applies the current configuration. +func NewInfluxOutput() *InfluxOutput { + i := new(InfluxOutput) + i.Reload() + return i +} + +// Reload rebuilds the Kafka writer and InfluxDB client from the configuration. +// A destination is only configured when its required settings are present. +func (i *InfluxOutput) Reload() { + i.config = &app.config.Influx + i.kwriter = nil + i.client = nil + + // Kafka output. + if len(i.config.KafkaBrokers) != 0 && i.config.KafkaTopic != "" { + dialer := &kafka.Dialer{ + Timeout: 10 * time.Second, + DualStack: true, + TLS: &tls.Config{InsecureSkipVerify: i.config.KafkaInsecureSkipVerify}, + } + if i.config.KafkaUsername != "" { + dialer.SASLMechanism = plain.Mechanism{ + Username: i.config.KafkaUsername, + Password: i.config.KafkaPassword, + } + } + i.kwriter = kafka.NewWriter(kafka.WriterConfig{ + Brokers: i.config.KafkaBrokers, + Topic: i.config.KafkaTopic, + Dialer: dialer, + }) + } + + // InfluxDB v2 API output. + if i.config.InfluxServer != "" && i.config.Token != "" && i.config.Org != "" && i.config.Bucket != "" { + c := influxdb2.NewClient(i.config.InfluxServer, i.config.Token) + i.client = &c + } +} + +// CollectAndLineprotocolFormat discovers the drives and renders them as InfluxDB +// line protocol. +func (i *InfluxOutput) CollectAndLineprotocolFormat() []byte { + drives, tsNs := collect() + return []byte(recordsToInflux(drives, tsNs)) +} + +// CollectAndJSONFormat discovers the drives and renders them as InfluxDB JSON. +func (i *InfluxOutput) CollectAndJSONFormat() []byte { + drives, tsNs := collect() + return recordsToInfluxJSON(drives, tsNs) +} + +// OutputEnabled reports whether a destination is configured and a push interval +// is set. +func (i *InfluxOutput) OutputEnabled() bool { + return (i.kwriter != nil || i.client != nil) && i.config.Frequency != 0 +} + +// Start runs the scheduled push loop until the context is cancelled. +func (i *InfluxOutput) Start(ctx context.Context) { + if !i.OutputEnabled() { + return + } + + ticker := time.NewTicker(i.config.Frequency) + defer ticker.Stop() + for { + select { + case <-ticker.C: + i.push(ctx) + + case <-ctx.Done(): + if i.kwriter != nil { + i.kwriter.Close() + } + if i.client != nil { + (*i.client).Close() + } + return + } + } +} + +// push collects metrics once and writes them to every configured destination. +func (i *InfluxOutput) push(ctx context.Context) { + // Kafka receives one message per drive in the configured format. + if i.kwriter != nil { + var data []byte + if i.config.KafkaOutputFormat == "json" { + data = i.CollectAndJSONFormat() + } else { + data = i.CollectAndLineprotocolFormat() + } + + var messages []kafka.Message + routingKey := []byte(app.config.Hostname) + scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + scanner.Split(bufio.ScanLines) + for scanner.Scan() { + b := append(scanner.Bytes(), '\n') + messages = append(messages, kafka.Message{Key: routingKey, Value: b}) + } + if len(messages) != 0 { + if err := i.kwriter.WriteMessages(ctx, messages...); err != nil { + log.Println("Unable to write to Kafka:", err) + } + } + } + + // InfluxDB API receives the full line-protocol document. + if i.client != nil { + data := i.CollectAndLineprotocolFormat() + if len(data) != 0 { + writeAPI := (*i.client).WriteAPIBlocking(i.config.Org, i.config.Bucket) + if err := writeAPI.WriteRecord(ctx, string(data)); err != nil { + log.Println("Unable to write to InfluxDB:", err) + } + } + } +} diff --git a/jsonutil.go b/jsonutil.go new file mode 100644 index 0000000..2a78af7 --- /dev/null +++ b/jsonutil.go @@ -0,0 +1,114 @@ +package main + +import ( + "encoding/json" + "regexp" + "strconv" + "strings" +) + +// loadJSON parses smartctl -j output. If leading noise precedes the object +// (rare, but some controllers emit warnings before the JSON), it retries from +// the first '{'. +func loadJSON(raw string) map[string]interface{} { + raw = strings.TrimSpace(raw) + if raw == "" { + return nil + } + var m map[string]interface{} + if err := json.Unmarshal([]byte(raw), &m); err == nil { + return m + } + if i := strings.IndexByte(raw, '{'); i >= 0 { + if err := json.Unmarshal([]byte(raw[i:]), &m); err == nil { + return m + } + } + return nil +} + +// jObj navigates nested maps by key path, returning the leaf map or nil. +func jObj(m map[string]interface{}, keys ...string) map[string]interface{} { + cur := m + for _, k := range keys { + if cur == nil { + return nil + } + v, ok := cur[k].(map[string]interface{}) + if !ok { + return nil + } + cur = v + } + return cur +} + +// jInt returns an *int for a numeric leaf (JSON numbers decode as float64). +func jInt(m map[string]interface{}, keys ...string) *int { + v := jLeaf(m, keys...) + switch t := v.(type) { + case float64: + n := int(t) + return &n + case string: + if n, err := strconv.Atoi(strings.TrimSpace(t)); err == nil { + return &n + } + } + return nil +} + +// jStr returns a trimmed string leaf, or "". +func jStr(m map[string]interface{}, keys ...string) string { + if s, ok := jLeaf(m, keys...).(string); ok { + return strings.TrimSpace(s) + } + return "" +} + +// jBoolPtr returns *bool for a boolean leaf. +func jBoolPtr(m map[string]interface{}, keys ...string) *bool { + if b, ok := jLeaf(m, keys...).(bool); ok { + return &b + } + return nil +} + +// jLeaf returns the raw value at the key path (the final key looked up in its +// parent map), or nil when any segment along the path is missing. +func jLeaf(m map[string]interface{}, keys ...string) interface{} { + if len(keys) == 0 { + return nil + } + parent := jObj(m, keys[:len(keys)-1]...) + if parent == nil { + return nil + } + return parent[keys[len(keys)-1]] +} + +var leadingInt = regexp.MustCompile(`^\s*(\d+)`) + +// firstInt extracts the leading run of digits from a string ("345 hours" -> 345). +// It stops at the first non-digit, so for comma-grouped numbers ("12,345") use +// parseIntLoose, which strips separators first. +func firstInt(s string) (int, bool) { + m := leadingInt.FindStringSubmatch(s) + if m == nil { + return 0, false + } + n, err := strconv.Atoi(m[1]) + return n, err == nil +} + +// parseIntLoose strips commas/spaces and parses an integer anywhere in s. +func parseIntLoose(s string) (int, bool) { + s = strings.TrimSpace(strings.ReplaceAll(s, ",", "")) + // Take the leading run of digits (and optional sign). + m := regexp.MustCompile(`-?\d+`).FindString(s) + if m == "" { + return 0, false + } + n, err := strconv.Atoi(m) + return n, err == nil +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..733ecd7 --- /dev/null +++ b/main.go @@ -0,0 +1,137 @@ +// Command drive-health-metrics collects per-drive SMART health from every physical +// drive on a host — direct SATA/SAS, NVMe, and drives hidden behind a RAID +// controller (MegaCLI / storcli / perccli) — scores each drive, and exports the +// result. By default it emits CSV or InfluxDB line protocol once and exits; +// with -server it runs as a service exposing a Prometheus endpoint and pushing to +// InfluxDB/Kafka on a schedule. +package main + +import ( + "context" + "fmt" + "log" + "os" + "os/signal" + "syscall" + + "github.com/prometheus/client_golang/prometheus" +) + +// Basic application info. namespace is the Prometheus metric prefix and matches +// the InfluxDB measurement name. +const ( + serviceName = "drive-health-metrics" + serviceDescription = "Collects and exports per-drive SMART health metrics" + namespace = "drive_health" +) + +// App holds the shared application state: parsed flags, configuration, the +// Prometheus registry, and the exporter/outputs. +type App struct { + flags *Flags + config *Config + registry *prometheus.Registry + driveExporter *DriveExporter + httpOutput *HTTPOutput + influxOutput *InfluxOutput +} + +// app is the global application state. +var app *App + +func main() { + app = new(App) + app.ParseFlags() + app.ReadConfig() + + switch { + case app.flags.Server: + runServer() + default: + runOneShot() + } +} + +// runOneShot collects once and writes CSV or InfluxDB line protocol to stdout. +func runOneShot() { + switch app.flags.Format { + case "csv", "influx": + default: + fmt.Fprintf(os.Stderr, "invalid --format %q (want csv|influx)\n", app.flags.Format) + os.Exit(2) + } + + drives, tsNs := collect() + if len(drives) == 0 { + fmt.Fprintln(os.Stderr, "WARNING: no drive records collected") + return + } + + if app.flags.Format == "csv" { + fmt.Println(recordsToCSV(drives)) + } else { + fmt.Println(recordsToInflux(drives, tsNs)) + } +} + +// runServer runs the long-lived service: a Prometheus HTTP endpoint plus the +// scheduled InfluxDB output, reloading configuration on SIGHUP and shutting down +// on SIGINT/SIGTERM. +func runServer() { + // Build the exporter and registry. + app.driveExporter = NewDriveExporter() + reg := prometheus.NewRegistry() + reg.MustRegister(app.driveExporter) + app.registry = reg + + // Build the outputs. + app.httpOutput = NewHTTPOutput() + app.influxOutput = NewInfluxOutput() + + if !app.httpOutput.OutputEnabled() && !app.influxOutput.OutputEnabled() { + log.Fatalln("No output services are enabled (set http_output.enabled or configure influx_output).") + } + + // Monitor signals. + c := make(chan os.Signal, 1) + signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) + + // Each outer iteration owns one background context for the output services; + // the inner loop applies SIGHUP reloads in place and only breaks out (to + // recreate the context and restart the services) when a config change + // requires it. + for { + ctx, cancel := context.WithCancel(context.Background()) + go app.httpOutput.Start(ctx) + go app.influxOutput.Start(ctx) + + restart := false + for !restart { + sig := <-c + if sig != syscall.SIGHUP { + // Termination/interruption: stop the services and exit. + cancel() + return + } + + log.Println("Reloading configurations") + oldConfig := app.config + influxWasEnabled := app.influxOutput.OutputEnabled() + + app.ReadConfig() + app.httpOutput.Reload() + app.influxOutput.Reload() + + httpNeedsRestart := oldConfig.HTTP.BindAddr != app.config.HTTP.BindAddr || + oldConfig.HTTP.Port != app.config.HTTP.Port || + oldConfig.HTTP.Enabled != app.config.HTTP.Enabled + influxNeedsRestart := app.influxOutput.OutputEnabled() != influxWasEnabled || + oldConfig.Influx.Frequency != app.config.Influx.Frequency + restart = httpNeedsRestart || influxNeedsRestart + } + + // A restart-worthy change occurred: stop the current services and loop + // to start them on a fresh context. + cancel() + } +} diff --git a/output.go b/output.go new file mode 100644 index 0000000..add3c73 --- /dev/null +++ b/output.go @@ -0,0 +1,178 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "sort" + "strconv" + "strings" +) + +// influxMeasurement is the InfluxDB measurement name; it matches the Prometheus +// namespace so both outputs describe the same series. +const influxMeasurement = namespace + +// enclosureSlot formats the physical location as ":", falling +// back to whichever single value is known. +func (d *Drive) enclosureSlot() string { + switch { + case d.Enclosure != "" && d.Slot != "": + return d.Enclosure + ":" + d.Slot + case d.Slot != "": + return d.Slot + default: + return d.Enclosure + } +} + +// csvEscape quotes s and doubles embedded quotes when it contains a comma, +// quote, or newline, per RFC 4180. +func csvEscape(s string) string { + if strings.ContainsAny(s, ",\"\n") { + return "\"" + strings.ReplaceAll(s, "\"", "\"\"") + "\"" + } + return s +} + +// recordsToCSV renders the drives as a CSV document: the header row followed by +// one escaped row per drive, in schema (columns) order. +func recordsToCSV(drives []*Drive) string { + var b strings.Builder + names := make([]string, len(columns)) + for i, c := range columns { + names[i] = c.name + } + b.WriteString(strings.Join(names, ",")) + for _, d := range drives { + b.WriteByte('\n') + cells := make([]string, len(columns)) + for i, c := range columns { + cells[i] = csvEscape(format(c.raw(d))) + } + b.WriteString(strings.Join(cells, ",")) + } + return b.String() +} + +// influxTagEscape escapes spaces, commas, and equals signs in an InfluxDB tag +// value, which the line protocol treats as delimiters. +func influxTagEscape(s string) string { + r := strings.NewReplacer(" ", `\ `, ",", `\,`, "=", `\=`) + return r.Replace(s) +} + +// recordsToInflux renders the drives as InfluxDB line protocol, one line per +// drive: tag columns become tags, the remaining (non-csvOnly) columns become +// typed fields (int "i" suffix, float, bool, or quoted string), all sharing the +// collection timestamp. A drive with no usable fields is skipped. +func recordsToInflux(drives []*Drive, tsNs int64) string { + var lines []string + for _, d := range drives { + // Tags. + tags := map[string]string{} + for _, c := range columns { + if !c.influxTag { + continue + } + if v := format(c.raw(d)); v != "" { + tags[c.name] = influxTagEscape(v) + } + } + // Fields: every non-csvOnly, non-tag column with a value. + fields := map[string]string{} + for _, c := range columns { + if c.csvOnly || c.influxTag { + continue + } + r := c.raw(d) + if r == nil { + continue + } + switch c.kind { + case kindInt: + fields[c.name] = strconv.Itoa(r.(int)) + "i" + case kindFloat: + fields[c.name] = format(r) + case kindBool: + fields[c.name] = format(r) // "true"/"false". + default: + fields[c.name] = "\"" + strings.ReplaceAll(format(r), "\"", "\\\"") + "\"" + } + } + if len(fields) == 0 { + continue + } + tagStr := joinSorted(tags) + fieldStr := joinSorted(fields) + if tagStr != "" { + lines = append(lines, fmt.Sprintf("%s,%s %s %d", influxMeasurement, tagStr, fieldStr, tsNs)) + } else { + lines = append(lines, fmt.Sprintf("%s %s %d", influxMeasurement, fieldStr, tsNs)) + } + } + return strings.Join(lines, "\n") +} + +// recordsToInfluxJSON renders the drives as newline-delimited InfluxDB JSON, one +// object per drive ({name, tags, fields, timestamp}). It applies the same +// tag/field split and typing as recordsToInflux; the timestamp is microseconds. +// A drive with no usable fields is skipped. +func recordsToInfluxJSON(drives []*Drive, tsNs int64) []byte { + var buff bytes.Buffer + tsMicro := tsNs / 1000 + for _, d := range drives { + // Tags. + tags := map[string]string{} + for _, c := range columns { + if !c.influxTag { + continue + } + if v := format(c.raw(d)); v != "" { + tags[c.name] = v + } + } + // Typed fields. + fields := map[string]interface{}{} + for _, c := range columns { + if c.csvOnly || c.influxTag { + continue + } + r := c.raw(d) + if r == nil { + continue + } + fields[c.name] = r // int, float64, bool, or string — already typed. + } + if len(fields) == 0 { + continue + } + serialized, err := json.Marshal(map[string]interface{}{ + "name": influxMeasurement, + "tags": tags, + "fields": fields, + "timestamp": tsMicro, + }) + if err != nil { + continue + } + buff.Write(serialized) + buff.WriteByte('\n') + } + return buff.Bytes() +} + +// joinSorted joins m as "k=v" pairs in key order, keeping line-protocol tag and +// field sets deterministic across runs. +func joinSorted(m map[string]string) string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + parts := make([]string, len(keys)) + for i, k := range keys { + parts[i] = k + "=" + m[k] + } + return strings.Join(parts, ",") +} diff --git a/realdata_test.go b/realdata_test.go new file mode 100644 index 0000000..e55a95e --- /dev/null +++ b/realdata_test.go @@ -0,0 +1,562 @@ +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// readFixture loads a captured real-world sample from testdata/. +func readFixture(t *testing.T, name string) string { + t.Helper() + b, err := os.ReadFile(filepath.Join("testdata", name)) + if err != nil { + t.Fatalf("read fixture %s: %v", name, err) + } + return string(b) +} + +// ---- Real smartctl JSON: Samsung 870 EVO via sat+megaraid (smartmontools 7.0) ---- + +func TestParseSmartJSON_RealMegaraidSSD(t *testing.T) { + d := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_ata_ssd_megaraid.json")), d) + + if d.Model != "Samsung SSD 870 EVO 2TB" { + t.Errorf("model = %q", d.Model) + } + if d.Serial != "S624NS0RC00003M" { + t.Errorf("serial = %q", d.Serial) + } + if d.Firmware != "SVT02B6Q" { + t.Errorf("firmware = %q", d.Firmware) + } + if d.Rotation != "SSD" { + t.Errorf("rotation = %q, want SSD", d.Rotation) + } + if d.Capacity != "2.00 TB" { + t.Errorf("capacity = %q, want 2.00 TB", d.Capacity) + } + if d.SmartHealth != "PASSED" { + t.Errorf("health = %q", d.SmartHealth) + } + if iv(d.PowerOnHours) != 33518 { + t.Errorf("poh = %v, want 33518", d.PowerOnHours) + } + if iv(d.PowerCycleCount) != 7 { + t.Errorf("power_cycles = %v, want 7", d.PowerCycleCount) + } + if iv(d.TempC) != 31 { + t.Errorf("temp = %v, want 31", d.TempC) + } + // Healthy drive: all defect counters present and zero. + if iv(d.Reallocated) != 0 || iv(d.UdmaCrc) != 0 || iv(d.ReportedUncorrect) != 0 { + t.Errorf("defects: realloc=%v crc=%v reported=%v, want 0/0/0", + d.Reallocated, d.UdmaCrc, d.ReportedUncorrect) + } + // Samsung Wear_Leveling_Count (ID177) value 93 -> 7% consumed. + if d.WearSrc != "Samsung/ID177" || iv(d.WearPctConsumed) != 7 { + t.Errorf("wear src=%q consumed=%v, want Samsung/ID177 / 7", d.WearSrc, d.WearPctConsumed) + } + + d.HaveSmart = true + finalizeDerived(d) + if d.Recommendation != "OK" { + t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) + } + if iv(d.DefectTotal) != 0 { + t.Errorf("defect_total = %v, want 0", d.DefectTotal) + } +} + +// ---- Real smartctl JSON: Micron 5400 via sat+megaraid, no controller CLI ---- +// util01 (Ubuntu 24, smartmontools 7.4) has megaraid drives but no storcli/ +// MegaCLI, so no controller data merges. The drive exposes ID173/202/233 wear +// attributes at once, and its model is a bare part number (no vendor prefix). +func TestParseSmartJSON_RealMicronSSD(t *testing.T) { + d := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_ata_ssd_micron.json")), d) + + if d.Model != "MTFDDAK960TGA-1BC1ZABDA" { + t.Errorf("model = %q", d.Model) + } + if isPseudoDevice(d) { + t.Errorf("bare part-number model wrongly flagged as pseudo device") + } + if d.Rotation != "SSD" { + t.Errorf("rotation = %q, want SSD", d.Rotation) + } + if d.Capacity != "0.96 TB" { + t.Errorf("capacity = %q, want 0.96 TB", d.Capacity) + } + if d.SmartHealth != "PASSED" { + t.Errorf("health = %q", d.SmartHealth) + } + if iv(d.PowerOnHours) != 20238 || iv(d.PowerCycleCount) != 12 { + t.Errorf("poh=%v cycles=%v, want 20238/12", d.PowerOnHours, d.PowerCycleCount) + } + // ID173 wins the wearAttr precedence; VALUE 100 -> 0% consumed (fresh). + if d.WearSrc != "Micron/ID173" || iv(d.WearPctConsumed) != 0 { + t.Errorf("wear src=%q consumed=%v, want Micron/ID173 / 0", d.WearSrc, d.WearPctConsumed) + } + + d.HaveSmart = true + finalizeDerived(d) + if d.Recommendation != "OK" { + t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) + } +} + +// ---- Real NVMe JSON: health log drives wear/identity (direct-attached SSD) ---- +func TestParseSmartJSON_RealNVMe(t *testing.T) { + d := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_nvme.json")), d) + + if d.Model != "Force MP510" { + t.Errorf("model = %q", d.Model) + } + if d.Rotation != "NVMe" { + t.Errorf("rotation = %q, want NVMe", d.Rotation) + } + if iv(d.PowerOnHours) != 42811 { + t.Errorf("poh = %v, want 42811", d.PowerOnHours) + } + // NVMe percentage_used 6 -> 6% consumed. + if d.WearSrc != "NVMe/percentage_used" || iv(d.WearPctConsumed) != 6 { + t.Errorf("wear src=%q consumed=%v, want NVMe/percentage_used / 6", d.WearSrc, d.WearPctConsumed) + } + if iv(d.NvmeCriticalWarning) != 0 || iv(d.NvmeAvailSpare) != 100 { + t.Errorf("nvme crit=%v spare=%v, want 0/100", d.NvmeCriticalWarning, d.NvmeAvailSpare) + } + + d.HaveSmart = true + finalizeDerived(d) + // Healthy NVMe, 42811h (>4yr -> age +4), wear 6% -> OK. + if d.Recommendation != "OK" { + t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) + } +} + +// ---- Real SAS SSD JSON: endurance + grown-defect/error-counter health ---- +// SAMSUNG ARFX0920S5xnNTRI behind a SAS HBA (tama, smartmontools 7.5). SAS +// drives have no ATA attribute table; hard-defect signals come from the SCSI +// logs (grown defect list, error counter log, pending defects). +func TestParseSmartJSON_RealSASSSD(t *testing.T) { + d := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_sas_ssd.json")), d) + + if d.Model != "SAMSUNG ARFX0920S5xnNTRI" || d.Serial != "S43YNF0K000001" { + t.Errorf("identity model=%q serial=%q", d.Model, d.Serial) + } + if !strings.HasPrefix(d.Transport, "SAS") { + t.Errorf("transport = %q, want SAS*", d.Transport) + } + if d.Rotation != "SSD" { + t.Errorf("rotation = %q, want SSD", d.Rotation) + } + if d.SmartHealth != "PASSED" { + t.Errorf("health = %q", d.SmartHealth) + } + if iv(d.PowerOnHours) != 2487 || iv(d.TempC) != 56 { + t.Errorf("poh=%v temp=%v, want 2487/56", d.PowerOnHours, d.TempC) + } + // SCSI endurance indicator 0% -> 0% consumed. + if d.WearSrc != "SCSI/endurance" || iv(d.WearPctConsumed) != 0 { + t.Errorf("wear src=%q consumed=%v, want SCSI/endurance / 0", d.WearSrc, d.WearPctConsumed) + } + // Healthy SAS drive: grown defects, uncorrected errors, pending all zero + // (and non-nil, since the SCSI logs were present). + if iv(d.Reallocated) != 0 || iv(d.Uncorrectable) != 0 || iv(d.Pending) != 0 { + t.Errorf("defects: grown=%v uncorrected=%v pending=%v, want 0/0/0", + d.Reallocated, d.Uncorrectable, d.Pending) + } + + d.HaveSmart = true + finalizeDerived(d) + if d.Recommendation != "OK" { + t.Errorf("rec = %q (score %d), want OK", d.Recommendation, d.RiskScore) + } +} + +// ---- SAS hard-defect signals drive the score (synthetic, both JSON + text) ---- +// The captured SAS drives are healthy; verify the SCSI error counter log and +// pending-defect count actually feed the scorer when nonzero. +func TestSASUncorrectedErrorsScored(t *testing.T) { + const j = `{ + "model_name": "SEAGATE ST4000NM", + "device": {"type": "scsi", "protocol": "SCSI"}, + "smart_status": {"passed": true}, + "scsi_grown_defect_list": 3, + "scsi_error_counter_log": { + "read": {"total_uncorrected_errors": 2}, + "write": {"total_uncorrected_errors": 0}, + "verify": {"total_uncorrected_errors": 1} + }, + "scsi_pending_defects": {"count": 4} + }` + d := &Drive{} + parseSmartJSON(loadJSON(j), d) + if iv(d.Uncorrectable) != 3 { // 2 read + 1 verify + t.Errorf("uncorrectable = %v, want 3", d.Uncorrectable) + } + if iv(d.Pending) != 4 { + t.Errorf("pending = %v, want 4", d.Pending) + } + if iv(d.Reallocated) != 3 { // grown defect list + t.Errorf("reallocated(grown) = %v, want 3", d.Reallocated) + } + d.HaveSmart = true + finalizeDerived(d) + if d.Recommendation != "REPLACE_NOW" { + t.Errorf("rec = %q (score %d), want REPLACE_NOW", d.Recommendation, d.RiskScore) + } + + // Text path: same signals from `smartctl -a` SAS output. + const text = `Vendor: SEAGATE +Product: ST4000NM +SMART Health Status: OK +Elements in grown defect list: 3 +Error counter log: +read: 0 0 0 0 0 2620.555 2 +write: 0 0 0 0 0 2091.250 0 +verify: 0 0 0 0 0 46.845 1 + Pending defect count:4 Pending Defects +` + dt := &Drive{} + parseSmartText(text, dt) + if iv(dt.Uncorrectable) != 3 || iv(dt.Pending) != 4 || iv(dt.Reallocated) != 3 { + t.Errorf("text path: uncorrected=%v pending=%v grown=%v, want 3/4/3", + dt.Uncorrectable, dt.Pending, dt.Reallocated) + } +} + +// ---- iSCSI LUNs (LIO/IET) are dropped via the SCSI transport, not by model ---- +func TestISCSILunFilter(t *testing.T) { + d := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_iscsi_lio.json")), d) + if d.Transport != "iSCSI" { + t.Fatalf("transport = %q, want iSCSI", d.Transport) + } + if !isPseudoDevice(d) { + t.Errorf("LIO-ORG iSCSI LUN not flagged as pseudo device") + } + // Transport alone is authoritative even if the model looks like a real drive. + if !isPseudoDevice(&Drive{Model: "Samsung SSD 860 EVO 1TB", Transport: "iSCSI"}) { + t.Errorf("iSCSI transport not honored over a real-looking model") + } + // LIO model token still catches it on the text path (no transport field). + if !isPseudoDevice(&Drive{Model: "LIO-ORG k8s1"}) { + t.Errorf("LIO-ORG model token not matched") + } + // A real SAS/SATA drive (transport SAS) is not filtered. + if isPseudoDevice(&Drive{Model: "HGST HUH721010ALE604", Transport: "SAS"}) { + t.Errorf("real SAS drive wrongly flagged as pseudo device") + } +} + +// ---- Pseudo devices must be filtered: iSCSI LUN + RAID controller VD ---- + +func TestPseudoDeviceFilter(t *testing.T) { + iscsi := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_iscsi_virtual_disk.json")), iscsi) + if iscsi.Model != "IET VIRTUAL-DISK" { + t.Fatalf("iscsi model = %q", iscsi.Model) + } + if !isPseudoDevice(iscsi) { + t.Errorf("IET VIRTUAL-DISK not flagged as pseudo device") + } + + vd := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_raid_vd.json")), vd) + if vd.Model != "BROADCOM MR9560-16i" { + t.Fatalf("raid vd model = %q", vd.Model) + } + if !isPseudoDevice(vd) { + t.Errorf("BROADCOM MR9560-16i VD not flagged as pseudo device") + } + + avago := &Drive{} + parseSmartJSON(loadJSON(readFixture(t, "smart_raid_vd_avago.json")), avago) + if avago.Model != "AVAGO MR9363-4i" { + t.Fatalf("avago vd model = %q", avago.Model) + } + if !isPseudoDevice(avago) { + t.Errorf("AVAGO MR9363-4i VD not flagged as pseudo device") + } + + // SMART-less RAID virtual disk identified by model (DELL RAID / PERC VDs). + for _, model := range []string{"DELL RAID", "DELL PERC H730"} { + if !isPseudoDevice(&Drive{Model: model}) { + t.Errorf("RAID VD %q not flagged as pseudo device", model) + } + } + + // Real drives behind these controllers must NOT be filtered. + for _, model := range []string{"Samsung SSD 870 EVO 2TB", "MICRON_M510DC_MTFDDAK960MBP", + "HFS960G3H2X069N", "Dell DC NVMe PM9A3 RI U.2 960GB"} { + if isPseudoDevice(&Drive{Model: model}) { + t.Errorf("real drive %q wrongly flagged as pseudo device", model) + } + } +} + +// ---- Real storcli: DID comes from the table row; sub-headers don't fragment ---- + +func TestParseStorcli_Real(t *testing.T) { + drives := parseStorcli(readFixture(t, "storcli_show_all.txt")) + if len(drives) != 4 { + t.Fatalf("parsed %d drives, want 4", len(drives)) + } + + byDID := map[string]ctrlDrive{} + for _, cd := range drives { + if cd.DeviceID == "" { + t.Errorf("drive at %s:%s has empty DID", cd.Enclosure, cd.Slot) + } + byDID[cd.DeviceID] = cd + } + + // Slot->DID->OtherErr mapping pulled straight from the capture. + wantOther := map[string]struct { + slot string + otherErr int + }{ + "22": {"0", 7}, "23": {"1", 6}, "20": {"2", 21}, "21": {"3", 12}, + } + for did, want := range wantOther { + cd, ok := byDID[did] + if !ok { + t.Errorf("DID %s missing", did) + continue + } + if cd.Enclosure != "64" || cd.Slot != want.slot { + t.Errorf("DID %s location %s:%s, want 64:%s", did, cd.Enclosure, cd.Slot, want.slot) + } + if cd.OtherErr != want.otherErr { + t.Errorf("DID %s other_err=%d, want %d", did, cd.OtherErr, want.otherErr) + } + if cd.MediaErr != 0 || cd.Predictive != 0 || cd.SmartAlert { + t.Errorf("DID %s media=%d pred=%d alert=%v, want 0/0/false", + did, cd.MediaErr, cd.Predictive, cd.SmartAlert) + } + if cd.FwState != "Onln" { + t.Errorf("DID %s fw_state=%q, want Onln", did, cd.FwState) + } + } +} + +// ---- Real storcli, second version/enclosure: parser generalizes ---- +// storcli v007.1907, enclosure 252, Micron SSDs (jarvis01-kvm92). Confirms the +// summary-header + table-row DID logic is not specific to one storcli build. +func TestParseStorcli_RealV2(t *testing.T) { + drives := parseStorcli(readFixture(t, "storcli_show_all_v2.txt")) + if len(drives) != 4 { + t.Fatalf("parsed %d drives, want 4", len(drives)) + } + // DID N lives in slot (N-4), enclosure 252; all Online with zero errors. + for _, cd := range drives { + if cd.DeviceID == "" { + t.Errorf("drive at %s:%s has empty DID", cd.Enclosure, cd.Slot) + } + if cd.Enclosure != "252" { + t.Errorf("DID %s enclosure=%q, want 252", cd.DeviceID, cd.Enclosure) + } + if cd.FwState != "Onln" { + t.Errorf("DID %s fw_state=%q, want Onln", cd.DeviceID, cd.FwState) + } + if cd.MediaErr != 0 || cd.OtherErr != 0 || cd.Predictive != 0 { + t.Errorf("DID %s errors media=%d other=%d pred=%d, want 0/0/0", + cd.DeviceID, cd.MediaErr, cd.OtherErr, cd.Predictive) + } + } + byDID := map[string]ctrlDrive{} + for _, cd := range drives { + byDID[cd.DeviceID] = cd + } + if cd, ok := byDID["4"]; !ok || cd.Slot != "0" { + t.Errorf("DID 4 -> %s:%s, want 252:0", cd.Enclosure, cd.Slot) + } + if cd, ok := byDID["7"]; !ok || cd.Slot != "3" { + t.Errorf("DID 7 -> %s:%s, want 252:3", cd.Enclosure, cd.Slot) + } +} + +// ---- Real perccli2 JSON: DID->PID, State vs Status, NVMe namespace counters ---- +// perccli2 (8.x) is JSON-native; the text form adds a second status column that +// breaks positional parsing, so parsePerccli2 consumes the JSON. Fixture is a +// real `perccli2 /call/eall/sall show all J` (Dell PM9A3 NVMe behind a PERC). +func TestParsePerccli2_Real(t *testing.T) { + drives := parsePerccli2(readFixture(t, "perccli2_show_all.json")) + if len(drives) != 2 { + t.Fatalf("parsed %d drives, want 2", len(drives)) + } + byPID := map[string]ctrlDrive{} + for _, cd := range drives { + byPID[cd.DeviceID] = cd + } + cd, ok := byPID["275"] + if !ok { + t.Fatal("PID 275 missing") + } + if cd.Enclosure != "284" || cd.Slot != "0" { + t.Errorf("PID 275 location %s:%s, want 284:0", cd.Enclosure, cd.Slot) + } + // FwState comes from Status (Online), NOT State (Conf). + if cd.FwState != "Online" { + t.Errorf("PID 275 fw_state=%q, want Online", cd.FwState) + } + if cd.Model != "Dell DC NVMe PM9A3 RI U.2 960GB" || cd.Serial != "S6JGNA0X000001" { + t.Errorf("PID 275 model=%q serial=%q", cd.Model, cd.Serial) + } + if cd.Rotation != "NVMe" { + t.Errorf("PID 275 rotation=%q, want NVMe", cd.Rotation) + } + if iv(cd.TempC) != 30 { + t.Errorf("PID 275 temp=%v, want 30", cd.TempC) + } + // NVMe error counters live under "LU/NS Properties", not directly in detail. + if cd.MediaErr != 0 || cd.OtherErr != 0 || cd.Predictive != 0 { + t.Errorf("PID 275 errors media=%d other=%d pred=%d, want 0/0/0", + cd.MediaErr, cd.OtherErr, cd.Predictive) + } + if cd, ok := byPID["276"]; !ok || cd.Slot != "1" || iv(cd.TempC) != 32 { + t.Errorf("PID 276 -> %s:%s temp=%v, want 284:1 / 32", cd.Enclosure, cd.Slot, cd.TempC) + } +} + +// ---- Controller-only drive: surfaced from controller data, scored not NO_DATA ---- +func TestPerccli2ControllerOnlyDrive(t *testing.T) { + drives := parsePerccli2(readFixture(t, "perccli2_show_all.json")) + byPID := map[string]ctrlDrive{} + for _, cd := range drives { + byPID[cd.DeviceID] = cd + } + cd := byPID["275"] + + // Healthy controller-only drive: identity + health from controller only. + d := &Drive{} + applyController(d, cd) + d.HaveSmart = false + finalizeDerived(d) + if d.enclosureSlot() != "284:0" { + t.Errorf("enclosure_slot = %q, want 284:0", d.enclosureSlot()) + } + if d.Model != "Dell DC NVMe PM9A3 RI U.2 960GB" || d.Rotation != "NVMe" { + t.Errorf("identity model=%q rotation=%q", d.Model, d.Rotation) + } + if d.Recommendation != "OK" { + t.Errorf("healthy controller-only rec=%q (score %d), want OK", d.Recommendation, d.RiskScore) + } + + // A failed controller drive is surfaced (scored), never dropped as NO_DATA. + cd.FwState = "Failed" + df := &Drive{} + applyController(df, cd) + df.HaveSmart = false + finalizeDerived(df) + if df.Recommendation == "NO_DATA" { + t.Errorf("failed controller-only drive scored NO_DATA") + } + if df.RiskScore < 40 { + t.Errorf("failed controller-only drive score=%d, want >=40", df.RiskScore) + } +} + +// ---- Controller merge by DID, where DID != slot (perccli v007.2616) ---- +// On util01 the slot<->DID numbering is crossed (slot 0 = DID 1, slot 1 = +// DID 0). smartctl's "megaraid,N" index equals the controller DID, so matching +// controller data by DID (not slot) must still yield the correct enclosure:slot. +func TestStorcli_DIDMatchingCrossed(t *testing.T) { + idx := map[string]ctrlDrive{} + for _, cd := range parseStorcli(readFixture(t, "perccli_show_all.txt")) { + mergeCtrl(idx, cd) + } + cases := map[string]string{"0": "252:1", "1": "252:0"} // megaraid index -> enclosure:slot. + for did, wantLoc := range cases { + cd, ok := idx[did] + if !ok { + t.Errorf("DID %s missing from index", did) + continue + } + d := &Drive{DeviceID: did} // DeviceID is the smartctl megaraid,N index. + applyController(d, cd) + if d.enclosureSlot() != wantLoc { + t.Errorf("megaraid,%s -> %q, want %q", did, d.enclosureSlot(), wantLoc) + } + } +} + +// ---- Real MegaCLI: matches storcli on the same hardware ---- + +func TestParseMegacli_Real(t *testing.T) { + drives := parseMegacliPDList(readFixture(t, "megacli_pdlist.txt")) + if len(drives) != 4 { + t.Fatalf("parsed %d drives, want 4", len(drives)) + } + byDID := map[string]ctrlDrive{} + for _, cd := range drives { + byDID[cd.DeviceID] = cd + } + cd, ok := byDID["20"] + if !ok { + t.Fatal("DID 20 missing") + } + // DID 20 == slot 2 (cross-checks the storcli capture above). + if cd.Enclosure != "64" || cd.Slot != "2" || cd.OtherErr != 21 { + t.Errorf("DID 20 = %s:%s other=%d, want 64:2 / 21", cd.Enclosure, cd.Slot, cd.OtherErr) + } + if cd.FwState != "Online, Spun Up" { + t.Errorf("DID 20 fw_state=%q", cd.FwState) + } + if iv(cd.TempC) != 31 { + t.Errorf("DID 20 temp=%v, want 31", cd.TempC) + } + if cd.SmartAlert { + t.Errorf("DID 20 smart alert set, want false") + } +} + +// ---- MegaCLI SMART-alert line uses "Drive has flagged a S.M.A.R.T alert" ---- + +func TestMegacliSmartAlertPhrasing(t *testing.T) { + text := `Enclosure Device ID: 64 +Slot Number: 5 +Device Id: 99 +Firmware state: Online, Spun Up +Drive has flagged a S.M.A.R.T alert : Yes +` + drives := parseMegacliPDList(text) + if len(drives) != 1 { + t.Fatalf("parsed %d drives, want 1", len(drives)) + } + if !drives[0].SmartAlert { + t.Errorf("smart alert not detected from MegaCLI phrasing") + } +} + +// ---- Controller state scoring: storcli abbreviations are not faults ---- + +func TestFwStateScoring(t *testing.T) { + cases := []struct { + state string + wantPts bool // true => the +40 fw_state penalty should apply + }{ + {"Onln", false}, + {"Online, Spun Up", false}, + {"GHS", false}, + {"JBOD", false}, + {"", false}, + {"Offln", true}, + {"Failed", true}, + {"Rebuild", true}, + } + for _, c := range cases { + d := &Drive{HaveSmart: true, Model: "X", SmartHealth: "PASSED", FwState: c.state} + score, _, _ := scoreDrive(d) + got := score >= 40 + if got != c.wantPts { + t.Errorf("fw_state %q: penalized=%v (score %d), want %v", c.state, got, score, c.wantPts) + } + } +} diff --git a/schema.go b/schema.go new file mode 100644 index 0000000..d78cbdc --- /dev/null +++ b/schema.go @@ -0,0 +1,158 @@ +package main + +import "strconv" + +// colKind is a column's value type. It drives formatting (CSV), typing (InfluxDB +// "i" suffix / JSON number / bool), and the tag-vs-field and label-vs-gauge +// splits the outputs derive from the table. +type colKind int + +const ( + kindStr colKind = iota + kindInt + kindFloat + kindBool +) + +// column is the single source of truth for one output column: its name, value +// type, where it appears, and how to read it from a Drive. Every output (CSV, +// InfluxDB line protocol/JSON, Prometheus) is driven by this one table, so a new +// column is added in exactly one place. +// +// The output partitions are derived, not duplicated: +// - InfluxDB tag : kindStr column with influxTag set. +// - InfluxDB field : any non-csvOnly column that is not a tag. +// - Prometheus label : every non-csvOnly kindStr column. +// - Prometheus gauge : every non-csvOnly numeric (non-kindStr) column. +// - csvOnly : present in CSV only (timestamp, risk_reasons). +type column struct { + name string + kind colKind + influxTag bool + csvOnly bool + // value reads the column from d. It returns the typed Go value (string, + // *int, int, *float64, float64, or bool); a nil pointer means "unknown" and + // renders blank / is skipped by the metric outputs. + value func(d *Drive) any +} + +// columns is the ordered output schema. CSV emits these in this order; the +// metric outputs sort their own tag/field sets independently. +var columns = []column{ + {name: "collected_at", kind: kindStr, csvOnly: true, value: func(d *Drive) any { return d.CollectedAt }}, + {name: "hostname", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Hostname }}, + {name: "device_path", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.DevicePath }}, + {name: "dtype", kind: kindStr, value: func(d *Drive) any { return d.Dtype }}, + {name: "enclosure_slot", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.enclosureSlot() }}, + {name: "device_id", kind: kindStr, value: func(d *Drive) any { return d.DeviceID }}, + {name: "serial", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Serial }}, + {name: "model", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Model }}, + {name: "firmware", kind: kindStr, value: func(d *Drive) any { return d.Firmware }}, + {name: "capacity", kind: kindStr, value: func(d *Drive) any { return d.Capacity }}, + {name: "rotation", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Rotation }}, + {name: "smart_health", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.SmartHealth }}, + {name: "defect_total", kind: kindInt, value: func(d *Drive) any { return d.DefectTotal }}, + {name: "udma_crc_errors", kind: kindInt, value: func(d *Drive) any { return d.UdmaCrc }}, + {name: "media_errors_ctrl", kind: kindInt, value: func(d *Drive) any { return d.MediaErrCtrl }}, + {name: "other_errors_ctrl", kind: kindInt, value: func(d *Drive) any { return d.OtherErrCtrl }}, + {name: "predictive_failure_ctrl", kind: kindInt, value: func(d *Drive) any { return d.PredictiveFailureCtrl }}, + {name: "smart_alert_ctrl", kind: kindBool, value: func(d *Drive) any { return d.SmartAlertCtrl }}, + {name: "fw_state", kind: kindStr, value: func(d *Drive) any { return d.FwState }}, + {name: "wear_pct_consumed", kind: kindInt, value: func(d *Drive) any { return d.WearPctConsumed }}, + {name: "wear_src", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.WearSrc }}, + {name: "unused_reserve_pct", kind: kindInt, value: func(d *Drive) any { return d.UnusedReservePct }}, + {name: "host_written_tb", kind: kindFloat, value: func(d *Drive) any { return d.HostWrittenTB }}, + {name: "nvme_critical_warning", kind: kindInt, value: func(d *Drive) any { return d.NvmeCriticalWarning }}, + {name: "nvme_avail_spare", kind: kindInt, value: func(d *Drive) any { return d.NvmeAvailSpare }}, + {name: "nvme_avail_spare_thresh", kind: kindInt, value: func(d *Drive) any { return d.NvmeAvailSpareThresh }}, + {name: "power_on_years", kind: kindFloat, value: func(d *Drive) any { return d.PowerOnYears }}, + {name: "power_cycle_count", kind: kindInt, value: func(d *Drive) any { return d.PowerCycleCount }}, + {name: "temp_c", kind: kindInt, value: func(d *Drive) any { return d.TempC }}, + {name: "risk_score", kind: kindInt, value: func(d *Drive) any { return d.RiskScore }}, + {name: "recommendation", kind: kindStr, influxTag: true, value: func(d *Drive) any { return d.Recommendation }}, + {name: "risk_reasons", kind: kindStr, csvOnly: true, value: func(d *Drive) any { return d.RiskReasons }}, +} + +// raw returns the column's typed value for d, or nil when the value is unknown +// (a nil pointer) or a blank string. Callers that need a presence test treat a +// nil return as "absent". +func (c column) raw(d *Drive) any { + switch t := c.value(d).(type) { + case nil: + return nil + case *int: + if t == nil { + return nil + } + return *t + case *float64: + if t == nil { + return nil + } + return *t + case string: + if t == "" { + return nil + } + return t + default: + return t // int, float64, bool — always present. + } +} + +// format renders a raw value as its display string ("" for an absent value), +// matching the CSV/line-protocol textual form. +func format(v any) string { + switch t := v.(type) { + case nil: + return "" + case string: + return t + case int: + return strconv.Itoa(t) + case float64: + return strconv.FormatFloat(t, 'f', -1, 64) + case bool: + if t { + return "true" + } + return "false" + default: + return "" + } +} + +// field returns the column's display string for d, or "" when unknown. It is the +// canonical text form shared by the CSV output and the test helpers. +func (d *Drive) field(name string) string { + for _, c := range columns { + if c.name == name { + return format(c.raw(d)) + } + } + return "" +} + +// labelColumns returns the string columns carried as Prometheus labels and (the +// influxTag subset) as InfluxDB tags — every non-csvOnly kindStr column. +func labelColumns() []column { + var out []column + for _, c := range columns { + if c.kind == kindStr && !c.csvOnly { + out = append(out, c) + } + } + return out +} + +// gaugeColumns returns the numeric columns emitted as Prometheus gauges and +// InfluxDB fields — every non-csvOnly column that is not a string. +func gaugeColumns() []column { + var out []column + for _, c := range columns { + if c.kind != kindStr && !c.csvOnly { + out = append(out, c) + } + } + return out +} diff --git a/score.go b/score.go new file mode 100644 index 0000000..6f17f3a --- /dev/null +++ b/score.go @@ -0,0 +1,173 @@ +package main + +import ( + "fmt" + "strings" +) + +// scoreDrive computes a drive's risk score, recommendation, and the reason +// string behind them. The scoring rules are deliberate: +// +// - Only real, drive-attributable defects add meaningful score. +// - Missing or unreadable data is never treated as a failure (no points). +// - Wear and age are graded to nudge toward planned replacement. +// +// The score maps to a recommendation: +// +// >= 100 -> REPLACE_NOW (hard defect: drive is failing or failed) +// >= 50 -> REPLACE_SOON (serious wear or accumulating defects) +// >= 20 -> MONITOR (early warning signs) +// < 20 -> OK +// +// A drive with no SMART data and no controller red flags scores NO_DATA, meaning +// re-collect rather than replace. +func scoreDrive(d *Drive) (int, string, string) { + score := 0 + var reasons []string + add := func(pts int, msg string) { + score += pts + reasons = append(reasons, msg) + } + min := func(a, b int) int { + if a < b { + return a + } + return b + } + + // ---- Hard physical defects (drive-attributable) ---- + if realloc := iv(d.Reallocated); realloc > 0 { + pts := min(40+realloc*5, 100) + add(pts, fmt.Sprintf("reallocated=%d(+%d)", realloc, pts)) + } + if pending := iv(d.Pending); pending > 0 { + pts := min(50+pending*5, 100) + add(pts, fmt.Sprintf("pending=%d(+%d)", pending, pts)) + } + if uncorr := iv(d.Uncorrectable); uncorr > 0 { + pts := min(60+uncorr*5, 100) + add(pts, fmt.Sprintf("uncorrectable=%d(+%d)", uncorr, pts)) + } + if reported := iv(d.ReportedUncorrect); reported > 0 { + pts := min(reported*10, 60) + add(pts, fmt.Sprintf("reported_uncorrect=%d(+%d)", reported, pts)) + } + if e2e := iv(d.EndToEnd); e2e > 0 { + pts := min(e2e*20, 80) + add(pts, fmt.Sprintf("end_to_end_err=%d(+%d)", e2e, pts)) + } + if badblk := iv(d.RuntimeBadBlocks); badblk > 0 { + pts := min(badblk*5, 40) + add(pts, fmt.Sprintf("runtime_bad_blocks=%d(+%d)", badblk, pts)) + } + + // ---- SMART self-assessment: only penalize an EXPLICIT failure ---- + if d.SmartHealth == "FAILED" { + add(100, "SMART_health=FAILED(+100)") + } + + // ---- SATA link quality (cabling/backplane, not the NAND) ---- + if crc := iv(d.UdmaCrc); crc > 0 { + pts := min(crc*3, 25) + add(pts, fmt.Sprintf("udma_crc=%d(+%d)", crc, pts)) + } + + // ---- RAID-controller signals ---- + if d.PredictiveFailureCtrl > 0 { + add(70, "ctrl_predictive_failure(+70)") + } + if d.SmartAlertCtrl { + add(50, "ctrl_smart_alert(+50)") + } + // Penalize a controller-reported state only when it is not a healthy one. + // MegaCLI spells these out ("Online, Spun Up", "Hotspare") while storcli + // abbreviates ("Onln", "GHS"/"DHS" for hot spares, "JBOD"); all are fine. + fw := strings.ToLower(d.FwState) + fwHealthy := fw == "" || + strings.Contains(fw, "online") || strings.Contains(fw, "onln") || + strings.Contains(fw, "hotspare") || strings.Contains(fw, "ghs") || + strings.Contains(fw, "dhs") || strings.Contains(fw, "jbod") + if !fwHealthy { + add(40, fmt.Sprintf("fw_state=%s(+40)", d.FwState)) + } + + // MegaCLI/storcli media errors: soft signal, graded gently and capped. + me := d.MediaErrCtrl + switch { + case me >= 100: + add(30, fmt.Sprintf("ctrl_media_errors=%d(+30)", me)) + case me >= 20: + add(15, fmt.Sprintf("ctrl_media_errors=%d(+15)", me)) + case me > 0: + add(5, fmt.Sprintf("ctrl_media_errors=%d(+5)", me)) + } + + // ---- NVMe critical warning bitmask (any bit set is a real alert) ---- + if d.NvmeCriticalWarning != nil && *d.NvmeCriticalWarning > 0 { + add(60, fmt.Sprintf("nvme_critical_warning=0x%02x(+60)", *d.NvmeCriticalWarning)) + } + // NVMe spare below threshold -> reserve exhaustion. + if d.NvmeAvailSpare != nil && d.NvmeAvailSpareThresh != nil && + *d.NvmeAvailSpare <= *d.NvmeAvailSpareThresh { + add(40, fmt.Sprintf("nvme_avail_spare<=thresh(%d<=%d)(+40)", + *d.NvmeAvailSpare, *d.NvmeAvailSpareThresh)) + } + + // ---- Wear (graded; only meaningful with real SMART data) ---- + if d.WearPctConsumed != nil { + wc := *d.WearPctConsumed + switch { + case wc >= 95: + add(80, fmt.Sprintf("wear_consumed=%d%%(+80)", wc)) + case wc >= 90: + add(55, fmt.Sprintf("wear_consumed=%d%%(+55)", wc)) + case wc >= 80: + add(30, fmt.Sprintf("wear_consumed=%d%%(+30)", wc)) + case wc >= 70: + add(15, fmt.Sprintf("wear_consumed=%d%%(+15)", wc)) + case wc >= 60: + add(8, fmt.Sprintf("wear_consumed=%d%%(+8)", wc)) + } + } + + // ---- Reserve-block exhaustion (Micron ID180 VALUE -> remaining %) ---- + if d.UnusedReservePct != nil && *d.UnusedReservePct <= 10 { + add(30, fmt.Sprintf("reserve_blocks_low(val=%d)(+30)", *d.UnusedReservePct)) + } + + // ---- Age (gentle nudge only) ---- + hours := iv(d.PowerOnHours) + switch { + case hours >= 61320: // Older than 7 years. + add(15, fmt.Sprintf("age=%dh(+15)", hours)) + case hours >= 52560: // Older than 6 years. + add(8, fmt.Sprintf("age=%dh(+8)", hours)) + case hours >= 43800: // Older than 5 years. + add(4, fmt.Sprintf("age=%dh(+4)", hours)) + } + + // ---- Decide recommendation ---- + // NO_DATA only when nothing observed the drive: no SMART, no controller error + // signals, and no controller state. A controller-only drive (e.g. NVMe behind + // a PERC) reports a FwState, so it is scored on controller evidence instead. + if !d.HaveSmart && me == 0 && d.PredictiveFailureCtrl == 0 && !d.SmartAlertCtrl && d.FwState == "" { + return 0, "NO_DATA", "smartctl returned no usable SMART data; re-collect" + } + + var rec string + switch { + case score >= 100: + rec = "REPLACE_NOW" + case score >= 50: + rec = "REPLACE_SOON" + case score >= 20: + rec = "MONITOR" + default: + rec = "OK" + } + + if len(reasons) == 0 { + return score, rec, "no defects detected" + } + return score, rec, strings.Join(reasons, "; ") +} diff --git a/smart_json.go b/smart_json.go new file mode 100644 index 0000000..447ff48 --- /dev/null +++ b/smart_json.go @@ -0,0 +1,284 @@ +package main + +import ( + "fmt" + "strings" +) + +// wearAttr maps vendor SSD-life attribute IDs to a source label. For all of +// these the normalized VALUE expresses "% life remaining". +var wearAttr = []struct { + id int + src string +}{ + {173, "Micron/ID173"}, + {202, "Intel/ID202"}, + {231, "Intel/ID231"}, + {177, "Samsung/ID177"}, + {233, "Generic/ID233"}, +} + +// ataAttr is one parsed row of an ATA SMART attribute table, shared by the JSON +// and text paths. raw is the attribute's raw counter; value/worst are the +// vendor-normalized current/worst values. +type ataAttr struct { + value *int + worst *int + whenFailed string + raw *int +} + +// attrRaw returns the raw counter for attribute id, or nil when absent. +func attrRaw(attrs map[int]ataAttr, id int) *int { + if a, ok := attrs[id]; ok { + return a.raw + } + return nil +} + +// attrVal returns the normalized current value for attribute id, or nil. +func attrVal(attrs map[int]ataAttr, id int) *int { + if a, ok := attrs[id]; ok { + return a.value + } + return nil +} + +// attrsFailed reports whether any attribute is flagged failed now or in the +// past — the basis for the PASSED_BY_ATTR/FAILED verdict when no explicit +// overall health result is available. +func attrsFailed(attrs map[int]ataAttr) bool { + for _, a := range attrs { + wf := strings.ToLower(a.whenFailed) + if wf == "now" || wf == "past" { + return true + } + } + return false +} + +// applyAtaCounters maps the parsed ATA attribute table onto the defect, wear, +// reserve-block, and host-write fields shared by the JSON and text paths. Power- +// on hours fall back to attribute 9 only when not already set from a dedicated +// field. Path-specific fallbacks (power-cycle/temperature on text, SCSI/NVMe on +// JSON) stay with their callers. +func applyAtaCounters(attrs map[int]ataAttr, d *Drive) { + d.Reallocated = attrRaw(attrs, 5) + d.ReallocatedEvents = attrRaw(attrs, 196) + d.Pending = attrRaw(attrs, 197) + d.Uncorrectable = attrRaw(attrs, 198) + d.UdmaCrc = attrRaw(attrs, 199) + d.ReportedUncorrect = attrRaw(attrs, 187) + d.RuntimeBadBlocks = attrRaw(attrs, 183) + d.EndToEnd = attrRaw(attrs, 184) + if d.PowerOnHours == nil { + d.PowerOnHours = attrRaw(attrs, 9) + } + + // Wear (vendor-normalized; VALUE = % remaining). + for _, w := range wearAttr { + if v := attrVal(attrs, w.id); v != nil { + d.WearPctRemaining = v + if a, ok := attrs[w.id]; ok { + d.WearPctWorst = a.worst + } + d.WearSrc = w.src + d.WearPctConsumed = pInt(100 - *v) + break + } + } + + // Micron ID180 reserve blocks (VALUE = % remaining) and ID246 host writes. + if a, ok := attrs[180]; ok { + d.UnusedReservePct = a.value + } + if lba := attrRaw(attrs, 246); lba != nil && *lba > 0 { + d.HostWrittenTB = pF(float64(*lba) * 512.0 / 1e12) + } +} + +// parseSmartJSON fills d from a smartctl -j object (ATA/SATA, SAS/SCSI, or NVMe). +func parseSmartJSON(j map[string]interface{}, d *Drive) { + if j == nil { + return + } + + d.Model = first(jStr(j, "model_name"), jStr(j, "scsi_model_name")) + d.Serial = jStr(j, "serial_number") + d.Transport = jStr(j, "scsi_transport_protocol", "name") + d.Firmware = first(jStr(j, "firmware_version"), jStr(j, "scsi_revision"), jStr(j, "revision")) + + if cap := jInt(j, "user_capacity", "bytes"); cap != nil && *cap > 0 { + d.Capacity = fmt.Sprintf("%.2f TB", float64(*cap)/1e12) + } + + switch rr := jInt(j, "rotation_rate"); { + case rr != nil && *rr == 0: + d.Rotation = "SSD" + case rr != nil: + d.Rotation = fmt.Sprintf("%d rpm", *rr) + default: + d.Rotation = "SSD" // Absent rotation_rate: assume SSD; NVMe is corrected just below. + } + if strings.Contains(strings.ToLower(jStr(j, "device", "type")), "nvme") || + jObj(j, "nvme_smart_health_information_log") != nil { + d.Rotation = "NVMe" + } + + d.PowerOnHours = jInt(j, "power_on_time", "hours") + d.PowerCycleCount = jInt(j, "power_cycle_count") + d.TempC = jInt(j, "temperature", "current") + + // ---- ATA attribute table ---- + attrs := map[int]ataAttr{} + if table, ok := jLeaf(j, "ata_smart_attributes", "table").([]interface{}); ok { + for _, it := range table { + a, ok := it.(map[string]interface{}) + if !ok { + continue + } + id := jInt(a, "id") + if id == nil { + continue + } + at := ataAttr{ + value: jInt(a, "value"), + worst: jInt(a, "worst"), + whenFailed: jStr(a, "when_failed"), + } + // Prefer the leading integer of raw.string (raw.value overflows + // for some attributes); fall back to raw.value. + if rs := jStr(a, "raw", "string"); rs != "" { + if n, ok := firstInt(rs); ok { + at.raw = &n + } + } + if at.raw == nil { + at.raw = jInt(a, "raw", "value") + } + attrs[*id] = at + } + } + + // ---- SMART health verdict ---- + if passed := jBoolPtr(j, "smart_status", "passed"); passed != nil { + if *passed { + d.SmartHealth = "PASSED" + } else { + d.SmartHealth = "FAILED" + } + } else if len(attrs) > 0 { + if attrsFailed(attrs) { + d.SmartHealth = "FAILED" + } else { + d.SmartHealth = "PASSED_BY_ATTR" + } + } else { + d.SmartHealth = "UNKNOWN" + } + + // Defect, wear, reserve, and host-write fields shared with the text path. + applyAtaCounters(attrs, d) + + // ---- SCSI/SAS endurance + grown defect list ---- + if d.WearPctRemaining == nil { + if pu := jInt(j, "scsi_percentage_used_endurance_indicator"); pu != nil { + d.WearPctConsumed = pu + d.WearPctRemaining = pInt(100 - *pu) + d.WearSrc = "SCSI/endurance" + } + } + if grown := jInt(j, "scsi_grown_defect_list"); grown != nil && d.Reallocated == nil { + d.Reallocated = grown + } + // SAS drives have no ATA attribute table; their hard-defect signals live in + // the SCSI logs. Map them onto the fields the scorer already grades: total + // uncorrected read/write/verify errors -> uncorrectable sectors, and the + // pending (to-be-reassigned) defect count -> current pending sectors. + if d.Uncorrectable == nil { + if ec := jObj(j, "scsi_error_counter_log"); ec != nil { + sum, any := 0, false + for _, op := range []string{"read", "write", "verify"} { + if u := jInt(ec, op, "total_uncorrected_errors"); u != nil { + any = true + sum += *u + } + } + if any { + d.Uncorrectable = pInt(sum) + } + } + } + if d.Pending == nil { + if pd := jInt(j, "scsi_pending_defects", "count"); pd != nil { + d.Pending = pd + } + } + + // ---- NVMe SMART/Health log ---- + if nv := jObj(j, "nvme_smart_health_information_log"); nv != nil { + d.NvmeCriticalWarning = jInt(nv, "critical_warning") + d.NvmeAvailSpare = jInt(nv, "available_spare") + d.NvmeAvailSpareThresh = jInt(nv, "available_spare_threshold") + d.NvmeMediaErrors = jInt(nv, "media_errors") + if d.PowerOnHours == nil { + d.PowerOnHours = jInt(nv, "power_on_hours") + } + if d.PowerCycleCount == nil { + d.PowerCycleCount = jInt(nv, "power_cycles") + } + if d.TempC == nil { + d.TempC = jInt(nv, "temperature") + } + if pu := jInt(nv, "percentage_used"); pu != nil { + d.WearPctConsumed = pu + d.WearPctRemaining = pInt(100 - *pu) + d.WearSrc = "NVMe/percentage_used" + } + // Treat NVMe media+data integrity errors like uncorrectable sectors. + if d.Uncorrectable == nil && d.NvmeMediaErrors != nil { + d.Uncorrectable = d.NvmeMediaErrors + } + } + + // Capture smartctl passthrough diagnostics. + if msgs, ok := jLeaf(j, "smartctl", "messages").([]interface{}); ok { + var parts []string + for _, mm := range msgs { + if mo, ok := mm.(map[string]interface{}); ok { + if s := jStr(mo, "string"); s != "" { + parts = append(parts, s) + } + } + } + d.SmartctlMessages = strings.Join(parts, "; ") + } +} + +// jsonUsable reports whether the parsed object carries real identity + health. +func jsonUsable(j map[string]interface{}) bool { + if j == nil { + return false + } + hasID := jStr(j, "model_name") != "" || jStr(j, "scsi_model_name") != "" + if !hasID { + return false + } + if jObj(j, "ata_smart_attributes") != nil || + jObj(j, "smart_status") != nil || + jObj(j, "nvme_smart_health_information_log") != nil || + jInt(j, "scsi_percentage_used_endurance_indicator") != nil { + return true + } + return false +} + +// first returns the first non-empty string in vals, or "". +func first(vals ...string) string { + for _, v := range vals { + if v != "" { + return v + } + } + return "" +} diff --git a/smart_text.go b/smart_text.go new file mode 100644 index 0000000..93306e1 --- /dev/null +++ b/smart_text.go @@ -0,0 +1,303 @@ +package main + +import ( + "fmt" + "regexp" + "strings" +) + +// parseSmartText parses `smartctl -a` PLAIN TEXT output. This is the path used +// on CentOS 6/7 where smartmontools (5.x/6.x) predates `--json` (7.0, 2019). +// Handles ATA/SATA, NVMe, and SAS/SCSI layouts. +func parseSmartText(text string, d *Drive) { + if strings.TrimSpace(text) == "" { + return + } + lines := strings.Split(text, "\n") + + d.Rotation = "SSD" + val := func(prefix string) string { + for _, ln := range lines { + if i := strings.Index(ln, prefix); i >= 0 { + return strings.TrimSpace(ln[i+len(prefix):]) + } + } + return "" + } + + // ---- Identity ---- + d.Model = first(val("Device Model:"), val("Model Number:"), val("Product:"), val("Model Family:")) + d.Serial = first(val("Serial Number:"), val("Serial number:")) + d.Firmware = first(val("Firmware Version:"), val("Revision:")) + + if uc := val("User Capacity:"); uc != "" { + // "1,920,383,410,176 bytes [1.92 TB]" + if m := regexp.MustCompile(`([\d,]+)\s*bytes`).FindStringSubmatch(uc); m != nil { + if n, ok := parseIntLoose(m[1]); ok && n > 0 { + d.Capacity = fmt.Sprintf("%.2f TB", float64(n)/1e12) + } + } + } + + if rr := val("Rotation Rate:"); rr != "" { + if strings.Contains(strings.ToLower(rr), "solid state") { + d.Rotation = "SSD" + } else if m := regexp.MustCompile(`(\d+)\s*rpm`).FindStringSubmatch(strings.ToLower(rr)); m != nil { + d.Rotation = m[1] + " rpm" + } + } + isNVMe := false + for _, ln := range lines { + if strings.Contains(ln, "NVMe Log") || strings.Contains(ln, "SMART/Health Information (NVMe") { + isNVMe = true + break + } + } + if isNVMe { + d.Rotation = "NVMe" + } + + // ---- SMART overall health ---- + // ATA: "SMART overall-health self-assessment test result: PASSED" + // SCSI: "SMART Health Status: OK" + if h := val("self-assessment test result:"); h != "" { + up := strings.ToUpper(h) + if strings.Contains(up, "PASS") { + d.SmartHealth = "PASSED" + } else if strings.Contains(up, "FAIL") { + d.SmartHealth = "FAILED" + } else { + d.SmartHealth = "UNKNOWN" + } + } else if h := val("SMART Health Status:"); h != "" { + if strings.Contains(strings.ToUpper(h), "OK") { + d.SmartHealth = "PASSED" + } else { + d.SmartHealth = "FAILED" + } + } else { + d.SmartHealth = "UNKNOWN" + } + + if isNVMe { + parseNVMeText(val, d) + return + } + if attrs := parseATAAttrTable(lines); len(attrs) > 0 { + applyATAAttrs(attrs, d) + return + } + // SAS/SCSI fallback fields. + parseSCSIText(val, d) + parseSCSIErrors(lines, d) +} + +// parseATAAttrTable parses the "Vendor Specific SMART Attributes" table: +// +// ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE +// 5 Reallocated_Sector_Ct 0x0033 100 100 010 Pre-fail Always - 0 +func parseATAAttrTable(lines []string) map[int]ataAttr { + attrs := map[int]ataAttr{} + inTable := false + for _, ln := range lines { + if strings.Contains(ln, "ATTRIBUTE_NAME") && strings.Contains(ln, "RAW_VALUE") { + inTable = true + continue + } + if !inTable { + continue + } + if strings.TrimSpace(ln) == "" { + break + } + f := strings.Fields(ln) + if len(f) < 10 { + continue + } + id, ok := firstInt(f[0]) + if !ok { + continue + } + ta := ataAttr{whenFailed: f[8]} + if v, ok := firstInt(f[3]); ok { + ta.value = &v + } + if w, ok := firstInt(f[4]); ok { + ta.worst = &w + } + // RAW_VALUE is the remainder from field 9 onward; take leading int. + if r, ok := firstInt(strings.Join(f[9:], " ")); ok { + ta.raw = &r + } + attrs[id] = ta + } + return attrs +} + +// applyATAAttrs maps the parsed ATA attribute table onto d: the health-by- +// attribute verdict when the overall result was unknown, the shared defect/wear/ +// reserve fields, and the text-path-only power-cycle and temperature fallbacks. +func applyATAAttrs(attrs map[int]ataAttr, d *Drive) { + // Health-by-attribute when the overall verdict was unknown. + if d.SmartHealth == "UNKNOWN" && len(attrs) > 0 { + if attrsFailed(attrs) { + d.SmartHealth = "FAILED" + } else { + d.SmartHealth = "PASSED_BY_ATTR" + } + } + + // Defect, wear, reserve, and host-write fields shared with the JSON path. + applyAtaCounters(attrs, d) + + // Text-path fallbacks: JSON reads these from dedicated fields instead. + if d.PowerCycleCount == nil { + d.PowerCycleCount = attrRaw(attrs, 12) + } + if d.TempC == nil { + d.TempC = attrRaw(attrs, 194) + } +} + +// parseNVMeText fills NVMe health fields from the "SMART/Health Information +// (NVMe Log)" section using the shared val() prefix lookup. +func parseNVMeText(val func(string) string, d *Drive) { + if cw := val("Critical Warning:"); cw != "" { + // "0x00" + if n, ok := parseHexOrInt(cw); ok { + d.NvmeCriticalWarning = &n + } + } + if as := val("Available Spare:"); as != "" { + if n, ok := firstInt(as); ok { + d.NvmeAvailSpare = &n + } + } + if at := val("Available Spare Threshold:"); at != "" { + if n, ok := firstInt(at); ok { + d.NvmeAvailSpareThresh = &n + } + } + if pu := val("Percentage Used:"); pu != "" { + if n, ok := firstInt(pu); ok { + d.WearPctConsumed = &n + d.WearPctRemaining = pInt(100 - n) + d.WearSrc = "NVMe/percentage_used" + } + } + if me := val("Media and Data Integrity Errors:"); me != "" { + if n, ok := parseIntLoose(me); ok { + d.NvmeMediaErrors = &n + if d.Uncorrectable == nil { + d.Uncorrectable = &n + } + } + } + if d.PowerOnHours == nil { + if n, ok := parseIntLoose(val("Power On Hours:")); ok { + d.PowerOnHours = &n + } + } + if d.PowerCycleCount == nil { + if n, ok := parseIntLoose(val("Power Cycles:")); ok { + d.PowerCycleCount = &n + } + } + if d.TempC == nil { + if n, ok := firstInt(val("Temperature:")); ok { + d.TempC = &n + } + } +} + +// parseSCSIText fills SAS/SCSI fields (temperature, grown defect list, +// endurance, power-on hours) that smartctl prints as "Label: value" lines. +func parseSCSIText(val func(string) string, d *Drive) { + if t := val("Current Drive Temperature:"); t != "" { + if n, ok := firstInt(t); ok { + d.TempC = &n + } + } + if g := val("Elements in grown defect list:"); g != "" { + if n, ok := parseIntLoose(g); ok { + d.Reallocated = &n + } + } + if e := val("Percentage used endurance indicator:"); e != "" { + if n, ok := firstInt(e); ok { + d.WearPctConsumed = &n + d.WearPctRemaining = pInt(100 - n) + d.WearSrc = "SCSI/endurance" + } + } + if h := val("number of hours powered up"); h != "" { + // "= 12345.67" + if m := regexp.MustCompile(`([\d,]+)`).FindString(h); m != "" { + if n, ok := parseIntLoose(m); ok { + d.PowerOnHours = &n + } + } + } + // Newer smartctl prints "Accumulated power on time, hours:minutes 2487:44". + if d.PowerOnHours == nil { + if h := val("Accumulated power on time, hours:minutes"); h != "" { + if n, ok := firstInt(h); ok { + d.PowerOnHours = &n + } + } + } +} + +// parseSCSIErrors handles the SAS "Error counter log" and pending-defect count, +// the SAS analog of ATA uncorrectable/pending sectors. These need the full line +// list (the error log is a multi-line table), not just the val() prefix lookup. +func parseSCSIErrors(lines []string, d *Drive) { + if d.Uncorrectable == nil { + // Each of read:/write:/verify: ends in a "total uncorrected errors" count. + sum, any := 0, false + for _, ln := range lines { + f := strings.Fields(ln) + if len(f) < 7 { + continue + } + switch f[0] { + case "read:", "write:", "verify:": + if n, ok := parseIntLoose(f[len(f)-1]); ok { + any = true + sum += n + } + } + } + if any { + d.Uncorrectable = pInt(sum) + } + } + if d.Pending == nil { + // " Pending defect count:0 Pending Defects" (no space after the colon). + for _, ln := range lines { + if i := strings.Index(ln, "Pending defect count:"); i >= 0 { + if n, ok := firstInt(ln[i+len("Pending defect count:"):]); ok { + d.Pending = pInt(n) + } + break + } + } + } +} + +// parseHexOrInt parses s as hex when it carries a 0x/0X prefix, otherwise as a +// loose decimal int. The NVMe critical-warning field arrives as "0x00". +func parseHexOrInt(s string) (int, bool) { + s = strings.TrimSpace(s) + if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") { + var n int + if _, err := fmt.Sscanf(s, "0x%x", &n); err == nil { + return n, true + } + if _, err := fmt.Sscanf(s, "0X%x", &n); err == nil { + return n, true + } + return 0, false + } + return parseIntLoose(s) +} diff --git a/testdata/megacli_pdlist.txt b/testdata/megacli_pdlist.txt new file mode 100644 index 0000000..58a4adb --- /dev/null +++ b/testdata/megacli_pdlist.txt @@ -0,0 +1,189 @@ + +Adapter #0 + +Enclosure Device ID: 64 +Slot Number: 0 +Drive's position: DiskGroup: 0, Span: 0, Arm: 0 +Enclosure position: 1 +Device Id: 22 +WWN: 5002538f31000001 +Sequence Number: 2 +Media Error Count: 0 +Other Error Count: 7 +Predictive Failure Count: 0 +Last Predictive Failure Event Seq Number: 0 +PD Type: SATA + +Raw Size: 1.819 TB [0xe8e088b0 Sectors] +Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors] +Coerced Size: 1.818 TB [0xe8d00000 Sectors] +Sector Size: 512 +Firmware state: Online, Spun Up +Device Firmware Level: 2B6Q +Shield Counter: 0 +Successful diagnostics completion on : N/A +SAS Address(0): 0x3b07b250d55b7500 +Connected Port Number: 0(path0) +Inquiry Data: S624NS0RA00001J Samsung SSD 870 EVO 2TB SVT02B6Q +FDE Capable: Capable +FDE Enable: Disable +Secured: Unsecured +Locked: Unlocked +Needs EKM Attention: No +Foreign State: None +Device Speed: 6.0Gb/s +Link Speed: 6.0Gb/s +Media Type: Solid State Device +Drive: Not Certified +Drive Temperature :31C (87.80 F) +PI Eligibility: No +Drive is formatted for PI information: No +PI: No PI +Drive's NCQ setting : Enabled +Port-0 : +Port status: Active +Port's Linkspeed: 6.0Gb/s +Drive has flagged a S.M.A.R.T alert : No + + + +Enclosure Device ID: 64 +Slot Number: 1 +Drive's position: DiskGroup: 0, Span: 0, Arm: 1 +Enclosure position: 1 +Device Id: 23 +WWN: 5002538f31000002 +Sequence Number: 2 +Media Error Count: 0 +Other Error Count: 6 +Predictive Failure Count: 0 +Last Predictive Failure Event Seq Number: 0 +PD Type: SATA + +Raw Size: 1.819 TB [0xe8e088b0 Sectors] +Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors] +Coerced Size: 1.818 TB [0xe8d00000 Sectors] +Sector Size: 512 +Firmware state: Online, Spun Up +Device Firmware Level: 2B6Q +Shield Counter: 0 +Successful diagnostics completion on : N/A +SAS Address(0): 0x3b07b250d55b7501 +Connected Port Number: 1(path0) +Inquiry Data: S624NS0RA00002L Samsung SSD 870 EVO 2TB SVT02B6Q +FDE Capable: Capable +FDE Enable: Disable +Secured: Unsecured +Locked: Unlocked +Needs EKM Attention: No +Foreign State: None +Device Speed: 6.0Gb/s +Link Speed: 6.0Gb/s +Media Type: Solid State Device +Drive: Not Certified +Drive Temperature :30C (86.00 F) +PI Eligibility: No +Drive is formatted for PI information: No +PI: No PI +Drive's NCQ setting : Enabled +Port-0 : +Port status: Active +Port's Linkspeed: 6.0Gb/s +Drive has flagged a S.M.A.R.T alert : No + + + +Enclosure Device ID: 64 +Slot Number: 2 +Drive's position: DiskGroup: 0, Span: 0, Arm: 2 +Enclosure position: 1 +Device Id: 20 +WWN: 5002538f31000003 +Sequence Number: 2 +Media Error Count: 0 +Other Error Count: 21 +Predictive Failure Count: 0 +Last Predictive Failure Event Seq Number: 0 +PD Type: SATA + +Raw Size: 1.819 TB [0xe8e088b0 Sectors] +Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors] +Coerced Size: 1.818 TB [0xe8d00000 Sectors] +Sector Size: 512 +Firmware state: Online, Spun Up +Device Firmware Level: 2B6Q +Shield Counter: 0 +Successful diagnostics completion on : N/A +SAS Address(0): 0x3b07b250d55b7502 +Connected Port Number: 2(path0) +Inquiry Data: S624NS0RC00003M Samsung SSD 870 EVO 2TB SVT02B6Q +FDE Capable: Capable +FDE Enable: Disable +Secured: Unsecured +Locked: Unlocked +Needs EKM Attention: No +Foreign State: None +Device Speed: 6.0Gb/s +Link Speed: 6.0Gb/s +Media Type: Solid State Device +Drive: Not Certified +Drive Temperature :31C (87.80 F) +PI Eligibility: No +Drive is formatted for PI information: No +PI: No PI +Drive's NCQ setting : Enabled +Port-0 : +Port status: Active +Port's Linkspeed: 6.0Gb/s +Drive has flagged a S.M.A.R.T alert : No + + + +Enclosure Device ID: 64 +Slot Number: 3 +Drive's position: DiskGroup: 0, Span: 0, Arm: 3 +Enclosure position: 1 +Device Id: 21 +WWN: 5002538f31000004 +Sequence Number: 2 +Media Error Count: 0 +Other Error Count: 12 +Predictive Failure Count: 0 +Last Predictive Failure Event Seq Number: 0 +PD Type: SATA + +Raw Size: 1.819 TB [0xe8e088b0 Sectors] +Non Coerced Size: 1.818 TB [0xe8d088b0 Sectors] +Coerced Size: 1.818 TB [0xe8d00000 Sectors] +Sector Size: 512 +Firmware state: Online, Spun Up +Device Firmware Level: 2B6Q +Shield Counter: 0 +Successful diagnostics completion on : N/A +SAS Address(0): 0x3b07b250d55b7503 +Connected Port Number: 3(path0) +Inquiry Data: S624NS0RA00004W Samsung SSD 870 EVO 2TB SVT02B6Q +FDE Capable: Capable +FDE Enable: Disable +Secured: Unsecured +Locked: Unlocked +Needs EKM Attention: No +Foreign State: None +Device Speed: 6.0Gb/s +Link Speed: 6.0Gb/s +Media Type: Solid State Device +Drive: Not Certified +Drive Temperature :30C (86.00 F) +PI Eligibility: No +Drive is formatted for PI information: No +PI: No PI +Drive's NCQ setting : Enabled +Port-0 : +Port status: Active +Port's Linkspeed: 6.0Gb/s +Drive has flagged a S.M.A.R.T alert : No + + + + +Exit Code: 0x00 diff --git a/testdata/perccli2_show_all.json b/testdata/perccli2_show_all.json new file mode 100644 index 0000000..dbffd22 --- /dev/null +++ b/testdata/perccli2_show_all.json @@ -0,0 +1,185 @@ +{ +"Controllers":[ +{ + "Command Status" : { + "CLI Version" : "008.0004.0000.0022 Apr 28, 2023", + "Operating system" : "Linux6.8.0-124-generic", + "Controller" : "0", + "Status" : "Success", + "Description" : "Show Drive Information Succeeded." + }, + "Response Data" : { + "Drives List" : [ + { + "Drive Information" : { + "EID:Slt" : "284:0", + "PID" : 275, + "State" : "Conf", + "Status" : "Online", + "DG" : 0, + "Size" : "893.75 GiB", + "Intf" : "NVMe", + "Med" : "SSD", + "SED_Type" : "-", + "SeSz" : "512B", + "Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ", + "Sp" : "U", + "LU/NS Count" : 1, + "Alt-EID" : "-" + }, + "LU/NS Information" : [ + { + "PID" : 275, + "LUN/NSID" : "0/1", + "Status" : "Online", + "Size" : "893.75 GiB" + } + ], + "Drive Detailed Information" : { + "Shield Counter" : 0, + "Temperature(C)" : 30, + "Serial Number" : "S6JGNA0X000001 ", + "Vendor" : "NVMe ", + "Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ", + "WWN" : "3100166D00000001", + "Firmware Revision Level" : "1.0.0 ", + "Logical Sector Size" : "512B", + "Physical Sector Size" : "512B", + "Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]", + "Coerced size" : "893.75 GiB [0x6fb80000 Sectors]", + "Capable Speed" : "16.0GT/s", + "Capable Link Width" : "x4", + "Negotiated Link Width" : "x2", + "Drive position" : "DriveGroup:0, Span:0, Row:0", + "Sequence Number" : 2, + "Commissioned Spare" : "No", + "Emergency Spare" : "No", + "Successful Shield Diagnostics completed on(Localtime yyyy/mm/dd hh:mm:sec)" : "NA", + "SED Capable" : "No", + "ISE Capable" : "Yes", + "T10 Power Mode" : "No", + "Needs EKM Attention" : "No", + "Secured By EKM" : "No", + "Certified" : "Yes", + "Supported Data Format" : "PRP", + "Device port count" : 1, + "Path Information" : [ + { + "WWID" : "0x4100166d002538c1", + "DevicePID" : 275, + "Path" : "Primary", + "Negotiated Speed" : "16.0GT/s", + "Num Phys/Lanes used" : "Unknown" + } + ], + "LU/NS Properties" : { + "Media Error Count" : 0, + "Other Error Count" : 0, + "Predictive Failure Count" : 0, + "Last Predictive Failure Event Sequence Number" : 0, + "Logical Sector Size" : "512B", + "Physical Sector Size" : "512B", + "Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]", + "Coerced size" : "893.75 GiB [0x6fb80000 Sectors]", + "FW managed drive security" : "No", + "Secured" : "No", + "Locked" : "No", + "PI Formatted" : "No", + "PI type" : "No PI", + "Number of bytes of user data in LBA" : "512B", + "Current Write Cache" : "Off", + "Default Write Cache" : "Off", + "Write Cache Changeable" : "No" + }, + "Inquiry Data" : "4d 14 28 10 53 36 4a 47 4e 41 30 58 31 30 31 34 38 33 20 20 20 20 20 20 44 65 6c 6c 20 44 43 20 4e 56 4d 65 20 50 4d 39 41 33 20 52 49 20 55 2e 32 20 39 36 30 47 42 20 20 20 20 20 20 20 20 20 31 2e 30 2e 30 20 20 20 02 38 25 00 00 09 06 00 00 04 01 00 00 12 7a 00 00 12 7a 00 00 03 00 00 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " + } + }, + { + "Drive Information" : { + "EID:Slt" : "284:1", + "PID" : 276, + "State" : "Conf", + "Status" : "Online", + "DG" : 0, + "Size" : "893.75 GiB", + "Intf" : "NVMe", + "Med" : "SSD", + "SED_Type" : "-", + "SeSz" : "512B", + "Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ", + "Sp" : "U", + "LU/NS Count" : 1, + "Alt-EID" : "-" + }, + "LU/NS Information" : [ + { + "PID" : 276, + "LUN/NSID" : "0/1", + "Status" : "Online", + "Size" : "893.75 GiB" + } + ], + "Drive Detailed Information" : { + "Shield Counter" : 0, + "Temperature(C)" : 32, + "Serial Number" : "S6JGNA0X000002 ", + "Vendor" : "NVMe ", + "Model" : "Dell DC NVMe PM9A3 RI U.2 960GB ", + "WWN" : "3100167200000002", + "Firmware Revision Level" : "1.0.0 ", + "Logical Sector Size" : "512B", + "Physical Sector Size" : "512B", + "Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]", + "Coerced size" : "893.75 GiB [0x6fb80000 Sectors]", + "Capable Speed" : "16.0GT/s", + "Capable Link Width" : "x4", + "Negotiated Link Width" : "x2", + "Drive position" : "DriveGroup:0, Span:0, Row:1", + "Sequence Number" : 2, + "Commissioned Spare" : "No", + "Emergency Spare" : "No", + "Successful Shield Diagnostics completed on(Localtime yyyy/mm/dd hh:mm:sec)" : "NA", + "SED Capable" : "No", + "ISE Capable" : "Yes", + "T10 Power Mode" : "No", + "Needs EKM Attention" : "No", + "Secured By EKM" : "No", + "Certified" : "Yes", + "Supported Data Format" : "PRP", + "Device port count" : 1, + "Path Information" : [ + { + "WWID" : "0x41001672002538c1", + "DevicePID" : 276, + "Path" : "Primary", + "Negotiated Speed" : "16.0GT/s", + "Num Phys/Lanes used" : "Unknown" + } + ], + "LU/NS Properties" : { + "Media Error Count" : 0, + "Other Error Count" : 0, + "Predictive Failure Count" : 0, + "Last Predictive Failure Event Sequence Number" : 0, + "Logical Sector Size" : "512B", + "Physical Sector Size" : "512B", + "Raw size" : "894.252 GiB [0x6fc81ab0 Sectors]", + "Coerced size" : "893.75 GiB [0x6fb80000 Sectors]", + "FW managed drive security" : "No", + "Secured" : "No", + "Locked" : "No", + "PI Formatted" : "No", + "PI type" : "No PI", + "Number of bytes of user data in LBA" : "512B", + "Current Write Cache" : "Off", + "Default Write Cache" : "Off", + "Write Cache Changeable" : "No" + }, + "Inquiry Data" : "4d 14 28 10 53 36 4a 47 4e 41 30 58 31 30 31 34 38 38 20 20 20 20 20 20 44 65 6c 6c 20 44 43 20 4e 56 4d 65 20 50 4d 39 41 33 20 52 49 20 55 2e 32 20 39 36 30 47 42 20 20 20 20 20 20 20 20 20 31 2e 30 2e 30 20 20 20 02 38 25 00 00 09 06 00 00 04 01 00 00 12 7a 00 00 12 7a 00 00 03 00 00 80 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " + } + } + ] + } +} +] +} diff --git a/testdata/perccli_show_all.txt b/testdata/perccli_show_all.txt new file mode 100644 index 0000000..b92b932 --- /dev/null +++ b/testdata/perccli_show_all.txt @@ -0,0 +1,209 @@ +CLI Version = 007.2616.0000.0000 Dec 06, 2023 +Operating system = Linux 6.8.0-124-generic +Controller = 0 +Status = Success +Description = Show Drive Information Succeeded. + + +Drive /c0/e252/s0 : +================= + +----------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------- +252:0 1 Onln 0 893.750 GB SATA SSD N N 512B HFS960G3H2X069N U - +----------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=PI Eligible +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e252/s0 - Detailed Information : +======================================== + +Drive /c0/e252/s0 State : +======================= +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 0 +Drive Temperature = 36C (96.80 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e252/s0 Device attributes : +=================================== +SN = ESC3N5538I0000001 +Manufacturer Id = ATA +Model Number = HFS960G3H2X069N +NAND Vendor = NA +WWN = 5ACE42E000000001 +Firmware Revision = DZ02 +Raw size = 894.252 GB [0x6fc81ab0 Sectors] +Coerced size = 893.750 GB [0x6fb80000 Sectors] +Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = Enabled +Logical Sector Size = 512B +Physical Sector Size = 4 KB +Connector Name = 00 x1 + + +Drive /c0/e252/s0 Policies/Settings : +=================================== +Drive position = DriveGroup:0, Span:0, Row:0 +Enclosure position = 1 +Connected Port Number = 8(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = None +SED Capable = No +SED Enabled = No +Secured = No +Cryptographic Erase Capable = Yes +Sanitize Support = CryptoErase, BlockErase +Locked = No +Needs EKM Attention = No +PI Eligible = No +Drive is formatted for PI = No +PI type = No PI +Number of bytes of user data in LBA = 0 KB +Certified = Yes +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x3f4ee0806c023d08 +----------------------------------------- + + +Inquiry Data = +40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 20 20 45 20 43 53 4e 33 35 35 38 33 +31 49 30 38 4f 33 51 30 00 00 00 00 00 00 20 20 +20 20 5a 44 32 30 46 48 39 53 30 36 33 47 32 48 +30 58 39 36 20 4e 20 20 20 20 20 20 20 20 20 20 +20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 +00 40 00 2f 00 40 00 00 00 00 06 00 ff 3f 10 00 +3f 00 10 fc fb 00 01 bd ff ff ff 0f 00 00 07 00 + + + +Drive /c0/e252/s1 : +================= + +----------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------- +252:1 0 Onln 0 893.750 GB SATA SSD N N 512B HFS960G3H2X069N U - +----------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=PI Eligible +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e252/s1 - Detailed Information : +======================================== + +Drive /c0/e252/s1 State : +======================= +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 0 +Drive Temperature = 36C (96.80 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e252/s1 Device attributes : +=================================== +SN = ESC3N5538I0000002 +Manufacturer Id = ATA +Model Number = HFS960G3H2X069N +NAND Vendor = NA +WWN = 5ACE42E000000002 +Firmware Revision = DZ02 +Raw size = 894.252 GB [0x6fc81ab0 Sectors] +Coerced size = 893.750 GB [0x6fb80000 Sectors] +Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = Enabled +Logical Sector Size = 512B +Physical Sector Size = 4 KB +Connector Name = 00 x1 + + +Drive /c0/e252/s1 Policies/Settings : +=================================== +Drive position = DriveGroup:0, Span:0, Row:1 +Enclosure position = 0 +Connected Port Number = 9(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = None +SED Capable = No +SED Enabled = No +Secured = No +Cryptographic Erase Capable = Yes +Sanitize Support = CryptoErase, BlockErase +Locked = No +Needs EKM Attention = No +PI Eligible = No +Drive is formatted for PI = No +PI type = No PI +Number of bytes of user data in LBA = 0 KB +Certified = Yes +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x3f4ee0806c023d09 +----------------------------------------- + + +Inquiry Data = +40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 20 20 45 20 43 53 4e 33 35 35 38 33 +31 49 30 38 4f 33 50 30 00 00 00 00 00 00 20 20 +20 20 5a 44 32 30 46 48 39 53 30 36 33 47 32 48 +30 58 39 36 20 4e 20 20 20 20 20 20 20 20 20 20 +20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 80 +00 40 00 2f 00 40 00 00 00 00 06 00 ff 3f 10 00 +3f 00 10 fc fb 00 01 bd ff ff ff 0f 00 00 07 00 + + + + + diff --git a/testdata/smart_ata_ssd_megaraid.json b/testdata/smart_ata_ssd_megaraid.json new file mode 100644 index 0000000..e6b1198 --- /dev/null +++ b/testdata/smart_ata_ssd_megaraid.json @@ -0,0 +1,796 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-5.4.225-200.el7.x86_64", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "sat+megaraid,20", + "/dev/bus/8" + ], + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/bus/8", + "info_name": "/dev/bus/8 [megaraid_disk_20] [SAT]", + "type": "sat+megaraid,20", + "protocol": "ATA" + }, + "model_name": "Samsung SSD 870 EVO 2TB", + "serial_number": "S624NS0RC00003M", + "wwn": { + "naa": 5, + "oui": 9528, + "id": 65259200556 + }, + "firmware_version": "SVT02B6Q", + "user_capacity": { + "blocks": 3907029168, + "bytes": 2000398934016 + }, + "logical_block_size": 512, + "physical_block_size": 512, + "rotation_rate": 0, + "form_factor": { + "ata_value": 3, + "name": "2.5 inches" + }, + "in_smartctl_database": false, + "ata_version": { + "string": "ACS-4 T13/BSR INCITS 529 revision 5", + "major_value": 2556, + "minor_value": 94 + }, + "sata_version": { + "string": "SATA 3.3", + "value": 511 + }, + "interface_speed": { + "max": { + "sata_value": 14, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 3, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + } + }, + "local_time": { + "time_t": 1782134221, + "asctime": "Mon Jun 22 09:17:01 2026 EDT" + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 0, + "string": "was never started" + }, + "completion_seconds": 0 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 160 + } + }, + "capabilities": { + "values": [ + 83, + 3 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": false, + "offline_surface_scan_supported": false, + "self_tests_supported": true, + "conveyance_self_test_supported": false, + "selective_self_test_supported": true, + "attribute_autosave_enabled": true, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_sct_capabilities": { + "value": 61, + "error_recovery_control_supported": true, + "feature_control_supported": true, + "data_table_supported": true + }, + "ata_smart_attributes": { + "revision": 1, + "table": [ + { + "id": 5, + "name": "Reallocated_Sector_Ct", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 93, + "worst": 93, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 33518, + "string": "33518" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 99, + "worst": 99, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 7, + "string": "7" + } + }, + { + "id": 177, + "name": "Wear_Leveling_Count", + "value": 93, + "worst": 93, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 19, + "string": "PO--C- ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 157, + "string": "157" + } + }, + { + "id": 179, + "name": "Used_Rsvd_Blk_Cnt_Tot", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 19, + "string": "PO--C- ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 181, + "name": "Program_Fail_Cnt_Total", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 182, + "name": "Erase_Fail_Count_Total", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 183, + "name": "Runtime_Bad_Block", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 19, + "string": "PO--C- ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 187, + "name": "Reported_Uncorrect", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 190, + "name": "Airflow_Temperature_Cel", + "value": 69, + "worst": 60, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 31, + "string": "31" + } + }, + { + "id": 195, + "name": "Hardware_ECC_Recovered", + "value": 200, + "worst": 200, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 26, + "string": "-O-RC- ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": true, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 199, + "name": "UDMA_CRC_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 62, + "string": "-OSRCK ", + "prefailure": false, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 235, + "name": "Unknown_Attribute", + "value": 99, + "worst": 99, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 18, + "string": "-O--C- ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 5, + "string": "5" + } + }, + { + "id": 241, + "name": "Total_LBAs_Written", + "value": 99, + "worst": 99, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 332951560676, + "string": "332951560676" + } + }, + { + "id": 252, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 273, + "string": "273" + } + } + ] + }, + "power_on_time": { + "hours": 33518 + }, + "power_cycle_count": 7, + "temperature": { + "current": 31 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "table": [ + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33510 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33486 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33462 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33438 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33414 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33390 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33366 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33342 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33318 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33294 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33270 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33246 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33222 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33198 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33174 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33150 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33126 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33102 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33078 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33054 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 33030 + } + ], + "count": 21, + "error_count_total": 0, + "error_count_outdated": 0 + } + }, + "ata_smart_selective_self_test_log": { + "revision": 1, + "table": [ + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + } + ], + "current_read_scan": { + "lba_min": 0, + "lba_max": 65535, + "status": { + "value": 0, + "string": "was never started" + } + }, + "flags": { + "value": 0, + "remainder_scan_enabled": false + }, + "power_up_scan_resume_minutes": 0 + } +} + diff --git a/testdata/smart_ata_ssd_micron.json b/testdata/smart_ata_ssd_micron.json new file mode 100644 index 0000000..1abcd06 --- /dev/null +++ b/testdata/smart_ata_ssd_micron.json @@ -0,0 +1,905 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 4 + ], + "pre_release": false, + "svn_revision": "5530", + "platform_info": "x86_64-linux-6.8.0-124-generic", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "sat+megaraid,0", + "/dev/bus/0" + ], + "drive_database_version": { + "string": "7.3/5528" + }, + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 0 + }, + "local_time": { + "time_t": 1782135879, + "asctime": "Mon Jun 22 09:44:39 2026 EDT" + }, + "device": { + "name": "/dev/bus/0", + "info_name": "/dev/bus/0 [megaraid_disk_00] [SAT]", + "type": "sat+megaraid,0", + "protocol": "ATA" + }, + "model_name": "MTFDDAK960TGA-1BC1ZABDA", + "serial_number": "232442000000", + "wwn": { + "naa": 5, + "oui": 41077, + "id": 5415414560 + }, + "ata_additional_product_id": "DELL(tm)", + "firmware_version": "D4DK003", + "user_capacity": { + "blocks": 1875385008, + "bytes": 960197124096 + }, + "logical_block_size": 512, + "physical_block_size": 4096, + "rotation_rate": 0, + "form_factor": { + "ata_value": 3, + "name": "2.5 inches" + }, + "trim": { + "supported": true, + "deterministic": true, + "zeroed": true + }, + "in_smartctl_database": false, + "ata_version": { + "string": "ACS-4 (minor revision not indicated)", + "major_value": 4088, + "minor_value": 65535 + }, + "sata_version": { + "string": "SATA 3.3", + "value": 511 + }, + "interface_speed": { + "max": { + "sata_value": 14, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 3, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + } + }, + "smart_support": { + "available": true, + "enabled": true + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 0, + "string": "was never started" + }, + "completion_seconds": 3348 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 57, + "conveyance": 3 + } + }, + "capabilities": { + "values": [ + 123, + 3 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": false, + "offline_surface_scan_supported": true, + "self_tests_supported": true, + "conveyance_self_test_supported": true, + "selective_self_test_supported": true, + "attribute_autosave_enabled": true, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_sct_capabilities": { + "value": 61, + "error_recovery_control_supported": true, + "feature_control_supported": true, + "data_table_supported": true + }, + "ata_smart_attributes": { + "revision": 16, + "table": [ + { + "id": 1, + "name": "Raw_Read_Error_Rate", + "value": 100, + "worst": 100, + "thresh": 50, + "when_failed": "", + "flags": { + "value": 46, + "string": "-OSR-K ", + "prefailure": false, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 5, + "name": "Reallocated_Sector_Ct", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 20238, + "string": "20238" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 12, + "string": "12" + } + }, + { + "id": 13, + "name": "Read_Soft_Error_Rate", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 173, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 94, + "string": "94" + } + }, + { + "id": 174, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 9, + "string": "9" + } + }, + { + "id": 175, + "name": "Program_Fail_Count_Chip", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 46, + "string": "46" + } + }, + { + "id": 179, + "name": "Used_Rsvd_Blk_Cnt_Tot", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 180, + "name": "Unused_Rsvd_Blk_Cnt_Tot", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 6323, + "string": "6323" + } + }, + { + "id": 181, + "name": "Program_Fail_Cnt_Total", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 182, + "name": "Erase_Fail_Count_Total", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 183, + "name": "Runtime_Bad_Block", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 184, + "name": "End-to-End_Error", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 188, + "name": "Command_Timeout", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 90, + "string": "90" + } + }, + { + "id": 194, + "name": "Temperature_Celsius", + "value": 71, + "worst": 63, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 34, + "string": "-O---K ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 158914838557, + "string": "29 (Min/Max 16/37)" + } + }, + { + "id": 195, + "name": "Hardware_ECC_Recovered", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 196, + "name": "Reallocated_Event_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 198, + "name": "Offline_Uncorrectable", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 48, + "string": "----CK ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 199, + "name": "UDMA_CRC_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 201, + "name": "Unknown_SSD_Attribute", + "value": 100, + "worst": 100, + "thresh": 1, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 202, + "name": "Unknown_SSD_Attribute", + "value": 100, + "worst": 100, + "thresh": 5, + "when_failed": "", + "flags": { + "value": 51, + "string": "PO--CK ", + "prefailure": true, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 5865, + "string": "5865" + } + }, + { + "id": 206, + "name": "Unknown_SSD_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 14, + "string": "-OSR-- ", + "prefailure": false, + "updated_online": true, + "performance": true, + "error_rate": true, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 210, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 233, + "name": "Media_Wearout_Indicator", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 34970317773, + "string": "34970317773" + } + }, + { + "id": 235, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 34970317773, + "string": "34970317773" + } + }, + { + "id": 245, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 48, + "string": "----CK ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 100, + "string": "100" + } + }, + { + "id": 247, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 1092919960, + "string": "1092919960" + } + }, + { + "id": 248, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 451491208, + "string": "451491208" + } + }, + { + "id": 241, + "name": "Total_LBAs_Written", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 49421157344, + "string": "49421157344" + } + } + ] + }, + "power_on_time": { + "hours": 20238 + }, + "power_cycle_count": 12, + "temperature": { + "current": 29 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "table": [ + { + "type": { + "value": 2, + "string": "Extended offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 1 + }, + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 1 + } + ], + "count": 2, + "error_count_total": 0, + "error_count_outdated": 0 + } + }, + "ata_smart_selective_self_test_log": { + "revision": 1, + "table": [ + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + }, + { + "lba_min": 0, + "lba_max": 0, + "status": { + "value": 0, + "string": "Not_testing" + } + } + ], + "flags": { + "value": 0, + "remainder_scan_enabled": false + }, + "power_up_scan_resume_minutes": 0 + } +} + diff --git a/testdata/smart_iscsi_lio.json b/testdata/smart_iscsi_lio.json new file mode 100644 index 0000000..2086356 --- /dev/null +++ b/testdata/smart_iscsi_lio.json @@ -0,0 +1,88 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 5 + ], + "pre_release": false, + "svn_revision": "5714", + "platform_info": "x86_64-linux-6.18.26-2-lts", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "scsi", + "/dev/sdj" + ], + "exit_status": 4 + }, + "local_time": { + "time_t": 1782138466, + "asctime": "Mon Jun 22 09:27:46 2026 CDT" + }, + "device": { + "name": "/dev/sdj", + "info_name": "/dev/sdj", + "type": "scsi", + "protocol": "SCSI" + }, + "scsi_vendor": "LIO-ORG", + "scsi_product": "MainServer", + "model_name": "LIO-ORG MainServer", + "scsi_model_name": "LIO-ORG MainServer", + "scsi_revision": "4.0", + "scsi_version": "SPC-4", + "user_capacity": { + "blocks": 4294967296, + "bytes": 2199023255552 + }, + "logical_block_size": 512, + "physical_block_size": 65536, + "scsi_lb_provisioning": { + "name": "fully provisioned", + "value": 0, + "management_enabled": { + "name": "LBPME", + "value": 0 + }, + "read_zeros": { + "name": "LBPRZ", + "value": 0 + } + }, + "rotation_rate": 0, + "logical_unit_id": "0x6001405277a9bf9a82147e4b954ece39", + "serial_number": "00000000-0000-0000-0000-000000000000", + "device_type": { + "scsi_terminology": "Peripheral Device Type [PDT]", + "scsi_value": 0, + "name": "disk" + }, + "scsi_transport_protocol": { + "name": "iSCSI", + "value": 5 + }, + "smart_support": { + "available": true, + "enabled": true + }, + "temperature_warning": { + "enabled": false + }, + "smart_status": { + "passed": true + }, + "temperature": { + "current": 0, + "drive_trip": 0 + }, + "seagate_farm_log": { + "supported": false + } +} diff --git a/testdata/smart_iscsi_virtual_disk.json b/testdata/smart_iscsi_virtual_disk.json new file mode 100644 index 0000000..b4d9ed6 --- /dev/null +++ b/testdata/smart_iscsi_virtual_disk.json @@ -0,0 +1,56 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-5.4.225-200.el7.x86_64", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "scsi", + "/dev/sdb" + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/sdb", + "info_name": "/dev/sdb", + "type": "scsi", + "protocol": "SCSI" + }, + "vendor": "IET", + "product": "VIRTUAL-DISK", + "model_name": "IET VIRTUAL-DISK", + "revision": "0001", + "scsi_version": "SPC-3", + "user_capacity": { + "blocks": 2831155200, + "bytes": 1449551462400 + }, + "logical_block_size": 512, + "serial_number": "000000", + "device_type": { + "scsi_value": 0, + "name": "disk" + }, + "local_time": { + "time_t": 1782134221, + "asctime": "Mon Jun 22 09:17:01 2026 EDT" + }, + "smart_status": { + "passed": true + }, + "temperature": { + "current": 0, + "drive_trip": 0 + } +} diff --git a/testdata/smart_nvme.json b/testdata/smart_nvme.json new file mode 100644 index 0000000..0dd147c --- /dev/null +++ b/testdata/smart_nvme.json @@ -0,0 +1,328 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 5 + ], + "pre_release": false, + "svn_revision": "5714", + "platform_info": "x86_64-linux-6.18.34-1-lts", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "nvme", + "/dev/nvme0" + ], + "exit_status": 0 + }, + "local_time": { + "time_t": 1782138345, + "asctime": "Mon Jun 22 09:25:45 2026 CDT" + }, + "device": { + "name": "/dev/nvme0", + "info_name": "/dev/nvme0", + "type": "nvme", + "protocol": "NVMe" + }, + "model_name": "Force MP510", + "serial_number": "21038270000000000000", + "firmware_version": "ECFM13.3", + "nvme_pci_vendor": { + "id": 6535, + "subsystem_id": 6535 + }, + "nvme_ieee_oui_identifier": 6584743, + "nvme_total_capacity": 4000787030016, + "nvme_unallocated_capacity": 0, + "nvme_controller_id": 1, + "nvme_version": { + "string": "1.3", + "value": 66304 + }, + "nvme_number_of_namespaces": 1, + "nvme_namespaces": [ + { + "id": 1, + "size": { + "blocks": 7814037168, + "bytes": 4000787030016 + }, + "capacity": { + "blocks": 7814037168, + "bytes": 4000787030016 + }, + "utilization": { + "blocks": 7814037168, + "bytes": 4000787030016 + }, + "formatted_lba_size": 512, + "eui64": { + "oui": 6584743, + "ext_id": 299576073406 + }, + "features": { + "value": 0, + "thin_provisioning": false, + "na_fields": false, + "dealloc_or_unwritten_block_error": false, + "uid_reuse": false, + "np_fields": false, + "other": 0 + }, + "lba_formats": [ + { + "formatted": true, + "data_bytes": 512, + "metadata_bytes": 0, + "relative_performance": 2 + }, + { + "formatted": false, + "data_bytes": 4096, + "metadata_bytes": 0, + "relative_performance": 1 + } + ] + } + ], + "user_capacity": { + "blocks": 7814037168, + "bytes": 4000787030016 + }, + "logical_block_size": 512, + "smart_support": { + "available": true, + "enabled": true + }, + "nvme_firmware_update_capabilities": { + "value": 18, + "slots": 1, + "first_slot_is_read_only": false, + "activiation_without_reset": true, + "multiple_update_detection": false, + "other": 0 + }, + "nvme_optional_admin_commands": { + "value": 23, + "security_send_receive": true, + "format_nvm": true, + "firmware_download": true, + "namespace_management": false, + "self_test": true, + "directives": false, + "mi_send_receive": false, + "virtualization_management": false, + "doorbell_buffer_config": false, + "get_lba_status": false, + "command_and_feature_lockdown": false, + "other": 0 + }, + "nvme_optional_nvm_commands": { + "value": 93, + "compare": true, + "write_uncorrectable": false, + "dataset_management": true, + "write_zeroes": true, + "save_select_feature_nonzero": true, + "reservations": false, + "timestamp": true, + "verify": false, + "copy": false, + "other": 0 + }, + "nvme_log_page_attributes": { + "value": 8, + "smart_health_per_namespace": false, + "commands_effects_log": false, + "extended_get_log_page_cmd": false, + "telemetry_log": true, + "persistent_event_log": false, + "supported_log_pages_log": false, + "telemetry_data_area_4": false, + "other": 0 + }, + "nvme_maximum_data_transfer_pages": 512, + "nvme_composite_temperature_threshold": { + "warning": 75, + "critical": 80 + }, + "temperature": { + "op_limit_max": 75, + "critical_limit_max": 80, + "current": 42 + }, + "nvme_power_states": [ + { + "non_operational_state": false, + "relative_read_latency": 0, + "relative_read_throughput": 0, + "relative_write_latency": 0, + "relative_write_throughput": 0, + "entry_latency_us": 0, + "exit_latency_us": 0, + "max_power": { + "value": 1533, + "scale": 2, + "units_per_watt": 100 + } + }, + { + "non_operational_state": false, + "relative_read_latency": 1, + "relative_read_throughput": 1, + "relative_write_latency": 1, + "relative_write_throughput": 1, + "entry_latency_us": 0, + "exit_latency_us": 0, + "max_power": { + "value": 965, + "scale": 2, + "units_per_watt": 100 + } + }, + { + "non_operational_state": false, + "relative_read_latency": 2, + "relative_read_throughput": 2, + "relative_write_latency": 2, + "relative_write_throughput": 2, + "entry_latency_us": 0, + "exit_latency_us": 0, + "max_power": { + "value": 682, + "scale": 2, + "units_per_watt": 100 + } + }, + { + "non_operational_state": true, + "relative_read_latency": 3, + "relative_read_throughput": 3, + "relative_write_latency": 3, + "relative_write_throughput": 3, + "entry_latency_us": 2000, + "exit_latency_us": 2000, + "max_power": { + "value": 490, + "scale": 1, + "units_per_watt": 10000 + } + }, + { + "non_operational_state": true, + "relative_read_latency": 4, + "relative_read_throughput": 4, + "relative_write_latency": 4, + "relative_write_throughput": 4, + "entry_latency_us": 25000, + "exit_latency_us": 25000, + "max_power": { + "value": 18, + "scale": 1, + "units_per_watt": 10000 + } + } + ], + "smart_status": { + "passed": true, + "nvme": { + "value": 0 + } + }, + "nvme_smart_health_information_log": { + "nsid": -1, + "critical_warning": 0, + "temperature": 42, + "available_spare": 100, + "available_spare_threshold": 5, + "percentage_used": 6, + "data_units_read": 221438663, + "data_units_written": 439320520, + "host_reads": 15653023750, + "host_writes": 8150052010, + "controller_busy_time": 21564, + "power_cycles": 289, + "power_on_hours": 42811, + "unsafe_shutdowns": 239, + "media_errors": 0, + "num_err_log_entries": 2340, + "warning_temp_time": 0, + "critical_comp_time": 0 + }, + "spare_available": { + "current_percent": 100, + "threshold_percent": 5 + }, + "endurance_used": { + "current_percent": 6 + }, + "power_cycle_count": 289, + "power_on_time": { + "hours": 42811 + }, + "nvme_error_information_log": { + "size": 63, + "read": 16, + "unread": 0, + "table": [ + { + "error_count": 2340, + "submission_queue_id": 0, + "command_id": 16, + "status_field": { + "value": 8194, + "do_not_retry": false, + "status_code_type": 0, + "status_code": 2, + "string": "Invalid Field in Command" + }, + "phase_tag": false, + "parm_error_location": 40, + "lba": { + "value": 0 + }, + "nsid": 0 + } + ] + }, + "nvme_self_test_log": { + "nsid": -1, + "current_self_test_operation": { + "value": 0, + "string": "No self-test in progress" + }, + "table": [ + { + "self_test_code": { + "value": 1, + "string": "Short" + }, + "self_test_result": { + "value": 2, + "string": "Aborted: Controller Reset" + }, + "power_on_hours": 5892 + }, + { + "self_test_code": { + "value": 1, + "string": "Short" + }, + "self_test_result": { + "value": 0, + "string": "Completed without error" + }, + "power_on_hours": 5801 + } + ] + } +} + diff --git a/testdata/smart_raid_vd.json b/testdata/smart_raid_vd.json new file mode 100644 index 0000000..6db4a13 --- /dev/null +++ b/testdata/smart_raid_vd.json @@ -0,0 +1,55 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 2 + ], + "svn_revision": "5155", + "platform_info": "x86_64-linux-6.10.6-1.el9.x86_64", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "scsi", + "/dev/sda" + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/sda", + "info_name": "/dev/sda", + "type": "scsi", + "protocol": "SCSI" + }, + "vendor": "BROADCOM", + "product": "MR9560-16i", + "model_name": "BROADCOM MR9560-16i", + "revision": "5.26", + "scsi_version": "SPC-3", + "user_capacity": { + "blocks": 93746888704, + "bytes": 47998407016448 + }, + "logical_block_size": 512, + "physical_block_size": 4096, + "rotation_rate": 0, + "serial_number": "00000000000000000000000000000001", + "device_type": { + "scsi_value": 0, + "name": "disk" + }, + "local_time": { + "time_t": 1782134423, + "asctime": "Mon Jun 22 09:20:23 2026 EDT" + }, + "temperature": { + "current": 0, + "drive_trip": 0 + } +} diff --git a/testdata/smart_raid_vd_avago.json b/testdata/smart_raid_vd_avago.json new file mode 100644 index 0000000..1d63943 --- /dev/null +++ b/testdata/smart_raid_vd_avago.json @@ -0,0 +1,54 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-5.4.134-200.el7.x86_64", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "scsi", + "/dev/sda" + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/sda", + "info_name": "/dev/sda", + "type": "scsi", + "protocol": "SCSI" + }, + "vendor": "AVAGO", + "product": "MR9363-4i", + "model_name": "AVAGO MR9363-4i", + "revision": "4.68", + "scsi_version": "SPC-3", + "user_capacity": { + "blocks": 3748659200, + "bytes": 1919313510400 + }, + "logical_block_size": 512, + "physical_block_size": 4096, + "serial_number": "00000000000000000000000000000002", + "device_type": { + "scsi_value": 0, + "name": "disk" + }, + "local_time": { + "time_t": 1782135089, + "asctime": "Mon Jun 22 09:31:29 2026 EDT" + }, + "temperature": { + "current": 0, + "drive_trip": 0 + } +} diff --git a/testdata/smart_sas_ssd.json b/testdata/smart_sas_ssd.json new file mode 100644 index 0000000..43fcf86 --- /dev/null +++ b/testdata/smart_sas_ssd.json @@ -0,0 +1,155 @@ +{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 5 + ], + "pre_release": false, + "svn_revision": "5714", + "platform_info": "x86_64-linux-6.18.26-2-lts", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "scsi", + "/dev/sdb" + ], + "exit_status": 0 + }, + "local_time": { + "time_t": 1782138465, + "asctime": "Mon Jun 22 09:27:45 2026 CDT" + }, + "device": { + "name": "/dev/sdb", + "info_name": "/dev/sdb", + "type": "scsi", + "protocol": "SCSI" + }, + "scsi_vendor": "SAMSUNG", + "scsi_product": "ARFX0920S5xnNTRI", + "model_name": "SAMSUNG ARFX0920S5xnNTRI", + "scsi_model_name": "SAMSUNG ARFX0920S5xnNTRI", + "scsi_revision": "3P00", + "scsi_version": "SPC-5", + "user_capacity": { + "blocks": 223621814, + "bytes": 915954950144 + }, + "logical_block_size": 4096, + "scsi_lb_provisioning": { + "name": "resource provisioned", + "value": 1, + "management_enabled": { + "name": "LBPME", + "value": 1 + }, + "read_zeros": { + "name": "LBPRZ", + "value": 1 + } + }, + "rotation_rate": 0, + "form_factor": { + "scsi_value": 3, + "name": "2.5 inches" + }, + "logical_unit_id": "0x5002538b48c8c360", + "serial_number": "S43YNF0K000001", + "device_type": { + "scsi_terminology": "Peripheral Device Type [PDT]", + "scsi_value": 0, + "name": "disk" + }, + "scsi_transport_protocol": { + "name": "SAS (SPL-4)", + "value": 6 + }, + "smart_support": { + "available": true, + "enabled": true + }, + "temperature_warning": { + "enabled": true + }, + "smart_status": { + "passed": true + }, + "scsi_percentage_used_endurance_indicator": 0, + "endurance_used": { + "current_percent": 0 + }, + "temperature": { + "current": 56, + "drive_trip": 70 + }, + "power_on_time": { + "hours": 2487, + "minutes": 44 + }, + "scsi_start_stop_cycle_counter": { + "year_of_manufacture": "2018", + "week_of_manufacture": "51", + "accumulated_start_stop_cycles": 12, + "specified_load_unload_count_over_device_lifetime": 0, + "accumulated_load_unload_cycles": 0 + }, + "scsi_grown_defect_list": 0, + "seagate_farm_log": { + "supported": false + }, + "scsi_error_counter_log": { + "read": { + "errors_corrected_by_eccfast": 0, + "errors_corrected_by_eccdelayed": 0, + "errors_corrected_by_rereads_rewrites": 0, + "total_errors_corrected": 0, + "correction_algorithm_invocations": 0, + "gigabytes_processed": "2620.555", + "total_uncorrected_errors": 0 + }, + "write": { + "errors_corrected_by_eccfast": 0, + "errors_corrected_by_eccdelayed": 0, + "errors_corrected_by_rereads_rewrites": 0, + "total_errors_corrected": 0, + "correction_algorithm_invocations": 0, + "gigabytes_processed": "2091.250", + "total_uncorrected_errors": 0 + }, + "verify": { + "errors_corrected_by_eccfast": 0, + "errors_corrected_by_eccdelayed": 0, + "errors_corrected_by_rereads_rewrites": 0, + "total_errors_corrected": 0, + "correction_algorithm_invocations": 0, + "gigabytes_processed": "46.845", + "total_uncorrected_errors": 0 + } + }, + "scsi_pending_defects": { + "count": 0 + }, + "scsi_self_test_0": { + "code": { + "value": 0, + "string": "Default" + }, + "result": { + "value": 0, + "string": "Completed" + }, + "power_on_time": { + "hours": 2, + "aka": "accumulated_power_on_hours" + } + }, + "scsi_extended_self_test_seconds": 3600 +} + diff --git a/testdata/storcli_show_all.txt b/testdata/storcli_show_all.txt new file mode 100644 index 0000000..396857b --- /dev/null +++ b/testdata/storcli_show_all.txt @@ -0,0 +1,397 @@ +CLI Version = 007.1420.0000.0000 Dec 10, 2020 +Operating system = Linux 5.4.225-200.el7.x86_64 +Controller = 0 +Status = Success +Description = Show Drive Information Succeeded. + + +Drive /c0/e64/s0 : +================ + +----------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------- +64:0 22 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U - +----------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e64/s0 - Detailed Information : +======================================= + +Drive /c0/e64/s0 State : +====================== +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 7 +Drive Temperature = 31C (87.80 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e64/s0 Device attributes : +================================== +SN = S624NS0RA00001J +Manufacturer Id = ATA +Model Number = Samsung SSD 870 EVO 2TB +NAND Vendor = NA +WWN = 5002538F31000001 +Firmware Revision = SVT02B6Q +Raw size = 1.819 TB [0xe8e088b0 Sectors] +Coerced size = 1.818 TB [0xe8d00000 Sectors] +Non Coerced size = 1.818 TB [0xe8d088b0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = Disabled +Logical Sector Size = 512B +Physical Sector Size = 512B +Connector Name = 00 x1 + + +Drive /c0/e64/s0 Policies/Settings : +================================== +Drive position = DriveGroup:0 +Enclosure position = 1 +Connected Port Number = 0(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = TCG Opal +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = No +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x3b07b250d55b7500 +----------------------------------------- + + +Inquiry Data = +40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 36 53 34 32 53 4e 52 30 30 41 30 32 +30 33 20 4a 20 20 20 20 00 00 00 00 00 00 56 53 +30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53 +20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20 +20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80 +01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00 + + + +Drive /c0/e64/s1 : +================ + +----------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------- +64:1 23 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U - +----------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e64/s1 - Detailed Information : +======================================= + +Drive /c0/e64/s1 State : +====================== +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 6 +Drive Temperature = 30C (86.00 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e64/s1 Device attributes : +================================== +SN = S624NS0RA00002L +Manufacturer Id = ATA +Model Number = Samsung SSD 870 EVO 2TB +NAND Vendor = NA +WWN = 5002538F31000002 +Firmware Revision = SVT02B6Q +Raw size = 1.819 TB [0xe8e088b0 Sectors] +Coerced size = 1.818 TB [0xe8d00000 Sectors] +Non Coerced size = 1.818 TB [0xe8d088b0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = Disabled +Logical Sector Size = 512B +Physical Sector Size = 512B +Connector Name = 00 x1 + + +Drive /c0/e64/s1 Policies/Settings : +================================== +Drive position = DriveGroup:0 +Enclosure position = 0 +Connected Port Number = 1(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = TCG Opal +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = No +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x3b07b250d55b7501 +----------------------------------------- + + +Inquiry Data = +40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 36 53 34 32 53 4e 52 30 30 41 34 33 +31 38 20 4c 20 20 20 20 00 00 00 00 00 00 56 53 +30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53 +20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20 +20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80 +01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00 + + + +Drive /c0/e64/s2 : +================ + +----------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------- +64:2 20 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U - +----------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e64/s2 - Detailed Information : +======================================= + +Drive /c0/e64/s2 State : +====================== +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 21 +Drive Temperature = 31C (87.80 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e64/s2 Device attributes : +================================== +SN = S624NS0RC00003M +Manufacturer Id = ATA +Model Number = Samsung SSD 870 EVO 2TB +NAND Vendor = NA +WWN = 5002538F31000003 +Firmware Revision = SVT02B6Q +Raw size = 1.819 TB [0xe8e088b0 Sectors] +Coerced size = 1.818 TB [0xe8d00000 Sectors] +Non Coerced size = 1.818 TB [0xe8d088b0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = Disabled +Logical Sector Size = 512B +Physical Sector Size = 512B +Connector Name = 00 x1 + + +Drive /c0/e64/s2 Policies/Settings : +================================== +Drive position = DriveGroup:0 +Enclosure position = 0 +Connected Port Number = 2(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = TCG Opal +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = No +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x3b07b250d55b7502 +----------------------------------------- + + +Inquiry Data = +40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 36 53 34 32 53 4e 52 30 30 43 38 35 +31 37 20 4d 20 20 20 20 00 00 00 00 00 00 56 53 +30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53 +20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20 +20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80 +01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00 + + + +Drive /c0/e64/s3 : +================ + +----------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------- +64:3 21 Onln 0 1.818 TB SATA SSD Y N 512B Samsung SSD 870 EVO 2TB U - +----------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No.|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e64/s3 - Detailed Information : +======================================= + +Drive /c0/e64/s3 State : +====================== +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 12 +Drive Temperature = 30C (86.00 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e64/s3 Device attributes : +================================== +SN = S624NS0RA00004W +Manufacturer Id = ATA +Model Number = Samsung SSD 870 EVO 2TB +NAND Vendor = NA +WWN = 5002538F31000004 +Firmware Revision = SVT02B6Q +Raw size = 1.819 TB [0xe8e088b0 Sectors] +Coerced size = 1.818 TB [0xe8d00000 Sectors] +Non Coerced size = 1.818 TB [0xe8d088b0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = Disabled +Logical Sector Size = 512B +Physical Sector Size = 512B +Connector Name = 00 x1 + + +Drive /c0/e64/s3 Policies/Settings : +================================== +Drive position = DriveGroup:0 +Enclosure position = 0 +Connected Port Number = 3(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = TCG Opal +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = No +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x3b07b250d55b7503 +----------------------------------------- + + +Inquiry Data = +40 00 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 36 53 34 32 53 4e 52 30 30 41 39 30 +36 36 20 57 20 20 20 20 00 00 00 00 00 00 56 53 +30 54 42 32 51 36 61 53 73 6d 6e 75 20 67 53 53 +20 44 37 38 20 30 56 45 20 4f 54 32 20 42 20 20 +20 20 20 20 20 20 20 20 20 20 20 20 20 20 01 80 +01 40 00 2f 00 40 00 02 00 02 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 01 01 ff ff ff 0f 00 00 07 00 + + + + + diff --git a/testdata/storcli_show_all_v2.txt b/testdata/storcli_show_all_v2.txt new file mode 100644 index 0000000..711ade7 --- /dev/null +++ b/testdata/storcli_show_all_v2.txt @@ -0,0 +1,397 @@ +CLI Version = 007.1907.0000.0000 Sep 13, 2021 +Operating system = Linux 5.4.134-200.el7.x86_64 +Controller = 0 +Status = Success +Description = Show Drive Information Succeeded. + + +Drive /c0/e252/s0 : +================= + +----------------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------------- +252:0 4 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U - +----------------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e252/s0 - Detailed Information : +======================================== + +Drive /c0/e252/s0 State : +======================= +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 0 +Drive Temperature = 24C (75.20 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e252/s0 Device attributes : +=================================== +SN = 163100000001 +Manufacturer Id = ATA +Model Number = MICRON_M510DC_MTFDDAK960MBP +NAND Vendor = NA +WWN = 500A075100000001 +Firmware Revision = 0013 +Raw size = 894.252 GB [0x6fc81ab0 Sectors] +Coerced size = 893.750 GB [0x6fb80000 Sectors] +Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = N/A +Logical Sector Size = 512B +Physical Sector Size = 4 KB +Connector Name = Port 0 - 3 x1 + + +Drive /c0/e252/s0 Policies/Settings : +=================================== +Drive position = DriveGroup:0, Span:0, Row:0 +Enclosure position = 1 +Connected Port Number = 0(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = None +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = Yes +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x4433221100000000 +----------------------------------------- + + +Inquiry Data = +40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 20 20 20 20 20 20 20 20 36 31 31 33 +33 31 35 37 35 43 31 35 00 00 00 00 00 00 30 30 +33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35 +44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d +20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80 +01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00 + + + +Drive /c0/e252/s1 : +================= + +----------------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------------- +252:1 5 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U - +----------------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e252/s1 - Detailed Information : +======================================== + +Drive /c0/e252/s1 State : +======================= +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 0 +Drive Temperature = 22C (71.60 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e252/s1 Device attributes : +=================================== +SN = 163100000002 +Manufacturer Id = ATA +Model Number = MICRON_M510DC_MTFDDAK960MBP +NAND Vendor = NA +WWN = 500A075100000002 +Firmware Revision = 0013 +Raw size = 894.252 GB [0x6fc81ab0 Sectors] +Coerced size = 893.750 GB [0x6fb80000 Sectors] +Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = N/A +Logical Sector Size = 512B +Physical Sector Size = 4 KB +Connector Name = Port 0 - 3 x1 + + +Drive /c0/e252/s1 Policies/Settings : +=================================== +Drive position = DriveGroup:0, Span:0, Row:1 +Enclosure position = 0 +Connected Port Number = 1(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = None +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = Yes +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x4433221101000000 +----------------------------------------- + + +Inquiry Data = +40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 20 20 20 20 20 20 20 20 36 31 31 33 +33 31 35 37 45 44 30 46 00 00 00 00 00 00 30 30 +33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35 +44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d +20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80 +01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00 + + + +Drive /c0/e252/s2 : +================= + +----------------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------------- +252:2 6 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U - +----------------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e252/s2 - Detailed Information : +======================================== + +Drive /c0/e252/s2 State : +======================= +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 0 +Drive Temperature = 20C (68.00 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e252/s2 Device attributes : +=================================== +SN = 165000000003 +Manufacturer Id = ATA +Model Number = MICRON_M510DC_MTFDDAK960MBP +NAND Vendor = NA +WWN = 500A075100000003 +Firmware Revision = 0013 +Raw size = 894.252 GB [0x6fc81ab0 Sectors] +Coerced size = 893.750 GB [0x6fb80000 Sectors] +Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = N/A +Logical Sector Size = 512B +Physical Sector Size = 4 KB +Connector Name = Port 0 - 3 x1 + + +Drive /c0/e252/s2 Policies/Settings : +=================================== +Drive position = DriveGroup:0, Span:1, Row:0 +Enclosure position = 0 +Connected Port Number = 2(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = None +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = Yes +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x4433221102000000 +----------------------------------------- + + +Inquiry Data = +40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 20 20 20 20 20 20 20 20 36 31 30 35 +35 31 30 31 35 30 42 42 00 00 00 00 00 00 30 30 +33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35 +44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d +20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80 +01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00 + + + +Drive /c0/e252/s3 : +================= + +----------------------------------------------------------------------------------------- +EID:Slt DID State DG Size Intf Med SED PI SeSz Model Sp Type +----------------------------------------------------------------------------------------- +252:3 7 Onln 0 893.750 GB SATA SSD Y N 512B MICRON_M510DC_MTFDDAK960MBP U - +----------------------------------------------------------------------------------------- + +EID=Enclosure Device ID|Slt=Slot No|DID=Device ID|DG=DriveGroup +DHS=Dedicated Hot Spare|UGood=Unconfigured Good|GHS=Global Hotspare +UBad=Unconfigured Bad|Sntze=Sanitize|Onln=Online|Offln=Offline|Intf=Interface +Med=Media Type|SED=Self Encryptive Drive|PI=Protection Info +SeSz=Sector Size|Sp=Spun|U=Up|D=Down|T=Transition|F=Foreign +UGUnsp=UGood Unsupported|UGShld=UGood shielded|HSPShld=Hotspare shielded +CFShld=Configured shielded|Cpybck=CopyBack|CBShld=Copyback Shielded +UBUnsp=UBad Unsupported|Rbld=Rebuild + + +Drive /c0/e252/s3 - Detailed Information : +======================================== + +Drive /c0/e252/s3 State : +======================= +Shield Counter = 0 +Media Error Count = 0 +Other Error Count = 0 +Drive Temperature = 20C (68.00 F) +Predictive Failure Count = 0 +S.M.A.R.T alert flagged by drive = No + + +Drive /c0/e252/s3 Device attributes : +=================================== +SN = 165000000004 +Manufacturer Id = ATA +Model Number = MICRON_M510DC_MTFDDAK960MBP +NAND Vendor = NA +WWN = 500A075100000004 +Firmware Revision = 0013 +Raw size = 894.252 GB [0x6fc81ab0 Sectors] +Coerced size = 893.750 GB [0x6fb80000 Sectors] +Non Coerced size = 893.752 GB [0x6fb81ab0 Sectors] +Device Speed = 6.0Gb/s +Link Speed = 6.0Gb/s +NCQ setting = Enabled +Write Cache = N/A +Logical Sector Size = 512B +Physical Sector Size = 4 KB +Connector Name = Port 0 - 3 x1 + + +Drive /c0/e252/s3 Policies/Settings : +=================================== +Drive position = DriveGroup:0, Span:1, Row:1 +Enclosure position = 0 +Connected Port Number = 3(path0) +Sequence Number = 2 +Commissioned Spare = No +Emergency Spare = No +Last Predictive Failure Event Sequence Number = 0 +Successful diagnostics completion on = N/A +FDE Type = None +SED Capable = Yes +SED Enabled = No +Secured = No +Cryptographic Erase Capable = Yes +Sanitize Support = Not supported +Locked = No +Needs EKM Attention = No +PI Eligible = No +Certified = No +Wide Port Capable = No +Multipath = No + +Port Information : +================ + +----------------------------------------- +Port Status Linkspeed SAS address +----------------------------------------- + 0 Active 6.0Gb/s 0x4433221103000000 +----------------------------------------- + + +Inquiry Data = +40 04 ff 3f 37 c8 10 00 00 00 00 00 3f 00 00 00 +00 00 00 00 20 20 20 20 20 20 20 20 36 31 30 35 +35 31 30 31 35 30 37 42 00 00 00 00 00 00 30 30 +33 31 20 20 20 20 49 4d 52 43 4e 4f 4d 5f 31 35 +44 30 5f 43 54 4d 44 46 41 44 39 4b 30 36 42 4d +20 50 20 20 20 20 20 20 20 20 20 20 20 20 10 80 +01 40 00 2f 01 40 00 00 00 00 07 00 ff 3f 10 00 +3f 00 10 fc fb 00 10 f1 ff ff ff 0f 00 00 07 00 + + + + + diff --git a/version.go b/version.go new file mode 100644 index 0000000..cc1efb8 --- /dev/null +++ b/version.go @@ -0,0 +1,24 @@ +package main + +import "fmt" + +// Build metadata injected at build time via -ldflags -X (see Makefile). version +// is sourced from the VERSION file so a single file is the authoritative version; +// commit and date are filled from git and the build clock. +var ( + version = "dev" + commit = "" + date = "" +) + +// printVersion writes the version line plus commit and build date when those +// were injected at build time. +func printVersion() { + fmt.Println("drive-health-metrics", version) + if commit != "" { + fmt.Printf(" commit: %s\n", commit) + } + if date != "" { + fmt.Printf(" built: %s\n", date) + } +}