637 lines
15 KiB
Go
637 lines
15 KiB
Go
// Copyright 2020 Lars Hoogestraat
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package main
|
|
|
|
import (
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/gosnmp/gosnmp"
|
|
)
|
|
|
|
type status int
|
|
|
|
const (
|
|
ok = iota
|
|
warning
|
|
critical
|
|
unknown
|
|
)
|
|
|
|
const (
|
|
productVersion = "1.3.6.1.4.1.789.1.1.2.0"
|
|
productFirmwareVersion = "1.3.6.1.4.1.789.1.1.6.0"
|
|
|
|
dfFileSys = "1.3.6.1.4.1.789.1.5.4.1.2"
|
|
dfPerCentKBytesCapacity = "1.3.6.1.4.1.789.1.5.4.1.6"
|
|
df64TotalKBytes = "1.3.6.1.4.1.789.1.5.4.1.29"
|
|
df64UsedKBytes = "1.3.6.1.4.1.789.1.5.4.1.30"
|
|
df64AvailKBytes = "1.3.6.1.4.1.789.1.5.4.1.31"
|
|
|
|
diskActiveCount = "1.3.6.1.4.1.789.1.6.4.2"
|
|
diskReconstructingCount = "1.3.6.1.4.1.789.1.6.4.3"
|
|
diskReconstructingParityCount = "1.3.6.1.4.1.789.1.6.4.4"
|
|
diskVerifyingParityCount = "1.3.6.1.4.1.789.1.6.4.5"
|
|
diskScrubbingCount = "1.3.6.1.4.1.789.1.6.4.6"
|
|
diskFailedCount = "1.3.6.1.4.1.789.1.6.4.7.0"
|
|
diskSpareCount = "1.3.6.1.4.1.789.1.6.4.8"
|
|
diskAddingSpareCount = "1.3.6.1.4.1.789.1.6.4.9"
|
|
diskFailedMessage = "1.3.6.1.4.1.789.1.6.4.10"
|
|
diskPrefailedCount = "1.3.6.1.4.1.789.1.6.4.11"
|
|
|
|
enclFansFailed = "1.3.6.1.4.1.789.1.21.1.2.1.18"
|
|
enclElectronicsFailed = "1.3.6.1.4.1.789.1.21.1.2.1.33"
|
|
)
|
|
|
|
type genericFlags struct {
|
|
host string
|
|
community string
|
|
}
|
|
|
|
type diskfailedFlags struct {
|
|
critical int
|
|
warning int
|
|
}
|
|
|
|
type dfFlags struct {
|
|
volumeID int
|
|
volume string
|
|
listVolumes bool
|
|
critical int
|
|
warning int
|
|
}
|
|
|
|
func usage() {
|
|
fmt.Println("check_netapp is a command line tool to check SNMP values from NetApp for e.g. icinga")
|
|
fmt.Println("Usage:")
|
|
fmt.Println("\t ./check_netapp (version|fw|fansfailed|elecfailed|diskfailed)")
|
|
}
|
|
|
|
func main() {
|
|
stat := ok
|
|
|
|
defer func() {
|
|
if gosnmp.Default.Conn != nil {
|
|
err := gosnmp.Default.Conn.Close()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
os.Exit(unknown)
|
|
}
|
|
}
|
|
|
|
os.Exit(stat)
|
|
}()
|
|
|
|
genericFlags := genericFlags{}
|
|
|
|
dfFlags := dfFlags{}
|
|
|
|
version := flag.NewFlagSet("version", flag.ExitOnError)
|
|
version.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
|
|
version.StringVar(&genericFlags.community, "C", "public", "The community to connect")
|
|
|
|
fw := flag.NewFlagSet("fw", flag.ExitOnError)
|
|
fw.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
|
|
fw.StringVar(&genericFlags.community, "C", "public", "The community to connect")
|
|
|
|
fansFailed := flag.NewFlagSet("fansfailed", flag.ExitOnError)
|
|
fansFailed.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
|
|
fansFailed.StringVar(&genericFlags.community, "C", "public", "The community to connect")
|
|
|
|
elecFailed := flag.NewFlagSet("elecfailed", flag.ExitOnError)
|
|
elecFailed.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
|
|
elecFailed.StringVar(&genericFlags.community, "C", "public", "The community to connect")
|
|
|
|
df := flag.NewFlagSet("df", flag.ExitOnError)
|
|
df.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
|
|
df.StringVar(&genericFlags.community, "C", "public", "The community to connect")
|
|
df.IntVar(&dfFlags.volumeID, "i", -1, "The ID of the volume. The last digits returned by the volume list of e.g. .1.3.6.1.4.1.789.1.5.4.1.2.1135 the id would be 1135.")
|
|
df.StringVar(&dfFlags.volume, "p", "", "The path of the volume to monitor. Could be slow, walks over all volumes")
|
|
df.BoolVar(&dfFlags.listVolumes, "l", false, "List all volumes reported by SNMP")
|
|
df.IntVar(&dfFlags.critical, "c", -1, "Percentage of space used before going critical")
|
|
df.IntVar(&dfFlags.warning, "w", -1, "Percentage of space used before sending a warning")
|
|
|
|
diskfailedFlags := diskfailedFlags{}
|
|
|
|
diskfailed := flag.NewFlagSet("diskfailed", flag.ExitOnError)
|
|
diskfailed.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
|
|
diskfailed.StringVar(&genericFlags.community, "C", "public", "The community to connect")
|
|
diskfailed.IntVar(&diskfailedFlags.critical, "c", -1, "number of failed disks before going critical")
|
|
diskfailed.IntVar(&diskfailedFlags.warning, "w", -1, "number of failed disks before sending a warning")
|
|
|
|
if len(os.Args) < 2 {
|
|
usage()
|
|
stat = unknown
|
|
return
|
|
}
|
|
|
|
switch os.Args[1] {
|
|
case "df":
|
|
df.Parse(os.Args[2:])
|
|
|
|
err := genericFlags.connect()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = unknown
|
|
return
|
|
}
|
|
|
|
msg, status, err := dfFlags.diskSpaceUsage()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = int(status)
|
|
return
|
|
}
|
|
stat = int(status)
|
|
fmt.Print(msg)
|
|
return
|
|
case "version":
|
|
version.Parse(os.Args[2:])
|
|
|
|
err := genericFlags.connect()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = unknown
|
|
return
|
|
}
|
|
|
|
msg, status, err := genericFlags.getVersion()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = int(status)
|
|
return
|
|
}
|
|
|
|
stat = int(status)
|
|
fmt.Print(msg)
|
|
return
|
|
case "fw":
|
|
fw.Parse(os.Args[2:])
|
|
|
|
err := genericFlags.connect()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = unknown
|
|
return
|
|
}
|
|
|
|
msg, status, err := genericFlags.getFirmwareVersion()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = int(status)
|
|
return
|
|
}
|
|
|
|
stat = int(status)
|
|
fmt.Print(msg)
|
|
return
|
|
case "diskfailed":
|
|
diskfailed.Parse(os.Args[2:])
|
|
|
|
err := genericFlags.connect()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = unknown
|
|
return
|
|
}
|
|
|
|
msg, status, err := diskfailedFlags.getDiskFailed()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = int(status)
|
|
return
|
|
}
|
|
|
|
stat = int(status)
|
|
fmt.Print(msg)
|
|
return
|
|
case "elecfailed":
|
|
elecFailed.Parse(os.Args[2:])
|
|
|
|
err := genericFlags.connect()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = unknown
|
|
return
|
|
}
|
|
|
|
msg, status, err := genericFlags.getElectronicFailed()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
stat = int(status)
|
|
return
|
|
}
|
|
|
|
stat = int(status)
|
|
fmt.Print(msg)
|
|
return
|
|
case "fansfailed":
|
|
fansFailed.Parse(os.Args[2:])
|
|
|
|
err := genericFlags.connect()
|
|
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
os.Exit(critical)
|
|
}
|
|
|
|
msg, status, err := genericFlags.getFansFailed()
|
|
|
|
if err != nil {
|
|
stat = int(status)
|
|
return
|
|
}
|
|
fmt.Print(msg)
|
|
stat = int(status)
|
|
return
|
|
default:
|
|
usage()
|
|
stat = unknown
|
|
return
|
|
}
|
|
}
|
|
|
|
// connect established connection with the snmp host
|
|
func (f genericFlags) connect() error {
|
|
gosnmp.Default.Target = f.host
|
|
gosnmp.Default.Community = f.community
|
|
|
|
err := gosnmp.Default.Connect()
|
|
|
|
if err != nil {
|
|
fmt.Printf("error: Connect(): %v", err)
|
|
os.Exit(unknown)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// getVersion returns the product version of the NetApp (OID .1.3.6.1.4.1.789.1.1.2.0)
|
|
// http://oidref.com/1.3.6.1.4.1.789.1.1.2.0
|
|
func (f genericFlags) getVersion() (string, status, error) {
|
|
version, err := gosnmp.Default.Get([]string{productVersion})
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("Get(%s) err: %v", productVersion, err)
|
|
}
|
|
|
|
str, err := getStringValues(version.Variables)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
return str[0], ok, nil
|
|
}
|
|
|
|
// getElectronicFailed the list of failed electronics elements. Valid only if enclElectronicsPresent shows that some are present.
|
|
// http://oidref.com/1.3.6.1.4.1.789.1.21.1.2.1.33
|
|
func (f genericFlags) getElectronicFailed() (string, status, error) {
|
|
elecFailed, err := gosnmp.Default.WalkAll(enclElectronicsFailed)
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("WalkAll(%s) err: %v", enclElectronicsFailed, err)
|
|
}
|
|
|
|
values, err := getStringValues(elecFailed)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
elecFailedMsg := ""
|
|
|
|
for _, str := range values {
|
|
if str != "" {
|
|
elecFailedMsg += fmt.Sprintf("%s\n", str)
|
|
}
|
|
}
|
|
|
|
if elecFailedMsg != "" {
|
|
return "", critical, errors.New(elecFailedMsg)
|
|
}
|
|
|
|
return "Electronic is ok", ok, nil
|
|
}
|
|
|
|
// getFansFailed the list of failed fans or fan modules in this enclosure. Fans are numbered as described in enclFansPresent.
|
|
// http://oidref.com/1.3.6.1.4.1.789.1.21.1.2.1.18
|
|
func (f genericFlags) getFansFailed() (string, status, error) {
|
|
fansFailed, err := gosnmp.Default.WalkAll(enclFansFailed)
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("WalkAll(%s) err: %v", enclElectronicsFailed, err)
|
|
}
|
|
|
|
fansFailedMsg := ""
|
|
|
|
values, err := getStringValues(fansFailed)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
for _, str := range values {
|
|
if str != "" {
|
|
fansFailedMsg += fmt.Sprintf("%s\n", str)
|
|
}
|
|
}
|
|
|
|
if fansFailedMsg != "" {
|
|
return "", critical, errors.New(fansFailedMsg)
|
|
}
|
|
|
|
return "Fans are ok", ok, nil
|
|
}
|
|
|
|
// getFirmwareVersion returns the firmware version of the NetApp
|
|
// http://oidref.com/1.3.6.1.4.1.789.1.1.6.0
|
|
func (f genericFlags) getFirmwareVersion() (string, status, error) {
|
|
version, err := gosnmp.Default.Get([]string{productFirmwareVersion})
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("Get(%s) err: %v", productFirmwareVersion, err)
|
|
}
|
|
|
|
str, err := getStringValues(version.Variables)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
return fmt.Sprintf("Firmware version: %s", str[0]), ok, nil
|
|
}
|
|
|
|
// getDiskFailed returns the number of failed disk with the failure message if any
|
|
// http://oidref.com/1.3.6.1.4.1.789.1.6.4.7.0
|
|
func (f diskfailedFlags) getDiskFailed() (string, status, error) {
|
|
failedDisk, err := gosnmp.Default.Get([]string{diskFailedCount})
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("Get(%s) err: %v", diskFailedCount, err)
|
|
}
|
|
|
|
str, err := getStringValues(failedDisk.Variables)
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
iFailedDisk, err := strconv.Atoi(str[0])
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
failedDiskMsg, err := f.getDiskFailedMessage(iFailedDisk)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
if f.critical > 0 && iFailedDisk >= f.critical {
|
|
return "", critical, fmt.Errorf(failedDiskMsg)
|
|
} else if f.warning > 0 && iFailedDisk >= f.warning {
|
|
return "", warning, fmt.Errorf(failedDiskMsg)
|
|
}
|
|
|
|
return failedDiskMsg, ok, nil
|
|
}
|
|
|
|
func (f diskfailedFlags) getDiskFailedMessage(failedDisks int) (string, error) {
|
|
failedDiskMsg := fmt.Sprintf("%d disk(s) has failures", failedDisks)
|
|
|
|
if failedDisks > 0 {
|
|
msg, err := gosnmp.Default.Get([]string{diskFailedMessage})
|
|
|
|
if err != nil {
|
|
return "", fmt.Errorf("Get(%s) err: %v", diskFailedMessage, err)
|
|
}
|
|
|
|
str, err := getStringValues(msg.Variables)
|
|
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
failedDiskMsg += fmt.Sprintf("\nDescription: %s", str[0])
|
|
}
|
|
|
|
return failedDiskMsg, nil
|
|
}
|
|
|
|
// diskSpaceUsage returns the percentage of used space of a volume
|
|
// http://oidref.com/1.3.6.1.4.1.789.1.5.4.1.2
|
|
func (f dfFlags) diskSpaceUsage() (string, status, error) {
|
|
volID := f.volumeID
|
|
|
|
if volID == -1 {
|
|
results, err := gosnmp.Default.WalkAll(dfFileSys)
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("WalkAll(%s): %v", dfFileSys, err)
|
|
}
|
|
|
|
if f.listVolumes {
|
|
volumes := ""
|
|
for _, v := range results {
|
|
volumes += fmt.Sprintf("%s %s\n", v.Name, v.Value)
|
|
}
|
|
|
|
return volumes, ok, nil
|
|
}
|
|
|
|
volID, err = getVolumeID(f.volume, results)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
}
|
|
|
|
volName, err := gosnmp.Default.Get([]string{dfFileSys + "." + strconv.Itoa(volID)})
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("Get(%s) err: %v", dfFileSys+"."+strconv.Itoa(volID), err)
|
|
}
|
|
|
|
strVolName, err := getStringValues(volName.Variables)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
//Get total space
|
|
total, err := gosnmp.Default.Get([]string{df64TotalKBytes + "." + strconv.Itoa(volID)})
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("Get(%s) err: %v", df64TotalKBytes+"."+strconv.Itoa(volID), err)
|
|
}
|
|
|
|
iTotal, err := getUint64Values(total.Variables)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
//Get used space
|
|
used, err := gosnmp.Default.Get([]string{df64UsedKBytes + "." + strconv.Itoa(volID)})
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("Get(%s) err: %v", df64UsedKBytes+"."+strconv.Itoa(volID), err)
|
|
}
|
|
|
|
iUsed, err := getUint64Values(used.Variables)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
//Get available space
|
|
available, err := gosnmp.Default.Get([]string{df64AvailKBytes + "." + strconv.Itoa(volID)})
|
|
|
|
if err != nil {
|
|
return "", unknown, fmt.Errorf("Get(%s) err: %v", df64AvailKBytes+"."+strconv.Itoa(volID), err)
|
|
}
|
|
|
|
iAvailable, err := getUint64Values(available.Variables)
|
|
|
|
if err != nil {
|
|
return "", unknown, err
|
|
}
|
|
|
|
percentageUsed := 100 / float64(iTotal[0]) * float64(iUsed[0])
|
|
|
|
if int(percentageUsed) >= f.critical && f.critical > 0 {
|
|
return "", critical, fmt.Errorf("Space of volume %s has usage of %.2f%%. Space total: %s - Space available: %s - Space used: %s", strVolName, percentageUsed, hr(iTotal[0]), hr(iAvailable[0]), hr(iUsed[0]))
|
|
}
|
|
|
|
if int(percentageUsed) >= f.warning && f.warning > 0 {
|
|
return "", warning, fmt.Errorf("Space of volume %s has usage of %.2f%%. Space total: %s - Space available: %s - Space used: %s", strVolName, percentageUsed, hr(iTotal[0]), hr(iAvailable[0]), hr(iUsed[0]))
|
|
}
|
|
|
|
str := fmt.Sprintf("Space of volume %s has usage of %.2f%%. Space total: %s - Space available: %s - Space used: %s", strVolName, percentageUsed, hr(iTotal[0]), hr(iAvailable[0]), hr(iUsed[0]))
|
|
return str, ok, nil
|
|
}
|
|
|
|
func getVolumeID(volName string, in []gosnmp.SnmpPDU) (int, error) {
|
|
for _, v := range in {
|
|
value := v.Value.(string)
|
|
|
|
if value == volName {
|
|
pos := strings.LastIndex(v.Name, ".")
|
|
|
|
if pos == -1 {
|
|
return -1, fmt.Errorf("error: unexpected snmp response %s, please check via snmpget", v.Name)
|
|
}
|
|
|
|
volID, err := strconv.Atoi(v.Name[pos+1:])
|
|
|
|
if err != nil {
|
|
return -1, fmt.Errorf("error: could not convert unexpected value %s into integer", v.Name[pos+1:])
|
|
}
|
|
|
|
return volID, nil
|
|
}
|
|
}
|
|
|
|
return -1, fmt.Errorf("error: volume %s not found", volName)
|
|
}
|
|
|
|
func getStringValues(vars []gosnmp.SnmpPDU) ([]string, error) {
|
|
values, err := getValues(vars)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var str []string
|
|
for _, v := range values {
|
|
if v2, ok := v.(string); ok {
|
|
str = append(str, v2)
|
|
}
|
|
}
|
|
|
|
return str, nil
|
|
}
|
|
|
|
func getUint64Values(vars []gosnmp.SnmpPDU) ([]uint64, error) {
|
|
values, err := getValues(vars)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var uints []uint64
|
|
|
|
for _, v := range values {
|
|
if v2, ok := v.(uint64); ok {
|
|
uints = append(uints, v2)
|
|
}
|
|
}
|
|
|
|
return uints, nil
|
|
}
|
|
|
|
func getValues(vars []gosnmp.SnmpPDU) ([]interface{}, error) {
|
|
var values []interface{}
|
|
for _, variable := range vars {
|
|
switch variable.Type {
|
|
case gosnmp.OctetString:
|
|
values = append(values, string(variable.Value.([]byte)))
|
|
case gosnmp.Counter64:
|
|
values = append(values, uint64(variable.Value.(uint64)))
|
|
case gosnmp.NoSuchInstance:
|
|
return nil, fmt.Errorf("no such instance, requested object instance with OID %s could not be returned", variable.Name)
|
|
case gosnmp.NoSuchObject:
|
|
return nil, fmt.Errorf("no such object, requested object instance with OID %s could not be returned", variable.Name)
|
|
case gosnmp.UnknownType:
|
|
return nil, fmt.Errorf("unknown type, object instance with OID %s contained an unknown type", variable.Name)
|
|
}
|
|
|
|
return values, nil
|
|
}
|
|
|
|
return nil, errors.New("unexpected type, object instance with OID %s contained an unknown type")
|
|
}
|
|
|
|
// hr fs input in byte - returns a human readable filesize
|
|
func hr(fs uint64) string {
|
|
if fs == 0 {
|
|
return "0"
|
|
}
|
|
|
|
sizes := []string{"B", "KB", "MB", "GB", "TB"}
|
|
|
|
exp := math.Floor(math.Log(float64(fs)) / math.Log(1024))
|
|
|
|
if exp > 4 {
|
|
exp = 4
|
|
}
|
|
|
|
s := sizes[int(exp)+1]
|
|
|
|
if exp == 0 {
|
|
return fmt.Sprintf("%d %s", fs, s)
|
|
}
|
|
|
|
val := float64(fs) / float64(math.Pow(1024, exp))
|
|
|
|
return fmt.Sprintf("%.1f %s", math.Ceil(float64(val)*10)/10, s)
|
|
}
|