You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

583 lines
14 KiB

// Copyright 2020 Lars Hoogestraat
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"errors"
"flag"
"fmt"
"math"
"os"
"strconv"
"strings"
"github.com/soniah/gosnmp"
)
type status int
const (
ok = iota
warning
critical
unknown
)
const (
productVersion = "1.3.6.1.4.1.789.1.1.2.0"
productFirmwareVersion = "1.3.6.1.4.1.789.1.1.6.0"
dfFileSys = "1.3.6.1.4.1.789.1.5.4.1.2"
dfPerCentKBytesCapacity = "1.3.6.1.4.1.789.1.5.4.1.6"
df64TotalKBytes = "1.3.6.1.4.1.789.1.5.4.1.29"
df64UsedKBytes = "1.3.6.1.4.1.789.1.5.4.1.30"
df64AvailKBytes = "1.3.6.1.4.1.789.1.5.4.1.31"
diskActiveCount = "1.3.6.1.4.1.789.1.6.4.2"
diskReconstructingCount = "1.3.6.1.4.1.789.1.6.4.3"
diskReconstructingParityCount = "1.3.6.1.4.1.789.1.6.4.4"
diskVerifyingParityCount = "1.3.6.1.4.1.789.1.6.4.5"
diskScrubbingCount = "1.3.6.1.4.1.789.1.6.4.6"
diskFailedCount = "1.3.6.1.4.1.789.1.6.4.7.0"
diskSpareCount = "1.3.6.1.4.1.789.1.6.4.8"
diskAddingSpareCount = "1.3.6.1.4.1.789.1.6.4.9"
diskFailedMessage = "1.3.6.1.4.1.789.1.6.4.10"
diskPrefailedCount = "1.3.6.1.4.1.789.1.6.4.11"
enclFansFailed = "1.3.6.1.4.1.789.1.21.1.2.1.18"
enclElectronicsFailed = "1.3.6.1.4.1.789.1.21.1.2.1.33"
)
type genericFlags struct {
host string
community string
}
type diskfailedFlags struct {
critical int
warning int
}
type dfFlags struct {
volumeID int
volume string
listVolumes bool
critical int
warning int
}
func usage() {
fmt.Println("check_netapp is a command line tool to check SNMP values from NetApp for e.g. icinga")
fmt.Println("Usage:")
fmt.Println("\t ./check_netapp (version|fw|fansfailed|elecfailed|diskfailed)")
}
func main() {
stat := ok
defer func() {
if gosnmp.Default.Conn != nil {
err := gosnmp.Default.Conn.Close()
if err != nil {
fmt.Println(err)
os.Exit(unknown)
}
}
os.Exit(stat)
}()
genericFlags := genericFlags{}
dfFlags := dfFlags{}
version := flag.NewFlagSet("version", flag.ExitOnError)
version.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
version.StringVar(&genericFlags.community, "C", "public", "The community to connect")
fw := flag.NewFlagSet("fw", flag.ExitOnError)
fw.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
fw.StringVar(&genericFlags.community, "C", "public", "The community to connect")
fansFailed := flag.NewFlagSet("fansfailed", flag.ExitOnError)
fansFailed.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
fansFailed.StringVar(&genericFlags.community, "C", "public", "The community to connect")
elecFailed := flag.NewFlagSet("elecfailed", flag.ExitOnError)
elecFailed.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
elecFailed.StringVar(&genericFlags.community, "C", "public", "The community to connect")
df := flag.NewFlagSet("df", flag.ExitOnError)
df.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
df.StringVar(&genericFlags.community, "C", "public", "The community to connect")
df.IntVar(&dfFlags.volumeID, "i", -1, "The ID of the volume. The last digits returned by the volume list of e.g. .1.3.6.1.4.1.789.1.5.4.1.2.1135 the id would be 1135.")
df.StringVar(&dfFlags.volume, "p", "", "The path of the volume to monitor. Could be slow, walks over all volumes")
df.BoolVar(&dfFlags.listVolumes, "l", false, "List all volumes reported by SNMP")
df.IntVar(&dfFlags.critical, "c", -1, "Percentage of space used before going critical")
df.IntVar(&dfFlags.warning, "w", -1, "Percentage of space used before sending a warning")
diskfailedFlags := diskfailedFlags{}
diskfailed := flag.NewFlagSet("diskfailed", flag.ExitOnError)
diskfailed.StringVar(&genericFlags.host, "H", "localhost", "The host to connect")
diskfailed.StringVar(&genericFlags.community, "C", "public", "The community to connect")
diskfailed.IntVar(&diskfailedFlags.critical, "c", -1, "number of failed disks before going critical")
diskfailed.IntVar(&diskfailedFlags.warning, "w", -1, "number of failed disks before sending a warning")
if len(os.Args) < 2 {
usage()
stat = unknown
return
}
switch os.Args[1] {
case "df":
df.Parse(os.Args[2:])
err := genericFlags.connect()
if err != nil {
fmt.Println(err)
stat = unknown
return
}
status, err := dfFlags.diskSpaceUsage()
if err != nil {
fmt.Println(err)
stat = int(status)
return
}
stat = int(status)
return
case "version":
version.Parse(os.Args[2:])
err := genericFlags.connect()
if err != nil {
fmt.Println(err)
stat = unknown
return
}
status, err := genericFlags.getVersion()
if err != nil {
fmt.Println(err)
stat = int(status)
return
}
stat = int(status)
return
case "fw":
fw.Parse(os.Args[2:])
err := genericFlags.connect()
if err != nil {
fmt.Println(err)
stat = unknown
return
}
status, err := genericFlags.getFirmwareVersion()
if err != nil {
fmt.Println(err)
stat = int(status)
return
}
stat = int(status)
return
case "diskfailed":
diskfailed.Parse(os.Args[2:])
err := genericFlags.connect()
if err != nil {
fmt.Println(err)
stat = unknown
return
}
status, err := diskfailedFlags.getDiskFailed()
if err != nil {
fmt.Println(err)
stat = int(status)
return
}
stat = int(status)
return
case "elecfailed":
elecFailed.Parse(os.Args[2:])
err := genericFlags.connect()
if err != nil {
fmt.Println(err)
stat = unknown
return
}
status, err := genericFlags.getElectronicFailed()
if err != nil {
fmt.Println(err)
stat = int(status)
return
}
stat = int(status)
return
case "fansfailed":
fansFailed.Parse(os.Args[2:])
err := genericFlags.connect()
if err != nil {
fmt.Println(err)
os.Exit(critical)
}
status, err := genericFlags.getFansFailed()
if err != nil {
fmt.Println(err)
stat = int(status)
return
}
stat = int(status)
return
default:
usage()
stat = unknown
return
}
}
// connect established connection with the snmp host
func (f genericFlags) connect() error {
gosnmp.Default.Target = f.host
gosnmp.Default.Community = f.community
err := gosnmp.Default.Connect()
if err != nil {
fmt.Printf("error: Connect(): %v", err)
os.Exit(unknown)
}
return nil
}
//getVersion returns the version of the NetApp (OID .1.3.6.1.4.1.789.1.1.2.0)
func (f genericFlags) getVersion() (status, error) {
version, err := gosnmp.Default.Get([]string{productVersion})
if err != nil {
return unknown, fmt.Errorf("Get(%s) err: %v", productVersion, err)
}
if err = checkType(version.Variables[0]); err != nil {
return unknown, err
}
fmt.Print(version.Variables[0].Value.(string))
return ok, nil
}
func (f genericFlags) getElectronicFailed() (status, error) {
elecFailed, err := gosnmp.Default.WalkAll(enclElectronicsFailed)
if err != nil {
return unknown, fmt.Errorf("WalkAll(%s) err: %v", enclElectronicsFailed, err)
}
elecFailedMsg := ""
for _, v := range elecFailed {
if err = checkType(v); err != nil {
return unknown, err
}
if v.Value.(string) != "" {
elecFailedMsg += fmt.Sprintf("%s\n", v.Value)
}
}
if elecFailedMsg != "" {
return critical, errors.New(elecFailedMsg)
}
fmt.Print("Electronic is ok")
return ok, nil
}
func (f genericFlags) getFansFailed() (status, error) {
fansFailed, err := gosnmp.Default.WalkAll(enclFansFailed)
if err != nil {
return unknown, fmt.Errorf("WalkAll(%s) err: %v", enclElectronicsFailed, err)
}
fansFailedMsg := ""
for _, v := range fansFailed {
if err = checkType(v); err != nil {
return unknown, err
}
if v.Value.(string) != "" {
fansFailedMsg += fmt.Sprintf("%s\n", v.Value)
}
}
if fansFailedMsg != "" {
return critical, errors.New(fansFailedMsg)
}
fmt.Print("Fans are ok")
return ok, nil
}
//getFirmwareVersion returns the firmware version of the NetApp (OID 1.3.6.1.4.1.789.1.1.6.0)
func (f genericFlags) getFirmwareVersion() (status, error) {
version, err := gosnmp.Default.Get([]string{productFirmwareVersion})
if err != nil {
return unknown, fmt.Errorf("Get(%s) err: %v", productFirmwareVersion, err)
}
err = checkType(version.Variables[0])
if err != nil {
return unknown, err
}
fmt.Printf("Firmware version: %s", version.Variables[0].Value)
return ok, nil
}
//getDiskFailed returns the number of failed disk with the failure message if any
func (f diskfailedFlags) getDiskFailed() (status, error) {
failedDisk, err := gosnmp.Default.Get([]string{diskFailedCount})
if err != nil {
return unknown, fmt.Errorf("Get(%s) err: %v", diskFailedCount, err)
}
err = checkType(failedDisk.Variables[0])
if err != nil {
return unknown, err
}
iFailedDisk := failedDisk.Variables[0].Value.(int)
failedDiskMsg, err := f.getDiskFailedMessage(iFailedDisk)
if err != nil {
return unknown, err
}
if f.critical > 0 && iFailedDisk >= f.critical {
return critical, fmt.Errorf(failedDiskMsg)
} else if f.warning > 0 && iFailedDisk >= f.warning {
return warning, fmt.Errorf(failedDiskMsg)
}
fmt.Print(failedDiskMsg)
return ok, nil
}
func (f diskfailedFlags) getDiskFailedMessage(failedDisks int) (string, error) {
failedDiskMsg := fmt.Sprintf("%d disk(s) has failures", failedDisks)
if failedDisks > 0 {
msg, err := gosnmp.Default.Get([]string{diskFailedMessage})
if err != nil {
return "", fmt.Errorf("Get(%s) err: %v", diskFailedMessage, err)
}
err = checkType(msg.Variables[0])
if err != nil {
return "", err
}
failedDiskMsg += fmt.Sprintf("\nDescription: %s", msg.Variables[0].Value)
}
return failedDiskMsg, nil
}
//diskSpaceUsage returns the percentage of used space of a volume
func (f dfFlags) diskSpaceUsage() (status, error) {
volID := f.volumeID
if volID == -1 {
results, err := gosnmp.Default.WalkAll(dfFileSys)
if err != nil {
return unknown, fmt.Errorf("WalkAll(%s): %v", dfFileSys, err)
}
if f.listVolumes {
for _, v := range results {
fmt.Printf("%s %s\n", v.Name, v.Value)
}
return ok, nil
}
volID, err = getVolumeID(f.volume, results)
if err != nil {
return unknown, err
}
}
volName, err := gosnmp.Default.Get([]string{dfFileSys + "." + strconv.Itoa(volID)})
if err != nil {
return unknown, fmt.Errorf("Get(%s) err: %v", dfFileSys+"."+strconv.Itoa(volID), err)
}
err = checkType(volName.Variables[0])
if err != nil {
return unknown, err
}
strVolName := volName.Variables[0].Value.(string)
//Get total space
total, err := gosnmp.Default.Get([]string{df64TotalKBytes + "." + strconv.Itoa(volID)})
if err != nil {
return unknown, fmt.Errorf("Get(%s) err: %v", df64TotalKBytes+"."+strconv.Itoa(volID), err)
}
err = checkType(total.Variables[0])
if err != nil {
return unknown, err
}
//Get used space
used, err := gosnmp.Default.Get([]string{df64UsedKBytes + "." + strconv.Itoa(volID)})
if err != nil {
return unknown, fmt.Errorf("Get(%s) err: %v", df64UsedKBytes+"."+strconv.Itoa(volID), err)
}
err = checkType(used.Variables[0])
if err != nil {
return unknown, err
}
//Get available space
available, err := gosnmp.Default.Get([]string{df64AvailKBytes + "." + strconv.Itoa(volID)})
if err != nil {
return unknown, fmt.Errorf("Get(%s) err: %v", df64AvailKBytes+"."+strconv.Itoa(volID), err)
}
err = checkType(available.Variables[0])
if err != nil {
return unknown, err
}
iused := used.Variables[0].Value.(uint64)
itotal := total.Variables[0].Value.(uint64)
iavailable := available.Variables[0].Value.(uint64)
percUsed := 100 / float64(itotal) * float64(iused)
if int(percUsed) >= f.critical && f.critical > 0 {
return critical, fmt.Errorf("Space of volume %s has usage of %.2f%%. Space total: %s - Space available: %s - Space used: %s", strVolName, percUsed, hr(itotal), hr(iavailable), hr(iused))
}
if int(percUsed) >= f.warning && f.warning > 0 {
return warning, fmt.Errorf("Space of volume %s has usage of %.2f%%. Space total: %s - Space available: %s - Space used: %s", strVolName, percUsed, hr(itotal), hr(iavailable), hr(iused))
}
fmt.Printf("Space of volume %s has usage of %.2f%%. Space total: %s - Space available: %s - Space used: %s", strVolName, percUsed, hr(itotal), hr(iavailable), hr(iused))
return ok, nil
}
func getVolumeID(volName string, in []gosnmp.SnmpPDU) (int, error) {
for _, v := range in {
value := v.Value.(string)
if value == volName {
pos := strings.LastIndex(v.Name, ".")
if pos == -1 {
return -1, fmt.Errorf("error: unexpected snmp response %s, please check via snmpget", v.Name)
}
volID, err := strconv.Atoi(v.Name[pos+1:])
if err != nil {
return -1, fmt.Errorf("error: could not convert unexpected value %s into integer", v.Name[pos+1:])
}
return volID, nil
}
}
return -1, fmt.Errorf("error: volume %s not found", volName)
}
func checkType(pdu gosnmp.SnmpPDU) error {
if pdu.Type == gosnmp.NoSuchInstance {
return fmt.Errorf("no such instance, requested object instance with OID %s could not be returned", pdu.Name)
} else if pdu.Type == gosnmp.NoSuchObject {
return fmt.Errorf("no such object, requested object instance with OID %s could not be returned", pdu.Name)
} else if pdu.Type == gosnmp.UnknownType {
return fmt.Errorf("unknown type, object instance with OID %s contained an unknown type", pdu.Name)
}
return nil
}
// hr fs input in byte - returns a human readable filesize
func hr(fs uint64) string {
if fs == 0 {
return "0"
}
sizes := []string{"B", "KB", "MB", "GB", "TB"}
exp := math.Floor(math.Log(float64(fs)) / math.Log(1024))
if exp > 4 {
exp = 4
}
s := sizes[int(exp)+1]
if exp == 0 {
return fmt.Sprintf("%d %s", fs, s)
}
val := float64(fs) / float64(math.Pow(1024, exp))
return fmt.Sprintf("%.1f %s", math.Ceil(float64(val)*10)/10, s)
}