esxi-monitor/cmd/watchdog/main.go
2023-08-09 14:31:25 +08:00

137 lines
2.3 KiB
Go
Executable File

package main
import (
"context"
"fmt"
"os"
"time"
"github.com/go-ping/ping"
"github.com/tursom/esxi-monitor/vmomi"
)
type (
watchdog struct {
addr string
vm *vmomi.VirtualMachine
life int
}
)
func main() {
cfg, err := parseConfig("config.yaml")
if err != nil {
panic(err)
}
watchTargets := make(map[string]*watchVm)
for _, watch := range cfg.Watch {
watchTargets[watch.Name] = watch
}
client, err := vmomi.Connect("https://esxi/sdk", cfg.User, cfg.Password)
if err != nil {
panic(err)
}
vms, err := vmomi.ListVms(client.Client)
if err != nil {
panic(err)
}
// ip -> vm instance
watches := make([]*watchdog, 0)
for _, vm := range vms {
target, contains := watchTargets[vm.Name()]
if !contains {
continue
}
fmt.Printf("watching virtual machine: addr=%s, name=%s\n", target.Addr, vm.Name())
watches = append(watches, &watchdog{
addr: target.Addr,
vm: vm,
life: 3,
})
}
doWatch(watches)
}
// watches map ip -> vm instance
func doWatch(watches []*watchdog) {
t := time.NewTicker(time.Minute)
for {
<-t.C
fmt.Println("in ping tick")
needReconnect := false
for _, w := range watches {
needReconnect = doWatch0(w) || needReconnect
}
if needReconnect {
return
}
}
}
func doWatch0(w *watchdog) (needReconnect bool) {
pinger, err := ping.NewPinger(w.addr)
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "ping to target failed: %s\n", err)
return
}
pinger.Timeout = time.Second * 15
pinger.Count = 4
fmt.Printf("start ping to %s\n", w.addr)
err = pinger.Run()
if err != nil {
_, _ = fmt.Fprintf(os.Stderr, "ping to target failed: %s\n", err)
return
}
fmt.Printf("finished ping to %s\n", w.addr)
statistics := pinger.Statistics()
if statistics.PacketsRecv != 0 {
w.life = 3
return
}
w.life--
fmt.Printf("ping vm %s(%s) failed, left %d life\n", w.addr, w.vm.Name(), w.life)
if w.life > 0 {
return
}
fmt.Printf("restarting vm %s(%s)\n", w.addr, w.vm.Name())
w.life = 3
if err := reset(w.vm); err != nil {
_, _ = fmt.Fprintf(os.Stderr, "restart vm failed: %s\n", err)
return true
}
return
}
func reset(vm *vmomi.VirtualMachine) error {
task, err := vm.Reset()
if err != nil {
return err
}
ctx, _ := context.WithTimeout(context.Background(), time.Minute)
err = task.Wait(ctx)
if err != nil {
return err
}
return nil
}