137 lines
2.3 KiB
Go
Executable File
137 lines
2.3 KiB
Go
Executable File
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/go-ping/ping"
|
|
|
|
"github.com/tursom/esxi-monitor/vmomi"
|
|
)
|
|
|
|
type (
|
|
watchdog struct {
|
|
addr string
|
|
vm *vmomi.VirtualMachine
|
|
life int
|
|
}
|
|
)
|
|
|
|
func main() {
|
|
cfg, err := parseConfig("config.yaml")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
watchTargets := make(map[string]*watchVm)
|
|
for _, watch := range cfg.Watch {
|
|
watchTargets[watch.Name] = watch
|
|
}
|
|
|
|
client, err := vmomi.Connect("https://esxi/sdk", cfg.User, cfg.Password)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
vms, err := vmomi.ListVms(client.Client)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
// ip -> vm instance
|
|
watches := make([]*watchdog, 0)
|
|
for _, vm := range vms {
|
|
target, contains := watchTargets[vm.Name()]
|
|
if !contains {
|
|
continue
|
|
}
|
|
|
|
fmt.Printf("watching virtual machine: addr=%s, name=%s\n", target.Addr, vm.Name())
|
|
watches = append(watches, &watchdog{
|
|
addr: target.Addr,
|
|
vm: vm,
|
|
life: 3,
|
|
})
|
|
}
|
|
|
|
doWatch(watches)
|
|
}
|
|
|
|
// watches map ip -> vm instance
|
|
func doWatch(watches []*watchdog) {
|
|
t := time.NewTicker(time.Minute)
|
|
for {
|
|
<-t.C
|
|
|
|
fmt.Println("in ping tick")
|
|
|
|
needReconnect := false
|
|
for _, w := range watches {
|
|
needReconnect = doWatch0(w) || needReconnect
|
|
}
|
|
|
|
if needReconnect {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func doWatch0(w *watchdog) (needReconnect bool) {
|
|
pinger, err := ping.NewPinger(w.addr)
|
|
if err != nil {
|
|
_, _ = fmt.Fprintf(os.Stderr, "ping to target failed: %s\n", err)
|
|
return
|
|
}
|
|
|
|
pinger.Timeout = time.Second * 15
|
|
pinger.Count = 4
|
|
|
|
fmt.Printf("start ping to %s\n", w.addr)
|
|
err = pinger.Run()
|
|
if err != nil {
|
|
_, _ = fmt.Fprintf(os.Stderr, "ping to target failed: %s\n", err)
|
|
return
|
|
}
|
|
fmt.Printf("finished ping to %s\n", w.addr)
|
|
|
|
statistics := pinger.Statistics()
|
|
if statistics.PacketsRecv != 0 {
|
|
w.life = 3
|
|
return
|
|
}
|
|
|
|
w.life--
|
|
fmt.Printf("ping vm %s(%s) failed, left %d life\n", w.addr, w.vm.Name(), w.life)
|
|
|
|
if w.life > 0 {
|
|
return
|
|
}
|
|
|
|
fmt.Printf("restarting vm %s(%s)\n", w.addr, w.vm.Name())
|
|
|
|
w.life = 3
|
|
if err := reset(w.vm); err != nil {
|
|
_, _ = fmt.Fprintf(os.Stderr, "restart vm failed: %s\n", err)
|
|
return true
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func reset(vm *vmomi.VirtualMachine) error {
|
|
task, err := vm.Reset()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ctx, _ := context.WithTimeout(context.Background(), time.Minute)
|
|
err = task.Wait(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|