package main import ( "context" "fmt" "os" "time" "github.com/go-ping/ping" "github.com/tursom/esxi-monitor/vmomi" ) type ( watchdog struct { addr string vm *vmomi.VirtualMachine life int } ) func main() { cfg, err := parseConfig("config.yaml") if err != nil { panic(err) } watchTargets := make(map[string]*watchVm) for _, watch := range cfg.Watch { watchTargets[watch.Name] = watch } client, err := vmomi.Connect("https://esxi/sdk", cfg.User, cfg.Password) if err != nil { panic(err) } vms, err := vmomi.ListVms(client.Client) if err != nil { panic(err) } // ip -> vm instance watches := make([]*watchdog, 0) for _, vm := range vms { target, contains := watchTargets[vm.Name()] if !contains { continue } fmt.Printf("watching virtual machine: addr=%s, name=%s\n", target.Addr, vm.Name()) watches = append(watches, &watchdog{ addr: target.Addr, vm: vm, life: 3, }) } doWatch(watches) } // watches map ip -> vm instance func doWatch(watches []*watchdog) { t := time.NewTicker(time.Minute) for { <-t.C fmt.Println("in ping tick") needReconnect := false for _, w := range watches { needReconnect = doWatch0(w) || needReconnect } if needReconnect { return } } } func doWatch0(w *watchdog) (needReconnect bool) { pinger, err := ping.NewPinger(w.addr) if err != nil { _, _ = fmt.Fprintf(os.Stderr, "ping to target failed: %s\n", err) return } pinger.Timeout = time.Second * 15 pinger.Count = 4 fmt.Printf("start ping to %s\n", w.addr) err = pinger.Run() if err != nil { _, _ = fmt.Fprintf(os.Stderr, "ping to target failed: %s\n", err) return } fmt.Printf("finished ping to %s\n", w.addr) statistics := pinger.Statistics() if statistics.PacketsRecv != 0 { w.life = 3 return } w.life-- fmt.Printf("ping vm %s(%s) failed, left %d life\n", w.addr, w.vm.Name(), w.life) if w.life > 0 { return } fmt.Printf("restarting vm %s(%s)\n", w.addr, w.vm.Name()) w.life = 3 if err := reset(w.vm); err != nil { _, _ = fmt.Fprintf(os.Stderr, "restart vm failed: %s\n", err) return true } return } func reset(vm *vmomi.VirtualMachine) error { task, err := vm.Reset() if err != nil { return err } ctx, _ := context.WithTimeout(context.Background(), time.Minute) err = task.Wait(ctx) if err != nil { return err } return nil }