🚸 improve: 报警优化

This commit is contained in:
naiba 2021-01-06 09:35:04 +08:00
parent d0dd44bda9
commit fa9da8b3b6
2 changed files with 26 additions and 19 deletions

View File

@ -50,6 +50,5 @@ func initDB() {
func main() {
go controller.ServeWeb(dao.Conf.HTTPPort)
go rpc.ServeRPC(5555)
go alertmanager.Start()
select {}
alertmanager.Start()
}

View File

@ -4,6 +4,7 @@ import (
"crypto/md5"
"encoding/hex"
"fmt"
"log"
"sync"
"time"
@ -44,7 +45,19 @@ func Start() {
alertsLock.Unlock()
time.Sleep(time.Second * 10)
go checkStatus()
var lastPrint time.Time
var checkCount uint64
for {
startedAt := time.Now()
checkStatus()
checkCount++
if lastPrint.Before(startedAt.Add(-1 * time.Hour)) {
log.Println("报警规则检测每小时", checkCount, "次", startedAt, time.Now())
checkCount = 0
lastPrint = startedAt
}
time.Sleep(time.Until(startedAt.Add(time.Second * dao.SnapshotDelay)))
}
}
func OnRefreshOrAddAlert(alert model.AlertRule) {
@ -101,28 +114,22 @@ func OnDeleteNotification(id uint64) {
}
func checkStatus() {
startedAt := time.Now()
defer func() {
time.Sleep(time.Until(startedAt.Add(time.Second * dao.SnapshotDelay)))
checkStatus()
}()
alertsLock.RLock()
defer alertsLock.RUnlock()
dao.ServerLock.RLock()
defer dao.ServerLock.RUnlock()
for j := 0; j < len(alerts); j++ {
for _, alert := range alerts {
// 跳过未启用
if alerts[j].Enable == nil || !*alerts[j].Enable {
if alert.Enable == nil || !*alert.Enable {
continue
}
for _, server := range dao.ServerList {
// 监测点
alertsStore[alerts[j].ID][server.ID] = append(alertsStore[alerts[j].
ID][server.ID], alerts[j].Snapshot(server))
alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
ID][server.ID], alert.Snapshot(server))
// 发送通知
max, desc := alerts[j].Check(alertsStore[alerts[j].ID][server.ID])
max, desc := alert.Check(alertsStore[alert.ID][server.ID])
if desc != "" {
nID := getNotificationHash(server, desc)
var flag bool
@ -136,7 +143,8 @@ func checkStatus() {
nHistory.Duration = time.Hour * 24
}
nHistory.Until = time.Now().Add(nHistory.Duration)
dao.Cache.Set(nID, nHistory, nHistory.Duration)
// 缓存有效期加 10 分钟
dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10)
}
} else {
// 新提醒直接通知
@ -147,15 +155,15 @@ func checkStatus() {
}, firstNotificationDelay)
}
if flag {
message := fmt.Sprintf("逮到咯,快去看看!服务器:%s(%s),报警规则:%s%s", server.Name, server.Host.IP, alerts[j].Name, desc)
message := fmt.Sprintf("逮到咯,快去看看!服务器:%s(%s),报警规则:%s%s", server.Name, server.Host.IP, alert.Name, desc)
go sendNotification(message)
}
}
// 清理旧数据
if max > 0 {
for k := 0; k < len(alertsStore[alerts[j].ID][server.ID]); k++ {
if max < len(alertsStore[alerts[j].ID][server.ID][k]) {
alertsStore[alerts[j].ID][server.ID][k] = alertsStore[alerts[j].ID][server.ID][k][len(alertsStore[alerts[j].ID][server.ID][k])-max:]
for k := 0; k < len(alertsStore[alert.ID][server.ID]); k++ {
if max < len(alertsStore[alert.ID][server.ID][k]) {
alertsStore[alert.ID][server.ID][k] = alertsStore[alert.ID][server.ID][k][len(alertsStore[alert.ID][server.ID][k])-max:]
}
}
}