🚸 improve: 报警优化

This commit is contained in:
naiba 2021-01-06 09:35:04 +08:00
parent d0dd44bda9
commit fa9da8b3b6
2 changed files with 26 additions and 19 deletions

View File

@ -50,6 +50,5 @@ func initDB() {
func main() { func main() {
go controller.ServeWeb(dao.Conf.HTTPPort) go controller.ServeWeb(dao.Conf.HTTPPort)
go rpc.ServeRPC(5555) go rpc.ServeRPC(5555)
go alertmanager.Start() alertmanager.Start()
select {}
} }

View File

@ -4,6 +4,7 @@ import (
"crypto/md5" "crypto/md5"
"encoding/hex" "encoding/hex"
"fmt" "fmt"
"log"
"sync" "sync"
"time" "time"
@ -44,7 +45,19 @@ func Start() {
alertsLock.Unlock() alertsLock.Unlock()
time.Sleep(time.Second * 10) time.Sleep(time.Second * 10)
go checkStatus() var lastPrint time.Time
var checkCount uint64
for {
startedAt := time.Now()
checkStatus()
checkCount++
if lastPrint.Before(startedAt.Add(-1 * time.Hour)) {
log.Println("报警规则检测每小时", checkCount, "次", startedAt, time.Now())
checkCount = 0
lastPrint = startedAt
}
time.Sleep(time.Until(startedAt.Add(time.Second * dao.SnapshotDelay)))
}
} }
func OnRefreshOrAddAlert(alert model.AlertRule) { func OnRefreshOrAddAlert(alert model.AlertRule) {
@ -101,28 +114,22 @@ func OnDeleteNotification(id uint64) {
} }
func checkStatus() { func checkStatus() {
startedAt := time.Now()
defer func() {
time.Sleep(time.Until(startedAt.Add(time.Second * dao.SnapshotDelay)))
checkStatus()
}()
alertsLock.RLock() alertsLock.RLock()
defer alertsLock.RUnlock() defer alertsLock.RUnlock()
dao.ServerLock.RLock() dao.ServerLock.RLock()
defer dao.ServerLock.RUnlock() defer dao.ServerLock.RUnlock()
for j := 0; j < len(alerts); j++ { for _, alert := range alerts {
// 跳过未启用 // 跳过未启用
if alerts[j].Enable == nil || !*alerts[j].Enable { if alert.Enable == nil || !*alert.Enable {
continue continue
} }
for _, server := range dao.ServerList { for _, server := range dao.ServerList {
// 监测点 // 监测点
alertsStore[alerts[j].ID][server.ID] = append(alertsStore[alerts[j]. alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
ID][server.ID], alerts[j].Snapshot(server)) ID][server.ID], alert.Snapshot(server))
// 发送通知 // 发送通知
max, desc := alerts[j].Check(alertsStore[alerts[j].ID][server.ID]) max, desc := alert.Check(alertsStore[alert.ID][server.ID])
if desc != "" { if desc != "" {
nID := getNotificationHash(server, desc) nID := getNotificationHash(server, desc)
var flag bool var flag bool
@ -136,7 +143,8 @@ func checkStatus() {
nHistory.Duration = time.Hour * 24 nHistory.Duration = time.Hour * 24
} }
nHistory.Until = time.Now().Add(nHistory.Duration) nHistory.Until = time.Now().Add(nHistory.Duration)
dao.Cache.Set(nID, nHistory, nHistory.Duration) // 缓存有效期加 10 分钟
dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10)
} }
} else { } else {
// 新提醒直接通知 // 新提醒直接通知
@ -147,15 +155,15 @@ func checkStatus() {
}, firstNotificationDelay) }, firstNotificationDelay)
} }
if flag { if flag {
message := fmt.Sprintf("逮到咯,快去看看!服务器:%s(%s),报警规则:%s%s", server.Name, server.Host.IP, alerts[j].Name, desc) message := fmt.Sprintf("逮到咯,快去看看!服务器:%s(%s),报警规则:%s%s", server.Name, server.Host.IP, alert.Name, desc)
go sendNotification(message) go sendNotification(message)
} }
} }
// 清理旧数据 // 清理旧数据
if max > 0 { if max > 0 {
for k := 0; k < len(alertsStore[alerts[j].ID][server.ID]); k++ { for k := 0; k < len(alertsStore[alert.ID][server.ID]); k++ {
if max < len(alertsStore[alerts[j].ID][server.ID][k]) { if max < len(alertsStore[alert.ID][server.ID][k]) {
alertsStore[alerts[j].ID][server.ID][k] = alertsStore[alerts[j].ID][server.ID][k][len(alertsStore[alerts[j].ID][server.ID][k])-max:] alertsStore[alert.ID][server.ID][k] = alertsStore[alert.ID][server.ID][k][len(alertsStore[alert.ID][server.ID][k])-max:]
} }
} }
} }