️ improve: 服务监控相关优化

This commit is contained in:
naiba 2021-01-16 18:04:47 +08:00
parent 161102a37b
commit df0eca5a74
6 changed files with 109 additions and 107 deletions

View File

@ -182,27 +182,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
## 变更日志
`dashboard 0.2.2` `agent 0.2.2`
- `dashboard 0.2.2`
- 修复一直报警SSL证书问题
- `agent 0.2.2`
- 修复双栈IP只能获取到v6的问题 #61
- `dashboard 0.2.1` `agent 0.2.1`
- dashboard
- 修复了默认开启IP变更通知
- hotaru 主题的服务状态页面
- **新增可以指定服务器忽略监控规则**
- 修复info透明 @ilay1678
- agent
- 优化了 IPv6/IPv4 双栈问题
- 增加 SSL 证书过期、即将过期提醒
最新:`dashboard 0.2.3` `agent 0.2.3`,只记录最后一次更新导致必须更新面板的说明。
- `dashboard 0.2.0` `agent 0.2.0` **重大更新**

View File

@ -147,81 +147,85 @@ func run(cmd *cobra.Command, args []string) {
continue
}
err = receiveTasks(tasks)
log.Printf("receiveCommand exit to main: %v", err)
log.Printf("receiveTasks exit to main: %v", err)
retry()
}
}
func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error {
var err error
var task *pb.Task
defer log.Printf("receiveTasks exit %v %v => %v", time.Now(), task, err)
defer log.Printf("receiveTasks exit %v => %v", time.Now(), err)
for {
var task *pb.Task
task, err = tasks.Recv()
if err != nil {
return err
}
var result pb.TaskResult
result.Id = task.GetId()
result.Type = task.GetType()
switch task.GetType() {
case model.MonitorTypeHTTPGET:
start := time.Now()
resp, err := httpClient.Get(task.GetData())
if err == nil {
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
if resp.StatusCode > 299 || resp.StatusCode < 200 {
err = errors.New("\n应用错误" + resp.Status)
}
}
if err == nil {
if strings.HasPrefix(task.GetData(), "https://") {
c := cert.NewCert(task.GetData()[8:])
if c.Error != "" {
if strings.Contains(c.Error, "expired") {
result.Data = "SSL证书错误证书已过期"
} else {
result.Data = "SSL证书错误" + c.Error
}
} else {
result.Data = c.Issuer + "|" + c.NotAfter
result.Successful = true
}
}
} else {
result.Data = err.Error()
}
case model.MonitorTypeICMPPing:
pinger, err := ping.NewPinger(task.GetData())
if err == nil {
pinger.Count = 10
err = pinger.Run() // Blocks until finished.
}
if err == nil {
stat := pinger.Statistics()
result.Delay = float32(stat.AvgRtt.Microseconds()) / 1000.0
result.Successful = true
} else {
result.Data = err.Error()
}
case model.MonitorTypeTCPPing:
start := time.Now()
conn, err := net.DialTimeout("tcp", task.GetData(), time.Second*10)
if err == nil {
conn.Close()
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
result.Successful = true
} else {
result.Data = err.Error()
}
default:
log.Printf("Unknown action: %v", task)
}
client.ReportTask(ctx, &result)
go doTask(task)
}
}
func doTask(task *pb.Task) {
var result pb.TaskResult
result.Id = task.GetId()
result.Type = task.GetType()
switch task.GetType() {
case model.MonitorTypeHTTPGET:
start := time.Now()
resp, err := httpClient.Get(task.GetData())
if err == nil {
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
if resp.StatusCode > 299 || resp.StatusCode < 200 {
err = errors.New("\n应用错误" + resp.Status)
}
}
if err == nil {
if strings.HasPrefix(task.GetData(), "https://") {
c := cert.NewCert(task.GetData()[8:])
if c.Error != "" {
if strings.Contains(c.Error, "expired") {
result.Data = "SSL证书错误证书已过期"
} else {
result.Data = "SSL证书错误" + c.Error
}
} else {
result.Data = c.Issuer + "|" + c.NotAfter
result.Successful = true
}
}
} else {
result.Data = err.Error()
}
case model.MonitorTypeICMPPing:
pinger, err := ping.NewPinger(task.GetData())
if err == nil {
pinger.Count = 10
pinger.Timeout = time.Second * 20
err = pinger.Run() // Blocks until finished.
}
if err == nil {
result.Delay = float32(pinger.Statistics().AvgRtt.Microseconds()) / 1000.0
result.Successful = true
} else {
result.Data = err.Error()
}
case model.MonitorTypeTCPPing:
start := time.Now()
conn, err := net.DialTimeout("tcp", task.GetData(), time.Second*10)
if err == nil {
conn.Write([]byte("ping\n"))
conn.Close()
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
result.Successful = true
} else {
result.Data = err.Error()
}
default:
log.Printf("Unknown action: %v", task)
}
client.ReportTask(ctx, &result)
}
func reportState() {
var lastReportHostInfo time.Time
var err error

View File

@ -52,6 +52,6 @@ func initDB() {
func main() {
go controller.ServeWeb(dao.Conf.HTTPPort)
go rpc.ServeRPC(5555)
go rpc.DispatchTask(time.Minute * 10)
go rpc.DispatchTask(time.Minute * 3)
alertmanager.Start()
}

View File

@ -32,6 +32,7 @@ func DispatchTask(duration time.Duration) {
var hasAliveAgent bool
dao.DB.Find(&tasks)
dao.ServerLock.RLock()
startedAt := time.Now()
for i := 0; i < len(tasks); i++ {
if index >= uint64(len(dao.SortedServerList)) {
index = 0
@ -50,6 +51,6 @@ func DispatchTask(duration time.Duration) {
index++
}
dao.ServerLock.RUnlock()
time.Sleep(duration)
time.Sleep(time.Until(startedAt.Add(duration)))
}
}

View File

@ -15,6 +15,35 @@ import (
)
func main() {
icmp()
// tcpping()
// httpWithSSLInfo()
// diskinfo()
}
func tcpping() {
start := time.Now()
conn, err := net.DialTimeout("tcp", "example.com:80", time.Second*10)
if err != nil {
panic(err)
}
conn.Write([]byte("ping\n"))
conn.Close()
fmt.Println(time.Now().Sub(start).Microseconds(), float32(time.Now().Sub(start).Microseconds())/1000.0)
}
func diskinfo() {
// 硬盘信息
dparts, _ := disk.Partitions(false)
for _, part := range dparts {
u, _ := disk.Usage(part.Mountpoint)
if u != nil {
log.Printf("%s %d %d", part.Device, u.Total, u.Used)
}
}
}
func httpWithSSLInfo() {
// 跳过 SSL 检查
transCfg := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
@ -25,31 +54,19 @@ func main() {
// SSL 证书信息获取
c := cert.NewCert("expired-ecc-dv.ssl.com")
fmt.Println(c.Error)
// TCP
conn, err := net.DialTimeout("tcp", "example.com:80", time.Second*10)
}
func icmp() {
pinger, err := ping.NewPinger("10.10.10.2")
if err != nil {
panic(err) // Blocks until finished.
}
pinger.Count = 3000
pinger.Timeout = 10 * time.Second
if err = pinger.Run(); err != nil {
panic(err)
}
println(conn)
// ICMP Ping
pinger, err := ping.NewPinger("example.com")
if err != nil {
panic(err)
}
pinger.Count = 3
err = pinger.Run() // Blocks until finished.
if err != nil {
panic(err)
}
fmt.Printf("%+v", pinger.Statistics())
// 硬盘信息
dparts, _ := disk.Partitions(false)
for _, part := range dparts {
u, _ := disk.Usage(part.Mountpoint)
if u != nil {
log.Printf("%s %d %d", part.Device, u.Total, u.Used)
}
}
fmt.Println(pinger.PacketsRecv, float32(pinger.Statistics().AvgRtt.Microseconds())/1000.0)
}
func cmdExec() {

View File

@ -26,7 +26,7 @@ var SortedServerList []*model.Server
var ServerLock sync.RWMutex
var Version = "v0.2.2"
var Version = "v0.2.3"
func ReSortServer() {
SortedServerList = []*model.Server{}