⚡️ improve: 服务监控相关优化
This commit is contained in:
parent
161102a37b
commit
df0eca5a74
22
README.md
22
README.md
@ -182,27 +182,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
|
||||
|
||||
## 变更日志
|
||||
|
||||
`dashboard 0.2.2` `agent 0.2.2`
|
||||
|
||||
- `dashboard 0.2.2`
|
||||
|
||||
- 修复一直报警SSL证书问题
|
||||
|
||||
- `agent 0.2.2`
|
||||
|
||||
- 修复双栈IP只能获取到v6的问题 #61
|
||||
|
||||
- `dashboard 0.2.1` `agent 0.2.1`
|
||||
|
||||
- dashboard
|
||||
- 修复了默认开启IP变更通知
|
||||
- hotaru 主题的服务状态页面
|
||||
- **新增可以指定服务器忽略监控规则**
|
||||
- 修复info透明 @ilay1678
|
||||
|
||||
- agent
|
||||
- 优化了 IPv6/IPv4 双栈问题
|
||||
- 增加 SSL 证书过期、即将过期提醒
|
||||
最新:`dashboard 0.2.3` `agent 0.2.3`,只记录最后一次更新导致必须更新面板的说明。
|
||||
|
||||
- `dashboard 0.2.0` `agent 0.2.0` **重大更新**
|
||||
|
||||
|
@ -147,81 +147,85 @@ func run(cmd *cobra.Command, args []string) {
|
||||
continue
|
||||
}
|
||||
err = receiveTasks(tasks)
|
||||
log.Printf("receiveCommand exit to main: %v", err)
|
||||
log.Printf("receiveTasks exit to main: %v", err)
|
||||
retry()
|
||||
}
|
||||
}
|
||||
|
||||
func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error {
|
||||
var err error
|
||||
var task *pb.Task
|
||||
|
||||
defer log.Printf("receiveTasks exit %v %v => %v", time.Now(), task, err)
|
||||
defer log.Printf("receiveTasks exit %v => %v", time.Now(), err)
|
||||
for {
|
||||
var task *pb.Task
|
||||
task, err = tasks.Recv()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var result pb.TaskResult
|
||||
result.Id = task.GetId()
|
||||
result.Type = task.GetType()
|
||||
switch task.GetType() {
|
||||
case model.MonitorTypeHTTPGET:
|
||||
start := time.Now()
|
||||
resp, err := httpClient.Get(task.GetData())
|
||||
if err == nil {
|
||||
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
|
||||
if resp.StatusCode > 299 || resp.StatusCode < 200 {
|
||||
err = errors.New("\n应用错误:" + resp.Status)
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
if strings.HasPrefix(task.GetData(), "https://") {
|
||||
c := cert.NewCert(task.GetData()[8:])
|
||||
if c.Error != "" {
|
||||
if strings.Contains(c.Error, "expired") {
|
||||
result.Data = "SSL证书错误:证书已过期"
|
||||
} else {
|
||||
result.Data = "SSL证书错误:" + c.Error
|
||||
}
|
||||
} else {
|
||||
result.Data = c.Issuer + "|" + c.NotAfter
|
||||
result.Successful = true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.Data = err.Error()
|
||||
}
|
||||
case model.MonitorTypeICMPPing:
|
||||
pinger, err := ping.NewPinger(task.GetData())
|
||||
if err == nil {
|
||||
pinger.Count = 10
|
||||
err = pinger.Run() // Blocks until finished.
|
||||
}
|
||||
if err == nil {
|
||||
stat := pinger.Statistics()
|
||||
result.Delay = float32(stat.AvgRtt.Microseconds()) / 1000.0
|
||||
result.Successful = true
|
||||
} else {
|
||||
result.Data = err.Error()
|
||||
}
|
||||
case model.MonitorTypeTCPPing:
|
||||
start := time.Now()
|
||||
conn, err := net.DialTimeout("tcp", task.GetData(), time.Second*10)
|
||||
if err == nil {
|
||||
conn.Close()
|
||||
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
|
||||
result.Successful = true
|
||||
} else {
|
||||
result.Data = err.Error()
|
||||
}
|
||||
default:
|
||||
log.Printf("Unknown action: %v", task)
|
||||
}
|
||||
client.ReportTask(ctx, &result)
|
||||
go doTask(task)
|
||||
}
|
||||
}
|
||||
|
||||
func doTask(task *pb.Task) {
|
||||
var result pb.TaskResult
|
||||
result.Id = task.GetId()
|
||||
result.Type = task.GetType()
|
||||
switch task.GetType() {
|
||||
case model.MonitorTypeHTTPGET:
|
||||
start := time.Now()
|
||||
resp, err := httpClient.Get(task.GetData())
|
||||
if err == nil {
|
||||
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
|
||||
if resp.StatusCode > 299 || resp.StatusCode < 200 {
|
||||
err = errors.New("\n应用错误:" + resp.Status)
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
if strings.HasPrefix(task.GetData(), "https://") {
|
||||
c := cert.NewCert(task.GetData()[8:])
|
||||
if c.Error != "" {
|
||||
if strings.Contains(c.Error, "expired") {
|
||||
result.Data = "SSL证书错误:证书已过期"
|
||||
} else {
|
||||
result.Data = "SSL证书错误:" + c.Error
|
||||
}
|
||||
} else {
|
||||
result.Data = c.Issuer + "|" + c.NotAfter
|
||||
result.Successful = true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.Data = err.Error()
|
||||
}
|
||||
case model.MonitorTypeICMPPing:
|
||||
pinger, err := ping.NewPinger(task.GetData())
|
||||
if err == nil {
|
||||
pinger.Count = 10
|
||||
pinger.Timeout = time.Second * 20
|
||||
err = pinger.Run() // Blocks until finished.
|
||||
}
|
||||
if err == nil {
|
||||
result.Delay = float32(pinger.Statistics().AvgRtt.Microseconds()) / 1000.0
|
||||
result.Successful = true
|
||||
} else {
|
||||
result.Data = err.Error()
|
||||
}
|
||||
case model.MonitorTypeTCPPing:
|
||||
start := time.Now()
|
||||
conn, err := net.DialTimeout("tcp", task.GetData(), time.Second*10)
|
||||
if err == nil {
|
||||
conn.Write([]byte("ping\n"))
|
||||
conn.Close()
|
||||
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
|
||||
result.Successful = true
|
||||
} else {
|
||||
result.Data = err.Error()
|
||||
}
|
||||
default:
|
||||
log.Printf("Unknown action: %v", task)
|
||||
}
|
||||
client.ReportTask(ctx, &result)
|
||||
}
|
||||
|
||||
func reportState() {
|
||||
var lastReportHostInfo time.Time
|
||||
var err error
|
||||
|
@ -52,6 +52,6 @@ func initDB() {
|
||||
func main() {
|
||||
go controller.ServeWeb(dao.Conf.HTTPPort)
|
||||
go rpc.ServeRPC(5555)
|
||||
go rpc.DispatchTask(time.Minute * 10)
|
||||
go rpc.DispatchTask(time.Minute * 3)
|
||||
alertmanager.Start()
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ func DispatchTask(duration time.Duration) {
|
||||
var hasAliveAgent bool
|
||||
dao.DB.Find(&tasks)
|
||||
dao.ServerLock.RLock()
|
||||
startedAt := time.Now()
|
||||
for i := 0; i < len(tasks); i++ {
|
||||
if index >= uint64(len(dao.SortedServerList)) {
|
||||
index = 0
|
||||
@ -50,6 +51,6 @@ func DispatchTask(duration time.Duration) {
|
||||
index++
|
||||
}
|
||||
dao.ServerLock.RUnlock()
|
||||
time.Sleep(duration)
|
||||
time.Sleep(time.Until(startedAt.Add(duration)))
|
||||
}
|
||||
}
|
||||
|
@ -15,6 +15,35 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
icmp()
|
||||
// tcpping()
|
||||
// httpWithSSLInfo()
|
||||
// diskinfo()
|
||||
}
|
||||
|
||||
func tcpping() {
|
||||
start := time.Now()
|
||||
conn, err := net.DialTimeout("tcp", "example.com:80", time.Second*10)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
conn.Write([]byte("ping\n"))
|
||||
conn.Close()
|
||||
fmt.Println(time.Now().Sub(start).Microseconds(), float32(time.Now().Sub(start).Microseconds())/1000.0)
|
||||
}
|
||||
|
||||
func diskinfo() {
|
||||
// 硬盘信息
|
||||
dparts, _ := disk.Partitions(false)
|
||||
for _, part := range dparts {
|
||||
u, _ := disk.Usage(part.Mountpoint)
|
||||
if u != nil {
|
||||
log.Printf("%s %d %d", part.Device, u.Total, u.Used)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func httpWithSSLInfo() {
|
||||
// 跳过 SSL 检查
|
||||
transCfg := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
@ -25,31 +54,19 @@ func main() {
|
||||
// SSL 证书信息获取
|
||||
c := cert.NewCert("expired-ecc-dv.ssl.com")
|
||||
fmt.Println(c.Error)
|
||||
// TCP
|
||||
conn, err := net.DialTimeout("tcp", "example.com:80", time.Second*10)
|
||||
}
|
||||
|
||||
func icmp() {
|
||||
pinger, err := ping.NewPinger("10.10.10.2")
|
||||
if err != nil {
|
||||
panic(err) // Blocks until finished.
|
||||
}
|
||||
pinger.Count = 3000
|
||||
pinger.Timeout = 10 * time.Second
|
||||
if err = pinger.Run(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
println(conn)
|
||||
// ICMP Ping
|
||||
pinger, err := ping.NewPinger("example.com")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
pinger.Count = 3
|
||||
err = pinger.Run() // Blocks until finished.
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf("%+v", pinger.Statistics())
|
||||
// 硬盘信息
|
||||
dparts, _ := disk.Partitions(false)
|
||||
for _, part := range dparts {
|
||||
u, _ := disk.Usage(part.Mountpoint)
|
||||
if u != nil {
|
||||
log.Printf("%s %d %d", part.Device, u.Total, u.Used)
|
||||
}
|
||||
}
|
||||
fmt.Println(pinger.PacketsRecv, float32(pinger.Statistics().AvgRtt.Microseconds())/1000.0)
|
||||
}
|
||||
|
||||
func cmdExec() {
|
||||
|
@ -26,7 +26,7 @@ var SortedServerList []*model.Server
|
||||
|
||||
var ServerLock sync.RWMutex
|
||||
|
||||
var Version = "v0.2.2"
|
||||
var Version = "v0.2.3"
|
||||
|
||||
func ReSortServer() {
|
||||
SortedServerList = []*model.Server{}
|
||||
|
Loading…
Reference in New Issue
Block a user