Authored by jack

连续三次触发阈值 告警

... ... @@ -2,6 +2,7 @@ package com.monitor.zabbix.constants;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.monitor.zabbix.model.ErrorAlarm;
import com.monitor.zabbix.model.ZabbixItemInfo;
import java.util.HashMap;
... ... @@ -9,6 +10,7 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Created by yoho on 2016/10/11.
... ... @@ -38,4 +40,6 @@ public interface Constants {
Map<Integer, String> ZABBIXALARMIDMPA = new ConcurrentHashMap<>();
Map<Integer, CopyOnWriteArrayList<ZabbixItemInfo>> ZABBIXALARMITEMMAP = new ConcurrentHashMap<>();
Map<String, ErrorAlarm> ERRORMAP = new ConcurrentHashMap<>();
}
... ...
package com.monitor.zabbix.model;
import lombok.Data;
/**
* Created by yoho on 2016/11/3.
*/
@Data
public class ErrorAlarm {
Long time;
Integer count = 1;
}
... ...
... ... @@ -16,10 +16,7 @@ import com.monitor.zabbix.impl.PointBuilder;
import com.monitor.zabbix.mapper.ZabbixHistoryMapper;
import com.monitor.zabbix.mapper.ZabbixHostMapper;
import com.monitor.zabbix.mapper.ZabbixItemMapper;
import com.monitor.zabbix.model.ZabbixDHistoryInfo;
import com.monitor.zabbix.model.ZabbixHostInfo;
import com.monitor.zabbix.model.ZabbixItemInfo;
import com.monitor.zabbix.model.ZabbixUHistoryInfo;
import com.monitor.zabbix.model.*;
import org.apache.commons.lang.StringUtils;
import org.joda.time.DateTime;
import org.slf4j.Logger;
... ... @@ -44,30 +41,30 @@ import java.util.concurrent.ThreadFactory;
@EnableScheduling
public class ZabbixAlarm {
private static String ALARMTEMPLATE = "时间:%s,ip:%s,标签:%s,CPU使用率:%.2f%%,可用内存/总内存(MB):%s,输入带宽:%.2fMbps,输出带宽:%.2fMbps";
private static String ALARMTEMPLATE = "时间:%s,IP:%s,标签:%s CPU使用率:%.2f%%,可用内存/总内存(MB):%s,输入/输出带宽(Mbps):%.2f / %.2f";
public static final Logger DEBUG = LoggerFactory.getLogger(ZabbixAlarm.class);
@Autowired
ZabbixHostMapper hostMapper;
private ZabbixHostMapper hostMapper;
@Autowired
ZabbixItemMapper itemMapper;
private ZabbixItemMapper itemMapper;
@Autowired
ZabbixHistoryMapper historyMapper;
private ZabbixHistoryMapper historyMapper;
@Value("${zabbix_cpu_alarm}")
Double zabbixCpuAlarm;
private Double zabbixCpuAlarm;
@Value("${zabbix_mem_alarm}")
Double zabbixMemAlarm;
private Double zabbixMemAlarm;
@Value("${zabbix_net_alarm}")
Double zabbixNetAlarm;
private Double zabbixNetAlarm;
@Autowired
IHostInfoService iHostInfoService;
private IHostInfoService iHostInfoService;
@Autowired
private AlarmMsgService alarmMsgService;
... ... @@ -75,7 +72,6 @@ public class ZabbixAlarm {
@Autowired
private SnsMobileConfig snsMobileConfig;
private final class ZabbixTask implements Runnable {
List<Integer> hostIdList;
... ... @@ -275,15 +271,75 @@ public class ZabbixAlarm {
//|| zabbixMemAlarm > memPer
if (zabbixCpuAlarm > idleCpu || zabbixNetAlarm < inNet || zabbixNetAlarm < outNet) {
if ((zabbixCpuAlarm > idleCpu || zabbixNetAlarm < inNet || zabbixNetAlarm < outNet) && queryErrorMap(ip)) {
String nowTime = DateTime.now().toString("yyyy-MM-dd HH:mm:ss");
String nowString = DateTime.now().toString("yyyy-MM-dd HH:mm:ss");
String alarmInfo = String.format(ALARMTEMPLATE, nowTime, ip, tags, 100 - idleCpu, String.valueOf(avMem) + " / " + String.valueOf(toMem), inNet, outNet);
String alarmInfo = String.format(ALARMTEMPLATE, nowString, ip, tags, 100 - idleCpu, String.valueOf(avMem) + " / " + String.valueOf(toMem), inNet, outNet);
DEBUG.info("3m 3times ,send alarm vm info {}", alarmInfo);
DEBUG.info("Alarm vm info {}", alarmInfo);
Constants.ERRORMAP.remove(ip);
alarmMsgService.sendSms("服务器性能告警", alarmInfo, snsMobileConfig.getBaseMobile());
}
}
/**
* 查询最近告警记录,未满三次的不发送告警短信,满三次发送告警短信
* @param ip
* @return
*/
private boolean queryErrorMap(String ip) {
Long nowTimeStamp = System.currentTimeMillis();
ErrorAlarm errorAlarm = Constants.ERRORMAP.get(ip);
//最近没有超过告警阈值,新建alarm记录
if (null == errorAlarm) {
errorAlarm = new ErrorAlarm();
errorAlarm.setCount(1);
errorAlarm.setTime(nowTimeStamp);
Constants.ERRORMAP.put(ip, errorAlarm);
return false;
} else {
Long lastTime = errorAlarm.getTime();
//间隔一分钟,考虑zabbix上报延迟,放长到90s,未连续,则清除记录,不发送
if ((90 * 1000) < (nowTimeStamp - lastTime)) {
Constants.ERRORMAP.remove(ip);
return false;
} else {
Integer lastCount = errorAlarm.getCount();
//连续3次 告警
if (2 <= lastCount) {
Constants.ERRORMAP.remove(ip);
return true;
}
//未三次,更新错误时间与连续的次数
errorAlarm.setTime(nowTimeStamp);
errorAlarm.setCount(lastCount++);
return false;
}
}
}
}
... ...