Authored by Xu

大手术,配置文件不全,从ops里面都拷贝过来了

上报错误总数超过1W,才发短信
@@ -9,22 +9,24 @@ public class ReportCount { @@ -9,22 +9,24 @@ public class ReportCount {
9 9
10 private static final AtomicInteger atomicInteger = new AtomicInteger(1); 10 private static final AtomicInteger atomicInteger = new AtomicInteger(1);
11 11
12 - // private static int last = 0;  
13 -  
14 public static int getAndIncrement() { 12 public static int getAndIncrement() {
15 return atomicInteger.getAndIncrement(); 13 return atomicInteger.getAndIncrement();
16 } 14 }
17 15
18 public static int getIncrement() { 16 public static int getIncrement() {
19 return atomicInteger.get(); 17 return atomicInteger.get();
  18 + }
20 19
21 -// int count = atomicInteger.get();  
22 -// if (count == 0) {  
23 -// return 0;  
24 -// }  
25 -// int increate = count - last;  
26 -// last = count;  
27 -// return increate; 20 + /**
  21 + * 错误总次数
  22 + */
  23 + private static final AtomicInteger errorSum = new AtomicInteger(1);
  24 +
  25 + public static int getErrorSum() {
  26 + return errorSum.getAndIncrement();
28 } 27 }
29 28
  29 + public static void setErrorSum(int i) {
  30 + errorSum.set(i);
  31 + }
30 } 32 }
@@ -58,8 +58,9 @@ public class ClientReportErrorServiceImpl implements ClientReportErrorService { @@ -58,8 +58,9 @@ public class ClientReportErrorServiceImpl implements ClientReportErrorService {
58 int count = ReportCount.getIncrement(); 58 int count = ReportCount.getIncrement();
59 int increate = count - alarmlast; 59 int increate = count - alarmlast;
60 alarmlast = count; 60 alarmlast = count;
61 - if (lastNum != 0 && increate != 0 && increate > flag && increate > 2 * lastNum) { 61 + if (lastNum != 0 && increate != 0 && increate > flag && increate > 2 * lastNum && ReportCount.getErrorSum() > 10000) {
62 alarm(InfluxDBName.AWS, increate, minute); 62 alarm(InfluxDBName.AWS, increate, minute);
  63 + ReportCount.setErrorSum(0);
63 } 64 }
64 lastNum = increate; 65 lastNum = increate;
65 66
@@ -69,8 +70,9 @@ public class ClientReportErrorServiceImpl implements ClientReportErrorService { @@ -69,8 +70,9 @@ public class ClientReportErrorServiceImpl implements ClientReportErrorService {
69 if (yestodayCount == 0) { 70 if (yestodayCount == 0) {
70 return; 71 return;
71 } 72 }
72 - if (increate > yestodayCount * 3) { 73 + if ((increate > yestodayCount * 3) && (ReportCount.getErrorSum() > 10000)) {
73 alarm(increate, minute, yestodayCount); 74 alarm(increate, minute, yestodayCount);
  75 + ReportCount.setErrorSum(0);
74 } 76 }
75 } 77 }
76 78
@@ -3,6 +3,16 @@ system.envi=product @@ -3,6 +3,16 @@ system.envi=product
3 bigdata.uid.location.url=http://172.31.6.98:8080/ds/member/getUidLocation/ 3 bigdata.uid.location.url=http://172.31.6.98:8080/ds/member/getUidLocation/
4 dnswrite.task.num=5 4 dnswrite.task.num=5
5 5
  6 +aws.redis.proxy.address=172.31.31.170
  7 +aws.redis.proxy.port=6379
  8 +aws.redis.proxy.auth=
  9 +
  10 +qq.redis.proxy.address=10.66.4.25
  11 +qq.redis.proxy.port=6379
  12 +qq.redis.proxy.auth=
  13 +
6 client.redis.proxy.address=10.66.4.81 14 client.redis.proxy.address=10.66.4.81
7 client.redis.proxy.port=6379 15 client.redis.proxy.port=6379
8 client.redis.proxy.auth= 16 client.redis.proxy.auth=
  17 +
  18 +kafka.servers=172.31.80.214:9092
@@ -58,6 +58,24 @@ @@ -58,6 +58,24 @@
58 </encoder> 58 </encoder>
59 </appender> 59 </appender>
60 60
  61 + <!-- 按照每天生成日志文件 -->
  62 + <appender name="ORDER_INFO_CHECK" class="ch.qos.logback.core.rolling.RollingFileAppender">
  63 + <file>${catalina.home}/logs/orderinfocheck/order_info_check.log</file>
  64 +
  65 + <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
  66 + <!--日志文件输出的文件名-->
  67 + <fileNamePattern>${catalina.home}/logs/orderinfocheck/order_info_check.%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
  68 + <MaxHistory>30</MaxHistory>
  69 + <timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
  70 + <!-- or whenever the file size reaches 100MB -->
  71 + <maxFileSize>${maxFileSize}</maxFileSize>
  72 + </timeBasedFileNamingAndTriggeringPolicy>
  73 + </rollingPolicy>
  74 + <encoder>
  75 + <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} - %msg%n</pattern>
  76 + </encoder>
  77 + </appender>
  78 +
61 <!-- root级别 DEBUG --> 79 <!-- root级别 DEBUG -->
62 <root level="INFO"> 80 <root level="INFO">
63 <!-- 文件输出 --> 81 <!-- 文件输出 -->
@@ -65,4 +83,9 @@ @@ -65,4 +83,9 @@
65 <appender-ref ref="WARN" /> 83 <appender-ref ref="WARN" />
66 </root> 84 </root>
67 85
  86 + <logger name="ordercheckLogger" additivity="false">
  87 + <level value="INFO"/>
  88 + <appender-ref ref="ORDER_INFO_CHECK"/>
  89 + </logger>
  90 +
68 </configuration> 91 </configuration>
1 # ******************** monitor common configs ******************** 1 # ******************** monitor common configs ********************
  2 +
  3 +#--------------------------------定时支付回调监控------------------------------------
  4 +#支付回调监控 每天9:00到24:00 每5分钟
  5 + cron.task.paycallback.alipay=0 0/5 10-20 * * ?
  6 +alram.paycallback.alipay.time_period=5
  7 +
  8 +#支付回调监控 每天9:00到24:00 每30分钟
  9 +cron.task.paycallback.wechat=0 0/30 9-23 * * ?
  10 +alram.paycallback.wechat.time_period=30
  11 +
  12 +#支付回调监控 每天9:00到24:00 整点
  13 +cron.task.paycallback.unionpay=0 0 9-23 * * ?
  14 +alram.paycallback.union.time_period=60
  15 +#--------------------------------定时支付回调监控------------------------------------
  16 +
  17 +#-------------------------------------定时读写任务------------------------------
  18 +#执行一次对于event写入任务的时间间隔
  19 +cron.task.influxdb.dbaccess.event.write=0 0/10 * * * ?
  20 +#进行一次对于statement写入任务 时间间隔
  21 +influxdb.dbaccess.event.write.time_period=10
  22 +
  23 +#每15分钟执行一次对于statement写入任务
  24 +cron.task.influxdb.dbaccess.statement.write=0 0/15 * * * ?
  25 +#进行一次对于statement写入任务 时间间隔
  26 +influxdb.dbaccess.statement.write.time_period=15
  27 +
  28 +#每15分钟执行一次对于src写入任务
  29 +cron.task.influxdb.dbaccess.src_service_name.write=0 0/15 * * * ?
  30 +#进行一次对于statement写入任务 时间间隔
  31 +influxdb.dbaccess.src_service_name.write.time_period=15
  32 +
  33 +#每10分钟执行一次对server_access表group by event的写入任务
  34 +cron.task.influxdb.service_access.event.write=0 6/10 * * * ?
  35 +influxdb.service_access.event.write.time_period=10
  36 +
  37 +#每15分钟执行一次对server_access表group by event的写入任务
  38 +cron.task.influxdb.gateway_access.event.write=0 3/15 * * * ?
  39 +influxdb.gateway_access.event.write.time_period=15
  40 +#--------------------------------定时读写任务------------------------------------
  41 +
  42 +#--------------------------------用户行为监控------------------------------------
  43 +
  44 +#用户注册异常信息监控时间cron表达式
  45 +alarm.cron.user.register.exception=0 0/15 * * * ?
  46 +
  47 +#用户登录异常信息监控时间cron表达式
  48 +alarm.cron.user.login.exception=0 0/15 * * * ?
  49 +
  50 +#用户无注册信息监控时间cron表达式
  51 +alarm.cron.user.register=0 0/15 9-22 * * ?
  52 +alarm.cron.user.register.qcloud=0 0 9-22 * * ?
  53 +
  54 +#用户无注册时间查询间隔
  55 +alarm.user.register.time_period=15
  56 +alarm.user.register.time_period.qcloud=60
  57 +
  58 +#注册异常数量告警阀值 查询间隔
  59 +alram.user.register.exception.threshold_count=5
  60 +alram.user.register.exception.time_period=15
  61 +
  62 +#登录异常数量告警阀值 查询间隔
  63 +alram.user.login.exception.threshold_count=5
  64 +alram.user.login.exception.time_period=15
  65 +
  66 +#--------------------------------用户行为监控------------------------------------
  67 +
  68 +#---------------------------------数据库监控-----------------------------------
  69 +
  70 +#数据库异常监控 定时任务cron表达式 时间间隔 告警阀值
  71 +alarm.cron.database.exception=0 0/3 * * * ?
  72 +alarm.database.exception.time_period=3
  73 +alarm.database.exception.threshold_count=50
  74 +
  75 +#--------------------------------数据库监控------------------------------------
  76 +
  77 +#--------------------------------服务监控------------------------------------
  78 +
  79 +#服务异常监控 定时任务cron表达式 时间间隔
  80 +alarm.cron.service.exception=0 0/2 * * * ?
  81 +alarm.database.service.exception.time_period=2
  82 +
  83 +#--------------------------------服务监控------------------------------------
  84 +
  85 +#--------------------------------订单模块监控------------------------------------
  86 +
  87 +#订单提交失败定时任务cron表达式 时间间隔
  88 +cron.task.order.submitfailed=0 0/5 * * * ?
  89 +alram.order.submitfailed.time_period=5
  90 +
  91 +#订单支付失败定时任务cron表达式 时间间隔
  92 +cron.task.order.payfailed=0 0/5 * * * ?
  93 +alram.order.payfailed.time_period=5
  94 +
  95 +#--------------------------------订单模块监控-----------------------------------
  96 +
  97 +#----------------------------Monitor告警模块--------------------------------
  98 +cron.task.monitor.exception=0 0/2 * * * ?
  99 +alarm.monitor.exception.time_period=2
  100 +#----------------------------Monitor告警模块--------------------------------
  101 +
  102 +#----------------------------yh_logs告警模块--------------------------------
  103 +#发送短信失败告警
  104 +cron.task.logs.sns_send_failed=0 0/2 * * * ?
  105 +alarm.logs.sns_send_failed.time_period=2
  106 +alarm.logs.sns_send_failed.threshold_count=5
  107 +#----------------------------yh_logs告警模块--------------------------------
  108 +
  109 +#------------jmxtrans----------------
  110 +cron.task.jmxtrans.memory=0 0/5 * * * ?
  111 +jmxtrans.memory.time_period=5
  112 +jmxtrans.memory.flag=3500000000
  113 +cron.task.jmxtrans.thread=0 0/5 * * * ?
  114 +jmxtrans.thread.time_period=5
  115 +jmxtrans.thread.flag=1500
  116 +#------------jmxtrans----------------
  117 +
  118 +#------------orderinfocheck----------------
  119 +cron.task.orderinfocheck=0 0/2 * * * ?
  120 +cron.task.refundAchange=0 0/5 * * * ?
  121 +#------------jmxtrans----------------
  122 +
  123 +#------------nginx----------------
  124 +cron.nginx.countCheck=0 0/2 * * * ?
  125 +cron.nginx.countCheck_period=2
  126 +cron.nginx.countCheck_number=2000
  127 +#------------nginx----------------
  128 +
2 #------------client---------------- 129 #------------client----------------
3 cron.client.countCheck=0 0/3 * * * ? 130 cron.client.countCheck=0 0/3 * * * ?
4 cron.client.countCheck_period=3 131 cron.client.countCheck_period=3
@@ -8,4 +135,14 @@ cron.task.influxdb.client.hour=0 0 0/1 * * ? @@ -8,4 +135,14 @@ cron.task.influxdb.client.hour=0 0 0/1 * * ?
8 cron.task.influxdb.client.day=0 0 0 0/1 * ? 135 cron.task.influxdb.client.day=0 0 0 0/1 * ?
9 cron.task.influxdb.clientreport.peroid=5 136 cron.task.influxdb.clientreport.peroid=5
10 #------------client---------------- 137 #------------client----------------
  138 +
  139 +#-----logs----
  140 +cron.task.logs=0 0/2 * * * ?
  141 +alarm.logs.time_period=2
  142 +#-----logs----
11 cron.task.client.dns=0 0 0/1 * * ? 143 cron.task.client.dns=0 0 0/1 * * ?
  144 +#-------search
  145 +cron.task.search.exception=0 0/5 * * * ?
  146 +cron.task.search.log=0 0/10 * * * ?
  147 +cron.task.search.log.period=10
  148 +cron.task.search.time_period=5
  1 +order.system.url=http://172.31.70.205:8084/order
  2 +erp.system.url=http://api.erp.yohobuy.com
  1 +rabbit_host=172.31.50.188:5672
  2 +rabbit_user=yoho
  3 +rabbit_password=yoho
  4 +rabbit_vhost=yoho
@@ -7,6 +7,62 @@ sendsms.notice.productid=8 @@ -7,6 +7,62 @@ sendsms.notice.productid=8
7 qcloud_sms_url=https://yun.tim.qq.com/v3/tlssmssvr/sendmultisms2?sdkappid=1400021400&random=124 7 qcloud_sms_url=https://yun.tim.qq.com/v3/tlssmssvr/sendmultisms2?sdkappid=1400021400&random=124
8 qcloud_sms_key=6e56f948f6f1c0a1bc359e23f7acc140 8 qcloud_sms_key=6e56f948f6f1c0a1bc359e23f7acc140
9 9
10 -base_mobile=18751986615,18652008443 10 +base_mobile=18751986615,18652008443,13515100825,18252034289
11 11
12 -clien_error_mobile=18751986615,18652008443,18915961598,18252034289,13515100825  
  12 +#------------------------------------------支付回调手机号------------------------------------------------
  13 +sendsms.alarm.paycallback=18751986615,18652008443,13515100825,18252034289
  14 +#------------------------------------------支付回调手机号------------------------------------------------
  15 +
  16 +#------------------------------------------订单异常手机号------------------------------------------------
  17 +sendsms.alarm.mobile.orderexception=13585196262,13951634768,15950561359,18751986615,18652008443,13515100825,18252034289
  18 +#------------------------------------------订单异常手机号------------------------------------------------
  19 +
  20 +#------------------------------------------用户行为异常手机号------------------------------------------------
  21 +sendsms.alarm.mobile.userbehaviour=18751986615,18652008443,13515100825,18252034289
  22 +#------------------------------------------用户行为异常手机号------------------------------------------------
  23 +
  24 +#------------------------------------------数据库异常手机号------------------------------------------------
  25 +sendsms.alarm.mobile.dbexception=18652008443,18751986615,13811102093,13451938602,13515100825,18252034289
  26 +#------------------------------------------数据库异常手机号------------------------------------------------
  27 +
  28 +#-------------------------------------------服务异常告警手机号----------------------------------------------------
  29 +
  30 +sendsms.alarm.mobile.serviceexception.product=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  31 +sendsms.alarm.mobile.serviceexception.search=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  32 +sendsms.alarm.mobile.serviceexception.guang=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  33 +sendsms.alarm.mobile.serviceexception.users=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  34 +sendsms.alarm.mobile.serviceexception.order=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  35 +sendsms.alarm.mobile.serviceexception.sns=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  36 +sendsms.alarm.mobile.serviceexception.promotion=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  37 +sendsms.alarm.mobile.serviceexception.message=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  38 +sendsms.alarm.mobile.serviceexception.resources=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  39 +sendsms.alarm.mobile.serviceexception.brower=18751986615,18652008443,18001582955,15950561359,13451938602,13515100825,18252034289
  40 +#-------------服务异常告警手机号-------------------
  41 +
  42 +#---------------------Monitor异常数据告警手机号------------------
  43 +sendsms.alarm.mobile.monitor.exception=18751986615,18652008443,13515100825,18252034289
  44 +#---------------------Monitor异常数据告警手机号------------------
  45 +
  46 +#-----------------------------Logs告警模块---------------------------
  47 +#短信发送失败量超过阀值 告警手机号
  48 +sendsms.alarm.mobile.logs.sns_send_failed=18751986615,18652008443,13515100825,18252034289
  49 +#-----------------------------Logs告警模块--------------------------
  50 +
  51 +#-----------------------------监控系统开发人员--------------------------
  52 +sendsms.alarm.mobile.ops_manager.developer=13951685157
  53 +#-----------------------------监控系统开发人员--------------------------
  54 +
  55 +dnspod.mobile=18751986615,18652008443,13515100825,18252034289
  56 +
  57 +jmxtrans.mobile=18751986615,18652008443,13515100825,18252034289
  58 +
  59 +nginx.config.switch.mobile=18751986615,18652008443,13515100825,18252034289
  60 +#订单校验失败
  61 +orderinfocheck.mobile=15950561359,13951882433,18751986615,18652008443,13515100825,18252034289,13552738725,13902496867,18511020499
  62 +
  63 +nginx.error.mobile=18751986615,18652008443,13515100825,18252034289
  64 +
  65 +log.order.error.mobile=18751986615,18652008443,13515100825,18252034289
  66 +search.mobile=18751986615,18652008443,13515100825,18252034289
  67 +
  68 +clien_error_mobile=18751986615,18652008443,13515100825,18252034289
@@ -6,7 +6,7 @@ jdbc.users=yh_test @@ -6,7 +6,7 @@ jdbc.users=yh_test
6 jdbc.passwords=yh_test 6 jdbc.passwords=yh_test
7 7
8 8
9 -local.jdbc.url=jdbc:mysql://172.16.6.246:3306/yh_ops?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull 9 +local.jdbc.url=jdbc:mysql://172.16.6.61:3306/yh_ops?characterEncoding=utf-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull
10 local.jdbc.user=root 10 local.jdbc.user=root
11 local.jdbc.password=123456 11 local.jdbc.password=123456
12 #---------jdbc config---------- 12 #---------jdbc config----------
@@ -4,5 +4,6 @@ String basePath = request.getScheme()+"://"+request.getServerName()+":"+request. @@ -4,5 +4,6 @@ String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.
4 %> 4 %>
5 <html> 5 <html>
6 <body> 6 <body>
  7 + Hello!
7 </body> 8 </body>
8 </html> 9 </html>