mysql_slave_failover.py 5.22 KB
# -*- coding: utf8 -*-
import requests
from requests.auth import HTTPBasicAuth
import json
import time
import os

"""
Author: chunhua.zhang@yoho.cn

 1. Called by Zabbix when mysql server down.
 2. Only process when downed mysql is slave/read
 3. Will change read dns to correspond master/write dns and reload cobar
"""
# dingding 
DING =  "https://oapi.dingtalk.com/robot/send?access_token=31cf1a526c98862b7945e561c5a0d857a069350dbee0e807074b83b39fa1e1af"
ansible_url = "http://ansible.yohops.com/api/v2"
ansible_user = "chunhua.zhang"
ansible_password = os.environ.get('ansible_password')

JENKINS_UPDATE_VAR_URL = "http://jenkins.yohops.com/job/yoho-ansible-roles/buildWithParameters"
JENKINS_TOKEN = "607f13cd5f7ff5bdf5813f9745d7da49"

def main_handler(event, context):
    print("start to process mysql alarm: %s" % event['queryString'])
    return_str = "OK"
    if "auth" not in event['queryString']  or "host" not in   event['queryString']  or "alarm" not in  event['queryString'] :
        return_str = "Request params failed" 
    elif event['queryString']['auth'] != 'yohomysqlfailover123456887':
        return_str = "Auth Failed"
    else:
        fail_host = event['queryString']['host']
        alarm = event['queryString']['alarm']  # maybe: MySQL status on 3306 port is Down (0)'
        return_str = " Mysql host error Proccess Successfully!"
        ret = ansible_tower_process(fail_host, alarm)
        alert_to_dingding(fail_host, alarm, ret)
    return { "isBase64Encoded": False, "statusCode": 200,  "headers": {} , "body": "%s" %return_str }

def ansible_tower_process(down_host, alarm):
   if not down_host:
       return -1
   if  "mysql" not in alarm.lower() or  "is down" not in alarm.lower():
      print("ignore this alarm :%s" % alarm)
      return -1

   ## az1, az2, az3
   INVENTORY_ENV = {6: "az1", 5: "az2", 4: "az3"}
   keys = []
   values = []
   for inventory in [ 4]:
      extra_var = {}
      r = requests.get(ansible_url + "/inventories/%i/" %inventory, auth=(ansible_user, ansible_password))
      inventory_vars = json.loads( r.json()['variables'])
      for key in inventory_vars:
        # find db read key which value == fail-mysql-slave-node 
        if key.startswith("db") and  key.endswith("_read") and inventory_vars[key] == down_host:
            new_read_host = inventory_vars[ key.replace("_read", "_write") ]  # change read to write host
            extra_var[key] = new_read_host
            keys.append(key)
            values.append(new_read_host)
        
      if not extra_var:
         print("can not find any variables in inventory:%i with read mysql is %s" %(inventory, down_host))
         continue
    
      # 1. update dns
      extra_var['confirmation'] = 'DNS'
      dns_payload = {}
      dns_payload['inventory'] = inventory
      dns_payload['extra_vars'] = json.dumps(extra_var)
      print("start to update dns for inventory: %i , params is: %s" %(inventory, dns_payload))
      r_dns = requests.post(ansible_url + "/job_templates/36/launch/", json = dns_payload, auth=(ansible_user, ansible_password))
     
      # 1.1. fetch job status, make sure dns reload success
      job_id = r_dns.json()['job']
      make_sure_job_success(job_id) 
    
      # 2. reload cobar
      cobar_payload = {}
      cobar_payload['inventory'] = inventory
      print("start to reload cobar for inventory: %i , params is: %s" %(inventory, cobar_payload))
      r_cobar = requests.post(ansible_url + "/job_templates/35/launch/", json = cobar_payload, auth=(ansible_user, ansible_password))
     
      # 2.1. fetch job status, make sure job success
      job_id = r_cobar.json()['job']
      make_sure_job_success(job_id) 

   #last: update git
   if  keys and  values:
      jenkins_param = {"token": JENKINS_TOKEN,  "MODIFY_ENV": INVENTORY_ENV[inventory],  "keys": ",".join(keys), "values": ",".join(values)}
      requests.get(JENKINS_UPDATE_VAR_URL, params = jenkins_param)
      print("start to update ansible git variables : %s" % jenkins_param)
   return 0 
    

 # 2. fetch job status, make sure job success
def make_sure_job_success(job_id):
      max_retry = 100
      job_success = False
      retry = 0
      while not job_success:
       if retry == max_retry:
         print("can not finish job : %i at %i times" %(job_id, max_retry))
         return -1
       r_check_job_status =  requests.get(ansible_url + "/jobs/%i/" %job_id, auth=(ansible_user, ansible_password))
       if r_check_job_status.json()['status'] == 'successful':
         print("job: %i is success" %job_id)
         job_success = True
       else:
         print("job:%i  status is : %s" %(job_id, r_check_job_status.json()['status'] ))
         time.sleep(3)
         retry = retry + 1
      return 0

def alert_to_dingding(down_host, alarm, ret):
    # send text dingding message. see:https://open-doc.dingtalk.com/docs/doc.htm?spm=a219a.7629140.0.0.karFPe&treeId=257&articleId=105735&docType=1
    ding_req = {}
    ding_req["msgtype"]="markdown"
    ding_req["markdown"] = {}
    ding_req["markdown"]['title']="Mysql Failover"
    ding_req["markdown"]["text"]= "### Mysql Down. Down Host: %s , alarm: %s, ret: %i" % (down_host, alarm, ret)
    
    r = requests.post(url = DING, json = ding_req) 
    print("Send DingDing message result:%s, request: %s "  %(r, ding_req)) 
    return("SUCCESS")