Blame view

scripts/tools/sync_bigdata_inventory.py 9.56 KB
txyang authored
1 2 3 4
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# author tiexin.yang@yoho.cn
root authored
5 6 7
import sys
sys.path.append('../')
from lb_manager.qcloud.qcloud_api import QcloudApi
root authored
8
import json
txyang authored
9 10 11 12
import datetime
import argparse
import os,shutil
import time
txyang authored
13 14
import re
root authored
15 16 17 18 19 20
"""
  作用:由于EMR集群不定时会发生伸缩变化,该脚本可以定时维护inventories/bigdata/hosts文件,如果发生变更会同步更新该文件并推送到git.yoho.cn,并执行resolv的playbook同步DNS
  依赖:zabbix.py(注销zabbix host)
  Demo:
        python server_decomission.py 1.2.3.4
"""
txyang authored
21
txyang authored
22 23
class EMRClusterScanner(object):
    def __init__(self,interval,secretId='',secretKey=''):
root authored
24
        self.interval = int(interval) #扫描时间间隔 若为0则只扫描一次 单位:秒 
txyang authored
25
        if not secretId or not secretKey:
ansible authored
26
            self.secretId,self.secretKey = open('/home/ansible/.qcloud_config').read().strip('\n').split('\n')
txyang authored
27 28
        else:
            self.secretId,self.secretKey = secretId,secretKey
txyang authored
29
ansible authored
30 31 32
        self.workDir = '/home/ansible/yoho-ansible-roles/'
        self.inventoryFile = '{0}inventories/bigdata/hosts'.format(self.workDir)
        self.playbook_resolv = '{0}playbooks/resolv.yml'.format(self.workDir)
root authored
33
        self.receivers = ['tiexin.yang@yoho.cn','kun.xiang@yoho.cn','chunhua.zhang@yoho.cn'] #已禁用
root authored
34 35 36
        self.emrNameMap = {
            "emr-rt": "emr-r6bhtb5v",
            "emr-ops": "emr-iaeloyc2",
root authored
37
            "emr-dw": "emr-aadl4hek",
root authored
38 39
            "emr-recom": "emr-ilj72ynu"
        }
txyang authored
40 41

txyang authored
42 43 44
    def EmrDescribeCluster(self,PageNo=1):
        #获取所有EMR集群数据
        #默认获取一页数据,每页最多显示20条集群信息
root authored
45
        client = QcloudApi(self.secretId,self.secretKey)
txyang authored
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
        params = {
            "Action":"EmrDescribeCluster",
            "PageNo": PageNo,
            "PageSize": 20
            }
        result = client.do_query(params,"emr.api.qcloud.com/v2/index.php")
        return result
    
    
    def EmrDescribeClusterNode(self,ClusterId,NodeFlag,PageNo=1):
        #获取单个EMR集群下指定节点名称的节点列表
        client = QcloudApi(self.secretId,self.secretKey)
        params = {
            "Action":"EmrDescribeClusterNode",
            "ClusterId":ClusterId,
            "NodeFlag":NodeFlag, #节点名称, 取值为:master,core,task,common,all
            "PageNo": PageNo,
            "PageSize": 20
            }
        result = client.do_query(params,"emr.api.qcloud.com/v2/index.php")
        return result
    
root authored
68
    def getCurrentNodes(self,clusterId):
txyang authored
69
        #获取当前所有EMR集群下的所有节点(不区分节点名称)
root authored
70
        #clusterList = self.getCurrentEmrs()
txyang authored
71 72 73
        emrNodeIps = []
        nodesCnt = 0
        PageNo = 1
root authored
74 75 76 77 78 79 80 81 82 83 84 85 86
        while True:
            print 'Getting nodes from',clusterId,'page',PageNo
            result = self.EmrDescribeClusterNode(clusterId,"all",PageNo)
            nodeList = result['data']['nodeList']
            for node in nodeList:
                emrNodeIps.append(node['ip'])
            nodesCnt += len(nodeList)
            totalCnt = result['data']['totalCnt']
            if nodesCnt < totalCnt:
                PageNo+=1
                continue
            else:
                break
txyang authored
87 88 89 90 91 92 93 94
        return emrNodeIps
    
    
    def getCurrentEmrs(self):
        #获取当前所有EMR集群的名称列表,如果超出默认页数则遍历所有结果页
        emrClusterList = []
        emrCnt = 0
        PageNo = 1
txyang authored
95
        while True:
txyang authored
96 97 98 99 100
            result = self.EmrDescribeCluster(PageNo)
            clusterList = result['data']['clusterList']
            for cluster in clusterList:
                emrClusterList.append(cluster['clusterId'])
            emrCnt += len(clusterList)
txyang authored
101
            totalCnt = result['data']['totalCnt']
txyang authored
102
            if emrCnt < totalCnt:
txyang authored
103 104 105 106
                PageNo+=1
                continue
            else:
                break
txyang authored
107 108 109
        return emrClusterList
    
    
root authored
110
    def getHostsInFile(self,data,prefix):
txyang authored
111
        #读取当前的Inventory信息,截取emr-recom下的所有ip数据用于和最新采集的ip列表进行匹配
root authored
112 113 114 115 116
        rexContent = re.compile(r'(\[{0}\][\d+\.\d+\.\d+\.\d+\s*]*)'.format(prefix),re.S)
        rexIp = re.compile(r'(\d+\.\d+\.\d+\.\d+)')
        content = rexContent.findall(data)
        hostsEntry = content[0].strip('\n')
        hostsInFile = rexIp.findall(hostsEntry)
txyang authored
117
        return hostsEntry,hostsInFile
txyang authored
118
txyang authored
119
    def gitUpdate(self):
ansible authored
120
        cmd = 'cd {0};git pull;git add {1};git commit -m "Update bigdata inventory file"; git push'.format(self.workDir,self.inventoryFile)
root authored
121 122 123 124
        if os.system(cmd) != 0:
            return False
        else:
            return True
root authored
125 126

    def syncDNS(self,target='emr-recom'):
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
        return True #脚本不再负责resolv文件刷新 交由tower schedule处理
        #if target == 'emr-recom': #目前只负责同步emr-recom的DNS
        #    cmd = 'ansible-playbook -i {0} {1}'.format(self.inventoryFile,self.playbook_resolv)
        #    retry = 3
        #    while retry > 0:
        #        print cmd
        #        output = os.popen(cmd).read()
        #        if 'unreachable=1' in output or 'failed=1' in output:
        #            with open('/var/log/supervisor/ansible.err','a') as f:
        #                f.write(output+'\n\n')
        #                f.close()
        #            retry-=1
        #            print "重新同步DNS...还剩{0}次".format(retry)
        #            time.sleep(300)
        #            continue
        #        else:
        #            with open('/var/log/supervisor/ansible.log','a') as f:
        #                f.write(output+'\n\n')
        #                f.close()
        #            print "DNS 同步成功"
        #            return True
        #    return False
        #else:
        #    return True
root authored
151 152 153 154

    def checkDNS(self):
        check_cmd = 'ansible-playbook -i {0} {1} --check'.format(self.inventoryFile,self.playbook_resolv)
        execute_cmd = 'ansible-playbook -i {0} {1}'.format(self.inventoryFile,self.playbook_resolv)
root authored
155
        print check_cmd
root authored
156 157 158 159 160 161 162 163 164
        output = os.popen(check_cmd).read()
        if 'changed=1' in output:
            print '检测到未变更主机!'
            print '开始执行变更'
            os.popen(execute_cmd)
        else:
            print '主机配置已最新'
        return True
        
txyang authored
165
    
root authored
166
    def updateEMRInventory(self, emrName):
txyang authored
167 168 169
        #比对当前与最新的Inventory ip列表,相同则pass不同则更新
        today = datetime.date.today().strftime('%Y%m%d')
        hostsData = open(self.inventoryFile).read()
root authored
170 171
        hostsEntry,hostsInFile = self.getHostsInFile(hostsData,prefix=emrName)
        currentNodes = self.getCurrentNodes(clusterId=self.emrNameMap[emrName])
txyang authored
172 173
        if set(currentNodes) == set(hostsInFile):
            print 'Inventory already up to date'
root authored
174
            return 'Passed'
txyang authored
175
        else:
root authored
176
            #print '\n'.join(currentNodes)
txyang authored
177 178
            N_toAdd = len(set(currentNodes) - set(hostsInFile))
            N_toDel = len(set(hostsInFile) - set(currentNodes))
root authored
179
            print '{0} to add,{1} to del'.format(N_toAdd,N_toDel)
txyang authored
180 181 182
            print 'Taking backup for current inventory file...'
            shutil.copyfile(self.inventoryFile,self.inventoryFile+'.'+today)
            print 'Replacing inventory file content...'
root authored
183
            newInventoryContent = hostsData.replace(hostsEntry,'[{0}]\n'.format(emrName)+'\n'.join(list(set(currentNodes))))
txyang authored
184 185 186 187
            with open(self.inventoryFile,'w') as f:
                f.write(newInventoryContent)
                f.close()
            print 'Pushing changes to git server...'
root authored
188 189 190 191 192 193 194
            updateResult = self.gitUpdate()
            if updateResult:
                pass
            else:
                print 'Inventory file updated failed'
                return False
            
txyang authored
195
            print 'Inventory file updated success'
root authored
196 197
            if N_toAdd > 0:
                print 'Running playbook to sync DNS...'
root authored
198 199 200 201 202 203
                syncResult = self.syncDNS(emrName)
                if syncResult:
                    print 'DNS synced'
                else:
                    print 'DNS sync failed'
                    return False
root authored
204
txyang authored
205
            return 'Inventory 新增{0}条,删除{1}条'.format(N_toAdd,N_toDel)
txyang authored
206
txyang authored
207 208 209
    def start_daemon(self):
        #以守护进程的状态运行此EMR扫描程序
        while True:
root authored
210
            #self.checkDNS()
root authored
211
            for emrName in self.emrNameMap:
root authored
212
                try:
root authored
213 214
                    result = self.updateEMRInventory(emrName)
                    if result:
root authored
215
                        if result.upper() == 'PASSED':
root authored
216
                            print 'Nothing changed, passed'
root authored
217 218
                            continue
                        else:
root authored
219
                            print 'EMR大数据Inventory [{0}] 已更新\n{1}\n详见 http://git.yoho.cn/ops/yoho-ansible-roles/blob/master/inventories/bigdata/hosts'.format(emrName,result)
root authored
220
                    else:
root authored
221
                        print 'EMR大数据Inventory更新失败\n请检查本地git缓存和ansible连接\n5分钟后重试'
root authored
222 223
                        time.sleep(300)
                        continue
root authored
224 225
                except Exception as e:
                    print e
root authored
226
                    print 'EMR大数据Inventory更新失败\n{0}\n5分钟后重试'.format(str(e))
root authored
227 228
                    time.sleep(300)
                    pass
root authored
229 230 231 232
            if self.interval > 0:
                time.sleep(self.interval)
            else:
                break
txyang authored
233 234 235
    

if __name__ == '__main__':
txyang authored
236
    parser = argparse.ArgumentParser()
root authored
237
    parser.add_argument('--interval',type=int,help='Checks interval in seconds')
txyang authored
238 239
    args = parser.parse_args()
    EMRClusterScanner(args.interval).start_daemon()