卧疾丰暇豫,翰墨时间作。这篇文章主要讲述ansible+python+shell 实现SpringCloud微服务治理相关的知识,希望能为你提供帮助。
前言:
公司目前还是传统方式的微服务集群,使用的是SpringCloud架构,最近晚上经常出现服务注册失败,consul上的服务会下线,导致整个微服务应用不可以使用,出现Down机状态,这个时候到集群节点上通过 ps -ef|grep xxx.jar发现服务进程还在,然后继续查看相应日志,找到了原因,由于微服务会每隔一段时间去自检mysql,mq的连通性,如果出现一个超时(公司使用aws,难免会出现超时),那么服务就任务自己是异常状态,从而自动退出,鉴于这种情况,博主自己设计了一套服务治理方案,可以在服务出现异常退出(异常是指由于网络延时,连接超时等)时,去优雅的重启服务,减少了人工干预。
环境介绍:
主机名 | 角色 | 操作系统 |
opsServer | 跳板机+ansible+python | ubuntu18.0 |
cloudServer1p | 微服务+consul | ubuntu18.0 |
cloudServer2p | 微服务+consul | ubuntu18.0 |
cloudServer3p | 微服务+consul | ubuntu18.0 |
1. 在 opsServer上操作:
安装ansible:
apt-get update #更新软件源中的所有软件列表
apt-get install ansible#安装ansible
配置ansible:
vim /etc/ansible/ansible.cfg[defaults]
inventory= /etc/ansible/hosts
forks= 5
remote_port= 22
host_key_checking = False
timeout = 10
log_path = /var/log/ansible.log
[inventory]
[privilege_escalation]
[paramiko_connection]
[ssh_connection]
[persistent_connection]
[accelerate]
[selinux]
[colors]
[diff]
vim /etc/ansible/hosts[webservers]
cloudServer1p ansible_ssh_user=cloud ansible_ssh_key=/home/cloud/.ssh/id_rsa
cloudServer2p ansible_ssh_user=cloud ansible_ssh_key=/home/cloud/.ssh/id_rsa
cloudServer3p ansible_ssh_user=cloud ansible_ssh_key=/home/cloud/.ssh/id_rsa解释:
确保opsServer与cloudServer1p ,cloudServer2p ,cloudServer3p的ssh 打通,这里是用密钥进行认证
配置crontab任务:
crontab -l*/5 * * * * /usr/bin/python /home/cloud/ops/manageCloudService1p.py `curl-s http://cloudServer1p:8530/v1/agent/services` >
/home/cloud/ops/cloudServer1p.log
*/5 * * * * /usr/bin/python /home/cloud/ops/manageCloudService2p.py `curl-s http://cloudServer2p:8530/v1/agent/services` >
/home/cloud/ops/cloudServer2p.log
*/5 * * * * /usr/bin/python /home/cloud/ops/manageCloudService3p.py `curl-s http://cloudServer3p:8530/v1/agent/services` >
/home/cloud/ops/cloudServer3p.log
注释:
curl-s http://cloudServer1p:8530/v1/agent/services通过调用consul接口获取consul注册的微服务然后作为参数传递给脚本去处理。
【ansible+python+shell 实现SpringCloud微服务治理】
manageCloudService1p.py内容:
# python json_format.py json_text
import os
import sys
import json
import smtplib
from email.mime.text import MIMETextlength = len(sys.argv)def sendMail(name):
mail_host = \'mail.163.com\'
mail_user = \'alert@163.com\'
mail_pass = \'test123\'
sender = \'alert@163.com\'
receivers = [\'alert@163.com\']
message = MIMEText(name+\' server check failure! i will start this server.\',\'plain\',\'utf-8\')
message[\'Subject\'] = \'cloud server check\'
message[\'From\'] = sender
message[\'To\'] = receivers[0]
try:
smtpObj = smtplib.SMTP()
smtpObj.connect(mail_host,25)
smtpObj.login(mail_user,mail_pass)
smtpObj.sendmail(
sender,receivers,message.as_string())
smtpObj.quit()
print(\'success\')
except smtplib.SMTPException as e:
print(\'error\',e)if length >
1:
try:
jsonstr = sys.argv[1]
jsonObj = json.loads(jsonstr)
serverName=["gateway-service-9999","order-service-8020","portal-service-8080","product-service-8010"]
realServerName=[]
for key in jsonObj.keys():
realServerName.append(key)
for name in serverName:
if name not in realServerName:
print(name+"is down")
if name == \'gateway-service-9999\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer1p -e serverName=restartGatewayService")
elif name == \'order-service-8020\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer1p -e serverName=restartOrderService")
elif name == \'portal-service-8080\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer1p -e serverName=restartPortalService")
elif name == \'product-service-8010\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer1p -e serverName=restartProductService")
else:
print(name+"not exist!")
else:
print(name+"is up")
except Exception:
print("json parse error.")
else :
print("argv\'s length is 1, no json text input.")
manageCloudService2p.py内容:
# python json_format.py json_text
import os
import sys
import json
import smtplib
from email.mime.text import MIMETextlength = len(sys.argv)def sendMail(name):
mail_host = \'mail.163.com\'
mail_user = \'alert@163.com\'
mail_pass = \'test123\'
sender = \'alert@163.com\'
receivers = [\'alert@163.com\']
message = MIMEText(name+\' server check failure! i will start this server.\',\'plain\',\'utf-8\')
message[\'Subject\'] = \'cloud server check\'
message[\'From\'] = sender
message[\'To\'] = receivers[0]
try:
smtpObj = smtplib.SMTP()
smtpObj.connect(mail_host,25)
smtpObj.login(mail_user,mail_pass)
smtpObj.sendmail(
sender,receivers,message.as_string())
smtpObj.quit()
print(\'success\')
except smtplib.SMTPException as e:
print(\'error\',e)if length >
1:
try:
jsonstr = sys.argv[1]
jsonObj = json.loads(jsonstr)
serverName=["gateway-service-9999","order-service-8020","portal-service-8080","product-service-8010"]
realServerName=[]
for key in jsonObj.keys():
realServerName.append(key)
for name in serverName:
if name not in realServerName:
print(name+"is down")
if name == \'gateway-service-9999\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer2p -e serverName=restartGatewayService")
elif name == \'order-service-8020\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer2p -e serverName=restartOrderService")
elif name == \'portal-service-8080\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer2p -e serverName=restartPortalService")
elif name == \'product-service-8010\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer2p -e serverName=restartProductService")
else:
print(name+"not exist!")
else:
print(name+"is up")
except Exception:
print("json parse error.")
else :
print("argv\'s length is 1, no json text input.")
manageCloudService3p.py内容:
# python json_format.py json_text
import os
import sys
import json
import smtplib
from email.mime.text import MIMETextlength = len(sys.argv)def sendMail(name):
mail_host = \'mail.163.com\'
mail_user = \'alert@163.com\'
mail_pass = \'test123\'
sender = \'alert@163.com\'
receivers = [\'alert@163.com\']
message = MIMEText(name+\' server check failure! i will start this server.\',\'plain\',\'utf-8\')
message[\'Subject\'] = \'cloud server check\'
message[\'From\'] = sender
message[\'To\'] = receivers[0]
try:
smtpObj = smtplib.SMTP()
smtpObj.connect(mail_host,25)
smtpObj.login(mail_user,mail_pass)
smtpObj.sendmail(
sender,receivers,message.as_string())
smtpObj.quit()
print(\'success\')
except smtplib.SMTPException as e:
print(\'error\',e)if length >
1:
try:
jsonstr = sys.argv[1]
jsonObj = json.loads(jsonstr)
serverName=["gateway-service-9999","order-service-8020","portal-service-8080","product-service-8010"]
realServerName=[]
for key in jsonObj.keys():
realServerName.append(key)
for name in serverName:
if name not in realServerName:
print(name+"is down")
if name == \'gateway-service-9999\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer3p -e serverName=restartGatewayService")
elif name == \'order-service-8020\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer3p -e serverName=restartOrderService")
elif name == \'portal-service-8080\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer3p -e serverName=restartPortalService")
elif name == \'product-service-8010\':
sendMail(name)
os.system("ansible-playbook /etc/ansible/restartService.yml -e serverList=cloudServer3p -e serverName=restartProductService")
else:
print(name+"not exist!")
else:
print(name+"is up")
except Exception:
print("json parse error.")
else :
print("argv\'s length is 1, no json text input.")
restartService.yml内容如下:
---
- hosts: \'{{ serverList }}\'
tasks:
- name: restart server
command: "/bin/bash /home/cloud/startService.sh {{serverName}}"
register: result
- name: show debug info
debug: var=result.stdout verbosity=0
2. 在微服务节点上操作:
将 startService.sh 这个脚本分别拷贝到三个微服务节点的 /home/cloud/ 目录下面:
startService.sh 内容如下:
#!/bin/bash
restartGatewayService()
{
srevice_name="gateway-service"
service_pid=`ps -ef|grep ${srevice_name} |grep -v task|grep prod |grep -v grep |awk \'{print $2}\'`
if [ ! -n "$service_pid" ];
then
echo"${srevice_name} is already stopped,i will start ${srevice_name} now!"
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=9999 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/gateway-service.jar>
>
/home/cloud/services/gateway-service.log &
sleep 10
else
echo"${srevice_name} is running,i will kill -15 ${srevice_name} and start it now!"
kill -15 $service_pid
sleep 10
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=9999 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/gateway-service.jar>
>
/home/cloud/services/gateway-service.log &
fi
}
restartOrderService()
{
srevice_name="order-service"
service_pid=`ps -ef|grep ${srevice_name} |grep -v task|grep prod |grep -v grep |awk \'{print $2}\'`
if [ ! -n "$service_pid" ];
then
echo"${srevice_name} is already stopped,i will start ${srevice_name} now!"
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=8020 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/order-service.jar>
>
/home/cloud/services/order-service.log &
sleep 10
else
echo"${srevice_name} is running,i will kill -15 ${srevice_name} and start it now!"
kill -15 $service_pid
sleep 10
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=8020 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/order-service.jar>
>
/home/cloud/services/order-service.log &
fi
}
restartPortalService()
{
srevice_name="portal-service"
service_pid=`ps -ef|grep ${srevice_name} |grep -v task|grep prod |grep -v grep |awk \'{print $2}\'`
if [ ! -n "$service_pid" ];
then
echo"${srevice_name} is already stopped,i will start ${srevice_name} now!"
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=8080 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/portal-service.jar>
>
/home/cloud/services/portal-service.log &
sleep 10
else
echo"${srevice_name} is running,i will kill -15 ${srevice_name} and start it now!"
kill -15 $service_pid
sleep 10
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=8080 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/portal-service.jar>
>
/home/cloud/services/portal-service.log &
fi
}
restartProductService()
{
srevice_name="product-service"
service_pid=`ps -ef|grep ${srevice_name} |grep -v task|grep prod |grep -v grep |awk \'{print $2}\'`
if [ ! -n "$service_pid" ];
then
echo"${srevice_name} is already stopped,i will start ${srevice_name} now!"
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=8010 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/product-service.jar>
>
/home/cloud/services/product-service.log &
sleep 10
else
echo"${srevice_name} is running,i will kill -15 ${srevice_name} and start it now!"
kill -15 $service_pid
sleep 10
/usr/bin/nohup java -jar -Xms512m -Xmx512m -Dserver.port=8010 -Dspring.profiles.active=prod -Dlogging.level.root=info /home/cloud/services/product-service.jar>
>
/home/cloud/services/product-service.log &
fi
}restartAllService()
{
restartGatewayService
restartOrderService
restartPortalService
restartProductService
}serviceStatus()
{
echo"service status as:"
jps -l
}case $1 in
"all")
echo"restart all service!"
restartAllService
serviceStatus
;
;
"restartGatewayService")
echo"restart gateway service!"
restartGatewayService
serviceStatus
;
;
"restartOrderService")
echo"restart order service!"
restartOrderService
serviceStatus
;
;
"restartPortalService")
echo"restart portal service!"
restartPortalService
serviceStatus
;
;
"restartProductService")
echo"restart product service!"
restartProductService
serviceStatus
;
;
*)
echo"input parametererror! USAGE: bash startService.sh all|one"
;
;
esac
此方案已经在生产环境开始运行,运行效果测试有效,如有问题请及时联系博主。
推荐阅读
- Python实现发送邮件(实现单发/群发邮件验证码)
- SpringBoot任意位置获取HttpServletRequest对象
- 新版PMBOK|项目管理原则核心内容
- 面试官(MySQL为何选择B+树存储索引())
- 我只用了3步,实现了一个逼真的3D场景渲染
- HTTP负载均衡WEB集群架构搭建-wordpress
- 实锤了!程序员喜提新头衔!!
- 直方图均衡化
- Opencv中几何变换