在日常巡检过程当中,不需要登录服务器去查看,通过调用k8s api的方式获取所有pod的状态
然后在每天9点执行本脚本即可。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 from  kubernetes import  client, configfrom  kubernetes.client.rest import  ApiExceptionfrom  datetime import  datetime, timezoneimport  requestsimport  jsonimport  sysimport  pytzconfig.load_kube_config() api_instance = client.CoreV1Api() target_namespaces = [sys.argv[1 ]]   filtered_keywords = ["mysql" , "redis" , "memcached" , "postgres" , "backend" ] discord_webhook_url = ""   containers_without_restart = []   containers_with_restart = []   try :    for  target_namespace in  target_namespaces:                  pods = api_instance.list_namespaced_pod(namespace=target_namespace).items         for  pod in  pods:             pod_name = pod.metadata.name             pod_status = pod.status.phase             pod_restart_reason = ""              pod_start_time = pod.metadata.creation_timestamp             if  any (keyword in  pod_name for  keyword in  filtered_keywords):                 continue                           cst_timezone = pytz.timezone("Asia/Shanghai" )             current_time = datetime.now(timezone.utc)                          if  pod_start_time is  not  None :                 pod_duration = current_time - pod_start_time                 pod_duration_str = str (pod_duration).split("." )[0 ]               else :                 pod_duration_str = "Unknown"                           if  pod.status.container_statuses is  not  None :                 for  container_status in  pod.status.container_statuses:                     restart_count = container_status.restart_count                                          if  restart_count > 0 :                         pod_restart_reason = container_status.last_state.terminated.reason             if  pod_restart_reason:                 containers_with_restart.append(                     (pod_name, pod_status, pod_duration_str, pod_restart_reason, restart_count)                 )             else :                 containers_without_restart.append(                     (pod_name, pod_status, pod_duration_str)                 )          message = "环境: {0} 获取时间:{1}\n\n" .format (target_namespace,datetime.now(cst_timezone).strftime("%Y-%m-%d %H:%M:%S" ) + " CST" )          for  container in  containers_without_restart:         pod_name, pod_status, pod_duration_str = container         message += "容器名称: {0}  当前状态: {1}  运行时长: {2}\n" .format (             pod_name, pod_status, pod_duration_str         )     message += "------------------------------------------------------\n"           for  container in  containers_with_restart:         pod_name, pod_status, pod_duration_str, pod_restart_reason, restart_count = container         message += "容器名称: {0}  当前状态: {1}  运行时长: {2}  重启原因: {3} 重启次数:{4}\n" .format (             pod_name, pod_status, pod_duration_str, pod_restart_reason, restart_count         )     message = "```{0}```" .format (message)          payload = {"content" : message}     headers = {"Content-Type" : "application/json" }     response = requests.post(         discord_webhook_url, data=json.dumps(payload), headers=headers     )     print (response.content)     if  response.status_code == 204 :         print ("Message sent to Discord successfully" )     else :         print (f"Failed to send message to Discord. Status code: {response.status_code} " ) except  ApiException as  e:    print (f"Exception when calling CoreV1Api: {e} \n" )