在日常巡检过程当中,不需要登录服务器去查看,通过调用k8s api的方式获取所有pod的状态
然后在每天9点执行本脚本即可。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 from kubernetes import client, configfrom kubernetes.client.rest import ApiExceptionfrom datetime import datetime, timezoneimport requestsimport jsonimport sysimport pytzconfig.load_kube_config() api_instance = client.CoreV1Api() target_namespaces = [sys.argv[1 ]] filtered_keywords = ["mysql" , "redis" , "memcached" , "postgres" , "backend" ] discord_webhook_url = "" containers_without_restart = [] containers_with_restart = [] try : for target_namespace in target_namespaces: pods = api_instance.list_namespaced_pod(namespace=target_namespace).items for pod in pods: pod_name = pod.metadata.name pod_status = pod.status.phase pod_restart_reason = "" pod_start_time = pod.metadata.creation_timestamp if any (keyword in pod_name for keyword in filtered_keywords): continue cst_timezone = pytz.timezone("Asia/Shanghai" ) current_time = datetime.now(timezone.utc) if pod_start_time is not None : pod_duration = current_time - pod_start_time pod_duration_str = str (pod_duration).split("." )[0 ] else : pod_duration_str = "Unknown" if pod.status.container_statuses is not None : for container_status in pod.status.container_statuses: restart_count = container_status.restart_count if restart_count > 0 : pod_restart_reason = container_status.last_state.terminated.reason if pod_restart_reason: containers_with_restart.append( (pod_name, pod_status, pod_duration_str, pod_restart_reason, restart_count) ) else : containers_without_restart.append( (pod_name, pod_status, pod_duration_str) ) message = "环境: {0} 获取时间:{1}\n\n" .format (target_namespace,datetime.now(cst_timezone).strftime("%Y-%m-%d %H:%M:%S" ) + " CST" ) for container in containers_without_restart: pod_name, pod_status, pod_duration_str = container message += "容器名称: {0} 当前状态: {1} 运行时长: {2}\n" .format ( pod_name, pod_status, pod_duration_str ) message += "------------------------------------------------------\n" for container in containers_with_restart: pod_name, pod_status, pod_duration_str, pod_restart_reason, restart_count = container message += "容器名称: {0} 当前状态: {1} 运行时长: {2} 重启原因: {3} 重启次数:{4}\n" .format ( pod_name, pod_status, pod_duration_str, pod_restart_reason, restart_count ) message = "```{0}```" .format (message) payload = {"content" : message} headers = {"Content-Type" : "application/json" } response = requests.post( discord_webhook_url, data=json.dumps(payload), headers=headers ) print (response.content) if response.status_code == 204 : print ("Message sent to Discord successfully" ) else : print (f"Failed to send message to Discord. Status code: {response.status_code} " ) except ApiException as e: print (f"Exception when calling CoreV1Api: {e} \n" )