Files
Zerto_Exporter/app/python-node-exporter.py
T
justin 0911a4eccc zvma10 wrapper module
Created a python module that has everything needed for comms with ZVMA version 10 in a seperate class. It does not have full api coverage, but is enough to start refactoring the exporter to use the class for ZVMa10
2023-12-08 11:05:50 -05:00

697 lines
48 KiB
Python

import requests
import http.server
import socketserver
import os
import ssl
import logging
from logging.handlers import RotatingFileHandler
import threading
import socket
from pyVim.connect import SmartConnect, Disconnect
from pyVmomi import vim
from time import sleep
from requests.packages.urllib3.exceptions import InsecureRequestWarning
from requests.structures import CaseInsensitiveDict
from tinydb import TinyDB, Query
from tinydbstorage.storage import MemoryStorage
from version import VERSION
from zvma10.vcenter import vcsite
from zvma10.zvma import zvmsite
from posthog import Posthog
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
callhomestats = os.getenv("CALL_HOME_STATS", 'True').lower() in ('false', '0', 'f')
verifySSL = os.getenv("VERIFY_SSL", 'False').lower() in ('true', '1', 't')
zvm_url = os.environ.get('ZVM_HOST', '192.168.50.60')
zvm_port = os.environ.get('ZVM_PORT', '443')
client_id = os.environ.get('CLIENT_ID', 'api-script')
client_secret = os.environ.get('CLIENT_SECRET', 'fcYMFuA5TkIUwp6b3hDUxim0f32z8erk')
scrape_speed = int(os.environ.get('SCRAPE_SPEED', 30))
api_timeout = int(os.environ.get('API_TIMEOUT', 5))
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
version = str(VERSION)
vcenter_host = os.environ.get('VCENTER_HOST', '192.168.50.50')
vcenter_user = os.environ.get('VCENTER_USER', 'administrator@vsphere.local')
vcenter_pwd = os.environ.get('VCENTER_PASSWORD', 'Zertodata987!')
# Get the hostname of the machine
container_id = str(socket.gethostname())
#set log line format including container_id
log_formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(threadName)s;%(message)s", "%Y-%m-%d %H:%M:%S")
log_handler = RotatingFileHandler(filename=f"./logs/Log-Main-{container_id}.log", maxBytes=1024*1024*100, backupCount=5)
log_handler.setFormatter(log_formatter)
log = logging.getLogger("Node-Exporter")
log.setLevel(LOGLEVEL)
log.addHandler(log_handler)
log.info(f"Zerto-Node-Exporter - Version {version}")
log.info(f"Log Level: {LOGLEVEL}")
log.debug("Running with Variables:\nVerify SSL: " + str(verifySSL) + "\nZVM Host: " + zvm_url + "\nZVM Port: " + zvm_port + "\nClient-Id: " + client_id + "\nClient Secret: " + client_secret)
# Global Variables
token = ""
siteId = "NotSet"
siteName = "NotSet"
siteZvmVersion = ""
siteVcVersion = ""
siteZvmMajorVersion = ""
siteZvmMinorVersion = ""
siteZvmUpdateVersion = ""
siteZvmPatchVersion = ""
lastStats = CaseInsensitiveDict()
# Check if vCenter is set, if not disable VRA metrics
is_vcenter_set = True
if vcenter_host == "vcenter.local":
log.error("vCenter Host not set. Please set the environment variable VCENTER_HOST, turning off VRA CPU and Memory metrics")
is_vcenter_set = False
log.debug("vCenter data collection is enabled")
vc_connection = vcsite(vcenter_host, vcenter_user, vcenter_pwd, loglevel="debug")
# Authentication Thread which handles authentication and token refresh for ZVM API
def ZvmAuthHandler():
log.debug("ZVMAuthHandler Thread Started")
expiresIn = 0
global token
global siteId
global siteName
retries = 0
while True:
if expiresIn < 30:
h = CaseInsensitiveDict()
h["Content-Type"] = "application/x-www-form-urlencoded"
d = CaseInsensitiveDict()
d["client_id"] = client_id
d["client_secret"] = client_secret
d["grant_type"] = "client_credentials"
uri = "https://" + zvm_url + ":" + zvm_port + "/auth/realms/zerto/protocol/openid-connect/token"
delay = 0
try:
response = requests.post(url=uri, data=d, headers=h, verify=verifySSL)
response.raise_for_status()
except requests.exceptions.RequestException as e:
retries += 1
delay = 2 ** retries
log.error("Error while sending authentication request: " + str(e) + ". Retrying in " + str(delay) + " seconds")
sleep(delay)
continue
else:
retries = 0
responseJSON = response.json()
if 'access_token' not in responseJSON or 'expires_in' not in responseJSON:
log.error("Authentication response does not contain expected keys")
delay = 2 ** retries
sleep(delay)
retries += 1
continue
token = str(responseJSON.get('access_token'))
expiresIn = int(responseJSON.get('expires_in'))
log.info("Authentication successful. Token expires in " + str(expiresIn) + " seconds")
if response.status_code != 200:
log.error("Authentication request failed with status code " + str(response.status_code))
delay = 2 ** retries
sleep(delay)
retries += 1
continue
# Get Site ID and Name
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/localsite"
delay = 0
try:
log.debug("Getting Site ID and Name")
h2 = CaseInsensitiveDict()
h2["Accept"] = "application/json"
h2["Authorization"] = "Bearer " + token
response = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
response.raise_for_status()
except requests.exceptions.RequestException as e:
retries += 1
delay = 2 ** retries
log.error("Error while sending authentication request: " + str(e) + ". Retrying in " + str(delay) + " seconds")
sleep(delay)
continue
else:
retries = 0
responseJSON = response.json()
log.debug(responseJSON)
if 'SiteIdentifier' not in responseJSON or 'SiteName' not in responseJSON:
log.error("LocalSite API response does not contain expected keys")
delay = 2 ** retries
sleep(delay)
retries += 1
continue
else:
siteId = str(responseJSON.get('SiteIdentifier'))
siteName = str(responseJSON.get('SiteName'))
siteZvmVersion = str(responseJSON.get('Version'))
siteVcVersion = str(responseJSON.get('SiteTypeVersion'))
# Break out ZVM version strings
siteZvmMajorVersion, siteZvmMinorVersion, siteZvmUpdateVersion = siteZvmVersion.split(".")
siteZvmUpdateVersion = siteZvmUpdateVersion[0]
if (len(siteZvmUpdateVersion) > 1):
siteZvmPatchVersion = siteZvmUpdateVersion[1]
else:
siteZvmPatchVersion = "0"
log.info("Site ID: " + siteId + " Site Name: " + siteName)
expiresIn -= 10 + delay
log.debug("Token Expires in " + str(expiresIn) + " seconds")
sleep(10)
'''
# Thread which gets VM level encryption statistics from ZVM API
def GetStatsFunc():
tempdb = TinyDB(storage=MemoryStorage) # ('./db.json') used for storing db on disk for debugging
dbvm = Query()
dbvpg = Query()
while (True) :
global token
global siteId
global siteName
if (token != ""):
log.info("Got Auth Token!")
log.debug("token: " + str(token))
log.debug("Stats Collector Loop Running")
metricsDictionary = {}
h2 = CaseInsensitiveDict()
h2["Accept"] = "application/json"
h2["Authorization"] = "Bearer " + token
## Statistics API
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/statistics/vms/"
statsapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
statsapi_json = statsapi.json()
#log.debug(statsapi_json)
for vm in statsapi_json:
oldvmdata = dict()
CurrentIops = 0
CurrentWriteCounterInMBs = 0
CurrentSyncCounterInMBs = 0
CurrentNetworkTrafficCounterInMBs = 0
CurrentEncryptedLBs = 0
CurrentUnencryptedLBs = 0
CurrentTotalLBs = 0
CurrentPercentEncrypted = 0
VMName = "NA"
oldvmdata = tempdb.search(dbvm.VmIdentifier == vm['VmIdentifier'] and dbvpg.VpgIdentifier == vm['VpgIdentifier'])
log.info("Checking TempDB for VM " + vm['VmIdentifier'] + " in VPG " + vm['VpgIdentifier'])
if (oldvmdata):
log.info(vm['VmIdentifier'] + " Record Found, Updating DB")
log.debug(oldvmdata[0])
log.debug(tempdb.update(vm, dbvm.VmIdentifier == vm['VmIdentifier'] and dbvpg.VpgIdentifier == vm['VpgIdentifier']))
log.debug("!@!@!@!@!@ Stats !@!@!@!@!@")
VMName = oldvmdata[0]['VmName']
log.debug("Current VM " + str(VMName))
CurrentIops = abs(vm['IoOperationsCounter'] - oldvmdata[0]['IoOperationsCounter'])
log.debug("CurrentIops " + str(CurrentIops))
CurrentSyncCounterInMBs = abs(vm['SyncCounterInMBs'] - oldvmdata[0]['SyncCounterInMBs'])
log.debug("CurrentSyncCounterInMBs " + str(CurrentSyncCounterInMBs))
CurrentNetworkTrafficCounterInMBs = abs(vm['NetworkTrafficCounterInMBs'] - oldvmdata[0]['NetworkTrafficCounterInMBs'])
log.debug("CurrentNetworkTrafficCounterInMBs " + str(CurrentNetworkTrafficCounterInMBs))
CurrentEncryptedLBs = abs(vm['EncryptionStatistics']['EncryptedDataInLBs'] - oldvmdata[0]['EncryptionStatistics']['EncryptedDataInLBs'])
log.debug("CurrentEncryptedLBs " + str(CurrentEncryptedLBs))
CurrentUnencryptedLBs = abs(vm['EncryptionStatistics']['UnencryptedDataInLBs'] - oldvmdata[0]['EncryptionStatistics']['UnencryptedDataInLBs'])
log.debug("CurrentUnencryptedLBs " + str(CurrentUnencryptedLBs))
CurrentTotalLBs = abs(CurrentEncryptedLBs + CurrentUnencryptedLBs)
log.debug("CurrentTotalLBs " + str(CurrentTotalLBs))
if CurrentTotalLBs != 0:
CurrentPercentEncrypted = ((CurrentEncryptedLBs / CurrentTotalLBs) * 100)
else:
CurrentPercentEncrypted = 0
log.debug("CurrentPercentEncrypted " + str(CurrentPercentEncrypted))
else:
log.info(vm['VmIdentifier'] + " No Record Found, Inserting into DB")
#insert original VM record to tempdb
log.debug(tempdb.insert(vm))
# update database with VM name, for easier display in Grafana Legends
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vms/" + vm['VmIdentifier'] +"?vpgIdentifier=" + vm['VpgIdentifier']
try:
vapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
vapi_json = vapi.json()
except Exception as e:
log.error("Error while sending api request: " + str(e))
VMName = "Unknown"
else:
log.debug("vapi_json: " + str(vapi_json))
tempdb.update({'VmName': vapi_json['VmName']}, dbvm.VmIdentifier == vm['VmIdentifier'])
log.info("Added vm to tempdb " + vm['VmIdentifier'] + " - " + vapi_json['VmName'])
VMName = vapi_json['VmName']
# Store Calculated Metrics
metricsDictionary["vm_IoOperationsCounter{VpgIdentifier=\"" + str(vm['VpgIdentifier']) + "\",VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(VMName) + "\",SiteIdentifier=\"" + str(siteId) + "\",SiteName=\"" + str(siteName) + "\"}"] = CurrentIops
metricsDictionary["vm_WriteCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentWriteCounterInMBs
metricsDictionary["vm_SyncCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentSyncCounterInMBs
metricsDictionary["vm_NetworkTrafficCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentNetworkTrafficCounterInMBs
metricsDictionary["vm_EncryptedDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentEncryptedLBs
metricsDictionary["vm_UnencryptedDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentUnencryptedLBs
metricsDictionary["vm_TotalDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentTotalLBs
metricsDictionary["vm_PercentEncrypted{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentPercentEncrypted
## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus
file_object = open('statsmetrics', 'w')
txt_object = open('statsmetrics.txt', 'w')
for item in metricsDictionary :
file_object.write(item)
file_object.write(" ")
file_object.write(str(metricsDictionary[item]))
file_object.write("\n")
txt_object.write(item)
txt_object.write(" ")
txt_object.write(str(metricsDictionary[item]))
txt_object.write("\n")
file_object.close()
txt_object.close()
log.debug("Starting Sleep for " + str(scrape_speed) + " seconds")
sleep(scrape_speed)
else:
log.debug("Waiting 1 second for Auth Token")
sleep(1)
'''
# Function which retrieves stats from various ZVM APIs and stores them in a metrics file
def GetDataFunc():
tempdb = TinyDB(storage=MemoryStorage)
dbvm = Query()
while (True) :
global token
global siteId
global siteName
if (token != ""):
log.debug("Got Auth Token!")
log.debug("token: " + str(token))
log.info("Data Collector Loop Running")
metricsDictionary = {}
h2 = CaseInsensitiveDict()
h2["Accept"] = "application/json"
h2["Authorization"] = "Bearer " + token
### VPGs API
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vpgs/"
service = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
vpg_json = service.json()
#log.debug(vpg_json)
for vpg in vpg_json :
metricsDictionary["vpg_storage_used_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["UsedStorageInMB"]
metricsDictionary["vpg_actual_rpo{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ActualRPO"]
metricsDictionary["vpg_throughput_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ThroughputInMB"]
metricsDictionary["vpg_iops{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["IOPs"]
metricsDictionary["vpg_provisioned_storage_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ProvisionedStorageInMB"]
metricsDictionary["vpg_vms_count{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["VmsCount"]
metricsDictionary["vpg_configured_rpo_seconds{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ConfiguredRpoSeconds"]
metricsDictionary["vpg_actual_history_in_minutes{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["HistoryStatusApi"]["ActualHistoryInMinutes"]
metricsDictionary["vpg_configured_history_in_minutes{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["HistoryStatusApi"]["ConfiguredHistoryInMinutes"]
metricsDictionary["vpg_failsafe_history_in_minutes_actual{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["FailSafeHistory"]["ActualFailSafeHistory"]
metricsDictionary["vpg_failsafe_history_in_minutes_configured{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["FailSafeHistory"]["ConfiguredFailSafeHistory"]
metricsDictionary["vpg_status{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["Status"]
metricsDictionary["vpg_substatus{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["SubStatus"]
metricsDictionary["vpg_alert_status{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["AlertStatus"]
### Datastores APIs
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/datastores/"
service = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
ds_json = service.json()
#log.debug(ds_json)
for ds in ds_json :
log.debug(f"Processing {ds['DatastoreName']}")
metricsDictionary["datastore_vras{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumVRAs"]
metricsDictionary["datastore_incoming_vms{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumIncomingVMs"]
metricsDictionary["datastore_outgoing_vms{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumOutgoingVMs"]
metricsDictionary["datastore_usage_capacityinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["CapacityInBytes"]
metricsDictionary["datastore_usage_freeinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["FreeInBytes"]
metricsDictionary["datastore_usage_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["UsedInBytes"]
metricsDictionary["datastore_usage_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["ProvisionedInBytes"]
metricsDictionary["datastore_usage_zerto_protected_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Protected"]["UsedInBytes"]
metricsDictionary["datastore_usage_zerto_protected_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Protected"]["ProvisionedInBytes"]
metricsDictionary["datastore_usage_zerto_recovery_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Recovery"]["UsedInBytes"]
metricsDictionary["datastore_usage_zerto_recovery_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Recovery"]["ProvisionedInBytes"]
metricsDictionary["datastore_usage_zerto_journal_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Journal"]["UsedInBytes"]
metricsDictionary["datastore_usage_zerto_journal_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Journal"]["ProvisionedInBytes"]
metricsDictionary["datastore_usage_zerto_scratch_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Scratch"]["UsedInBytes"]
metricsDictionary["datastore_usage_zerto_scratch_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Scratch"]["ProvisionedInBytes"]
metricsDictionary["datastore_usage_zerto_appliances_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Appliances"]["UsedInBytes"]
metricsDictionary["datastore_usage_zerto_appliances_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Appliances"]["ProvisionedInBytes"]
## VMs API
log.debug("Getting VMs API")
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vms/"
vmapi_json = {}
try:
vmapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
vmapi_json = vmapi.json()
except Exception as e:
log.error("Error while sending api request: " + str(e))
VMName = "Unknown"
return
log.debug("Got VMs API")
log.debug(vmapi_json)
for vm in vmapi_json :
log.debug("Processing VM: " + str(vm['VmName']))
log.debug("Checking VM " + vm['VmIdentifier'] + " on Protected Site " + vm['ProtectedSite']['identifier'] + " against " + siteId)
if siteId == vm['ProtectedSite']['identifier']:
log.debug("Found VM " + vm['VmIdentifier'] + " on Protected Site")
if not isinstance(vm["ActualRPO"], int):
vm["ActualRPO"] = -1
metricsDictionary["vm_actualrpo{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["ActualRPO"]
metricsDictionary["vm_throughput_in_mb{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["ThroughputInMB"]
metricsDictionary["vm_iops{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["IOPs"]
metricsDictionary["vm_journal_hard_limit{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["JournalHardLimit"]["LimitValue"]
metricsDictionary["vm_journal_warning_limit{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["JournalWarningThreshold"]["LimitValue"]
metricsDictionary["vm_journal_used_storage_mb{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["JournalUsedStorageMb"]
metricsDictionary["vm_outgoing_bandwidth_in_mbps{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["OutgoingBandWidthInMbps"]
metricsDictionary["vm_used_storage_in_MB{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["UsedStorageInMB"]
metricsDictionary["vm_provisioned_storage_in_MB{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["ProvisionedStorageInMB"]
metricsDictionary["vm_status{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["Status"]
metricsDictionary["vm_substatus{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["SubStatus"]
log.debug("Processed VM: " + str(vm['VmName']))
else:
log.debug("VM " + vm['VmIdentifier'] + " is protected to this site")
## Volumes API for Scratch Volumes
log.debug("Getting Scratch Volumes")
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/volumes?volumeType=scratch"
volapi_json = {}
try:
volapi = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
volapi_json = volapi.json()
except Exception as e:
log.error("Error while sending api request: " + str(e))
VMName = "Unknown"
return
log.debug("Got Scratch Volumes API")
if(bool(volapi_json)):
for volume in volapi_json :
#metricsDictionary["scratch_volume_provisioned_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = volume["Size"]["ProvisionedInBytes"]
# Determine the key for a given VM, then see if the key is already in the dictionary, if it is add the next disk to the total. If not, create a new key.
metrickey = "scratch_volume_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
if (metrickey in metricsDictionary):
metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["UsedInBytes"]
else:
metricsDictionary[metrickey] = volume["Size"]["UsedInBytes"]
percentage_used = (volume["Size"]["UsedInBytes"] / volume["Size"]["ProvisionedInBytes"] * 100)
percentage_used = round(percentage_used, 1)
#metricsDictionary["scratch_volume_percentage_used{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = percentage_used
## Volumes API for Journal Volumes
log.debug("Getting Journal Volumes")
volapi_json = {}
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/volumes?volumeType=journal"
try:
volapi = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
volapi_json = volapi.json()
except Exception as e:
log.error("Error while sending api request: " + str(e))
VMName = "Unknown"
return
log.debug("Got Journal Volumes API")
if(bool(volapi_json)):
log.debug("Journal Volumes Exist")
for volume in volapi_json :
log.debug("Journal Volume: " + volume['ProtectedVm']['Name'] + " Calculating total size...")
#metricsDictionary["scratch_volume_provisioned_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = volume["Size"]["ProvisionedInBytes"]
# Determine the key for a given VM, then see if the key is already in the dictionary, if it is add the next disk to the total. If not, create a new key.
metrickey = "vm_journal_volume_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
if (metrickey in metricsDictionary):
metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["UsedInBytes"]
else:
metricsDictionary[metrickey] = volume["Size"]["UsedInBytes"]
metrickey = "vm_journal_volume_provisioned_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
if (metrickey in metricsDictionary):
metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["ProvisionedInBytes"]
else:
metricsDictionary[metrickey] = volume["Size"]["ProvisionedInBytes"]
metrickey = "vm_journal_volume_count{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
if (metrickey in metricsDictionary):
metricsDictionary[metrickey] = metricsDictionary[metrickey] + 1
else:
metricsDictionary[metrickey] = 1
## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus
log.debug("Writing metrics to file")
file_object = open('metrics', 'w')
txt_object = open('metrics.txt', 'w')
for item in metricsDictionary :
file_object.write(item)
file_object.write(" ")
file_object.write(str(metricsDictionary[item]))
file_object.write("\n")
txt_object.write(item)
txt_object.write(" ")
txt_object.write(str(metricsDictionary[item]))
txt_object.write("\n")
file_object.close()
txt_object.close()
log.debug("Metrics written to file")
# This function will get data every 10 seconds
log.debug("Starting Sleep for " + str(scrape_speed) + " seconds")
sleep(scrape_speed)
else:
log.debug("Waiting 1 second for Auth Token")
sleep(1)
# get VRA CPU and memory usage from vCenter Server
def GetVraMetrics():
# set up API endpoint and headers
log.debug("GetVraCpuMemory() called")
metricsDictionary = {}
while True:
vra_names = []
vras = []
global token
global siteId
global siteName
log.debug("Checking Token in VRA CPU MEM Collector")
if (token != ""):
log.debug("Auth Token Valid!")
log.debug("token: " + str(token))
log.info("VRA CPU MEM Collector Running")
h2 = CaseInsensitiveDict()
h2["Accept"] = "application/json"
h2["Authorization"] = "Bearer " + token
### VRA API
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vras"
# make API call to get list of VRAs
try:
response = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
except Exception as e:
log.error(f"Error connecting to {uri}: {e}")
return
else:
log.debug("Response from GET /v1/vras: %s", response.text)
# parse JSON response and get the name of each VRA
vras = response.json()
log.debug("VRA names: %s", vras)
log.debug(type(vras))
for vra in vras :
# Gather other VRA Metrics from Zerto API into Metrics Diectionary
metricsDictionary["vra_memory_in_GB{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["MemoryInGB"]
metricsDictionary["vra_vcpu_count{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["NumOfCpus"]
metricsDictionary["vra_protected_vms{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Vms"]
metricsDictionary["vra_protected_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Vpgs"]
metricsDictionary["vra_protected_volumes{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Volumes"]
metricsDictionary["vra_recovery_vms{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Vms"]
metricsDictionary["vra_recovery_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Vpgs"]
metricsDictionary["vra_recovery_volumes{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Volumes"]
metricsDictionary["vra_self_protected_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["SelfProtectedVpgs"]
log.debug("VRA Name: %s", vra['VraName'])
log.info(f"vCenter info: T/F = {is_vcenter_set} Host: {vcenter_host} u: {vcenter_user} p: {vcenter_pwd}")
# get the CPU and memory usage for each VRA
if is_vcenter_set:
log.debug(f"vCenter Info Is Valid... Trying to get CPU and Memory usage for VRAs")
try:
log.debug("Trying to get stats from vc module")
vradata = vc_connection.get_cpu_mem_used(vra['VraName'])
# get the CPU usage and memory usage for the VM
cpu_usage_mhz = vradata[0]
memory_usage_mb = vradata[1]
# print the CPU and memory usage for the VM
log.debug(f"VRA {vra['VraName']}) has CPU usage of {cpu_usage_mhz} MHz and memory usage of {memory_usage_mb} MB")
metricsDictionary["vra_cpu_usage_mhz{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = cpu_usage_mhz
metricsDictionary["vra_memory_usage_mb{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = memory_usage_mb
except:
log.info(f"No VM found with name {vra['VraName']}")
## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus
file_object = open('vrametrics', 'w')
txt_object = open('vrametrics.txt', 'w')
for item in metricsDictionary :
file_object.write(item)
file_object.write(" ")
file_object.write(str(metricsDictionary[item]))
file_object.write("\n")
txt_object.write(item)
txt_object.write(" ")
txt_object.write(str(metricsDictionary[item]))
txt_object.write("\n")
file_object.close()
txt_object.close()
# This function will get data every 10 seconds
log.debug("Starting Sleep for " + str(int(scrape_speed *2)) + " seconds")
sleep(scrape_speed * 2)
else:
log.debug("Waiting 1 second for Auth Token")
sleep(1)
# function which monitors the threads and restarts them if they die
def ThreadProbe():
global container_id
while True:
log.debug("Thread Probe Started")
metricsDictionary = {}
log.debug("Is Auth Thread Alive")
if auth_thread.is_alive():
metricsDictionary["exporter_thread_status{thread=\"" + "AuthHandler" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
else:
metricsDictionary["exporter_thread_status{thread=\"" + "AuthHandler" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
log.debug("Is Data Thread Alive")
if data_thread.is_alive():
metricsDictionary["exporter_thread_status{thread=\"" + "DataStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
else:
metricsDictionary["exporter_thread_status{thread=\"" + "DataStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
#log.debug("Is Stats Thread Alive")
#if stats_thread.is_alive():
# metricsDictionary["exporter_thread_status{thread=\"" + "EncryptionStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
#else:
# metricsDictionary["exporter_thread_status{thread=\"" + "EncryptionStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
log.debug("Is VRA Metrics Thread Alive")
if vra_metrics_thread.is_alive():
metricsDictionary["exporter_thread_status{thread=\"" + "VraMetrics" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
else:
metricsDictionary["exporter_thread_status{thread=\"" + "VraMetrics" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
log.debug("Writing Probe data to files")
file_object = open('threads', 'w')
txt_object = open('threads.txt', 'w')
for item in metricsDictionary :
file_object.write(item)
file_object.write(" ")
file_object.write(str(metricsDictionary[item]))
file_object.write("\n")
txt_object.write(item)
txt_object.write(" ")
txt_object.write(str(metricsDictionary[item]))
txt_object.write("\n")
log.debug("Trying to Close probe txt files")
file_object.close()
txt_object.close()
log.debug("Probe Thread Going to Sleep")
sleep(30)
#----------------run http server on port 9999-----------------
def WebServer():
log.info("Web Server Started")
PORT = 9999
Handler = http.server.SimpleHTTPRequestHandler
with socketserver.TCPServer(("", PORT), Handler) as httpd:
log.info(f"Webserver running on port {PORT}")
httpd.serve_forever()
def start_thread(target_func):
# start a new thread
thread = threading.Thread(target=target_func)
thread.start()
# return the thread object
return thread
# start the threads
auth_thread = start_thread(ZvmAuthHandler)
data_thread = start_thread(GetDataFunc)
#stats_thread = start_thread(GetStatsFunc())
vra_metrics_thread = start_thread(GetVraMetrics)
webserver_thread = start_thread(WebServer)
probe_thread = start_thread(ThreadProbe)
# loop indefinitely
while True:
# check if any thread has crashed
sleep(10)
if not probe_thread.is_alive():
# restart the thread
log.error("Probe Thread Died - Restarting")
probe_thread = start_thread(ThreadProbe)
if not auth_thread.is_alive():
# restart the thread
log.error("Authentication Thread Died - Restarting")
auth_thread = start_thread(ZvmAuthHandler)
if not data_thread.is_alive():
# restart the thread
log.error("Data Thread Died - Restarting")
data_thread = start_thread(GetDataFunc)
#if not stats_thread.is_alive():
# # restart the thread
# log.error("Stats Thread Died - Restarting")
# stats_thread = start_thread(GetStatsFunc())
if not vra_metrics_thread.is_alive():
# restart the thread
log.error("VRA Metrics Thread Died - Restarting")
vra_metrics_thread = start_thread(GetVraMetrics)
if not webserver_thread.is_alive():
# restart the thread
log.error("Webserver Thread Died - Restarting")
webserver_thread = start_thread(WebServer)
sleep(api_timeout)