From deae17aca9df7bc8c4023b91dd6fff86a83c4375 Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Tue, 19 Dec 2023 12:18:46 -0500 Subject: [PATCH] updates to modules --- app/exporter.py | 719 ++++++++++++++++++ app/python-node-exporter.py | 3 +- app/vmware/__init__.py | 3 +- .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 238 bytes .../__pycache__/vcenter.cpython-310.pyc | Bin 0 -> 5233 bytes app/vmware/vcenter.py | 1 - app/zvma10/zvma.py | 115 +-- 7 files changed, 792 insertions(+), 49 deletions(-) create mode 100644 app/exporter.py create mode 100644 app/vmware/__pycache__/__init__.cpython-310.pyc create mode 100644 app/vmware/__pycache__/vcenter.cpython-310.pyc diff --git a/app/exporter.py b/app/exporter.py new file mode 100644 index 0000000..4bfc477 --- /dev/null +++ b/app/exporter.py @@ -0,0 +1,719 @@ +import requests +import http.server +import socketserver +import os +import ssl +import logging +from logging.handlers import RotatingFileHandler +import threading +import socket +from pyVim.connect import SmartConnect, Disconnect +from pyVmomi import vim +from time import sleep +from requests.packages.urllib3.exceptions import InsecureRequestWarning +from requests.structures import CaseInsensitiveDict +from tinydb import TinyDB, Query +from tinydbstorage.storage import MemoryStorage +from version import VERSION +from vmware.vcenter import vcsite +from zvma10.zvma import zvmsite +from posthog import Posthog +requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + +""" +Variables: Normally these are imported from the Docker Container, but alternative values can be modified if running the script manually +""" + +callhomestats = os.getenv("CALL_HOME_STATS", 'True').lower() in ('false', '0', 'f') +verifySSL = os.getenv("VERIFY_SSL", 'False').lower() in ('true', '1', 't') +zvm_url = os.environ.get('ZVM_HOST', '192.168.50.60') +zvm_port = os.environ.get('ZVM_PORT', '443') +client_id = os.environ.get('CLIENT_ID', 'api-script') +client_secret = os.environ.get('CLIENT_SECRET', 'fcYMFuA5TkIUwp6b3hDUxim0f32z8erk') +scrape_speed = int(os.environ.get('SCRAPE_SPEED', 30)) +api_timeout = int(os.environ.get('API_TIMEOUT', 5)) +LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper() +version = str(VERSION) +vcenter_host = os.environ.get('VCENTER_HOST', '192.168.50.50') +vcenter_user = os.environ.get('VCENTER_USER', 'administrator@vsphere.local') +vcenter_pwd = os.environ.get('VCENTER_PASSWORD', 'Zertodata987!') + + +""" +Global Variables used by the program +""" +token = "" +siteId = "NotSet" +siteName = "NotSet" +siteZvmVersion = "" +siteVcVersion = "" +siteZvmMajorVersion = "" +siteZvmMinorVersion = "" +siteZvmUpdateVersion = "" +siteZvmPatchVersion = "" +lastStats = CaseInsensitiveDict() + + +# Authentication Thread which handles authentication and token refresh for ZVM API +def ZvmAuthHandler(): + log.debug("ZVMAuthHandler Thread Started") + expiresIn = 0 + global token + global siteId + global siteName + retries = 0 + while True: + if expiresIn < 30: + h = CaseInsensitiveDict() + h["Content-Type"] = "application/x-www-form-urlencoded" + + d = CaseInsensitiveDict() + d["client_id"] = client_id + d["client_secret"] = client_secret + d["grant_type"] = "client_credentials" + + uri = "https://" + zvm_url + ":" + zvm_port + "/auth/realms/zerto/protocol/openid-connect/token" + delay = 0 + + try: + response = requests.post(url=uri, data=d, headers=h, verify=verifySSL) + response.raise_for_status() + except requests.exceptions.RequestException as e: + retries += 1 + delay = 2 ** retries + log.error("Error while sending authentication request: " + str(e) + ". Retrying in " + str(delay) + " seconds") + sleep(delay) + continue + else: + retries = 0 + + responseJSON = response.json() + if 'access_token' not in responseJSON or 'expires_in' not in responseJSON: + log.error("Authentication response does not contain expected keys") + delay = 2 ** retries + sleep(delay) + retries += 1 + continue + + token = str(responseJSON.get('access_token')) + expiresIn = int(responseJSON.get('expires_in')) + log.info("Authentication successful. Token expires in " + str(expiresIn) + " seconds") + + if response.status_code != 200: + log.error("Authentication request failed with status code " + str(response.status_code)) + delay = 2 ** retries + sleep(delay) + retries += 1 + continue + + # Get Site ID and Name + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/localsite" + delay = 0 + try: + log.debug("Getting Site ID and Name") + h2 = CaseInsensitiveDict() + h2["Accept"] = "application/json" + h2["Authorization"] = "Bearer " + token + response = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL) + response.raise_for_status() + except requests.exceptions.RequestException as e: + retries += 1 + delay = 2 ** retries + log.error("Error while sending authentication request: " + str(e) + ". Retrying in " + str(delay) + " seconds") + sleep(delay) + continue + else: + retries = 0 + + responseJSON = response.json() + log.debug(responseJSON) + if 'SiteIdentifier' not in responseJSON or 'SiteName' not in responseJSON: + log.error("LocalSite API response does not contain expected keys") + delay = 2 ** retries + sleep(delay) + retries += 1 + continue + else: + siteId = str(responseJSON.get('SiteIdentifier')) + siteName = str(responseJSON.get('SiteName')) + siteZvmVersion = str(responseJSON.get('Version')) + siteVcVersion = str(responseJSON.get('SiteTypeVersion')) + + # Break out ZVM version strings + siteZvmMajorVersion, siteZvmMinorVersion, siteZvmUpdateVersion = siteZvmVersion.split(".") + siteZvmUpdateVersion = siteZvmUpdateVersion[0] + if (len(siteZvmUpdateVersion) > 1): + siteZvmPatchVersion = siteZvmUpdateVersion[1] + else: + siteZvmPatchVersion = "0" + log.info("Site ID: " + siteId + " Site Name: " + siteName) + + expiresIn -= 10 + delay + log.debug("Token Expires in " + str(expiresIn) + " seconds") + sleep(10) + +''' +# Thread which gets VM level encryption statistics from ZVM API + +def GetStatsFunc(): + tempdb = TinyDB(storage=MemoryStorage) # ('./db.json') used for storing db on disk for debugging + dbvm = Query() + dbvpg = Query() + while (True) : + global token + global siteId + global siteName + + if (token != ""): + log.info("Got Auth Token!") + log.debug("token: " + str(token)) + log.debug("Stats Collector Loop Running") + + metricsDictionary = {} + + h2 = CaseInsensitiveDict() + h2["Accept"] = "application/json" + h2["Authorization"] = "Bearer " + token + + ## Statistics API + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/statistics/vms/" + statsapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL) + statsapi_json = statsapi.json() + #log.debug(statsapi_json) + + for vm in statsapi_json: + oldvmdata = dict() + + CurrentIops = 0 + CurrentWriteCounterInMBs = 0 + CurrentSyncCounterInMBs = 0 + CurrentNetworkTrafficCounterInMBs = 0 + CurrentEncryptedLBs = 0 + CurrentUnencryptedLBs = 0 + CurrentTotalLBs = 0 + CurrentPercentEncrypted = 0 + VMName = "NA" + + oldvmdata = tempdb.search(dbvm.VmIdentifier == vm['VmIdentifier'] and dbvpg.VpgIdentifier == vm['VpgIdentifier']) + + log.info("Checking TempDB for VM " + vm['VmIdentifier'] + " in VPG " + vm['VpgIdentifier']) + if (oldvmdata): + log.info(vm['VmIdentifier'] + " Record Found, Updating DB") + log.debug(oldvmdata[0]) + log.debug(tempdb.update(vm, dbvm.VmIdentifier == vm['VmIdentifier'] and dbvpg.VpgIdentifier == vm['VpgIdentifier'])) + + log.debug("!@!@!@!@!@ Stats !@!@!@!@!@") + VMName = oldvmdata[0]['VmName'] + log.debug("Current VM " + str(VMName)) + CurrentIops = abs(vm['IoOperationsCounter'] - oldvmdata[0]['IoOperationsCounter']) + log.debug("CurrentIops " + str(CurrentIops)) + CurrentSyncCounterInMBs = abs(vm['SyncCounterInMBs'] - oldvmdata[0]['SyncCounterInMBs']) + log.debug("CurrentSyncCounterInMBs " + str(CurrentSyncCounterInMBs)) + CurrentNetworkTrafficCounterInMBs = abs(vm['NetworkTrafficCounterInMBs'] - oldvmdata[0]['NetworkTrafficCounterInMBs']) + log.debug("CurrentNetworkTrafficCounterInMBs " + str(CurrentNetworkTrafficCounterInMBs)) + CurrentEncryptedLBs = abs(vm['EncryptionStatistics']['EncryptedDataInLBs'] - oldvmdata[0]['EncryptionStatistics']['EncryptedDataInLBs']) + log.debug("CurrentEncryptedLBs " + str(CurrentEncryptedLBs)) + CurrentUnencryptedLBs = abs(vm['EncryptionStatistics']['UnencryptedDataInLBs'] - oldvmdata[0]['EncryptionStatistics']['UnencryptedDataInLBs']) + log.debug("CurrentUnencryptedLBs " + str(CurrentUnencryptedLBs)) + CurrentTotalLBs = abs(CurrentEncryptedLBs + CurrentUnencryptedLBs) + log.debug("CurrentTotalLBs " + str(CurrentTotalLBs)) + if CurrentTotalLBs != 0: + CurrentPercentEncrypted = ((CurrentEncryptedLBs / CurrentTotalLBs) * 100) + else: + CurrentPercentEncrypted = 0 + log.debug("CurrentPercentEncrypted " + str(CurrentPercentEncrypted)) + + else: + log.info(vm['VmIdentifier'] + " No Record Found, Inserting into DB") + #insert original VM record to tempdb + log.debug(tempdb.insert(vm)) + + # update database with VM name, for easier display in Grafana Legends + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vms/" + vm['VmIdentifier'] +"?vpgIdentifier=" + vm['VpgIdentifier'] + try: + vapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL) + vapi_json = vapi.json() + except Exception as e: + log.error("Error while sending api request: " + str(e)) + VMName = "Unknown" + else: + log.debug("vapi_json: " + str(vapi_json)) + tempdb.update({'VmName': vapi_json['VmName']}, dbvm.VmIdentifier == vm['VmIdentifier']) + log.info("Added vm to tempdb " + vm['VmIdentifier'] + " - " + vapi_json['VmName']) + VMName = vapi_json['VmName'] + + # Store Calculated Metrics + metricsDictionary["vm_IoOperationsCounter{VpgIdentifier=\"" + str(vm['VpgIdentifier']) + "\",VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(VMName) + "\",SiteIdentifier=\"" + str(siteId) + "\",SiteName=\"" + str(siteName) + "\"}"] = CurrentIops + metricsDictionary["vm_WriteCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentWriteCounterInMBs + metricsDictionary["vm_SyncCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentSyncCounterInMBs + metricsDictionary["vm_NetworkTrafficCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentNetworkTrafficCounterInMBs + metricsDictionary["vm_EncryptedDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentEncryptedLBs + metricsDictionary["vm_UnencryptedDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentUnencryptedLBs + metricsDictionary["vm_TotalDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentTotalLBs + metricsDictionary["vm_PercentEncrypted{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentPercentEncrypted + + ## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus + file_object = open('statsmetrics', 'w') + txt_object = open('statsmetrics.txt', 'w') + for item in metricsDictionary : + file_object.write(item) + file_object.write(" ") + file_object.write(str(metricsDictionary[item])) + file_object.write("\n") + txt_object.write(item) + txt_object.write(" ") + txt_object.write(str(metricsDictionary[item])) + txt_object.write("\n") + file_object.close() + txt_object.close() + + log.debug("Starting Sleep for " + str(scrape_speed) + " seconds") + sleep(scrape_speed) + else: + log.debug("Waiting 1 second for Auth Token") + sleep(1) +''' + +# Function which retrieves stats from various ZVM APIs and stores them in a metrics file +def GetDataFunc(): + tempdb = TinyDB(storage=MemoryStorage) + dbvm = Query() + while (True) : + global token + global siteId + global siteName + + if (token != ""): + log.debug("Got Auth Token!") + log.debug("token: " + str(token)) + log.info("Data Collector Loop Running") + + metricsDictionary = {} + + h2 = CaseInsensitiveDict() + h2["Accept"] = "application/json" + h2["Authorization"] = "Bearer " + token + + + ### VPGs API + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vpgs/" + service = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL) + vpg_json = service.json() + #log.debug(vpg_json) + for vpg in vpg_json : + metricsDictionary["vpg_storage_used_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["UsedStorageInMB"] + metricsDictionary["vpg_actual_rpo{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ActualRPO"] + metricsDictionary["vpg_throughput_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ThroughputInMB"] + metricsDictionary["vpg_iops{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["IOPs"] + metricsDictionary["vpg_provisioned_storage_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ProvisionedStorageInMB"] + metricsDictionary["vpg_vms_count{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["VmsCount"] + metricsDictionary["vpg_configured_rpo_seconds{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ConfiguredRpoSeconds"] + metricsDictionary["vpg_actual_history_in_minutes{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["HistoryStatusApi"]["ActualHistoryInMinutes"] + metricsDictionary["vpg_configured_history_in_minutes{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["HistoryStatusApi"]["ConfiguredHistoryInMinutes"] + metricsDictionary["vpg_failsafe_history_in_minutes_actual{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["FailSafeHistory"]["ActualFailSafeHistory"] + metricsDictionary["vpg_failsafe_history_in_minutes_configured{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["FailSafeHistory"]["ConfiguredFailSafeHistory"] + metricsDictionary["vpg_status{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["Status"] + metricsDictionary["vpg_substatus{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["SubStatus"] + metricsDictionary["vpg_alert_status{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["AlertStatus"] + + ### Datastores APIs + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/datastores/" + service = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL) + ds_json = service.json() + #log.debug(ds_json) + for ds in ds_json : + + log.debug(f"Processing {ds['DatastoreName']}") + + metricsDictionary["datastore_vras{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumVRAs"] + metricsDictionary["datastore_incoming_vms{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumIncomingVMs"] + metricsDictionary["datastore_outgoing_vms{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumOutgoingVMs"] + metricsDictionary["datastore_usage_capacityinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["CapacityInBytes"] + metricsDictionary["datastore_usage_freeinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["FreeInBytes"] + metricsDictionary["datastore_usage_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["UsedInBytes"] + metricsDictionary["datastore_usage_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["ProvisionedInBytes"] + metricsDictionary["datastore_usage_zerto_protected_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Protected"]["UsedInBytes"] + metricsDictionary["datastore_usage_zerto_protected_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Protected"]["ProvisionedInBytes"] + metricsDictionary["datastore_usage_zerto_recovery_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Recovery"]["UsedInBytes"] + metricsDictionary["datastore_usage_zerto_recovery_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Recovery"]["ProvisionedInBytes"] + metricsDictionary["datastore_usage_zerto_journal_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Journal"]["UsedInBytes"] + metricsDictionary["datastore_usage_zerto_journal_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Journal"]["ProvisionedInBytes"] + metricsDictionary["datastore_usage_zerto_scratch_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Scratch"]["UsedInBytes"] + metricsDictionary["datastore_usage_zerto_scratch_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Scratch"]["ProvisionedInBytes"] + metricsDictionary["datastore_usage_zerto_appliances_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Appliances"]["UsedInBytes"] + metricsDictionary["datastore_usage_zerto_appliances_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Appliances"]["ProvisionedInBytes"] + + ## VMs API + log.debug("Getting VMs API") + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vms/" + + vmapi_json = {} + try: + vmapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL) + vmapi_json = vmapi.json() + except Exception as e: + log.error("Error while sending api request: " + str(e)) + VMName = "Unknown" + return + + log.debug("Got VMs API") + log.debug(vmapi_json) + for vm in vmapi_json : + log.debug("Processing VM: " + str(vm['VmName'])) + log.debug("Checking VM " + vm['VmIdentifier'] + " on Protected Site " + vm['ProtectedSite']['identifier'] + " against " + siteId) + + if siteId == vm['ProtectedSite']['identifier']: + log.debug("Found VM " + vm['VmIdentifier'] + " on Protected Site") + + if not isinstance(vm["ActualRPO"], int): + vm["ActualRPO"] = -1 + metricsDictionary["vm_actualrpo{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["ActualRPO"] + metricsDictionary["vm_throughput_in_mb{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["ThroughputInMB"] + metricsDictionary["vm_iops{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["IOPs"] + metricsDictionary["vm_journal_hard_limit{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["JournalHardLimit"]["LimitValue"] + metricsDictionary["vm_journal_warning_limit{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["JournalWarningThreshold"]["LimitValue"] + metricsDictionary["vm_journal_used_storage_mb{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["JournalUsedStorageMb"] + metricsDictionary["vm_outgoing_bandwidth_in_mbps{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["OutgoingBandWidthInMbps"] + metricsDictionary["vm_used_storage_in_MB{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["UsedStorageInMB"] + metricsDictionary["vm_provisioned_storage_in_MB{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["ProvisionedStorageInMB"] + metricsDictionary["vm_status{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["Status"] + metricsDictionary["vm_substatus{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["SubStatus"] + log.debug("Processed VM: " + str(vm['VmName'])) + + else: + log.debug("VM " + vm['VmIdentifier'] + " is protected to this site") + + + ## Volumes API for Scratch Volumes + log.debug("Getting Scratch Volumes") + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/volumes?volumeType=scratch" + + volapi_json = {} + try: + volapi = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL) + volapi_json = volapi.json() + except Exception as e: + log.error("Error while sending api request: " + str(e)) + VMName = "Unknown" + return + + log.debug("Got Scratch Volumes API") + if(bool(volapi_json)): + for volume in volapi_json : + #metricsDictionary["scratch_volume_provisioned_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = volume["Size"]["ProvisionedInBytes"] + # Determine the key for a given VM, then see if the key is already in the dictionary, if it is add the next disk to the total. If not, create a new key. + metrickey = "scratch_volume_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}" + if (metrickey in metricsDictionary): + metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["UsedInBytes"] + else: + metricsDictionary[metrickey] = volume["Size"]["UsedInBytes"] + percentage_used = (volume["Size"]["UsedInBytes"] / volume["Size"]["ProvisionedInBytes"] * 100) + percentage_used = round(percentage_used, 1) + #metricsDictionary["scratch_volume_percentage_used{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = percentage_used + + ## Volumes API for Journal Volumes + log.debug("Getting Journal Volumes") + + volapi_json = {} + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/volumes?volumeType=journal" + try: + volapi = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL) + volapi_json = volapi.json() + except Exception as e: + log.error("Error while sending api request: " + str(e)) + VMName = "Unknown" + return + + log.debug("Got Journal Volumes API") + if(bool(volapi_json)): + log.debug("Journal Volumes Exist") + for volume in volapi_json : + log.debug("Journal Volume: " + volume['ProtectedVm']['Name'] + " Calculating total size...") + #metricsDictionary["scratch_volume_provisioned_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = volume["Size"]["ProvisionedInBytes"] + # Determine the key for a given VM, then see if the key is already in the dictionary, if it is add the next disk to the total. If not, create a new key. + metrickey = "vm_journal_volume_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}" + if (metrickey in metricsDictionary): + metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["UsedInBytes"] + else: + metricsDictionary[metrickey] = volume["Size"]["UsedInBytes"] + + metrickey = "vm_journal_volume_provisioned_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}" + if (metrickey in metricsDictionary): + metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["ProvisionedInBytes"] + else: + metricsDictionary[metrickey] = volume["Size"]["ProvisionedInBytes"] + + metrickey = "vm_journal_volume_count{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}" + if (metrickey in metricsDictionary): + metricsDictionary[metrickey] = metricsDictionary[metrickey] + 1 + else: + metricsDictionary[metrickey] = 1 + + ## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus + log.debug("Writing metrics to file") + file_object = open('metrics', 'w') + txt_object = open('metrics.txt', 'w') + for item in metricsDictionary : + file_object.write(item) + file_object.write(" ") + file_object.write(str(metricsDictionary[item])) + file_object.write("\n") + txt_object.write(item) + txt_object.write(" ") + txt_object.write(str(metricsDictionary[item])) + txt_object.write("\n") + + file_object.close() + txt_object.close() + log.debug("Metrics written to file") + + # This function will get data every 10 seconds + log.debug("Starting Sleep for " + str(scrape_speed) + " seconds") + sleep(scrape_speed) + else: + log.debug("Waiting 1 second for Auth Token") + sleep(1) + +# get VRA CPU and memory usage from vCenter Server +def GetVraMetrics(): + # set up API endpoint and headers + log.debug("GetVraCpuMemory() called") + metricsDictionary = {} + while True: + vra_names = [] + vras = [] + global token + global siteId + global siteName + + log.debug("Checking Token in VRA CPU MEM Collector") + if (token != ""): + log.debug("Auth Token Valid!") + log.debug("token: " + str(token)) + log.info("VRA CPU MEM Collector Running") + + h2 = CaseInsensitiveDict() + h2["Accept"] = "application/json" + h2["Authorization"] = "Bearer " + token + + + ### VRA API + uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vras" + + # make API call to get list of VRAs + try: + response = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL) + except Exception as e: + log.error(f"Error connecting to {uri}: {e}") + return + else: + log.debug("Response from GET /v1/vras: %s", response.text) + # parse JSON response and get the name of each VRA + + vras = response.json() + + log.debug("VRA names: %s", vras) + log.debug(type(vras)) + for vra in vras : + # Gather other VRA Metrics from Zerto API into Metrics Diectionary + metricsDictionary["vra_memory_in_GB{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["MemoryInGB"] + metricsDictionary["vra_vcpu_count{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["NumOfCpus"] + metricsDictionary["vra_protected_vms{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Vms"] + metricsDictionary["vra_protected_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Vpgs"] + metricsDictionary["vra_protected_volumes{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Volumes"] + metricsDictionary["vra_recovery_vms{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Vms"] + metricsDictionary["vra_recovery_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Vpgs"] + metricsDictionary["vra_recovery_volumes{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Volumes"] + metricsDictionary["vra_self_protected_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["SelfProtectedVpgs"] + + + log.debug("VRA Name: %s", vra['VraName']) + log.info(f"vCenter info: T/F = {is_vcenter_set} Host: {vcenter_host} u: {vcenter_user} p: {vcenter_pwd}") + + # get the CPU and memory usage for each VRA + if is_vcenter_set: + log.debug(f"vCenter Info Is Valid... Trying to get CPU and Memory usage for VRAs") + try: + log.debug("Trying to get stats from vc module") + vradata = vc_connection.get_cpu_mem_used(vra['VraName']) + + # get the CPU usage and memory usage for the VM + cpu_usage_mhz = vradata[0] + memory_usage_mb = vradata[1] + + # print the CPU and memory usage for the VM + log.debug(f"VRA {vra['VraName']}) has CPU usage of {cpu_usage_mhz} MHz and memory usage of {memory_usage_mb} MB") + metricsDictionary["vra_cpu_usage_mhz{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = cpu_usage_mhz + metricsDictionary["vra_memory_usage_mb{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = memory_usage_mb + except: + log.info(f"No VM found with name {vra['VraName']}") + + ## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus + file_object = open('vrametrics', 'w') + txt_object = open('vrametrics.txt', 'w') + for item in metricsDictionary : + file_object.write(item) + file_object.write(" ") + file_object.write(str(metricsDictionary[item])) + file_object.write("\n") + txt_object.write(item) + txt_object.write(" ") + txt_object.write(str(metricsDictionary[item])) + txt_object.write("\n") + + file_object.close() + txt_object.close() + + # This function will get data every 10 seconds + log.debug("Starting Sleep for " + str(int(scrape_speed *2)) + " seconds") + sleep(scrape_speed * 2) + else: + log.debug("Waiting 1 second for Auth Token") + sleep(1) + +# function which monitors the threads and restarts them if they die +def ThreadProbe(): + global container_id + while True: + log.debug("Thread Probe Started") + metricsDictionary = {} + + log.debug("Is Auth Thread Alive") + if auth_thread.is_alive(): + metricsDictionary["exporter_thread_status{thread=\"" + "AuthHandler" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1 + else: + metricsDictionary["exporter_thread_status{thread=\"" + "AuthHandler" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0 + + log.debug("Is Data Thread Alive") + if data_thread.is_alive(): + metricsDictionary["exporter_thread_status{thread=\"" + "DataStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1 + else: + metricsDictionary["exporter_thread_status{thread=\"" + "DataStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0 + + #log.debug("Is Stats Thread Alive") + #if stats_thread.is_alive(): + # metricsDictionary["exporter_thread_status{thread=\"" + "EncryptionStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1 + #else: + # metricsDictionary["exporter_thread_status{thread=\"" + "EncryptionStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0 + + log.debug("Is VRA Metrics Thread Alive") + if vra_metrics_thread.is_alive(): + metricsDictionary["exporter_thread_status{thread=\"" + "VraMetrics" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1 + else: + metricsDictionary["exporter_thread_status{thread=\"" + "VraMetrics" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0 + + log.debug("Writing Probe data to files") + file_object = open('threads', 'w') + txt_object = open('threads.txt', 'w') + for item in metricsDictionary : + file_object.write(item) + file_object.write(" ") + file_object.write(str(metricsDictionary[item])) + file_object.write("\n") + txt_object.write(item) + txt_object.write(" ") + txt_object.write(str(metricsDictionary[item])) + txt_object.write("\n") + + log.debug("Trying to Close probe txt files") + file_object.close() + txt_object.close() + + log.debug("Probe Thread Going to Sleep") + sleep(30) + +#----------------run http server on port 9999----------------- +def WebServer(): + log.info("Web Server Started") + PORT = 9999 + + Handler = http.server.SimpleHTTPRequestHandler + + with socketserver.TCPServer(("", PORT), Handler) as httpd: + log.info(f"Webserver running on port {PORT}") + httpd.serve_forever() + +def start_thread(target_func): + # start a new thread + thread = threading.Thread(target=target_func) + thread.start() + # return the thread object + return thread + +""" +Main Program Logic +""" + +# Initialize zvmsite instance +zvm_instance = zvmsite( + host=zvm_url, + port=zvm_port, + client_id=client_id, + client_secret=client_secret, + grant_type="client_credentials", + loglevel=LOGLEVEL +) + +# Start the zvmsite authentication thread +zvm_instance.connect() + +# Get the hostname of the machine +container_id = str(socket.gethostname()) + +#set log line format including container_id +log_formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(threadName)s;%(message)s", "%Y-%m-%d %H:%M:%S") +log_handler = RotatingFileHandler(filename=f"./logs/Log-Main-{container_id}.log", maxBytes=1024*1024*100, backupCount=5) +log_handler.setFormatter(log_formatter) +log = logging.getLogger("Node-Exporter") +log.setLevel(LOGLEVEL) +log.addHandler(log_handler) +log.info(f"Zerto-Node-Exporter - Version {version}") +log.info(f"Log Level: {LOGLEVEL}") +log.debug("Running with Variables:\nVerify SSL: " + str(verifySSL) + "\nZVM Host: " + zvm_url + "\nZVM Port: " + zvm_port + "\nClient-Id: " + client_id + "\nClient Secret: " + client_secret) + +# Check if vCenter is set, if not disable VRA metrics +is_vcenter_set = True +if vcenter_host == "vcenter.local": + log.error("vCenter Host not set. Please set the environment variable VCENTER_HOST, turning off VRA CPU and Memory metrics") + is_vcenter_set = False +log.debug("vCenter data collection is enabled") +vc_connection = vcsite(vcenter_host, vcenter_user, vcenter_pwd, loglevel="debug") + + +# start the threads +auth_thread = start_thread(ZvmAuthHandler) +data_thread = start_thread(GetDataFunc) +#stats_thread = start_thread(GetStatsFunc()) +vra_metrics_thread = start_thread(GetVraMetrics) +webserver_thread = start_thread(WebServer) +probe_thread = start_thread(ThreadProbe) + +# loop indefinitely +while True: + # check if any thread has crashed + sleep(10) + if not probe_thread.is_alive(): + # restart the thread + log.error("Probe Thread Died - Restarting") + probe_thread = start_thread(ThreadProbe) + if not auth_thread.is_alive(): + # restart the thread + log.error("Authentication Thread Died - Restarting") + auth_thread = start_thread(ZvmAuthHandler) + if not data_thread.is_alive(): + # restart the thread + log.error("Data Thread Died - Restarting") + data_thread = start_thread(GetDataFunc) + #if not stats_thread.is_alive(): + # # restart the thread + # log.error("Stats Thread Died - Restarting") + # stats_thread = start_thread(GetStatsFunc()) + if not vra_metrics_thread.is_alive(): + # restart the thread + log.error("VRA Metrics Thread Died - Restarting") + vra_metrics_thread = start_thread(GetVraMetrics) + if not webserver_thread.is_alive(): + # restart the thread + log.error("Webserver Thread Died - Restarting") + webserver_thread = start_thread(WebServer) + sleep(api_timeout) \ No newline at end of file diff --git a/app/python-node-exporter.py b/app/python-node-exporter.py index 897365f..a6bc628 100644 --- a/app/python-node-exporter.py +++ b/app/python-node-exporter.py @@ -15,7 +15,7 @@ from requests.structures import CaseInsensitiveDict from tinydb import TinyDB, Query from tinydbstorage.storage import MemoryStorage from version import VERSION -from zvma10.vcenter import vcsite +from vmware.vcenter import vcsite from zvma10.zvma import zvmsite from posthog import Posthog @@ -587,7 +587,6 @@ def GetVraMetrics(): log.debug("Waiting 1 second for Auth Token") sleep(1) - # function which monitors the threads and restarts them if they die def ThreadProbe(): global container_id diff --git a/app/vmware/__init__.py b/app/vmware/__init__.py index bc546f1..a5312ec 100644 --- a/app/vmware/__init__.py +++ b/app/vmware/__init__.py @@ -1,5 +1,4 @@ print("Initializing zvma10 package...") #from .zvma import zvm -from .vcenter import vcsite -from .zvma import zvmsite \ No newline at end of file +from .vcenter import vcsite \ No newline at end of file diff --git a/app/vmware/__pycache__/__init__.cpython-310.pyc b/app/vmware/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab8f405b2c8e63e188ccd6d8c8f3e82ee59323ea GIT binary patch literal 238 zcmd1j<>g`kf{K!g)Mz067{oyaOhAqU5En}TiByIZhGs@ah7`sWrX0pxrYI&Ln>m<4 zlch?|GcU6wGchN#Dl;!#p{gu5(a=DlATc>RF+EjJPwyooP`f7MEw-}c;>?m%KTYOa ztOZ4xc_p{l%aT*`N>Yo8KsK#pC}IUtVB(jRerR!OQL%njX>mzrp1yl#iAQOYepG5v zNq)R*ML~WMP?vsUL4kf*Zh2x+s(yTYCeRJ>@p=W7w>WHa^HWN5Qtd$Y7P9~e4h8{c E0LQ64jsO4v literal 0 HcmV?d00001 diff --git a/app/vmware/__pycache__/vcenter.cpython-310.pyc b/app/vmware/__pycache__/vcenter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10f076207ab0100f1e9c9148b15f725749a3bd09 GIT binary patch literal 5233 zcmcgwOLN=S6~>DI2vQXF@N1k&QO9b`X)LvA`iPS{b`;5WN0J>&RvJ!P6yk*>&;&tW zfRcp)JDoba*89w4GD9u>8@lPTzu|7X@~&$y+J5JP)GJOG?M%tRci;D(dvU&V&P6;i z;VAgkZzaJO-&2&oQ{m)KN8uKpWD~#?rgoG_ZK{#h)Kn^K9X&FdMpSB+WLfW+O%wHc z$7)(KwVO6lqf?HYrlTr%6joy9mkKkVY0V1L*Oi9#5_2mJO;pz-pQnp)w;Qxl;d~M% zts;*$eJ_keX)lUdXRV=%nTK)er(t(zIqU>?{VwYSyhYQT{2d9ujM_kHs!VBWY=#-E z^rhO=naQj#m8QXLRz_N44yzzFS(Qy7wOEZ!BDGnaO(89_X$U(j%)M3;roqdI zjC*U#_gmJ9Wy{Ey@GRm<{s_p`T=A6L0IInL)N&oD=Ov)Q6ktiBnVUoFkji#i9y&~A z+MzbEQXCV>%*^Ndz|QSqHMb5`RY9K#h8+6Wa;04xP6BOek)?Vr^Gt!%_FDGd+%-RG zrC}5_k`Lytb%MR1)AdKCbe9J{TN@XnAW8fkbj+saKEFN}U7us_+}#^v&-`f zop>i%Sc!M8uliy4`d%yOrU9QvZ3t~KU()Uyy0EwLZfFT%NB+}~57HnJl`X&ZxZhih z``t8~Sc_S3ed%d0<{00Y62@+vq{5)Gu=`2Ci_twlN%mvTgtHg$aQnbZl8&&DPr|q> zY|kS#@jOxL_j&;r_R9S`D@&V8E5d?EJ5Ul)UXFR>r&xuX5KdH+AU&!Q<((kKZgwzM z#|UAgb%oAHI6h+|?P`>Tkp!J>Uc&^uPB29t_2l1Ct9nfZsx`H)W|M`!&W}g6ptnaqIt`jeD(l~nf$Atjl|iZ6+SkfA+DM&r zrt}rQl^Vm+p)#OWGx>gMG3^=Djp?}xl{MOBqz+RCT6QI^pidRg1fE*1GUGWB0E*w% z+mr43K+pB|)WDFmm_n^GoMs?IyLt%A@)>N)x?(8l%z>{e1K5_VF8jGAm%sFwyF3%31!Cna5`l?#toJ4#|k};?lznZ|(lt zlBkR*0mB$@jix@WpTd$8Vv>@-757s$~ff_MynEXv@F9tnL zg_eX(M-GI}L3lY@4Wtei@Yc9L8EsO=eT4 z{Ui0ih}EE|%43a_;z}Bj`jc{@tjU(kqm~gdhWa6d%p7VT8lZ-jYwglN|4iw^+0lR0 zU#FQqSI}yb(hsd%PwjSj2gEqk2TN6GctmPKk;aK}B zAB<)!v|@t@pIr&;Bh`bV`;6#(|u6~?NxM3oN^J#&sx)#fY19(aJMi_vh3A9E8 z*A8__>EfjPcYOe;QD z?kjf&`WTJ!sJ1MFr_)|7su$yWAwtfmz?Epjd*t4{AaQY`~R`{7HkFCg(D^( z1rS=hDwzy7eS*o8?a5(%#N^5L)F~!Uw5Pv=$up;z{MCrHvy#WN{Hr5ok7~f|^QZef zQ;FG9)(fZEP4b>#cXso?bhxX9d*9&q@W%PC052)f;wuEJ1P=&4C0HleAb3RZ1%c%1 zyQjI@0WG?%ga1fZ{#%UxANlH$0KX&f3APAY1dJdc*e2K|*df5ZSCP*0Tg>BaYI;o2 zA&3aN1TjI6;0d64?lhrBdp*mkOT?+y_WTnjNj?UA7rlr)pqJ@ruc4P?Xm#xxf^DLhB-ozQ7Q~MUB`+* zj0Bfg2L21mT{(ickQ2`F>VHK!p}Q4btPyx&2 zvc331;dmY{YW)u7tDg4+FI*<_9c bool: + # Assuming self.token is the authentication token and it's set upon successful authentication + # and self.__connected__ is a boolean indicating the connection status + return self.token is not None and self.__connected__ + + def setup_logging(self) -> None: container_id = str(socket.gethostname()) log_formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(threadName)s;%(message)s", "%Y-%m-%d %H:%M:%S") log_handler = RotatingFileHandler(filename=f"./logs/Log-{container_id}.log", maxBytes=1024*1024*100, backupCount=5) @@ -127,7 +135,7 @@ class zvmsite: self.log.setLevel(self.LOGLEVEL) self.log.addHandler(log_handler) - def __redact__(self, data): + def __redact__(self, data) -> str: sensitive_keys = ["password", "secret", "token"] # Add any other keys that need redaction redacted_data = {} @@ -139,7 +147,7 @@ class zvmsite: return redacted_data - def load_or_generate_uuid(self): + def load_or_generate_uuid(self) -> uuid.uuid4: uuid_path = 'uuid.txt' if os.path.exists(uuid_path): with open(uuid_path, 'r') as file: @@ -154,26 +162,26 @@ class zvmsite: file.write(new_uuid) return new_uuid - def setup_posthog(self): + def setup_posthog(self) -> None: self.posthog = Posthog(project_api_key='phc_HflqUkx9majhzm8DZva8pTwXFRnOn99onA9xPpK5HaQ', host='https://posthog.jpaul.io') self.posthog.debug = True self.posthog.identify(distinct_id=self.uuid) - def construct_url(self, path="", params=None): + def construct_url(self, path="", params=None) -> str: full_url = f"{self.base_url}/{path}" if params: query_string = urlencode({k: str(v) for k, v in params.items() if v is not None}) full_url = f"{full_url}?{query_string}" return full_url - def deconstruct_url(self, url): + def deconstruct_url(self, url) -> Tuple[str, str]: parsed_url = urlparse(url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" path = parsed_url.path return base_url, path - def make_api_request(self, method, url, data=None, json_data=None, headers=None, timeout=3, test=None): + def make_api_request(self, method, url, data=None, json_data=None, headers=None, timeout=3, test=None) -> Optional[Union[Dict[str, Any], str]]: try: headers = headers or {} start_time = time.time() # Record the start time @@ -241,7 +249,7 @@ class zvmsite: self.log.error(f"Response content: {e.response.text}") return None - def connect(self): + def connect(self) -> None: if (self.__auththread__ is None) or (not self.__auththread__.is_alive()): self._running = True self.__auththread__ = threading.Thread(target=self.__authhandler__, daemon=True) @@ -250,13 +258,13 @@ class zvmsite: else: self.log.info("Already connected to the ZVM") - def disconnect(self): + def disconnect(self) -> None: self.log.debug("Disconnecting") self._running = False if self.__auththread__ and self.__auththread__.is_alive(): self.__auththread__.join(timeout=5) - def alert(self, alertidentifier=None): + def alert(self, alertidentifier=None) -> Dict[str, Any]: if alertidentifier is None: self.log.error("Alert identifier is required for get_vpg function.") @@ -268,31 +276,53 @@ class zvmsite: uri = self.construct_url(f"v1/alerts/{alertidentifier}", params) return self.make_api_request("GET", uri, headers=self.apiheader) - def alert_dismiss(self, alertidentifier=None): - + def alert_dismiss(self, alertidentifier=None) -> bool: if alertidentifier is None: - self.log.error("Alert identifier is required for get_vpg function.") + self.log.error("Alert identifier is required for alert_dismiss function.") raise ValueError("Alert identifier is required.") - params = { - } - + params = {} uri = self.construct_url(f"v1/alerts/{alertidentifier}/dismiss", params) - return self.make_api_request("POST", uri, headers=self.apiheader) - - def alert_undismiss(self, alertidentifier=None): - + + try: + response = self.make_api_request("POST", uri, headers=self.apiheader) + # Check if the response status code is 200 (OK) + if response.status_code == 200: + return True + else: + # Log and raise an exception for any non-200 status codes + self.log.error(f"Failed to dismiss alert: {response.status_code}") + response.raise_for_status() + except requests.exceptions.RequestException as e: + self.log.error(f"Error while sending dismiss alert request: {e}") + raise + + return False # Return False if the try block didn't execute successfully + + def alert_undismiss(self, alertidentifier=None) -> bool: if alertidentifier is None: - self.log.error("Alert identifier is required for get_vpg function.") + self.log.error("Alert identifier is required for alert_undismiss function.") raise ValueError("Alert identifier is required.") - params = { - } - + params = {} uri = self.construct_url(f"v1/alerts/{alertidentifier}/undismiss", params) - return self.make_api_request("POST", uri, headers=self.apiheader) - - def alert_levels(self): + + try: + response = self.make_api_request("POST", uri, headers=self.apiheader) + # Check if the response status code is 200 (OK) + if response.status_code == 200: + return True + else: + # Log and raise an exception for any non-200 status codes + self.log.error(f"Failed to undismiss alert: {response.status_code}") + response.raise_for_status() + except requests.exceptions.RequestException as e: + self.log.error(f"Error while sending undismiss alert request: {e}") + raise + + return False # Return False if the try block didn't execute successfully + + def alert_levels(self) -> List[str]: params = { } @@ -300,7 +330,7 @@ class zvmsite: uri = self.construct_url(f"v1/alerts/levels", params) return self.make_api_request("GET", uri, headers=self.apiheader) - def alert_entities(self): + def alert_entities(self) -> List[str]: params = { } @@ -308,7 +338,7 @@ class zvmsite: uri = self.construct_url(f"v1/alerts/entities", params) return self.make_api_request("GET", uri, headers=self.apiheader) - def alert_helpidentifiers(self): + def alert_helpidentifiers(self) -> List[str]: params = { } @@ -317,7 +347,7 @@ class zvmsite: return self.make_api_request("GET", uri, headers=self.apiheader) def alerts(self, startdate=None, enddate=None, vpgid=None, zorgidentifier=None, level=None, - entity=None, helpidentifier=None, isdismissed: bool = None): + entity=None, helpidentifier=None, isdismissed: bool = None) -> List[Dict[str, Any]]: params = { 'startdate': startdate, @@ -345,7 +375,7 @@ class zvmsite: uri = self.construct_url(f"v1/datastores/{datastoreidentifier}", params) return self.make_api_request("GET", uri, headers=self.apiheader) - def datastores(self, datadtoreidentifier=None): + def datastores(self, datadtoreidentifier=None) -> List[Dict[str, Any]]: params = { } @@ -457,7 +487,7 @@ class zvmsite: return self.make_api_request("GET", uri, headers=self.apiheader) def events(self, startdate=None, enddate=None, vpgid=None, sitename=None, zorgidentifier=None, eventtype=None, - entitytype=None, category=None, username=None, alertidentifier=None): + entitytype=None, category=None, username=None, alertidentifier=None) -> List[Dict[str, Any]]: params = { 'startdate': startdate, @@ -553,7 +583,7 @@ class zvmsite: uri = self.construct_url(f"v1/license", params) return self.make_api_request("PUT", uri, json_data=license, headers=self.apiheader) - def peer_sites(self): + def peer_sites(self) -> List[Dict[str, Any]]: params = { } @@ -620,10 +650,7 @@ class zvmsite: uri = self.construct_url(f"v1/peersites/generatetoken", params) return self.make_api_request("POST", uri, headers=self.apiheader) - - - - def tasks(self, startedbeforedate=None, startedafterdate=None, completedbeforedate=None, completedafterdate=None, tasktype=None, status=None): + def tasks(self, startedbeforedate=None, startedafterdate=None, completedbeforedate=None, completedafterdate=None, tasktype=None, status=None) -> List[Dict[str, Any]]: params = { 'startedbeforedate': startedbeforedate, @@ -670,7 +697,7 @@ class zvmsite: def vpgs(self, vpgid=None, vpgname=None, vpgstatus=None, vpgsubstatus=None, protectedsitetype=None, recoverysitetype=None, protectedsiteidentifier=None, recoverysiteidentifier=None, - zorgidentifier=None, priority=None, serviceprofileidentifier=None): + zorgidentifier=None, priority=None, serviceprofileidentifier=None) -> List[Dict[str, Any]]: params = { 'vpgid': vpgid, @@ -708,7 +735,7 @@ class zvmsite: def vms(self, vmidentifier=None, vmname=None, vpgstatus=None, vpgsubstatus=None, protectedsitetype=None, recoverysitetype=None, protectedsiteidentifier=None, recoverysiteidentifier=None, - zorgname=None, priority=None, includebackupvms: bool = None, includemountedvms: bool = None): + zorgname=None, priority=None, includebackupvms: bool = None, includemountedvms: bool = None) -> List[Dict[str, Any]]: params = { 'vmidentifier': vmidentifier, @@ -758,7 +785,7 @@ class zvmsite: uri = self.construct_url(f"v1/vms/{vmidentifier}/pointsintime", params) return self.make_api_request("GET", uri, headers=self.apiheader) - def volumes(self, volumetype=None, vpgidentifier=None, datastoreidentifier=None, protectedvmidentifier=None, owningvmidentifier=None): + def volumes(self, volumetype=None, vpgidentifier=None, datastoreidentifier=None, protectedvmidentifier=None, owningvmidentifier=None) -> List[Dict[str, Any]]: if volumetype: valid_volumetypes = ["scratch", "journal", "recovery", "protected", "appliance"]