mirror of
https://github.com/recklessop/Zerto_Exporter.git
synced 2026-07-02 23:53:13 -04:00
251b23990c
Updated logic to look for VM and VPG ID in encryption detection stats incase VM is protected using one to many
708 lines
48 KiB
Python
708 lines
48 KiB
Python
import requests
|
|
import http.server
|
|
import socketserver
|
|
import os
|
|
import ssl
|
|
import logging
|
|
import threading
|
|
import socket
|
|
from pyVim.connect import SmartConnect, Disconnect
|
|
from pyVmomi import vim
|
|
from time import sleep
|
|
from logging.handlers import RotatingFileHandler
|
|
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
|
from requests.structures import CaseInsensitiveDict
|
|
from tinydb import TinyDB, Query
|
|
from tinydbstorage.storage import MemoryStorage
|
|
from version import VERSION
|
|
|
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
|
verifySSL = os.getenv("VERIFY_SSL", 'False').lower() in ('true', '1', 't')
|
|
zvm_url = os.environ.get('ZVM_HOST', '192.168.50.60')
|
|
zvm_port = os.environ.get('ZVM_PORT', '443')
|
|
client_id = os.environ.get('CLIENT_ID', 'api-script')
|
|
client_secret = os.environ.get('CLIENT_SECRET', 'js51tDM8oappYUGRJBhF7bcsedNoHA5j')
|
|
scrape_speed = int(os.environ.get('SCRAPE_SPEED', 30))
|
|
api_timeout = int(os.environ.get('API_TIMEOUT', 5))
|
|
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
|
|
version = str(VERSION)
|
|
vcenter_host = os.environ.get('VCENTER_HOST', 'vcenter.local')
|
|
vcenter_user = os.environ.get('VCENTER_USER', 'administrator@vsphere.local')
|
|
vcenter_pwd = os.environ.get('VCENTER_PASSWORD', 'supersecret')
|
|
|
|
# Get the hostname of the machine
|
|
container_id = str(socket.gethostname())
|
|
|
|
#set log line format including container_id
|
|
log_formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(threadName)s;%(message)s", "%Y-%m-%d %H:%M:%S")
|
|
|
|
log_handler = RotatingFileHandler(filename=f"./logs/Log-Main-{container_id}.log", maxBytes=1024*1024*100, backupCount=5)
|
|
log_handler.setFormatter(log_formatter)
|
|
|
|
log = logging.getLogger("Node-Exporter")
|
|
log.setLevel(LOGLEVEL)
|
|
log.addHandler(log_handler)
|
|
log.info(f"Zerto-Node-Exporter - Version {version}")
|
|
log.info(f"Log Level: {LOGLEVEL}")
|
|
log.debug("Running with Variables:\nVerify SSL: " + str(verifySSL) + "\nZVM Host: " + zvm_url + "\nZVM Port: " + zvm_port + "\nClient-Id: " + client_id + "\nClient Secret: " + client_secret)
|
|
|
|
# Global Variables
|
|
token = ""
|
|
siteId = "NotSet"
|
|
siteName = "NotSet"
|
|
lastStats = CaseInsensitiveDict()
|
|
|
|
# Check if vCenter is set, if not disable VRA metrics
|
|
is_vcenter_set = True
|
|
if vcenter_host == "vcenter.local":
|
|
log.error("vCenter Host not set. Please set the environment variable VCENTER_HOST, turning off VRA CPU and Memory metrics")
|
|
is_vcenter_set = False
|
|
log.debug("vCenter data collection is enabled")
|
|
|
|
# Authentication Thread which handles authentication and token refresh for ZVM API
|
|
def ZvmAuthHandler():
|
|
log.debug("ZVMAuthHandler Thread Started")
|
|
expiresIn = 0
|
|
global token
|
|
global siteId
|
|
global siteName
|
|
retries = 0
|
|
while True:
|
|
if expiresIn < 30:
|
|
h = CaseInsensitiveDict()
|
|
h["Content-Type"] = "application/x-www-form-urlencoded"
|
|
|
|
d = CaseInsensitiveDict()
|
|
d["client_id"] = client_id
|
|
d["client_secret"] = client_secret
|
|
d["grant_type"] = "client_credentials"
|
|
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/auth/realms/zerto/protocol/openid-connect/token"
|
|
delay = 0
|
|
|
|
try:
|
|
response = requests.post(url=uri, data=d, headers=h, verify=verifySSL)
|
|
response.raise_for_status()
|
|
except requests.exceptions.RequestException as e:
|
|
retries += 1
|
|
delay = 2 ** retries
|
|
log.error("Error while sending authentication request: " + str(e) + ". Retrying in " + str(delay) + " seconds")
|
|
sleep(delay)
|
|
continue
|
|
else:
|
|
retries = 0
|
|
|
|
responseJSON = response.json()
|
|
if 'access_token' not in responseJSON or 'expires_in' not in responseJSON:
|
|
log.error("Authentication response does not contain expected keys")
|
|
delay = 2 ** retries
|
|
sleep(delay)
|
|
retries += 1
|
|
continue
|
|
|
|
token = str(responseJSON.get('access_token'))
|
|
expiresIn = int(responseJSON.get('expires_in'))
|
|
log.info("Authentication successful. Token expires in " + str(expiresIn) + " seconds")
|
|
|
|
if response.status_code != 200:
|
|
log.error("Authentication request failed with status code " + str(response.status_code))
|
|
delay = 2 ** retries
|
|
sleep(delay)
|
|
retries += 1
|
|
continue
|
|
|
|
# Get Site ID and Name
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/localsite"
|
|
delay = 0
|
|
try:
|
|
log.debug("Getting Site ID and Name")
|
|
h2 = CaseInsensitiveDict()
|
|
h2["Accept"] = "application/json"
|
|
h2["Authorization"] = "Bearer " + token
|
|
response = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
|
|
response.raise_for_status()
|
|
except requests.exceptions.RequestException as e:
|
|
retries += 1
|
|
delay = 2 ** retries
|
|
log.error("Error while sending authentication request: " + str(e) + ". Retrying in " + str(delay) + " seconds")
|
|
sleep(delay)
|
|
continue
|
|
else:
|
|
retries = 0
|
|
|
|
responseJSON = response.json()
|
|
log.debug(responseJSON)
|
|
if 'SiteIdentifier' not in responseJSON or 'SiteName' not in responseJSON:
|
|
log.error("LocalSite API response does not contain expected keys")
|
|
delay = 2 ** retries
|
|
sleep(delay)
|
|
retries += 1
|
|
continue
|
|
else:
|
|
siteId = str(responseJSON.get('SiteIdentifier'))
|
|
siteName = str(responseJSON.get('SiteName'))
|
|
log.info("Site ID: " + siteId + " Site Name: " + siteName)
|
|
|
|
expiresIn -= 10 + delay
|
|
log.debug("Token Expires in " + str(expiresIn) + " seconds")
|
|
sleep(10)
|
|
|
|
|
|
# Thread which gets VM level encryption statistics from ZVM API
|
|
def GetStatsFunc():
|
|
tempdb = TinyDB(storage=MemoryStorage) # ('./db.json') used for storing db on disk for debugging
|
|
dbvm = Query()
|
|
dbvpg = Query()
|
|
while (True) :
|
|
global token
|
|
global siteId
|
|
global siteName
|
|
|
|
if (token != ""):
|
|
log.info("Got Auth Token!")
|
|
log.debug("token: " + str(token))
|
|
log.debug("Stats Collector Loop Running")
|
|
|
|
metricsDictionary = {}
|
|
|
|
h2 = CaseInsensitiveDict()
|
|
h2["Accept"] = "application/json"
|
|
h2["Authorization"] = "Bearer " + token
|
|
|
|
## Statistics API
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/statistics/vms/"
|
|
statsapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
|
|
statsapi_json = statsapi.json()
|
|
#log.debug(statsapi_json)
|
|
|
|
for vm in statsapi_json:
|
|
oldvmdata = dict()
|
|
|
|
CurrentIops = 0
|
|
CurrentWriteCounterInMBs = 0
|
|
CurrentSyncCounterInMBs = 0
|
|
CurrentNetworkTrafficCounterInMBs = 0
|
|
CurrentEncryptedLBs = 0
|
|
CurrentUnencryptedLBs = 0
|
|
CurrentTotalLBs = 0
|
|
CurrentPercentEncrypted = 0
|
|
VMName = "NA"
|
|
|
|
oldvmdata = tempdb.search(dbvm.VmIdentifier == vm['VmIdentifier'] and dbvpg.VpgIdentifier == vm['VpgIdentifier'])
|
|
|
|
log.info("Checking TempDB for VM " + vm['VmIdentifier'] + " in VPG " + vm['VpgIdentifier'])
|
|
if (oldvmdata):
|
|
log.info(vm['VmIdentifier'] + " Record Found, Updating DB")
|
|
log.debug(oldvmdata[0])
|
|
log.debug(tempdb.update(vm, dbvm.VmIdentifier == vm['VmIdentifier'] and dbvpg.VpgIdentifier == vm['VpgIdentifier']))
|
|
|
|
log.debug("!@!@!@!@!@ Stats !@!@!@!@!@")
|
|
VMName = oldvmdata[0]['VmName']
|
|
log.debug("Current VM " + str(VMName))
|
|
CurrentIops = abs(vm['IoOperationsCounter'] - oldvmdata[0]['IoOperationsCounter'])
|
|
log.debug("CurrentIops " + str(CurrentIops))
|
|
CurrentSyncCounterInMBs = abs(vm['SyncCounterInMBs'] - oldvmdata[0]['SyncCounterInMBs'])
|
|
log.debug("CurrentSyncCounterInMBs " + str(CurrentSyncCounterInMBs))
|
|
CurrentNetworkTrafficCounterInMBs = abs(vm['NetworkTrafficCounterInMBs'] - oldvmdata[0]['NetworkTrafficCounterInMBs'])
|
|
log.debug("CurrentNetworkTrafficCounterInMBs " + str(CurrentNetworkTrafficCounterInMBs))
|
|
CurrentEncryptedLBs = abs(vm['EncryptionStatistics']['EncryptedDataInLBs'] - oldvmdata[0]['EncryptionStatistics']['EncryptedDataInLBs'])
|
|
log.debug("CurrentEncryptedLBs " + str(CurrentEncryptedLBs))
|
|
CurrentUnencryptedLBs = abs(vm['EncryptionStatistics']['UnencryptedDataInLBs'] - oldvmdata[0]['EncryptionStatistics']['UnencryptedDataInLBs'])
|
|
log.debug("CurrentUnencryptedLBs " + str(CurrentUnencryptedLBs))
|
|
CurrentTotalLBs = abs(CurrentEncryptedLBs + CurrentUnencryptedLBs)
|
|
log.debug("CurrentTotalLBs " + str(CurrentTotalLBs))
|
|
if CurrentTotalLBs != 0:
|
|
CurrentPercentEncrypted = ((CurrentEncryptedLBs / CurrentTotalLBs) * 100)
|
|
else:
|
|
CurrentPercentEncrypted = 0
|
|
log.debug("CurrentPercentEncrypted " + str(CurrentPercentEncrypted))
|
|
|
|
else:
|
|
log.info(vm['VmIdentifier'] + " No Record Found, Inserting into DB")
|
|
#insert original VM record to tempdb
|
|
log.debug(tempdb.insert(vm))
|
|
|
|
# update database with VM name, for easier display in Grafana Legends
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vms/" + vm['VmIdentifier'] +"?vpgIdentifier=" + vm['VpgIdentifier']
|
|
try:
|
|
vapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
|
|
vapi_json = vapi.json()
|
|
except Exception as e:
|
|
log.error("Error while sending api request: " + str(e))
|
|
VMName = "Unknown"
|
|
else:
|
|
log.debug("vapi_json: " + str(vapi_json))
|
|
tempdb.update({'VmName': vapi_json['VmName']}, dbvm.VmIdentifier == vm['VmIdentifier'])
|
|
log.info("Added vm to tempdb " + vm['VmIdentifier'] + " - " + vapi_json['VmName'])
|
|
VMName = vapi_json['VmName']
|
|
|
|
# Store Calculated Metrics
|
|
metricsDictionary["vm_IoOperationsCounter{VpgIdentifier=\"" + str(vm['VpgIdentifier']) + "\",VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(VMName) + "\",SiteIdentifier=\"" + str(siteId) + "\",SiteName=\"" + str(siteName) + "\"}"] = CurrentIops
|
|
metricsDictionary["vm_WriteCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentWriteCounterInMBs
|
|
metricsDictionary["vm_SyncCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentSyncCounterInMBs
|
|
metricsDictionary["vm_NetworkTrafficCounterInMBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentNetworkTrafficCounterInMBs
|
|
metricsDictionary["vm_EncryptedDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentEncryptedLBs
|
|
metricsDictionary["vm_UnencryptedDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentUnencryptedLBs
|
|
metricsDictionary["vm_TotalDataInLBs{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentTotalLBs
|
|
metricsDictionary["vm_PercentEncrypted{VpgIdentifier=\"" + vm['VpgIdentifier'] + "\",VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + VMName + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = CurrentPercentEncrypted
|
|
|
|
## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus
|
|
file_object = open('statsmetrics', 'w')
|
|
txt_object = open('statsmetrics.txt', 'w')
|
|
for item in metricsDictionary :
|
|
file_object.write(item)
|
|
file_object.write(" ")
|
|
file_object.write(str(metricsDictionary[item]))
|
|
file_object.write("\n")
|
|
txt_object.write(item)
|
|
txt_object.write(" ")
|
|
txt_object.write(str(metricsDictionary[item]))
|
|
txt_object.write("\n")
|
|
file_object.close()
|
|
txt_object.close()
|
|
|
|
log.debug("Starting Sleep for " + str(scrape_speed) + " seconds")
|
|
sleep(scrape_speed)
|
|
else:
|
|
log.debug("Waiting 1 second for Auth Token")
|
|
sleep(1)
|
|
|
|
# Function which retrieves stats from various ZVM APIs and stores them in a metrics file
|
|
def GetDataFunc():
|
|
tempdb = TinyDB(storage=MemoryStorage)
|
|
dbvm = Query()
|
|
while (True) :
|
|
global token
|
|
global siteId
|
|
global siteName
|
|
|
|
if (token != ""):
|
|
log.debug("Got Auth Token!")
|
|
log.debug("token: " + str(token))
|
|
log.info("Data Collector Loop Running")
|
|
|
|
metricsDictionary = {}
|
|
|
|
h2 = CaseInsensitiveDict()
|
|
h2["Accept"] = "application/json"
|
|
h2["Authorization"] = "Bearer " + token
|
|
|
|
|
|
### VPGs API
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vpgs/"
|
|
service = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
|
|
vpg_json = service.json()
|
|
#log.debug(vpg_json)
|
|
for vpg in vpg_json :
|
|
metricsDictionary["vpg_storage_used_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["UsedStorageInMB"]
|
|
metricsDictionary["vpg_actual_rpo{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ActualRPO"]
|
|
metricsDictionary["vpg_throughput_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ThroughputInMB"]
|
|
metricsDictionary["vpg_iops{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["IOPs"]
|
|
metricsDictionary["vpg_provisioned_storage_in_mb{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ProvisionedStorageInMB"]
|
|
metricsDictionary["vpg_vms_count{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["VmsCount"]
|
|
metricsDictionary["vpg_configured_rpo_seconds{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["ConfiguredRpoSeconds"]
|
|
metricsDictionary["vpg_actual_history_in_minutes{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["HistoryStatusApi"]["ActualHistoryInMinutes"]
|
|
metricsDictionary["vpg_configured_history_in_minutes{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["HistoryStatusApi"]["ConfiguredHistoryInMinutes"]
|
|
metricsDictionary["vpg_failsafe_history_in_minutes_actual{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["FailSafeHistory"]["ActualFailSafeHistory"]
|
|
metricsDictionary["vpg_failsafe_history_in_minutes_configured{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["FailSafeHistory"]["ConfiguredFailSafeHistory"]
|
|
metricsDictionary["vpg_status{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["Status"]
|
|
metricsDictionary["vpg_substatus{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["SubStatus"]
|
|
metricsDictionary["vpg_alert_status{VpgIdentifier=\"" + vpg['VpgIdentifier'] + "\",VpgName=\"" + vpg['VpgName'] + "\",VpgPriority=\"" + str(vpg['Priority']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vpg["AlertStatus"]
|
|
|
|
### Datastores APIs
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/datastores/"
|
|
service = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
|
|
ds_json = service.json()
|
|
#log.debug(ds_json)
|
|
for ds in ds_json :
|
|
|
|
log.debug(f"Processing {ds['DatastoreName']}")
|
|
|
|
metricsDictionary["datastore_vras{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumVRAs"]
|
|
metricsDictionary["datastore_incoming_vms{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumIncomingVMs"]
|
|
metricsDictionary["datastore_outgoing_vms{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["NumOutgoingVMs"]
|
|
metricsDictionary["datastore_usage_capacityinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["CapacityInBytes"]
|
|
metricsDictionary["datastore_usage_freeinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["FreeInBytes"]
|
|
metricsDictionary["datastore_usage_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["UsedInBytes"]
|
|
metricsDictionary["datastore_usage_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Datastore"]["ProvisionedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_protected_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Protected"]["UsedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_protected_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Protected"]["ProvisionedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_recovery_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Recovery"]["UsedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_recovery_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Recovery"]["ProvisionedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_journal_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Journal"]["UsedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_journal_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Journal"]["ProvisionedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_scratch_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Scratch"]["UsedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_scratch_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Scratch"]["ProvisionedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_appliances_usedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Appliances"]["UsedInBytes"]
|
|
metricsDictionary["datastore_usage_zerto_appliances_provisionedinbytes{datastoreIdentifier=\"" + ds['DatastoreIdentifier'] + "\",DatastoreName=\"" + ds['DatastoreName'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = ds["Stats"]["Usage"]["Zerto"]["Appliances"]["ProvisionedInBytes"]
|
|
|
|
## VMs API
|
|
log.debug("Getting VMs API")
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vms/"
|
|
|
|
vmapi_json = {}
|
|
try:
|
|
vmapi = requests.get(url=uri, timeout=3, headers=h2, verify=verifySSL)
|
|
vmapi_json = vmapi.json()
|
|
except Exception as e:
|
|
log.error("Error while sending api request: " + str(e))
|
|
VMName = "Unknown"
|
|
return
|
|
|
|
log.debug("Got VMs API")
|
|
log.debug(vmapi_json)
|
|
for vm in vmapi_json :
|
|
log.debug("Processing VM: " + str(vm['VmName']))
|
|
log.debug("Checking VM " + vm['VmIdentifier'] + " on Protected Site " + vm['ProtectedSite']['identifier'] + " against " + siteId)
|
|
|
|
if siteId == vm['ProtectedSite']['identifier']:
|
|
log.debug("Found VM " + vm['VmIdentifier'] + " on Protected Site")
|
|
|
|
if not isinstance(vm["ActualRPO"], int):
|
|
vm["ActualRPO"] = -1
|
|
metricsDictionary["vm_actualrpo{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["ActualRPO"]
|
|
metricsDictionary["vm_throughput_in_mb{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["ThroughputInMB"]
|
|
metricsDictionary["vm_iops{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["IOPs"]
|
|
metricsDictionary["vm_journal_hard_limit{VmIdentifier=\"" + str(vm['VmIdentifier']) + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + str(siteName) + "\"}"] = vm["JournalHardLimit"]["LimitValue"]
|
|
metricsDictionary["vm_journal_warning_limit{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["JournalWarningThreshold"]["LimitValue"]
|
|
metricsDictionary["vm_journal_used_storage_mb{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["JournalUsedStorageMb"]
|
|
metricsDictionary["vm_outgoing_bandwidth_in_mbps{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["OutgoingBandWidthInMbps"]
|
|
metricsDictionary["vm_used_storage_in_MB{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["UsedStorageInMB"]
|
|
metricsDictionary["vm_provisioned_storage_in_MB{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["ProvisionedStorageInMB"]
|
|
metricsDictionary["vm_status{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["Status"]
|
|
metricsDictionary["vm_substatus{VmIdentifier=\"" + vm['VmIdentifier'] + "\",VmName=\"" + str(vm['VmName']) + "\",VmRecoveryVRA=\"" + str(vm["RecoveryHostName"]) + "\",VmPriority=\"" + str(vm['Priority']) + "\",SiteIdentifier=\"" + str(siteId) + "\",VpgName=\"" + str(vm['VpgName']) + "\",SiteName=\"" + siteName + "\"}"] = vm["SubStatus"]
|
|
log.debug("Processed VM: " + str(vm['VmName']))
|
|
|
|
else:
|
|
log.debug("VM " + vm['VmIdentifier'] + " is protected to this site")
|
|
|
|
|
|
## Volumes API for Scratch Volumes
|
|
log.debug("Getting Scratch Volumes")
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/volumes?volumeType=scratch"
|
|
|
|
volapi_json = {}
|
|
try:
|
|
volapi = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
|
|
volapi_json = volapi.json()
|
|
except Exception as e:
|
|
log.error("Error while sending api request: " + str(e))
|
|
VMName = "Unknown"
|
|
return
|
|
|
|
log.debug("Got Scratch Volumes API")
|
|
if(bool(volapi_json)):
|
|
for volume in volapi_json :
|
|
#metricsDictionary["scratch_volume_provisioned_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = volume["Size"]["ProvisionedInBytes"]
|
|
# Determine the key for a given VM, then see if the key is already in the dictionary, if it is add the next disk to the total. If not, create a new key.
|
|
metrickey = "scratch_volume_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
|
|
if (metrickey in metricsDictionary):
|
|
metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["UsedInBytes"]
|
|
else:
|
|
metricsDictionary[metrickey] = volume["Size"]["UsedInBytes"]
|
|
percentage_used = (volume["Size"]["UsedInBytes"] / volume["Size"]["ProvisionedInBytes"] * 100)
|
|
percentage_used = round(percentage_used, 1)
|
|
#metricsDictionary["scratch_volume_percentage_used{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = percentage_used
|
|
|
|
## Volumes API for Journal Volumes
|
|
log.debug("Getting Journal Volumes")
|
|
|
|
volapi_json = {}
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/volumes?volumeType=journal"
|
|
try:
|
|
volapi = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
|
|
volapi_json = volapi.json()
|
|
except Exception as e:
|
|
log.error("Error while sending api request: " + str(e))
|
|
VMName = "Unknown"
|
|
return
|
|
|
|
log.debug("Got Journal Volumes API")
|
|
if(bool(volapi_json)):
|
|
log.debug("Journal Volumes Exist")
|
|
for volume in volapi_json :
|
|
log.debug("Journal Volume: " + volume['ProtectedVm']['Name'] + " Calculating total size...")
|
|
#metricsDictionary["scratch_volume_provisioned_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\"}"] = volume["Size"]["ProvisionedInBytes"]
|
|
# Determine the key for a given VM, then see if the key is already in the dictionary, if it is add the next disk to the total. If not, create a new key.
|
|
metrickey = "vm_journal_volume_size_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
|
|
if (metrickey in metricsDictionary):
|
|
metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["UsedInBytes"]
|
|
else:
|
|
metricsDictionary[metrickey] = volume["Size"]["UsedInBytes"]
|
|
|
|
metrickey = "vm_journal_volume_provisioned_in_bytes{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
|
|
if (metrickey in metricsDictionary):
|
|
metricsDictionary[metrickey] = metricsDictionary[metrickey] + volume["Size"]["ProvisionedInBytes"]
|
|
else:
|
|
metricsDictionary[metrickey] = volume["Size"]["ProvisionedInBytes"]
|
|
|
|
metrickey = "vm_journal_volume_count{ProtectedVm=\"" + volume['ProtectedVm']['Name'] + "\", ProtectedVmIdentifier=\"" + volume['ProtectedVm']['Identifier'] + "\", OwningVRA=\"" + volume['OwningVm']['Name'] + "\",VpgName=\"" + str(volume['Vpg']['Name']) + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"
|
|
if (metrickey in metricsDictionary):
|
|
metricsDictionary[metrickey] = metricsDictionary[metrickey] + 1
|
|
else:
|
|
metricsDictionary[metrickey] = 1
|
|
|
|
## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus
|
|
log.debug("Writing metrics to file")
|
|
file_object = open('metrics', 'w')
|
|
txt_object = open('metrics.txt', 'w')
|
|
for item in metricsDictionary :
|
|
file_object.write(item)
|
|
file_object.write(" ")
|
|
file_object.write(str(metricsDictionary[item]))
|
|
file_object.write("\n")
|
|
txt_object.write(item)
|
|
txt_object.write(" ")
|
|
txt_object.write(str(metricsDictionary[item]))
|
|
txt_object.write("\n")
|
|
|
|
file_object.close()
|
|
txt_object.close()
|
|
log.debug("Metrics written to file")
|
|
|
|
# This function will get data every 10 seconds
|
|
log.debug("Starting Sleep for " + str(scrape_speed) + " seconds")
|
|
sleep(scrape_speed)
|
|
else:
|
|
log.debug("Waiting 1 second for Auth Token")
|
|
sleep(1)
|
|
|
|
# get VRA CPU and memory usage from vCenter Server
|
|
def GetVraMetrics():
|
|
# set up API endpoint and headers
|
|
log.debug("GetVraCpuMemory() called")
|
|
metricsDictionary = {}
|
|
while True:
|
|
vra_names = []
|
|
vras = []
|
|
global token
|
|
global siteId
|
|
global siteName
|
|
|
|
log.debug("Checking Token in VRA CPU MEM Collector")
|
|
if (token != ""):
|
|
log.debug("Auth Token Valid!")
|
|
log.debug("token: " + str(token))
|
|
log.info("VRA CPU MEM Collector Running")
|
|
|
|
h2 = CaseInsensitiveDict()
|
|
h2["Accept"] = "application/json"
|
|
h2["Authorization"] = "Bearer " + token
|
|
|
|
|
|
### VRA API
|
|
uri = "https://" + zvm_url + ":" + zvm_port + "/v1/vras"
|
|
|
|
# make API call to get list of VRAs
|
|
try:
|
|
response = requests.get(url=uri, timeout=api_timeout, headers=h2, verify=verifySSL)
|
|
except Exception as e:
|
|
log.error(f"Error connecting to {endpoint}: {e}")
|
|
return
|
|
else:
|
|
log.debug("Response from GET /v1/vras: %s", response.text)
|
|
# parse JSON response and get the name of each VRA
|
|
|
|
if is_vcenter_set:
|
|
# Disable SSL certificate verification
|
|
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
|
context.verify_mode = ssl.CERT_NONE
|
|
|
|
# connect to vCenter Server
|
|
si = None
|
|
try:
|
|
si = SmartConnect(host=vcenter_host, user=vcenter_user, pwd=vcenter_pwd, sslContext=context)
|
|
log.debug("Connected to vCenter Server %s", vcenter_host)
|
|
except Exception as e:
|
|
log.error(f"Error connecting to vCenter Server: {e}")
|
|
return
|
|
|
|
|
|
# get the root folder of the vCenter Server
|
|
content = si.RetrieveContent()
|
|
root_folder = content.rootFolder
|
|
|
|
# create a view for all VMs on the vCenter Server
|
|
view_manager = content.viewManager
|
|
vm_view = view_manager.CreateContainerView(root_folder, [vim.VirtualMachine], True)
|
|
|
|
|
|
|
|
vras = response.json()
|
|
|
|
log.debug("VRA names: %s", vras)
|
|
log.debug(type(vras))
|
|
for vra in vras :
|
|
#vra_names.append(vra['VraName'])
|
|
|
|
# Gather other VRA Metrics from Zerto API into Metrics Diectionary
|
|
metricsDictionary["vra_memory_in_GB{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["MemoryInGB"]
|
|
metricsDictionary["vra_vcpu_count{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["NumOfCpus"]
|
|
metricsDictionary["vra_protected_vms{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Vms"]
|
|
metricsDictionary["vra_protected_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Vpgs"]
|
|
metricsDictionary["vra_protected_volumes{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["ProtectedCounters"]["Volumes"]
|
|
metricsDictionary["vra_recovery_vms{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Vms"]
|
|
metricsDictionary["vra_recovery_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Vpgs"]
|
|
metricsDictionary["vra_recovery_volumes{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["RecoveryCounters"]["Volumes"]
|
|
metricsDictionary["vra_self_protected_vpgs{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = vra["SelfProtectedVpgs"]
|
|
|
|
|
|
log.debug("VRA Name: %s", vra['VraName'])
|
|
|
|
|
|
# get the CPU and memory usage for each VRA
|
|
if is_vcenter_set:
|
|
vm = None
|
|
for vm_obj in vm_view.view:
|
|
if vm_obj.name == vra['VraName']:
|
|
vm = vm_obj
|
|
break
|
|
|
|
if vm is not None:
|
|
log.debug("Found VRA VM in vCenter with name %s", vra['VraName'])
|
|
# get the CPU usage and memory usage for the VM
|
|
cpu_usage_mhz = vm.summary.quickStats.overallCpuUsage
|
|
memory_usage_mb = vm.summary.quickStats.guestMemoryUsage
|
|
|
|
# print the CPU and memory usage for the VM
|
|
log.info(f"VM {vm.name} (name: {vra['VraName']}) has CPU usage of {cpu_usage_mhz} MHz and memory usage of {memory_usage_mb} MB")
|
|
metricsDictionary["vra_cpu_usage_mhz{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = cpu_usage_mhz
|
|
metricsDictionary["vra_memory_usage_mb{VraIdentifierStr=\"" + vra['VraIdentifierStr'] + "\",VraName=\"" + vra['VraName'] + "\",VraVersion=\"" + vra['VraVersion'] + "\",HostVersion=\"" + vra['HostVersion'] + "\",SiteIdentifier=\"" + siteId + "\",SiteName=\"" + siteName + "\"}"] = memory_usage_mb
|
|
else:
|
|
log.info(f"No VM found with name {vra['VraName']}")
|
|
|
|
# Disconnect from vCenter
|
|
Disconnect(si)
|
|
|
|
## Write metrics to a human readable metrics.txt file as well as a metrics file that is easy to get in prometheus
|
|
file_object = open('vrametrics', 'w')
|
|
txt_object = open('vrametrics.txt', 'w')
|
|
for item in metricsDictionary :
|
|
file_object.write(item)
|
|
file_object.write(" ")
|
|
file_object.write(str(metricsDictionary[item]))
|
|
file_object.write("\n")
|
|
txt_object.write(item)
|
|
txt_object.write(" ")
|
|
txt_object.write(str(metricsDictionary[item]))
|
|
txt_object.write("\n")
|
|
|
|
file_object.close()
|
|
txt_object.close()
|
|
|
|
# This function will get data every 10 seconds
|
|
log.debug("Starting Sleep for " + str(scrape_speed) + " seconds")
|
|
sleep(scrape_speed * 2)
|
|
else:
|
|
log.debug("Waiting 1 second for Auth Token")
|
|
sleep(1)
|
|
|
|
|
|
# function which monitors the threads and restarts them if they die
|
|
def ThreadProbe():
|
|
global container_id
|
|
while True:
|
|
log.debug("Thread Probe Started")
|
|
metricsDictionary = {}
|
|
|
|
log.debug("Is Auth Thread Alive")
|
|
if auth_thread.is_alive():
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "AuthHandler" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
|
|
else:
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "AuthHandler" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
|
|
|
|
log.debug("Is Data Thread Alive")
|
|
if data_thread.is_alive():
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "DataStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
|
|
else:
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "DataStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
|
|
|
|
log.debug("Is Stats Thread Alive")
|
|
if stats_thread.is_alive():
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "EncryptionStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
|
|
else:
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "EncryptionStats" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
|
|
|
|
log.debug("Is VRA Metrics Thread Alive")
|
|
if vra_metrics_thread.is_alive():
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "VraMetrics" + "\",ExporterInstance=\"" + container_id + "\"}"] = 1
|
|
else:
|
|
metricsDictionary["exporter_thread_status{thread=\"" + "VraMetrics" + "\",ExporterInstance=\"" + container_id + "\"}"] = 0
|
|
|
|
log.debug("Writing Probe data to files")
|
|
file_object = open('threads', 'w')
|
|
txt_object = open('threads.txt', 'w')
|
|
for item in metricsDictionary :
|
|
file_object.write(item)
|
|
file_object.write(" ")
|
|
file_object.write(str(metricsDictionary[item]))
|
|
file_object.write("\n")
|
|
txt_object.write(item)
|
|
txt_object.write(" ")
|
|
txt_object.write(str(metricsDictionary[item]))
|
|
txt_object.write("\n")
|
|
|
|
log.debug("Trying to Close probe txt files")
|
|
file_object.close()
|
|
txt_object.close()
|
|
|
|
log.debug("Probe Thread Going to Sleep")
|
|
sleep(30)
|
|
|
|
|
|
#----------------run http server on port 9999-----------------
|
|
def WebServer():
|
|
log.info("Web Server Started")
|
|
PORT = 9999
|
|
|
|
Handler = http.server.SimpleHTTPRequestHandler
|
|
|
|
with socketserver.TCPServer(("", PORT), Handler) as httpd:
|
|
log.info(f"Webserver running on port {PORT}")
|
|
httpd.serve_forever()
|
|
|
|
def start_thread(target_func):
|
|
# start a new thread
|
|
thread = threading.Thread(target=target_func)
|
|
thread.start()
|
|
# return the thread object
|
|
return thread
|
|
|
|
# start the threads
|
|
|
|
auth_thread = start_thread(ZvmAuthHandler)
|
|
data_thread = start_thread(GetDataFunc)
|
|
stats_thread = start_thread(GetStatsFunc)
|
|
vra_metrics_thread = start_thread(GetVraMetrics)
|
|
webserver_thread = start_thread(WebServer)
|
|
probe_thread = start_thread(ThreadProbe)
|
|
|
|
# loop indefinitely
|
|
while True:
|
|
# check if any thread has crashed
|
|
sleep(10)
|
|
if not probe_thread.is_alive():
|
|
# restart the thread
|
|
log.error("Probe Thread Died - Restarting")
|
|
probe_thread = start_thread(ThreadProbe)
|
|
if not auth_thread.is_alive():
|
|
# restart the thread
|
|
log.error("Authentication Thread Died - Restarting")
|
|
auth_thread = start_thread(ZvmAuthHandler)
|
|
if not data_thread.is_alive():
|
|
# restart the thread
|
|
log.error("Data Thread Died - Restarting")
|
|
data_thread = start_thread(GetDataFunc)
|
|
if not stats_thread.is_alive():
|
|
# restart the thread
|
|
log.error("Stats Thread Died - Restarting")
|
|
stats_thread = start_thread(GetStatsFunc)
|
|
if not vra_metrics_thread.is_alive():
|
|
# restart the thread
|
|
log.error("VRA Metrics Thread Died - Restarting")
|
|
vra_metrics_thread = start_thread(GetVraMetrics)
|
|
if not webserver_thread.is_alive():
|
|
# restart the thread
|
|
log.error("Webserver Thread Died - Restarting")
|
|
webserver_thread = start_thread(WebServer)
|
|
sleep(api_timeout) |