-
-
Save Hemie143/68622837ea9fc7ee389364fb73f19aba to your computer and use it in GitHub Desktop.
ZenFixIt.py A script to fix common Zenoss problems (Created by Zenoss Inc. posted for posterity)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
########################################################################## | |
# | |
# Copyright 2009 Zenoss, Inc. All Rights Reserved. | |
# | |
########################################################################## | |
__doc__ = """zenfixit | |
Apply common remedies for problems, and look for other issues. | |
""" | |
import os | |
import sys | |
import os.path | |
from stat import * | |
import Globals | |
from Products.ZenUtils.ZenScriptBase import ZenScriptBase | |
from transaction import commit | |
dmd = ZenScriptBase(connect=True).dmd | |
class ZenFixIt: | |
""" | |
Detects and fixes a variety of potentials problems in the ZODB. | |
""" | |
def run(self): | |
self.fixCatalogs() | |
self.fixRelationships() | |
self.fixVmwarerelations() | |
self.fixAdminRoles() | |
#self.fixComponentMonitoredState() | |
self.fixDeviceClassRelation() | |
self.fixMonitorRelations() | |
self.fixUnicodeStrings() | |
self.fixOsRelations() | |
self.fixServiceClasses() | |
self.unbindComponentTemplates() | |
self.findNoDatapointThresholds() | |
self.removeInvalidIpAddresses() | |
self.reindex() | |
self.checkRootFilePerms() | |
self.checkZenPacksDirectory() | |
self.repairRouting() | |
self.fixProducts() | |
def checkRootFilePerms(self): | |
""" | |
Check for sane file permissions for root-owned files | |
""" | |
bin_items = [ | |
"zensocket", | |
] | |
ZENHOME = os.environ['ZENHOME'] | |
if not ZENHOME: | |
print "ZENHOME is not defined. Run as the zenoss user to " \ | |
"check file permissions." | |
return | |
chown_list = [] | |
for name in bin_items: | |
path = os.path.join(ZENHOME, 'bin', name) | |
file_info = os.stat(path) | |
if file_info[ST_UID] != 0: | |
chown_list.append(path) | |
if name == "zensocket": | |
mode = file_info[ST_MODE] | |
if not S_ISUID & mode: | |
print "Incorrect permission on zensocket. As the root user," \ | |
" run the following command:" | |
print " chmod u+s %s" % path | |
if chown_list: | |
print "Incorrect ownership of file(s). As the root user" \ | |
" run the following command:" | |
print " chown root %s" % ' '.join(chown_list) | |
def checkZenPacksDirectory(self): | |
""" | |
Sometimes the zenpack install or remove doesn't get a chance to complete, so the | |
Python include list can get messed up. | |
""" | |
ZENHOME = os.environ['ZENHOME'] | |
if not ZENHOME: | |
print "ZENHOME is not defined. Run as the zenoss user to " \ | |
"check ZenPacks directory contents." | |
return | |
egg_path = os.path.join(ZENHOME, 'ZenPacks') | |
path = os.path.join(egg_path, 'easy-install.pth') | |
try: | |
fd = open(path) | |
easy_items = fd.readlines() | |
fd.close() | |
except Exception, ex: | |
print "Unable to validate %s file because: %s" % ( | |
path,str(ex)) | |
return | |
easy_items = map(lambda x: x.strip(), easy_items[1:-1]) | |
from glob import glob | |
egg_glob = os.path.join(egg_path, 'ZenPacks.*') | |
eggs = map(lambda x: x.replace(egg_path, '.'), | |
filter(lambda x: not x.endswith('.egg-link'), glob(egg_glob))) | |
# Hash of eggs | |
benedict = dict(zip(eggs, eggs)) | |
from sets import Set | |
found = Set() | |
missing = [] | |
for item in easy_items: | |
if item not in benedict: | |
if item.startswith('/'): | |
if not os.path.exists(item): | |
missing.append(item) | |
continue | |
else: | |
missing.append(item) | |
continue | |
found.add(item) | |
if missing: | |
print "The %s refers to the following non-existent ZenPacks\n" % path | |
print '\t', '\n\t'.join(missing), '\n' | |
print "Make a backup copy of the %s and then remove the above items." % path | |
extra_eggs = Set(eggs).difference(found) | |
if extra_eggs: | |
print "The %s doesn't know about these ZenPack directories:\n" % path | |
print '\t', '\n\t'.join(extra_eggs), '\n' | |
print "It is safe to delete these directories." | |
easy_latest = self.determineLatestZenPacks(easy_items) | |
ondisk_latest = self.determineLatestZenPacks(eggs) | |
for egg_name, easy_version in easy_latest.items(): | |
ondisk_version = ondisk_latest.get(egg_name, None) | |
if ondisk_version is None: | |
continue # Already told the user of the issue | |
if ondisk_version > easy_version: | |
print "\nNewer version of ZenPack %s is *NOT* in use -- reinstall it." % ondisk_version | |
def determineLatestZenPacks(self, eggs): | |
""" | |
If there are multiple ZenPacks with different versions, determine the | |
latest version | |
""" | |
latest = {} | |
from Products.ZenUtils.Version import Version, getVersionTupleFromString | |
import re | |
strip_regex = re.compile('-py2\.4.*') | |
for egg in eggs: | |
# Don't worry about anything not in the ZenPacks directory | |
if not egg.startswith('./ZenPacks.'): | |
continue | |
new_egg = egg | |
if not strip_regex.match(new_egg): | |
continue # Ignore ZenPacks in development mode | |
name_version = strip_regex.sub('', new_egg.replace('./ZenPacks.','')) | |
(name, version) = name_version.rsplit('-',1) | |
version = Version(name, *getVersionTupleFromString(version)) | |
prev_version = latest.get(name, None) | |
if prev_version is None: | |
latest[name] = version | |
elif version > prev_version: | |
latest[name] = version | |
return latest | |
def exitGracefully(self): | |
print "\nExiting." | |
sys.exit(0) | |
def fixVmwarerelations(self): | |
try: | |
from ZenPacks.zenoss.ZenVMware.VMwareHost import VMwareHost | |
for d in dmd.Devices.VMware.getSubDevices_recursive(): | |
# Skip if not a VMwareHost instance | |
if not isinstance(d, VMwareHost): continue | |
cluster = d.cluster() | |
if cluster is not None: | |
if d.id not in [ x.id for x in cluster.hostSystems() ]: | |
print "Host %s has a broken relationship to cluster %s." % ( | |
d.id, cluster.id) | |
d.cluster._remove() | |
for ds in d.datastores(): | |
if d.id not in [ x.id for x in ds.attachedHosts() ]: | |
print "Host %s has a broken relationship to datastores %s." % ( | |
d.id, ds.id) | |
d.datastores._remove(ds) | |
for nic in d.os.interfaces(): | |
for ip in nic.ipaddresses(): | |
if not ip.interface() or nic.id != ip.interface().id: | |
print "NIC %s has a broken IP relationship to %s." % ( | |
nic.id, ip.id) | |
nic.ipaddresses._remove(ip) | |
for g in d.guestDevices(): | |
for ds in g.datastores(): | |
if g.id not in [ x.id for x in ds.attachedVms() ]: | |
print "Guest %s has a broken relationship to datastore %s." % ( | |
g.id, ds.id) | |
g.datastores._remove(ds) | |
rp = g.resourcePool() | |
if rp is not None: | |
if g.id not in [ x.id for x in rp.pooledVms() ]: | |
print "Guest %s has a broken relationship to resource pool %s" % ( | |
g.id, rp.id) | |
g.resourcePool._remove() | |
for d in dmd.Devices.VMware.getSubDevices_recursive(): | |
if d.id not in [ x.id for x in d.perfServer().devices() ]: | |
print "%s has a bad collector relationship." % d.id | |
d.perfServer._remove() | |
d.setPerformanceMonitor('localhost') | |
except (ImportError, AttributeError): | |
pass | |
def fixCatalogs(self): | |
print "Cleaning up invalid entries from catalogs:" | |
# removed dmd.Devices.componentSearch, from catalog list | |
for catalog in ( | |
dmd.Devices.deviceSearch, | |
dmd.Services.serviceSearch, | |
dmd.ZenLinkManager.layer3_catalog, | |
dmd.maintenanceWindowSearch, | |
dmd.zenPackPersistence, | |
dmd.Manufacturers.productSearch, | |
dmd.searchRRDTemplates, | |
getattr(getattr(dmd.Devices, "VMware", None), "vmwareGuestSearch", None),): | |
if catalog is not None: | |
self.fixCatalog(catalog) | |
def fixCatalog(self, catalog): | |
print " * %s" % catalog.id | |
removeCount = 0 | |
problems = True | |
while problems: | |
problems = False | |
brains = catalog() | |
for o in brains: | |
try: | |
bah = o.getObject() | |
except KeyboardInterrupt: | |
self.exitGracefully() | |
except: | |
print " - removing %s." % o.getPath() | |
catalog.uncatalog_object(o.getPath()) | |
removeCount += 1 | |
problems = True | |
commit() | |
if removeCount > 0: print | |
def fixRelationships(self): | |
from Products.ZenRelations.ToManyRelationship import ToManyRelationship | |
if not hasattr(ToManyRelationship, 'checkObjectRelation'): | |
return | |
from Products.ZenRelations.ToOneRelationship import ToOneRelationship | |
if not hasattr(ToOneRelationship, 'checkRelation'): | |
return | |
# Fix condition where a device cannot be found in the primaryAq path | |
# that exists in Locations, Groups or Systems | |
from zExceptions import NotFound | |
for tl in (dmd.Locations, dmd.Groups, dmd.Systems): | |
for org in tl.getSubOrganizers(): | |
for device in org.devices(): | |
try: | |
unused = device.primaryAq() | |
except (KeyError, NotFound): | |
print "Fixing bad link to %s." % device.id | |
org.devices._remove(device) | |
# Fix condition where the device knows about a location but the location | |
# does not know about the device | |
# add the device back into the location | |
dc=dmd.Devices | |
for d in dc.getSubDevices(): | |
location = d.location() | |
if location: | |
if d.id not in [ x.id for x in location.devices() ]: | |
print "Host %s has a broken relationship to location %s." % (d.id,location.id) | |
locations.devices._add(d) | |
commit() | |
for group in d.groups(): | |
if d.id not in [ x.id for x in group.devices() ]: | |
print "Host %s has a broken relationship to group %s." % (d.id,group.id) | |
group.devices._add(d) | |
commit() | |
for system in d.systems(): | |
if d.id not in [ x.id for x in system.devices() ]: | |
print "Host %s has a broken relationship to system %s." % (d.id,system.id) | |
system.devices._add(d) | |
commit() | |
def fixRelation(relation): | |
if isinstance(relation, ToManyRelationship): | |
rname = relation.remoteName() | |
parobj = relation.getPrimaryParent() | |
for obj in relation._objects: | |
relation.checkObjectRelation(obj, rname, parobj, True) | |
elif isinstance(relation, ToOneRelationship): | |
relation.checkRelation(True) | |
print "Cleaning up one-sided relationships:" | |
# VMware-specific relationships. | |
try: | |
from ZenPacks.zenoss.ZenVMware.VMwareCluster \ | |
import VMwareCluster | |
from ZenPacks.zenoss.ZenVMware.VMwareResourcePool \ | |
import VMwareResourcePool | |
from ZenPacks.zenoss.ZenVMware.VMwareDatastore \ | |
import VMwareDatastore | |
from ZenPacks.zenoss.ZenVMware.VMwareHost \ | |
import VMwareHost | |
dc = dmd.Devices.VMware | |
print " * VMware" | |
for d in dc.getSubDevices(): | |
if isinstance(d, VMwareCluster): | |
fixRelation(d.hostSystems) | |
elif isinstance(d, VMwareResourcePool): | |
fixRelation(d.pooledVms) | |
fixRelation(d.childPools) | |
elif isinstance(d, VMwareDatastore): | |
fixRelation(d.attachedHosts) | |
fixRelation(d.attachedVms) | |
elif isinstance(d, VMwareHost): | |
fixRelation(d.guestDevices) | |
fixRelation(d.cluster) | |
fixRelation(d.datastores) | |
for guest in d.guestDevices(): | |
fixRelation(guest.datastores) | |
fixRelation(guest.resourcePool) | |
commit() | |
except KeyboardInterrupt: | |
self.exitGracefully() | |
except (ImportError, AttributeError): | |
pass | |
def fixAdminRoles(self): | |
print "Cleaning up any admin roles associated to invalid users." | |
removeCount = 0 | |
for rootName in ('Devices', 'Locations', 'Systems', 'Groups'): | |
root = dmd.getDmdRoot(rootName) | |
for sub in [root] + root.getSubOrganizers(): | |
for ar in sub.adminRoles(): | |
if ar.userSetting() is None: | |
print " - removing %s from %s." % (ar.id, sub.getOrganizerName()) | |
sub.adminRoles._delObject(ar.id) | |
removeCount += 1 | |
commit() | |
if removeCount > 0: print | |
def fixComponentMonitoredState(self): | |
print "Updating component catalog with monitor state from database." | |
toggleCount = 0 | |
for cc in dmd.Devices.componentSearch(dict(monitored=True)): | |
c = cc.getObject() | |
if not c.monitored(): | |
print " + enabling monitoring for %s." % cc.getPath() | |
c.index_object() | |
toggleCount += 1 | |
commit() | |
if toggleCount > 0: print | |
def fixDeviceClassRelation(self): | |
print "Verifying that all devices have a deviceClass relation." | |
fixCount = 0 | |
for d in dmd.Devices.getSubDevices(): | |
if d.deviceClass() is not None: continue | |
print " + adding deviceClass relation for %s." % d.id | |
dc = d.getPrimaryParent().getPrimaryParent() | |
d.deviceClass._add(dc) | |
fixCount += 1 | |
commit() | |
if fixCount > 0: print | |
def fixMonitorRelations(self): | |
print "Removing invalid devices from collectors." | |
from Products.ZenModel.PerformanceConf import PerformanceConf | |
removeCount = 0 | |
for pmon in dmd.Monitors.Performance.objectValues(): | |
if not isinstance(pmon, PerformanceConf): continue | |
for device in pmon.devices(): | |
try: | |
bah = device.primaryAq() | |
except KeyboardInterrupt: | |
self.exitGracefully() | |
except Exception: | |
print " - removing %s from %s" % (device.id, pmon.id) | |
pmon.devices._remove(device) | |
removeCount += 1 | |
commit() | |
if removeCount > 0: print | |
def fixServiceClasses(self): | |
print "Removing invalid instances from service classes." | |
removeCount = 0 | |
for so in dmd.Services.getSubOrganizers(): | |
for sc in so.serviceclasses(): | |
for inst in sc.instances(): | |
try: | |
bah = inst.primaryAq() | |
except KeyboardInterrupt: | |
self.exitGracefully() | |
except Exception: | |
print " - removing bad instance from %s." % sc.id | |
sc.instances._remove(inst) | |
commit() | |
if removeCount > 0: print | |
def fixUnicodeStrings(self): | |
print "Converting important values from unicode to string." | |
keyZProperties = ('zSnmpCommunity', 'zWinUser', 'zWinPassword') | |
fixCount = 0 | |
for org in [dmd.Devices] + dmd.Devices.getSubOrganizers(): | |
self.fixUnicodeZProperties(org, keyZProperties) | |
for d in org.devices(): | |
fixCount += self.fixUnicodeZProperties(d, keyZProperties) | |
if type(d.manageIp) == unicode: | |
d.manageIp = str(d.manageIp) | |
commit() | |
if fixCount > 0: print | |
def fixUnicodeZProperties(self, o, keyZProperties): | |
fixCount = 0 | |
for zprop in keyZProperties: | |
if o.hasProperty(zprop) and type(o.getZ(zprop)) == unicode: | |
print " - converting %s on %s." % (zprop, o.id) | |
o.setZenProperty(zprop, str(o.getZ(zprop))) | |
fixCount += 1 | |
return fixCount | |
def fixOsRelations(self): | |
print "Repairing Os Process Monitoring Relations" | |
for ospc in dmd.Processes.getSubOSProcessClassesGen(): | |
rel = ospc.instances | |
for i in rel.instances(): | |
try: | |
unused = i.device().getPrimaryPath() | |
except: | |
print "Removing %s." % '/'.join(i.getPhysicalPath()[4:]) | |
rel._remove(i) | |
commit() | |
def fixServiceClasses(self): | |
problem = True | |
while problem: | |
problem = False | |
for so in dmd.Services.getSubOrganizers(): | |
for sc in so.serviceclasses(): | |
for inst in sc.instances(): | |
try: | |
bah = inst.primaryAq() | |
except Exception: | |
print "Removing bad instance from %s." % (sc.id,) | |
problem = True | |
sc.instances._remove(inst) | |
commit() | |
def unbindComponentTemplates(self): | |
print "Unbinding component templates from devices." | |
componentTemplates = ( | |
"FileSystem", "ethernetCsmacd", "IpService", "OSProcess", | |
"HardDisk", "WinService") | |
fixCount = 0 | |
for o in [dmd.Devices] + dmd.Devices.getSubOrganizers() + \ | |
dmd.Devices.getSubDevices(): | |
if not o.hasProperty('zDeviceTemplates'): continue | |
templates = [] | |
for t in o.zDeviceTemplates: | |
if t not in componentTemplates: | |
templates.append(t) | |
else: | |
print " - removing %s template from %s." % (t, o.id) | |
fixCount += 1 | |
o.zDeviceTemplates = templates | |
commit() | |
if fixCount > 0: print | |
def findNoDatapointThresholds(self): | |
noDpThreshes = {} | |
for o in [dmd.Devices] + dmd.Devices.getSubOrganizers() + \ | |
dmd.Devices.getSubDevices(): | |
for template in o.getRRDTemplates(): | |
for threshold in template.thresholds.objectValues(): | |
if not threshold.dsnames: | |
path = threshold.getPrimaryUrlPath() | |
noDpThreshes[path] = threshold.id | |
if noDpThreshes: | |
print "\nThe following thresholds do not have any datapoints associated with them.\n" | |
for threshold in sorted(noDpThreshes.keys()): | |
print "\t", threshold | |
print "\nEither remove the threshold or associate a datapoint with the threshold.\n" | |
def removeInvalidIpAddresses(self): | |
print "Removing IP addresses linked to invalid devices." | |
fixCount = 0 | |
for n in dmd.Networks.getSubOrganizers(): | |
for i in n.ipaddresses(): | |
try: | |
unused = i.getDeviceLink() | |
except KeyboardInterrupt: | |
self.exitGracefully() | |
except: | |
print " - removing %s" % i.id | |
n.ipaddresses._delObject(i.id) | |
fixCount += 1 | |
commit() | |
if fixCount > 0: print | |
def fixProducts(self): | |
""" | |
Cleanup os and hw product relations | |
""" | |
for device in dmd.Devices.getSubDevicesGen(): | |
productClass = device.os.productClass() | |
if productClass is None: continue | |
productClass = productClass.primaryAq() | |
for instance in productClass.instances(): | |
try: | |
if instance.getPrimaryUrlPath() == device.os.getPrimaryUrlPath(): | |
break | |
except KeyboardInterrupt: | |
self.exitGracefully() | |
except: | |
pass | |
else: | |
print "Cleaning up one-way link from %s to %s." % ( | |
device.id, productClass.name) | |
device.os.productClass._remove() | |
commit() | |
for device in dmd.Devices.getSubDevicesGen(): | |
productClass = device.hw.productClass() | |
if productClass is None: continue | |
productClass = productClass.primaryAq() | |
for instance in productClass.instances(): | |
try: | |
if instance.getPrimaryUrlPath() == device.hw.getPrimaryUrlPath(): | |
break | |
except KeyboardInterrupt: | |
self.exitGracefully() | |
except: | |
pass | |
else: | |
print "Cleaning up one-way link from %s to %s." % ( | |
device.id, productClass.name) | |
device.hw.productClass._remove() | |
commit() | |
def repairRouting(self): | |
from Products.ZenRelations.Exceptions import ObjectNotFound | |
for brain in dmd.Networks.ipSearch(): | |
ip = brain.getObject() | |
interface = ip.interface() | |
if interface: | |
try: | |
for if_ip in interface.ipaddresses(): | |
if if_ip.getPrimaryId() == ip.getPrimaryId(): | |
break | |
else: | |
print "Interface has no link back to %s." % ip.getIpAddress() | |
ip.interface._remove() | |
commit() | |
except: | |
print "Warning .. exception occurred .. skipping" | |
pass | |
for brain in dmd.Networks.ipSearch(): | |
ip = brain.getObject() | |
for clientroute in ip.clientroutes(): | |
nexthop = clientroute.nexthop() | |
if not nexthop or nexthop.getPrimaryId() != ip.getPrimaryId(): | |
print "Route nexthop has no link back to %s." % ip.getIpAddress() | |
try: | |
ip.clientroutes._remove(ip) | |
commit() | |
except ObjectNotFound: | |
pass | |
def reindex(self): | |
dmd.Devices.reIndex() | |
commit() | |
dmd.Services.reIndex() | |
commit() | |
dmd.Events.reIndex() | |
commit() | |
dmd.Manufacturers.reIndex() | |
commit() | |
dmd.Networks.reIndex() | |
commit() | |
if __name__ == "__main__": | |
zfi = ZenFixIt() | |
zfi.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment