russorat · February 20, 2020 17:18
diff --git a/arris-scraper.py b/arris-scraper.py
 #!/usr/bin/env python3
 # -*- coding: UTF-8 -*-
 '''arris-scraper.py (c) 2019 Matthew J Ernisse <[email protected]>
 All Rights Reserved.

 Load the status page from an Arris/Motorola SB8200 modem, parses the
 status page and loads the values into an InfluxDB database.

 Redistribution and use in source and binary forms,
 with or without modification, are permitted provided
 that the following conditions are met:

    * Redistributions of source code must retain the
      above copyright notice, this list of conditions
      and the following disclaimer.
    * Redistributions in binary form must reproduce
      the above copyright notice, this list of conditions
      and the following disclaimer in the documentation
      and/or other materials provided with the distribution.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 '''
 import datetime
 import math
 import requests
 import sys
 from bs4 import BeautifulSoup
 from influxdb_client import InfluxDBClient, Point
 from influxdb_client.client.write_api import SYNCHRONOUS

 url = "https://us-west-2-1.aws.cloud2.influxdata.com"
 org = ""
 bucket = ""
 token = ""


 def parseDownstream(obj):
 	channels = []
 	channelDescr = []
 	channelList = []

 	skipNext = 0
 	for v in obj:
 		if skipNext:
 			skipNext -= 1
 			continue

 		if v.string == "Channel ID":
 			skipNext = 7
 			continue

 		# In case we get the whole bullshit.
 		if v.string == "Channel":
 			break

 		channelDescr.append(v.string)

 		if len(channelDescr) == 8:
 			channels.append(channelDescr)
 			channelDescr = []

 	for channel in channels:
 		channelDescr = {
 			'id': int(channel[0]),
 			'status': channel[1],
 			'modulation': channel[2],
 			'freq': int(channel[3].split()[0]),
 			'power': float(channel[4].split()[0]),
 			'snr': float(channel[5].split()[0]),
 			'corrected': int(channel[6]),
 			'uncorrected': int(channel[7])
 		}

 		channelList.append(channelDescr)

 	return channelList


 def parseUpstream(obj):
 	channels = []
 	channelDescr = []
 	channelList = []

 	skipNext = 0
 	for v in obj:
 		if skipNext:
 			skipNext -= 1
 			continue

 		if v.string == "Channel":
 			skipNext = 6
 			continue

 		channelDescr.append(v.string)

 		if len(channelDescr) == 7:
 			channels.append(channelDescr)
 			channelDescr = []

 	for channel in channels:
 		channelDescr = {
 			'id': int(channel[0]),
 			'channel': int(channel[1]),
 			'status': channel[2],
 			'type': channel[3],
 			'freq': int(channel[4].split()[0]),
 			'width': int(channel[5].split()[0]),
 			'power': float(channel[6].split()[0]),
 		}

 		channelList.append(channelDescr)

 	return channelList


 def sendDownstreamMeasurements(db, channels, timeStamp):
 	''' Convert the channelList from parseDownstream to an InfluxDB measurement.
 	Field:
 		- power
 		- snr
 		- corrected
 		- uncorrected
 	Tags:
 		- id
 		- status
 		- modulation
 	'''
 	measurements = []
 	fieldKeys = ['power', 'snr', 'corrected', 'uncorrected']
 	tagKeys = ['id', 'status', 'modulation','freq']

 	for channel in channels:
 		fields = {}
 		tags = {}

 		for k, v in channel.items():
 			if k in fieldKeys:
 				fields[k] = v
 				continue

 			if k in tagKeys:
 				tags[k] = v
 				continue

 		measurements.append({
 			'measurement': 'arris_downstream',
 			'tags': tags,
 			'time': timeStamp,
 			'fields': fields,
 		})
 	db.write(bucket, org, measurements) 

 def sendUpstreamMeasurements(db, channels, timeStamp):
 	''' Convert the channelList from parseUpstream to an InfluxDB measurement.
 	Field:
 		- power
 	Tags:
 		- id
 		- channel
 		- status
 		- type
 	'''
 	measurements = []
 	fieldKeys = ['power']
 	tagKeys = ['id', 'channel', 'status', 'type','freq']

 	for channel in channels:
 		fields = {}
 		tags = {}

 		for k, v in channel.items():
 			if k in fieldKeys:
 				fields[k] = v
 				continue

 			if k in tagKeys:
 				tags[k] = v
 				continue

 		measurements.append({
 			'measurement': 'arris_upstream',
 			'tags': tags,
 			'time': timeStamp,
 			'fields': fields,
 		})

 	db.write(bucket, org, measurements) 

 if __name__ == "__main__":
 	timeStamp = datetime.datetime.now(datetime.timezone.utc)
 	timeStamp = timeStamp.isoformat()

 	# We seem to get a different page from the modem if we
 	# use the generic requests User-Agent.
 	headers = {
 		 'User-Agent':
 		 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0'
 	}

 	resp = requests.get(
 		"http://192.168.100.1/cmconnectionstatus.html",
 		headers=headers
 	)
 	resp.raise_for_status()

 	soup = BeautifulSoup(resp.text, 'lxml')
 	infoTables = soup.findAll('table', {'class': 'simpleTable'})

 	downstream = infoTables[1]
 	upstream = infoTables[2]

 	downChannels = parseDownstream(downstream.find_all_next('td'))
 	upChannels = parseUpstream(upstream.find_all_next('td'))

 	client = InfluxDBClient(url=url, token=token, org=org)

 	write_api = client.write_api(write_options=SYNCHRONOUS)

 	sendDownstreamMeasurements(write_api, downChannels, timeStamp)
 	sendUpstreamMeasurements(write_api, upChannels, timeStamp)
	#!/usr/bin/env python3
	# -- coding: UTF-8 --
	'''arris-scraper.py (c) 2019 Matthew J Ernisse <[email protected]>
	All Rights Reserved.

	Load the status page from an Arris/Motorola SB8200 modem, parses the
	status page and loads the values into an InfluxDB database.

	Redistribution and use in source and binary forms,
	with or without modification, are permitted provided
	that the following conditions are met:

	* Redistributions of source code must retain the
	above copyright notice, this list of conditions
	and the following disclaimer.
	* Redistributions in binary form must reproduce
	the above copyright notice, this list of conditions
	and the following disclaimer in the documentation
	and/or other materials provided with the distribution.

	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
	OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
	TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
	USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	'''
	import datetime
	import math
	import requests
	import sys
	from bs4 import BeautifulSoup
	from influxdb_client import InfluxDBClient, Point
	from influxdb_client.client.write_api import SYNCHRONOUS

	url = "https://us-west-2-1.aws.cloud2.influxdata.com"
	org = ""
	bucket = ""
	token = ""


	def parseDownstream(obj):
	channels = []
	channelDescr = []
	channelList = []

	skipNext = 0
	for v in obj:
	if skipNext:
	skipNext -= 1
	continue

	if v.string == "Channel ID":
	skipNext = 7
	continue

	# In case we get the whole bullshit.
	if v.string == "Channel":
	break

	channelDescr.append(v.string)

	if len(channelDescr) == 8:
	channels.append(channelDescr)
	channelDescr = []

	for channel in channels:
	channelDescr = {
	'id': int(channel[0]),
	'status': channel[1],
	'modulation': channel[2],
	'freq': int(channel[3].split()[0]),
	'power': float(channel[4].split()[0]),
	'snr': float(channel[5].split()[0]),
	'corrected': int(channel[6]),
	'uncorrected': int(channel[7])
	}

	channelList.append(channelDescr)

	return channelList


	def parseUpstream(obj):
	channels = []
	channelDescr = []
	channelList = []

	skipNext = 0
	for v in obj:
	if skipNext:
	skipNext -= 1
	continue

	if v.string == "Channel":
	skipNext = 6
	continue

	channelDescr.append(v.string)

	if len(channelDescr) == 7:
	channels.append(channelDescr)
	channelDescr = []

	for channel in channels:
	channelDescr = {
	'id': int(channel[0]),
	'channel': int(channel[1]),
	'status': channel[2],
	'type': channel[3],
	'freq': int(channel[4].split()[0]),
	'width': int(channel[5].split()[0]),
	'power': float(channel[6].split()[0]),
	}

	channelList.append(channelDescr)

	return channelList


	def sendDownstreamMeasurements(db, channels, timeStamp):
	''' Convert the channelList from parseDownstream to an InfluxDB measurement.
	Field:
	- power
	- snr
	- corrected
	- uncorrected
	Tags:
	- id
	- status
	- modulation
	'''
	measurements = []
	fieldKeys = ['power', 'snr', 'corrected', 'uncorrected']
	tagKeys = ['id', 'status', 'modulation','freq']

	for channel in channels:
	fields = {}
	tags = {}

	for k, v in channel.items():
	if k in fieldKeys:
	fields[k] = v
	continue

	if k in tagKeys:
	tags[k] = v
	continue

	measurements.append({
	'measurement': 'arris_downstream',
	'tags': tags,
	'time': timeStamp,
	'fields': fields,
	})
	db.write(bucket, org, measurements)

	def sendUpstreamMeasurements(db, channels, timeStamp):
	''' Convert the channelList from parseUpstream to an InfluxDB measurement.
	Field:
	- power
	Tags:
	- id
	- channel
	- status
	- type
	'''
	measurements = []
	fieldKeys = ['power']
	tagKeys = ['id', 'channel', 'status', 'type','freq']

	for channel in channels:
	fields = {}
	tags = {}

	for k, v in channel.items():
	if k in fieldKeys:
	fields[k] = v
	continue

	if k in tagKeys:
	tags[k] = v
	continue

	measurements.append({
	'measurement': 'arris_upstream',
	'tags': tags,
	'time': timeStamp,
	'fields': fields,
	})

	db.write(bucket, org, measurements)

	if __name__ == "__main__":
	timeStamp = datetime.datetime.now(datetime.timezone.utc)
	timeStamp = timeStamp.isoformat()

	# We seem to get a different page from the modem if we
	# use the generic requests User-Agent.
	headers = {
	'User-Agent':
	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0'
	}

	resp = requests.get(
	"http://192.168.100.1/cmconnectionstatus.html",
	headers=headers
	)
	resp.raise_for_status()

	soup = BeautifulSoup(resp.text, 'lxml')
	infoTables = soup.findAll('table', {'class': 'simpleTable'})

	downstream = infoTables[1]
	upstream = infoTables[2]

	downChannels = parseDownstream(downstream.find_all_next('td'))
	upChannels = parseUpstream(upstream.find_all_next('td'))

	client = InfluxDBClient(url=url, token=token, org=org)

	write_api = client.write_api(write_options=SYNCHRONOUS)

	sendDownstreamMeasurements(write_api, downChannels, timeStamp)
	sendUpstreamMeasurements(write_api, upChannels, timeStamp)