mohammedi-haroune · October 16, 2018 20:38
diff --git a/djezzy_info.py b/djezzy_info.py
 try:
    from PIL import Image
 except ImportError:
    import Image
 import pytesseract
 from selenium import webdriver
 from selenium import webdriver
 from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.by import By
 import base64
 import re

 def get_usage():
    # the url of the djezzy web page that contains inforation like phone number, data usage ... etc
    url = 'http://www.internet.djezzy.dz/#richtext_page'
    
    # temporary file to save image for later processing
    tmp = '/tmp/djezzy.png'
    
    # create chrome driver to access the webpage
    # note: accessing the web page using urlopen from urllib gives another page. this is weired !
    options = webdriver.ChromeOptions()
    options.headless = True
    driver = webdriver.Chrome(options=options)
    
    # get the page
    driver.get(url)
    timeout = 5
    
    #make sure the element that contains data consumption is loaded
    #this will raise an exception on timeout
    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#gauge-left canvas'))
    WebDriverWait(driver, timeout).until(element_present)


    # get the base64 representation of the canvas image (the part substring(21) is for removing the padding "data:image/png;base64")
    base64_image = driver.execute_script("""return document.querySelector("#gauge-left canvas").toDataURL('image/png').substring(21);""")

    # decode the base64 image
    output_image = base64.b64decode(base64_image)

    # save to the output image
    with open(tmp, 'wb') as f:
       f.write(output_image)
    
    # open the saved image to extract data 
    Image.open(tmp)
    
    # transform image to text using tesseract ocr
    string = pytesseract.image_to_string(Image.open(tmp))
    
    # extract usage and total from string. ex: (5.34 Go \n Sur 90 Go)
    quotion = re.findall(r'\d+\.?\d+ \w+', string)
    
    # save results in dict and return it
    info = dict()
    info['data_usage'] = quotion[0]
    info['data_total'] = quotion[1]
    return info
	try:
	from PIL import Image
	except ImportError:
	import Image
	import pytesseract
	from selenium import webdriver
	from selenium import webdriver
	from selenium.common.exceptions import TimeoutException
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from selenium.webdriver.common.by import By
	import base64
	import re

	def get_usage():
	# the url of the djezzy web page that contains inforation like phone number, data usage ... etc
	url = 'http://www.internet.djezzy.dz/#richtext_page'

	# temporary file to save image for later processing
	tmp = '/tmp/djezzy.png'

	# create chrome driver to access the webpage
	# note: accessing the web page using urlopen from urllib gives another page. this is weired !
	options = webdriver.ChromeOptions()
	options.headless = True
	driver = webdriver.Chrome(options=options)

	# get the page
	driver.get(url)
	timeout = 5

	#make sure the element that contains data consumption is loaded
	#this will raise an exception on timeout
	element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#gauge-left canvas'))
	WebDriverWait(driver, timeout).until(element_present)


	# get the base64 representation of the canvas image (the part substring(21) is for removing the padding "data:image/png;base64")
	base64_image = driver.execute_script("""return document.querySelector("#gauge-left canvas").toDataURL('image/png').substring(21);""")

	# decode the base64 image
	output_image = base64.b64decode(base64_image)

	# save to the output image
	with open(tmp, 'wb') as f:
	f.write(output_image)

	# open the saved image to extract data
	Image.open(tmp)

	# transform image to text using tesseract ocr
	string = pytesseract.image_to_string(Image.open(tmp))

	# extract usage and total from string. ex: (5.34 Go \n Sur 90 Go)
	quotion = re.findall(r'\d+\.?\d+ \w+', string)

	# save results in dict and return it
	info = dict()
	info['data_usage'] = quotion[0]
	info['data_total'] = quotion[1]
	return info