maxmarchuk · January 31, 2017 23:50
diff --git a/CKEditor specialChar description getter b/CKEditor specialChar description getter
 #!/usr/local/bin/python3

 from lxml import html
 import requests

 try:
    file = open("./symbols.txt")
    file_string = file.read().split()
 except:
    file_string = 'Α α Β β Γ γ Δ δ Ε ε Ζ ζ Η η Θ θ Ι'.split()

 # get the page and turn it into a tree object
 page = requests.get('http://www.dionysia.org/html/entities/symbols.html')
 tree = html.fromstring(page.content)

 line_length = 0
 final_string = ''

 for symbol in file_string:
 	# Find any table cells that have the symbol we're searching for and get 
 	# the whole row of cells that are its siblings
    result = tree.xpath('//tr/td[contains(text(), "'+symbol+'")]/../*')
    if len(result) > 4:
 		# Get the text of the description column 
        desc = result[4].text
 		# only get the description. I don't care about the unicode value
        desc = desc.split(',')[0]
        # in some cases, there are multiple definitions. Just get the first one 
        desc = desc.split('=')[0]
        # remove any remaining whitespace at the start and end of the string
        desc = desc.strip()

        # Capitalize the first letter of every description
        desc = desc[0].capitalize() + desc[1:]
        current_string = "['{0}', '{1}'], ".format(symbol, desc)

 		# Print the full string when you reach a particular character length
 		# I am just using 80 since it's the standard line length
        if (line_length + len(current_string)) > 80:
            print(final_string)
            line_length = 0
            final_string = ''
        final_string += current_string
        line_length += len(current_string)

 print(final_string)



 #TODO: add the ability to use symbols passed in through command line args
 #TODO: Use https://www.w3.org/TR/REC-html40/sgml/entities.html instead, as it's probably more reliable and has more characters.Only downside of this is that you won't be able to search by symbol, only by code
	#!/usr/local/bin/python3

	from lxml import html
	import requests

	try:
	file = open("./symbols.txt")
	file_string = file.read().split()
	except:
	file_string = 'Α α Β β Γ γ Δ δ Ε ε Ζ ζ Η η Θ θ Ι'.split()

	# get the page and turn it into a tree object
	page = requests.get('http://www.dionysia.org/html/entities/symbols.html')
	tree = html.fromstring(page.content)

	line_length = 0
	final_string = ''

	for symbol in file_string:
	# Find any table cells that have the symbol we're searching for and get
	# the whole row of cells that are its siblings
	result = tree.xpath('//tr/td[contains(text(), "'+symbol+'")]/../*')
	if len(result) > 4:
	# Get the text of the description column
	desc = result[4].text
	# only get the description. I don't care about the unicode value
	desc = desc.split(',')[0]
	# in some cases, there are multiple definitions. Just get the first one
	desc = desc.split('=')[0]
	# remove any remaining whitespace at the start and end of the string
	desc = desc.strip()

	# Capitalize the first letter of every description
	desc = desc[0].capitalize() + desc[1:]
	current_string = "['{0}', '{1}'], ".format(symbol, desc)

	# Print the full string when you reach a particular character length
	# I am just using 80 since it's the standard line length
	if (line_length + len(current_string)) > 80:
	print(final_string)
	line_length = 0
	final_string = ''
	final_string += current_string
	line_length += len(current_string)

	print(final_string)



	#TODO: add the ability to use symbols passed in through command line args
	#TODO: Use https://www.w3.org/TR/REC-html40/sgml/entities.html instead, as it's probably more reliable and has more characters.Only downside of this is that you won't be able to search by symbol, only by code
No results found