tbmreza · May 15, 2019 07:47
diff --git a/gcloudTXTtoArray.py b/gcloudTXTtoArray.py
 from collections import namedtuple
 import re

 ''' TXT structure sample:

 "PROVINSI"(489,185),(652,185),(652,219),(489,219)
 "JAWA"(658,185),(747,185),(747,219),(658,219)
 "BARAT"(758,185),(866,185),(866,219),(758,219)
 "KABUPATEN"(508,220),(704,220),(704,252),(508,252)
 "BEKASI"(724,220),(848,220),(848,252),(724,252)
 "NIK"(187,266),(256,266),(256,303),(187,303)
 ":"(377,260),(391,260),(391,311),(377,311)

 '''

 gcloud_output_clean_path = 'path_to_txt'

 with open(gcloud_output_clean_path, "r") as the_file:
    lines = the_file.readlines()

 pairs_list = []

 for each_line in lines:
    text_pattern = r'"(.+)"' # Detects string between quotes.
    xy_pattern = r'(\d+)' # Detects numbers.

    t = re.compile(text_pattern)
    t = t.search(each_line)
    texts = t.group(1)

    # Delete match (to handle digit between quotes).
    each_line = re.sub(text_pattern, '', each_line)

    p = re.compile(xy_pattern)        
    points = p.findall(each_line)

    # Raw coordinate data is represented as [(x0,y0),(x1,y0),(x1,y1),(x0,y1)].
    # Possible positions of [x0 x1 y0 y1] are respectively (0 4 1 5)
    To_namedtuple = namedtuple('Bounding', 'x0 x1 y0 y1')
    points_tuple = To_namedtuple(int(points[0]),int(points[4]),int(points[1]),int(points[5]))

    element = [texts.lower(), points_tuple]
    if len(texts) > 1: # Omit single character detected text.
        pairs_list.append(element)
	from collections import namedtuple
	import re

	''' TXT structure sample:

	"PROVINSI"(489,185),(652,185),(652,219),(489,219)
	"JAWA"(658,185),(747,185),(747,219),(658,219)
	"BARAT"(758,185),(866,185),(866,219),(758,219)
	"KABUPATEN"(508,220),(704,220),(704,252),(508,252)
	"BEKASI"(724,220),(848,220),(848,252),(724,252)
	"NIK"(187,266),(256,266),(256,303),(187,303)
	":"(377,260),(391,260),(391,311),(377,311)

	'''

	gcloud_output_clean_path = 'path_to_txt'

	with open(gcloud_output_clean_path, "r") as the_file:
	lines = the_file.readlines()

	pairs_list = []

	for each_line in lines:
	text_pattern = r'"(.+)"' # Detects string between quotes.
	xy_pattern = r'(\d+)' # Detects numbers.

	t = re.compile(text_pattern)
	t = t.search(each_line)
	texts = t.group(1)

	# Delete match (to handle digit between quotes).
	each_line = re.sub(text_pattern, '', each_line)

	p = re.compile(xy_pattern)
	points = p.findall(each_line)

	# Raw coordinate data is represented as [(x0,y0),(x1,y0),(x1,y1),(x0,y1)].
	# Possible positions of [x0 x1 y0 y1] are respectively (0 4 1 5)
	To_namedtuple = namedtuple('Bounding', 'x0 x1 y0 y1')
	points_tuple = To_namedtuple(int(points[0]),int(points[4]),int(points[1]),int(points[5]))

	element = [texts.lower(), points_tuple]
	if len(texts) > 1: # Omit single character detected text.
	pairs_list.append(element)