ilyasahsan123 · January 14, 2019 15:01
diff --git a/main.py b/main.py
 def detect_text(bucket, filename):
    
    # use cloud vision client for extract information from image which store on cloud storage
    from google.cloud import vision 
    vision_client = vision.ImageAnnotatorClient()
    response = vision_client.text_detection({
        'source': {'image_uri': "gs://{}/{}".format(bucket, filename)}
    })
    
    # get all text from extraction
    text = response.full_text_annotation.text
    
    # use regex for get specific information such as transfer status, transfer date, account number, name, nominal
    import re
    regex = r"m-Transfer m-Transfer\s([a-zA-Z]*?)\s([\s\S]*?)Ke\s(\d*)\s([a-zA-Z][\s\S]*?)Rp.\s(\d{1,3}[\s\S]*?)\s"
    matches = re.finditer(regex, text.replace('\n', ' '), re.MULTILINE)
    for match in matches:
        transfer_detail = dict(
            transfer_status = match.group(1),
            transfer_date = match.group(2),
            account_number = match.group(3),
            name = match.group(4),
            nominal = match.group(5).replace(',','')[:-3]
        )
    
    # use bigquery client for store information
    from google.cloud import bigquery
    bigquery_client = bigquery.Client()
    dataset_ref = bigquery_client.dataset('DATASET_NAME')
    table_ref = dataset_ref.table('TABLE_NAME')
    table = bigquery_client.get_table(table_ref)
    bigquery_client.insert_rows(table, transfer_detail)
	def detect_text(bucket, filename):

	# use cloud vision client for extract information from image which store on cloud storage
	from google.cloud import vision
	vision_client = vision.ImageAnnotatorClient()
	response = vision_client.text_detection({
	'source': {'image_uri': "gs://{}/{}".format(bucket, filename)}
	})

	# get all text from extraction
	text = response.full_text_annotation.text

	# use regex for get specific information such as transfer status, transfer date, account number, name, nominal
	import re
	regex = r"m-Transfer m-Transfer\s([a-zA-Z]?)\s([\s\S]?)Ke\s(\d)\s([a-zA-Z][\s\S]?)Rp.\s(\d{1,3}[\s\S]*?)\s"
	matches = re.finditer(regex, text.replace('\n', ' '), re.MULTILINE)
	for match in matches:
	transfer_detail = dict(
	transfer_status = match.group(1),
	transfer_date = match.group(2),
	account_number = match.group(3),
	name = match.group(4),
	nominal = match.group(5).replace(',','')[:-3]
	)

	# use bigquery client for store information
	from google.cloud import bigquery
	bigquery_client = bigquery.Client()
	dataset_ref = bigquery_client.dataset('DATASET_NAME')
	table_ref = dataset_ref.table('TABLE_NAME')
	table = bigquery_client.get_table(table_ref)
	bigquery_client.insert_rows(table, transfer_detail)