Last active
August 7, 2019 06:56
-
-
Save zaltoprofen/7b227b18395d0b143dfe54dece5ed62c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import time | |
import cv2 | |
import pandas as pd | |
import pytesseract | |
from PIL import Image | |
_CODE_PATTERN = re.compile('(?<![0-9a-z])[0-9a-z]{9}(?![0-9a-z])') | |
_CODE_PATTERN2 = re.compile('([0-9]*[a-z]){3}[0-9]*') | |
def extract_serial_code(s): | |
return [t[0] for t in _CODE_PATTERN.finditer(s) | |
if _CODE_PATTERN2.fullmatch(t[0])] | |
class VideoCaptureIterator: | |
def __init__(self, device_id=0, window_name='captured'): | |
self._window_name = window_name | |
self._cap = cv2.VideoCapture(device_id) | |
def __iter__(self): | |
return self | |
def __next__(self): | |
while True: | |
retval, img = self._cap.read() | |
if not retval: | |
time.sleep(0.05) | |
continue | |
h, w, c = img.shape | |
img = img[h//4:h//4*3, w//4:w//4*3] | |
cv2.imshow(self._window_name, img) | |
cv2.waitKey(1) | |
return Image.fromarray(img) | |
def __del__(self): | |
self._cap.release() | |
cv2.destroyWindow(self._window_name) | |
cv2.waitKey(1) | |
def capture_code(image_iterator): | |
codes_lst = [] | |
codes_set = set() | |
os.makedirs('imgs', exist_ok=True) | |
try: | |
for img in image_iterator: | |
ocr_result = pytesseract.image_to_string(img) | |
codes = extract_serial_code(ocr_result) | |
if len(codes) == 1: | |
code = codes[0] | |
if code not in codes_set: | |
print('code:', code) | |
img.save(os.path.join('imgs', f'{code}.jpg')) | |
codes_lst.append(code) | |
codes_set.add(code) | |
except KeyboardInterrupt: | |
print('interrupted') | |
return codes_lst | |
def main(csv_name='serial_codes.csv'): | |
codes = capture_code(VideoCaptureIterator()) | |
if len(codes) > 0: | |
df = pd.DataFrame({'code': codes}) | |
df.to_csv(csv_name, index=False) | |
print(f'outputted to {csv_name}') | |
else: | |
print('scanned no code') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment