Skip to content

Instantly share code, notes, and snippets.

@dzakyputra
Created October 7, 2019 07:54
Show Gist options
  • Save dzakyputra/854ea1910f6257fee5ff88df48999a65 to your computer and use it in GitHub Desktop.
Save dzakyputra/854ea1910f6257fee5ff88df48999a65 to your computer and use it in GitHub Desktop.
import pandas as pd
import os
from PIL import Image
import glob
df = pd.read_excel('result.xlsx')
out = {
'user_id': [],
'type': [],
'validity': [],
'height': [],
'width': [],
'size(kb)': []
}
for idx, row in df.iterrows():
user_id = str(int(row[0])) # user_id
files = [f for f in [glob.glob(s) for s in ('document/'+user_id+'/jpg/*.jpg', 'document/'+user_id+'/jpg/*.jpeg')] for f in f]
# print(files)
for f in files:
if 'KTP' in f:
ktp = Image.open(f)
ktp_size = os.path.getsize(f)
elif 'TRANSKRIP' in f:
transkrip = Image.open(f)
transkrip_size = os.path.getsize(f)
elif 'IJAZAH' in f:
ijazah = Image.open(f)
ijazah_size = os.path.getsize(f)
for i in range(1,len(row)//4+1):
out['user_id'].append(user_id)
if i*4 == 4:
out['type'].append('name')
try:
width, height = ktp.size
size = ktp_size/1000
except:
print('file not found')
width, height = (0,0)
size = 0.0
elif i*4 == 8:
out['type'].append('dob')
try:
width, height = ktp.size
size = ktp_size/1000
except:
print('file not found')
width, height = (0,0)
size = 0.0
elif i*4 == 12:
out['type'].append('nik')
try:
width, height = ktp.size
size = ktp_size/1000
except:
print('file not found')
width, height = (0,0)
size = 0.0
elif i*4 ==16:
out['type'].append('institution')
try:
width, height = ijazah.size
size = ijazah_size/1000
except:
print('file not found')
width, height = (0,0)
size = 0.0
elif i*4 == 20:
out['type'].append('ipk')
try:
width, height = transkrip.size
size = transkrip_size/1000
except:
print('file not found')
width, height = (0,0)
size = 0.0
out['validity'].append(row[i*4])
out['height'].append(height)
out['width'].append(width)
out['size(kb)'].append(size)
print('processing file')
ktp = None
ktp_size = None
transkrip = None
transkrip_size = None
ijazah = None
ijazah_size = None
output = pd.DataFrame(out)
output.to_excel('image-details.xlsx', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment