Skip to content

Instantly share code, notes, and snippets.

@DastanIqbal
Created March 10, 2018 11:04
Show Gist options
  • Save DastanIqbal/46983d2aa6c140b70a331b0771ce61fc to your computer and use it in GitHub Desktop.
Save DastanIqbal/46983d2aa6c140b70a331b0771ce61fc to your computer and use it in GitHub Desktop.
import glob, os
import json
import MySQLdb
from bs4 import BeautifulSoup
db = MySQLdb.connect(host="localhost",
user="root",
passwd="<password>",
db="<dbname>")
cur=db.cursor()
wdir=os.getcwd()+"/<folderName>/"
for fname in ["2k","1k"]:
os.chdir(wdir+"/"+fname)
for file in glob.glob("*.html"):
#print(file)
with open(file, 'r') as content_file:
content = content_file.read()
soup = BeautifulSoup(content, 'html.parser')
table=soup.find_all('table')
#PI Table
rows = table[4].find_all('tr')
pInfo={}
try:
for row in rows:
key=row.find_all('td')[0].find('font').find(text=True).strip()
if(key == 'Name:'):
value=row.find_all('td')[1].find('font').find('b').find(text=True).strip()
else:
value=row.find_all('td')[1].find('font').find(text=True)
if value:
pInfo[key]=value.strip()
else:
pInfo[key]=''
except:
print("Error in PI: "+str(rows))
continue
#print(json.dumps(pInfo))
#MI Table
rows = table[5].find_all('tr')
del rows[0]
del rows[len(rows)-1]
mInfo={}
try:
for row in rows:
key=row.find_all('td')[1].find('font').find(text=True).strip()
value=row.find_all('td')[2].find('font').find(text=True)
if value:
mInfo[key]=value.strip()
else:
mInfo[key]=''
except:
print("Error in Marks Info: "+str(rows))
continue
try:
col1=pInfo['extract Key']
#print(col1)
cur.execute("""Insert into <TableName>(col1,col2,col3) values (%s,%s,%s)""",(col1,json.dumps(pInfo),json.dumps(mInfo)))
db.commit()
except:
print("Error in DB Insertions: "+str(col1))
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment