Skip to content

Instantly share code, notes, and snippets.

@sagar290
Created January 11, 2021 16:46
Show Gist options
  • Select an option

  • Save sagar290/fc5cf4b9c73eae80d72cdf3a0496956a to your computer and use it in GitHub Desktop.

Select an option

Save sagar290/fc5cf4b9c73eae80d72cdf3a0496956a to your computer and use it in GitHub Desktop.
import pandas as pd
import requests
import json
import sched, time
import base64
from bs4 import BeautifulSoup as BSHTML
# print(df.to_json(orient = 'index', indent = 2))
# print(df.to_json(orient='records', indent = 2))
# url = './admissionerpap.csv'
# df = pd.read_csv(url, sep=',')
# data = df.to_json(orient='records', indent = 2)
def purifyString(item, key):
soup = BSHTML(item[key], "html.parser")
images = soup.findAll('img')
i = 0
for img in images:
# print(img['src'][22:])
try:
with open("./img/image_{0}_{1}.png".format(i, item['question_id']), "wb") as fh:
fh.write(base64.b64decode(img['src'][22:]))
except:
# print(img)
img['src'] = ""
continue
img['src'] = "/admission/questions/image_{0}_{1}.png".format(i, item['question_id'])
# print(images)
i += 1
return format(soup)
with open("admissionerpap.json", "r") as read_file:
data = json.load(read_file)
# data = json.loads('list.json')
# print(data)
newData = []
# for item in json.loads(data):
for item in data:
# print(item['type'])
# try:
string = purifyString(item, 'question')
# itemObj = {
# "question_id": '{}'.format(item['question_id']),
# "quiz_id": '{}'.format(item['quiz_id']),
# "order_idx": '{}'.format(item['order_idx']),
# "question": format(soup),
# # "question": newsrc,
# "type": '{}'.format(item['type']),
# # "explanation": '{}'.format(item['explanation']),
# }
with open("sql.txt", "a") as myfile:
myfile.write("UPDATE `content_questions` SET `question`='{}' WHERE `question_id`={};\n".format(string.replace("'","\\\'"), item['question_id']))
# newData.append(itemObj)
# except:
# continue
# with open('./list.json', 'w') as outfile:
# json.dump(newData, outfile, ensure_ascii=False)
# rows = json.dumps(newData)
# print(rows)
# df2 = pd.DataFrame(newData, columns=["colummn"])
# df2.to_csv('list.csv', index=False)
# print(newData)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment