Skip to content

Instantly share code, notes, and snippets.

@alabrashJr
Last active June 30, 2020 09:10
Show Gist options
  • Save alabrashJr/a659786068eba2235a8b4cb2c986e1a4 to your computer and use it in GitHub Desktop.
Save alabrashJr/a659786068eba2235a8b4cb2c986e1a4 to your computer and use it in GitHub Desktop.
import argparse
import pandas as pd
import pymongo
from datetime import datetime
def args():
parser = argparse.ArgumentParser()
parser.add_argument("path", help="path to ebay index html")
parser.add_argument("mongodb",help="ip to mongodb",default="mongodb://localhost:27017/")
parser.add_argument("mongodb_prname",help="monogdb project name",default="shelock")
return parser.parse_args()
if __name__ == '__main__':
arg=args()
df=pd.read_html(arg.path)[0]
df.drop(0,inplace=True)
df["Last Modified"]=df["Last Modified"].astype("datetime64[ns]")
df=df[df.Type=="application/zip"].sort_values(by="Last Modified")
latest_files=df[(df.Type=="application/zip") & (df.Name.str.startswith("delta")) & ~(df.Name.str.contains("cadde"))].iloc[-1:,:].Name.values
myc=pymongo.MongoClient(arg.mongodb)
dblist = myclient.list_database_names()
if arg.mongodb_prname not in dblist:
print("The database is not exists.")
mydb=myc[arg.mongodb_prname]
mycol = mydb["files"]
files=[x["file"] for x in mycol.find()]
if latest_files[0] not in files :
d=datetime.now().strftime("%d-%m-%H-%M")
mydict = { "file": latest_files[0], "time": d }
x = mycol.insert_one(mydict)
return latest_files[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment