Last active
June 30, 2020 09:10
-
-
Save alabrashJr/a659786068eba2235a8b4cb2c986e1a4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import pandas as pd | |
import pymongo | |
from datetime import datetime | |
def args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("path", help="path to ebay index html") | |
parser.add_argument("mongodb",help="ip to mongodb",default="mongodb://localhost:27017/") | |
parser.add_argument("mongodb_prname",help="monogdb project name",default="shelock") | |
return parser.parse_args() | |
if __name__ == '__main__': | |
arg=args() | |
df=pd.read_html(arg.path)[0] | |
df.drop(0,inplace=True) | |
df["Last Modified"]=df["Last Modified"].astype("datetime64[ns]") | |
df=df[df.Type=="application/zip"].sort_values(by="Last Modified") | |
latest_files=df[(df.Type=="application/zip") & (df.Name.str.startswith("delta")) & ~(df.Name.str.contains("cadde"))].iloc[-1:,:].Name.values | |
myc=pymongo.MongoClient(arg.mongodb) | |
dblist = myclient.list_database_names() | |
if arg.mongodb_prname not in dblist: | |
print("The database is not exists.") | |
mydb=myc[arg.mongodb_prname] | |
mycol = mydb["files"] | |
files=[x["file"] for x in mycol.find()] | |
if latest_files[0] not in files : | |
d=datetime.now().strftime("%d-%m-%H-%M") | |
mydict = { "file": latest_files[0], "time": d } | |
x = mycol.insert_one(mydict) | |
return latest_files[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment