Last active
August 24, 2021 05:58
-
-
Save osanseviero/467e57906b4598403af6cee870804f29 to your computer and use it in GitHub Desktop.
Upload CoreNLP models to the Hub
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
from huggingface_hub import Repository, HfApi, HfFolder | |
def get_model_card(lang): | |
model_card = """--- | |
tags: | |
- corenlp | |
library_tag: corenlp | |
language: | |
- {lang} | |
license: GNU | |
--- | |
# Core NLP model for {lang} | |
CoreNLP is your one stop shop for natural language processing in Java! CoreNLP enables users to derive linguistic annotations for text, including token and sentence boundaries, parts of speech, named entities, numeric and time values, dependency and constituency parses, coreference, sentiment, quote attributions, and relations. | |
Find more about it in [our website](https://stanfordnlp.github.io/CoreNLP) and our [GitHub repository](https://github.com/stanfordnlp/CoreNLP). | |
""".format(lang=lang) | |
return model_card | |
MODELS = [ | |
"arabic", | |
"chinese", | |
"english-default", | |
"english-extra", | |
"english-kbp", | |
"french", | |
"german", | |
"spanish" | |
] | |
def push_to_hub(): | |
api = HfApi() | |
for model in MODELS: | |
# Create the repository | |
repo_name = "corenlp_" + model | |
repo_url = api.create_repo( | |
name=repo_name, | |
token=HfFolder.get_token(), | |
organization=None, # Change to StanfordNLP | |
exist_ok=True, | |
) | |
# Clone the repository | |
repo_local_path = os.path.join("hub", repo_name) | |
repo = Repository(repo_local_path, clone_from=repo_url) | |
repo.git_pull(rebase=True) | |
# Make sure jar files are tracked with LFS | |
repo.lfs_track(["*.jar"]) | |
# Create a copy of the jar file in the repository | |
src = f"stanford-corenlp-models-{model}.jar" | |
dst = os.path.join(repo_local_path, src) | |
shutil.copy(src, dst) | |
# Create the model card | |
lang = model.split("-")[0][:2] # get arabic and then first two chars (ar) | |
readme_path = os.path.join(repo_local_path, "README.md") | |
with open(readme_path, "w") as f: | |
f.write(get_model_card(lang)) | |
# Push the model | |
print("Pushing files to the Hub") | |
repo.push_to_hub(commit_message="Add model") | |
print(f"View your model in {repo_url}") | |
if __name__ == '__main__': | |
push_to_hub() |
url
is the same as ‘repo_url’ no?
Yes, you're right!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
url
is the same as ‘repo_url’ no?