Last active
October 20, 2022 02:44
-
-
Save nousr/e834ca7f3bd02ea0c257811036986b13 to your computer and use it in GitHub Desktop.
vit-h-14 embeddings sanity check
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"caption": "View Over Pyrenees Mountains, Spain", | |
"url": "https://static1.bigstockphoto.com/thumbs/0/0/1/large2/100000799.jpg" | |
}, | |
{ | |
"caption": "Mountain landscape with deer and forest at sunset", | |
"url": "https://cdn.xxl.thumbs.canstockphoto.com/vector-illustration-of-mountain-landscape-with-deer-and-forest-at-sunset-clip-art-vector_csp56740268.jpg" | |
}, | |
{ | |
"caption": "cover image of America the Beautiful", | |
"url": "https://img1.od-cdn.com/ImageType-400/6627-1/7EE/93B/80/%7B7EE93B80-68C2-47FB-9B75-5C2B4376A6C5%7DImg400.jpg" | |
}, | |
{ | |
"caption": "Amazing mountain landscape in Georgia on sunny summer day. Alpine green meadow in Caucasus highlands. Idyllic valley in Svaneti mountains. #1264205623", | |
"url": "https://image.shutterstock.com/image-photo/image-450w-1264205623.jpg" | |
}, | |
{ | |
"caption": "Tropical forest, palm trees in sunlight. Dominican Republic - stock photo", | |
"url": "https://thumb7.shutterstock.com/image-photo/stock-photo-tropical-forest-palm-trees-in-sunlight-dominican-republic-450w-186450275.jpg" | |
}, | |
{ | |
"caption": "Captivating scene of the alpine valley in sunlight. Main Caucasus ridge. Stock Footage", | |
"url": "https://images.pond5.com/captivating-scene-alpine-valley-sunlight-footage-085837642_iconm.jpeg" | |
}, | |
{ | |
"caption": "Vector mountain landscape with cloudy sky and silhouette of forest on the foreground", | |
"url": "https://image.shutterstock.com/image-vector/vector-mountain-landscape-cloudy-sky-260nw-473199520.jpg" | |
}, | |
{ | |
"caption": "Sweeping view of the Patagonia region located at the southern end of South America, shared by Argentina and Chile.", | |
"url": "https://i.pinimg.com/236x/8f/85/9a/8f859ab7e08822200618af20eaa14c19--places-to-travel-places-to-go.jpg" | |
}, | |
{ | |
"caption": "Cute Landscape With Mountains", | |
"url": "https://images.creativemarket.com/0.1.0/ps/2986663/580/273/m1/fpnw/wm1/mountains_banner2-.jpg?1500318273&s=0abb42df7976eb3e041d2465d4406544" | |
}, | |
{ | |
"caption": "The Mountains. Fantasy Fiction Natural Backdrop. Concept Art. Realistic Illustration. Video Game Digital CG Artwork. Nature Scenery.", | |
"url": "https://as2.ftcdn.net/jpg/02/94/29/05/240_F_294290560_y5HS6RLaCdK6TdloN5VwKQiEGsbiW8cK.jpg" | |
} | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import faiss | |
from clip_retrieval.load_clip import load_open_clip | |
from torch import float32 | |
from open_clip import tokenize | |
import json | |
PROMPT = "A beautiful mountain landscape" | |
def main(): | |
# load the shard | |
shard = np.load("img_emb_0000.npy").astype("float32") | |
# load the parquet metadata | |
metadata = pd.read_parquet("metadata_0000.parquet") | |
# build an index with IndexFlatIP | |
index = faiss.IndexFlatIP(shard.shape[1]) | |
index.add(shard) | |
# load the clip model | |
clip_model, _ = load_open_clip(clip_model="ViT-H-14", device="cuda", use_jit=False) | |
query = clip_model.encode_text(tokenize(PROMPT).cuda()).to(float32).detach().cpu().numpy().astype("float32") | |
_,i = index.search(query, 10) | |
results = metadata.iloc[i[0]][["caption", "url"]].to_dict(orient="records") | |
# dump the results to json | |
with open("results.json", "w") as f: | |
json.dump(results, f, indent=4) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment