Skip to content

Instantly share code, notes, and snippets.

@nousr
Last active October 20, 2022 02:44
Show Gist options
  • Save nousr/e834ca7f3bd02ea0c257811036986b13 to your computer and use it in GitHub Desktop.
Save nousr/e834ca7f3bd02ea0c257811036986b13 to your computer and use it in GitHub Desktop.
vit-h-14 embeddings sanity check
[
{
"caption": "View Over Pyrenees Mountains, Spain",
"url": "https://static1.bigstockphoto.com/thumbs/0/0/1/large2/100000799.jpg"
},
{
"caption": "Mountain landscape with deer and forest at sunset",
"url": "https://cdn.xxl.thumbs.canstockphoto.com/vector-illustration-of-mountain-landscape-with-deer-and-forest-at-sunset-clip-art-vector_csp56740268.jpg"
},
{
"caption": "cover image of America the Beautiful",
"url": "https://img1.od-cdn.com/ImageType-400/6627-1/7EE/93B/80/%7B7EE93B80-68C2-47FB-9B75-5C2B4376A6C5%7DImg400.jpg"
},
{
"caption": "Amazing mountain landscape in Georgia on sunny summer day. Alpine green meadow in Caucasus highlands. Idyllic valley in Svaneti mountains. #1264205623",
"url": "https://image.shutterstock.com/image-photo/image-450w-1264205623.jpg"
},
{
"caption": "Tropical forest, palm trees in sunlight. Dominican Republic - stock photo",
"url": "https://thumb7.shutterstock.com/image-photo/stock-photo-tropical-forest-palm-trees-in-sunlight-dominican-republic-450w-186450275.jpg"
},
{
"caption": "Captivating scene of the alpine valley in sunlight. Main Caucasus ridge. Stock Footage",
"url": "https://images.pond5.com/captivating-scene-alpine-valley-sunlight-footage-085837642_iconm.jpeg"
},
{
"caption": "Vector mountain landscape with cloudy sky and silhouette of forest on the foreground",
"url": "https://image.shutterstock.com/image-vector/vector-mountain-landscape-cloudy-sky-260nw-473199520.jpg"
},
{
"caption": "Sweeping view of the Patagonia region located at the southern end of South America, shared by Argentina and Chile.",
"url": "https://i.pinimg.com/236x/8f/85/9a/8f859ab7e08822200618af20eaa14c19--places-to-travel-places-to-go.jpg"
},
{
"caption": "Cute Landscape With Mountains",
"url": "https://images.creativemarket.com/0.1.0/ps/2986663/580/273/m1/fpnw/wm1/mountains_banner2-.jpg?1500318273&s=0abb42df7976eb3e041d2465d4406544"
},
{
"caption": "The Mountains. Fantasy Fiction Natural Backdrop. Concept Art. Realistic Illustration. Video Game Digital CG Artwork. Nature Scenery.",
"url": "https://as2.ftcdn.net/jpg/02/94/29/05/240_F_294290560_y5HS6RLaCdK6TdloN5VwKQiEGsbiW8cK.jpg"
}
]
import numpy as np
import pandas as pd
import faiss
from clip_retrieval.load_clip import load_open_clip
from torch import float32
from open_clip import tokenize
import json
PROMPT = "A beautiful mountain landscape"
def main():
# load the shard
shard = np.load("img_emb_0000.npy").astype("float32")
# load the parquet metadata
metadata = pd.read_parquet("metadata_0000.parquet")
# build an index with IndexFlatIP
index = faiss.IndexFlatIP(shard.shape[1])
index.add(shard)
# load the clip model
clip_model, _ = load_open_clip(clip_model="ViT-H-14", device="cuda", use_jit=False)
query = clip_model.encode_text(tokenize(PROMPT).cuda()).to(float32).detach().cpu().numpy().astype("float32")
_,i = index.search(query, 10)
results = metadata.iloc[i[0]][["caption", "url"]].to_dict(orient="records")
# dump the results to json
with open("results.json", "w") as f:
json.dump(results, f, indent=4)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment