Mentors:
- Morgan Roff
- Sayak Paul
- jaeyounkim
This is a summary of my GSoC 2021 project. In this project, I tried to produce text embedding modules trained on underrepresented languages like Arabic and Swahili and publish them on tfhub.dev.
| # Copyright 2021 Google LLC. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| import kfp | |
| import json | |
| import time | |
| from google.cloud import bigquery | |
| from google.cloud.exceptions import NotFound | |
| from kfp.v2.google.client import AIPlatformClient | |
| client = bigquery.Client() |
| # Copyright 2022 Google LLC. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # Author: Maithra Raghu <[email protected]> | |
| def compute_distance_matrix(patch_size, num_patches, length): | |
| """Helper function to compute distance matrix.""" | |
| distance_matrix = np.zeros((num_patches, num_patches)) |
| import torch | |
| from diffusers import FluxPipeline | |
| from torch import nn | |
| class ModelOffloaderV2: | |
| def __init__(self, model: nn.Module, record_stream: bool = False): | |
| # move model to pinned memory. keep a model copy in CPU pinned memory. | |
| for p in model.parameters(): | |
| p.data = p.data.cpu().pin_memory() |