Skip to content

Instantly share code, notes, and snippets.

@mbklein
Last active January 6, 2022 20:42
Show Gist options
  • Save mbklein/6e0e358f39708bd53a78b046782bf473 to your computer and use it in GitHub Desktop.
Save mbklein/6e0e358f39708bd53a78b046782bf473 to your computer and use it in GitHub Desktop.
AVR Migration Snippets
# Copy file set files for 10 works
(AVR.Migration.list_avr_works()
|> Enum.group_by(& &1.work_type.id)
|> then(fn map ->
~w(AUDIO VIDEO)
|> Enum.flat_map(fn type -> Map.get(map, type) |> Enum.take(10) end)
|> Repo.preload(:file_sets)
|> Enum.flat_map(& &1.file_sets)
|> Task.async_stream(&AVR.Migration.FileMover.process_file_set_files/1, max_concurrency: , timeout: :infinity)
|> Stream.run()
end))
# Count Completed file sets
from(f in FileSet,
join: s in ActionState,
on: f.id == s.object_id,
where: like(f.accession_number, "avr:%"),
where: not like(f.accession_number, "%:mods"),
where: s.action == "Meadow.Pipeline.Actions.FileSetComplete",
where: s.outcome == "ok",
select: f) |> Repo.aggregate(:count)
# Process all file sets with a nil playlist
with project <- Projects.get_project_by_title("AVR Migration") do
from(f in FileSet,
where: like(f.accession_number, "avr:%"),
where: not like(f.accession_number, "%:mods"),
where: fragment("?.derivatives->'playlist'", f) |> is_nil()
)
|> Repo.all()
|> Task.async_stream(&AVR.Migration.FileMover.process_file_set_files(&1, project), timeout: :infinity)
|> Stream.run()
end
# Check migration folder for checksum tags
alias Meadow.Config
alias Meadow.Utils.AWS
ExAws.S3.list_objects_v2(Config.ingest_bucket(), prefix: project.folder<>"/")
|> ExAws.stream!()
|> Stream.reject(& &1.key |> String.match?(~r"/master_files/"))
|> Task.async_stream(fn %{key: key} ->
if not AWS.check_object_tags!(Meadow.Config.ingest_bucket(), key, Meadow.Config.required_checksum_tags()), do: IO.puts(key)
end, timeout: :infinity, max_concurrency: 50)
|> Stream.run()
# Validate all work metadata
import AVR.Migration.Metadata
Logger.configure(level: :info)
AVR.Migration.list_avr_works() |> Repo.preload(:file_sets) |> Enum.map(& validate_metadata/1)
# Update all work metadata
AVR.Migration.list_avr_works()
|> Enum.filter(& &1.descriptive_metadata.title |> is_nil())
|> Repo.preload(:file_sets)
|> Task.async_stream(
&AVR.Migration.Metadata.update_work_metadata/1,
max_concurrency: Config.concurrency(),
timeout: :infinity
)
|> Stream.run()
# Create placeholder derivatives
alias Meadow.Config
alias Meadow.Utils.Pairtree
content = File.read!("test/fixtures/test-1080.m3u8")
AVR.Migration.list_avr_filesets()
|> Task.async_stream(fn file_set ->
Config.streaming_bucket()
|> ExAws.S3.put_object(Path.join([Pairtree.generate!(file_set.id), "playlist.m3u8"]), content)
|> ExAws.request()
IO.write(".")
end, max_concurrency: 50, timeout: :infinity)
|> Stream.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment