Last active
April 2, 2024 20:03
-
-
Save ericboehs/563db3b74d9cda47469f24bc46421747 to your computer and use it in GitHub Desktop.
Sync with Kiwix.org and update zim files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
xml="/volume1/kiwix-share/" | |
library="/volume1/kiwix-share/" | |
log=($(find $library -name '*.zim' | sort)) | |
if [ -f "$xml/library.log" ]; then | |
IFS=$'\n' read -d '' -r -a oldlog < "$xml/library.log" | |
fi | |
if [[ "${log[@]}" == "${oldlog[@]}" ]]; then | |
echo No change | |
else | |
echo Update library | |
rm -f "$xml/library.log" | |
rm -f "$xml/library_new.xml" | |
for zim in "${log[@]}"; do | |
echo $zim | |
echo $zim >> "$xml/library.log" | |
kiwix-manage "$xml/library_new.xml" add "$zim" | |
done | |
mv "$xml/library_new.xml" "$xml/library.xml" | |
systemctl restart pkgctl-kiwix | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require 'json' | |
require 'open3' | |
require 'tempfile' | |
existing_zims = Dir.glob('*.zim') | |
# Check if kiwix-content.html is old and redownload if so | |
if !File.exist?('kiwix-content.html') || File.mtime('kiwix-content.html') < Time.now - 24*60*60 | |
puts "Downloading Kiwix listing. This may take a minute." | |
`wget -O kiwix-content.html https://wiki.kiwix.org/wiki/Content` | |
end | |
# Path to your HTML file | |
file_path = 'kiwix-content.html' | |
# Read the HTML content from the file | |
html_content = File.read(file_path) | |
# Parse the HTML | |
doc = Nokogiri::HTML(html_content) | |
# Initialize an array to hold the parsed rows | |
rows = [] | |
# Extract data only from the table with the ID 'zimtable' | |
doc.css('table#zimtable tr').each do |row| | |
cells = row.css('td').map(&:text).map(&:strip) | |
bittorrent_link = row.css('td a').find { |link| link['href'].include?('.torrent') } | |
# Extract the BitTorrent URL and filename | |
bittorrent_url = bittorrent_link['href'] if bittorrent_link | |
filename = bittorrent_url.split('/').last if bittorrent_url | |
# Only proceed if there are enough cells (to avoid header rows, if any) | |
if cells.size >= 5 | |
next if cells[1] != "en" | |
row_data = { | |
name: cells[0], | |
language: cells[1], | |
size: cells[2], | |
date_created: cells[3], | |
flavor: cells[4], | |
bittorrent_url: bittorrent_url, | |
filename: filename | |
} | |
if row_data[:filename] | |
filename = row_data[:filename].gsub('.torrent', '') | |
filename = filename.gsub('.zim', '') | |
row_data[:filename_on_disk] = existing_zims.select { |zim| zim.start_with?(filename) }.first | |
row_data[:full_filename] = filename + '_' + row_data[:date_created] + '.zim' | |
if existing_zims.include?(row_data[:full_filename]) | |
row_data[:status] = 'Downloaded' | |
else | |
# if the downloaded zim file starts with the filename but has a different date_created, mark it as needs update | |
if row_data[:filename_on_disk] && row_data[:filename_on_disk].start_with?(filename) && !row_data[:filename_on_disk].include?(row_data[:date_created]) | |
row_data[:status] = 'Needs-Update' | |
else | |
row_data[:status] = 'Not-Downloaded' | |
end | |
end | |
end | |
rows << row_data | |
end | |
end | |
# Filter rows where status is needs update | |
# rows = rows.select { |row| row[:status] == 'Needs Update' } | |
#rows = rows.select { |row| row[:status] == 'Downloaded' } | |
# For each row, wget the bit torrent URL to /volume1/docker/transmission/watch | |
# Assuming rows array is populated as in your script | |
# Generate a list for fzf | |
Tempfile.create('kiwix_selection') do |tempfile| | |
row_format = "%-4s\t%-50s %-10s %-15s %-20s %-15s\n" | |
# Print the header | |
puts sprintf(row_format, "#", "Name", "Size", "Date Created", "Flavor", "Status") | |
# Print each row using the defined format | |
rows.each_with_index do |row, i| | |
next if row[:flavor].include? 'playlist' | |
flavor = row[:flavor][0..19] | |
tempfile.puts sprintf(row_format, i+1, row[:name], row[:size], row[:date_created], flavor, row[:status]) | |
#tempfile.puts "#{row[:name]}\t\t#{row[:size]}\t\t#{row[:date_created]}\t#{row[:flavor]}" | |
end | |
tempfile.close | |
# Use fzf to select from the list | |
selected, status = Open3.capture2("cat #{tempfile.path} | fzf --multi --delimiter='\t'") | |
# Process selection | |
unless selected.empty? | |
selected.lines.each do |line| | |
id, _ = line.chomp.split("\t") | |
id = id.chomp.to_i - 1 | |
row = rows[id] | |
if ["Not-Downloaded", "Needs-Update"].include?(row[:status]) | |
system("wget", "-P", "/Volumes/docker/transmission/watch", row[:bittorrent_url]) | |
puts "Downloading: #{row[:filename]}" | |
else | |
puts "Skipping downloaded or unavailable: #{row[:filename]}" | |
end | |
end | |
else | |
puts "No selection made." | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I use this to download ZIM files from kiwix.org.
I run this from the directory where my ZIM files live (on my NAS). You'll have to set the location for where to put the torrent files.
When ran, it'll show you all the English ZIM files you can download, what you have downloaded, and what needs updated. You can press enter on one and it'll download the torrent file to your watch directory. You can use
Tab
to select multiple and thenEnter
to download them all.On my Synology, I have a script that runs every hour to move ZIM files from my Transmission downloads to my kiwix-share directory. I have another script that runs 5 past the hour to update and restart Kiwix if needed (kiwix-add.sh).