Created
March 19, 2012 07:37
-
-
Save dvoiss/2101102 to your computer and use it in GitHub Desktop.
Made this for a friend - generates an xml feed for a podcast directory, didn't exactly finish it...reads id3v1 tags (id3v2 requires external libraries), runs applescript to open iTunes and load feed for testing, serves feed over http (iTunes required)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# generate an xml podcast file for a directory of files | |
# | |
# I parse id3v1 tags but only for the artist and track title, | |
# most of the samples did not have reliable meta-data, so the | |
# file-name is used. | |
# | |
# I don't process id3v2 tags because an external library would be needed | |
# such as id3lib. | |
# | |
# requires python or ruby to be installed | |
# USAGE: | |
if [ "$1" = "-h" -o "$1" = "--help" ]; then | |
echo; echo "Usage: $0 [options]"; echo | |
echo -e "Run this script in a directory of mp3s you want to generate \ | |
a podcast feed for. If you have multiple folders run the script at \ | |
the top level to generate a feed for the items in the sub-directories." | |
echo; | |
echo -e "Note that certain assumptions are made, such as the file-name \ | |
being used for the track title and artist, id3v1 tags are read \ | |
for these when available. Due to unreliable metadata id3v1 tags \ | |
aren't read to retrieve other attributes, id3v2 tags are not read \ | |
at all as an external dependency such as id3lib would be required." | |
echo; | |
echo "== Options"; echo | |
echo -e "Pass the string \"tag\" to try to use id3v1 tags. \ | |
Depending on the source of the mp3 files, many will not have reliable \ | |
data. Try generating the feed with \"tag\" and determine whether it \ | |
is any good." | |
exit 0 | |
fi | |
# to escape HTML entities which can't be in the xml file | |
RUBY=$(which ruby) | |
PYTHON=$(which python) | |
if [ -z "${PYTHON}" -a -z "${RUBY}" ]; then | |
echo "ERROR:" | |
echo "You need to have Python or Ruby installed to use this script." | |
exit 0 | |
fi | |
# script vars | |
# =========== | |
FILENAME="podcast.xml" | |
URL="http://localhost:8000/" | |
artist='' | |
image='' | |
result='' | |
id3v1=false | |
id3v1_extended=false | |
# rfc-2822 date format | |
date_to_use=`date "+%a, %b %d %Y %H:%M:%S %z"` | |
# do you want to try to read id3v1 tags? | |
if [ "$1" = "tag" ]; then | |
use_id3v1=true | |
else | |
use_id3v1=false | |
fi | |
# current directory | |
function get_dir() { | |
echo -e "${PWD##*/}" | |
} | |
# the local url, substitutes spaces ' ' => '%20' | |
function local_url() { | |
local result="$URL" | |
if [[ ! "$2" = "root" ]]; then | |
result+=`get_dir`'/' | |
fi | |
result+=`basename "$1"` | |
echo -e "$result" | sed 's/\ /%20/g' | |
} | |
# readline -f emulation | |
function readline_f() { | |
echo -e `cd $(dirname "$1"); pwd`/`basename "$1"` | |
} | |
# id3v1 processing | |
function is_id3v1() { | |
if [[ `tail -c 128 "$1" 2>/dev/null | head -c 3` == "TAG" ]]; then id3v1=true | |
else id3v1=false; fi | |
} | |
# id3v1-extended, how frequent are these? is this actually needed? | |
function is_id3v1_extended() { | |
if [[ `tail -c 227 "$1" 2>/dev/null | head -c 4` == "TAG+" ]]; then id3v1_extended=true | |
else id3v1_extended=false; fi | |
} | |
# match text as a capture group and any spaces after it, | |
# replace match with just the capture group: | |
function rtrim() { | |
sed 's/\([^ ]*\) *$/\1/'; | |
} | |
# get rid of quotes, this is currently the source of slowness the script | |
function escape() { | |
local result=`echo $1 | sed s/[\'\"]//g` | |
if [ ! -z "${PYTHON}" ]; then | |
result=`"${PYTHON}" -c 'import cgi; print cgi.escape('"'$result'"');'` | |
elif [ ! -z "${RUBY}" ]; then | |
result=`"${RUBY}" -e 'require "cgi"; puts CGI.escapeHTML('"'$result'"');'` | |
fi | |
echo -e $result | |
} | |
# use just the name of the file (no extension) | |
function file_with_stripped_extension() { | |
filename=`basename $1` | |
echo -e ${filename%.*} | |
} | |
# get the title for id3v1 or id3v1 extended tags | |
function get_title() { | |
local result='' | |
if $id3v1; then | |
result=`tail -c 125 "$1" | head -c 30 | rtrim` | |
elif $id3v1_extended; then | |
result=`tail -c 223 "$1" | head -c 60 | rtrim` | |
fi | |
if [[ -z "$result" ]]; then | |
result=`file_with_stripped_extension "$1"` # use the filename | |
fi | |
echo -e $result | |
} | |
# get the artist for id3v1 or id3v1 extended tags | |
function get_artist() { | |
local result='' | |
if $id3v1; then | |
result=`tail -c 95 "$1" | head -c 30 | rtrim` | |
elif $id3v1_extended; then | |
result=`tail -c 163 "$1" | head -c 60 | rtrim` | |
fi | |
# if either of the above failed | |
if [[ -z "$result" ]]; then | |
# the global $artist is empty, use the current directory name | |
if [[ -z "$artist" ]]; then | |
result=`get_dir` | |
else | |
# otherwise use the globally set artist | |
result="$artist" | |
fi | |
fi | |
echo -e $result | |
} | |
function make_entry() { | |
# if it's an id3v1 tag, then try to get relevant info, | |
# I ignore track #s due to them not being particularly | |
# reliable (with the examples I'm using) | |
if $use_id3v1; then | |
echo "BLEH" | |
is_id3v1 "$1" | |
is_id3v1_extended "$1" | |
fi | |
title=`get_title "$1"` | |
result+="\n<item>" | |
result+="\n\t<title>`escape "$title"`</title>" | |
result+="\n\t<itunes:author>`escape "$2"`</itunes:author>" | |
# image, if available | |
if [[ -n "$3" ]]; then | |
result+="\n\t<itunes:image href=\"`escape "$(local_url "$3")"`\"/>" | |
fi | |
# no date info, just use the current date | |
result+="\n\t<pubDate>$date_to_use</pubDate>" | |
# url is the absolute path of the file on the file-system (readlink -f) | |
# length is the size in bytes | |
result+="\n\t<enclosure url=\"`escape "$(local_url "$1" "$4")"`\" length=\"`command ls -al "$1" | awk '{ print $5 }'`\" type=\"`file -b --mime-type "$1"`\"/>" | |
result+="\n</item>" | |
#echo -e $result | |
} | |
function process_directory() {\ | |
echo -e "PROCESSING DIRECTORY: $1" | |
# get image if one exists, just use the first one we find, | |
# even if there are multiple images | |
for file in *.png *.jpg; do | |
if [[ -f $file ]]; then | |
image="$file" | |
break | |
fi | |
done | |
# use same artist for the directory | |
local artist=`get_artist "$1"` | |
# item info.. | |
# audio files, sorted | |
# optionally can add the following: *.mov, *.m4v, *.pdf, *.epub | |
# "`command ls *.mp3 *.m4a *.mp4 2>/dev/null | sort -b -f -i`" | |
for file in *.mp3 *.m4a *.mp4; do | |
if [[ -f $file ]]; then | |
make_entry "$file" "$artist" "$image" "$2" | |
fi | |
done | |
#echo -e "$result" | |
} | |
function write_header() { | |
result+="<?xml version=\"1.0\" encoding=\"UTF-8\"?>" | |
result+="\n<rss xmlns:itunes=\"http://www.itunes.com/dtds/podcast-1.0.dtd\" version=\"2.0\">" | |
result+="\n<channel>" | |
directory_name=`escape "$(get_dir)"` | |
# channel info | |
# assume the directory name is the name of the podcast | |
result+="\n<title>$directory_name</title>" | |
# try to get the artist from an id3 tag if available, | |
# otherwise use the directory name | |
result+="\n<itunes:author>`escape "$artist"`</itunes:author>" | |
# summary and description, use directory name | |
result+="\n<itunes:summary>$directory_name</itunes:summary>" | |
result+="\n<description>$directory_name</description>" | |
if [[ -n "$image" ]]; then | |
result+="\n<itunes:image href=\"$image\" />" | |
fi | |
} | |
function write_footer() { | |
result+="\n</channel>" | |
result+="\n</rss>" | |
} | |
function write_file() { | |
# set global artist | |
artist="`get_dir`" | |
write_header | |
# top-level-directory | |
process_directory "`pwd`" 'root' | |
# process sub-directories | |
for dir in *; do | |
if [ -d "$dir" ]; then | |
cd "$dir" | |
process_directory "$dir" | |
cd .. | |
fi | |
done | |
write_footer | |
echo -e "$result" > $FILENAME | |
} | |
# OUTPUT: | |
echo | |
echo "GENERATING PODCAST FEED" | |
echo "=======================" | |
echo "Writing to: $FILENAME" | |
# write the actual file | |
write_file | |
echo "Finished writing to file: $FILENAME" | |
# don't run any of the crap below... | |
exit 0 | |
# test feed in iTunes: | |
if [ ! -z "${PYTHON}" ]; then | |
echo | |
echo "Attempting to open iTunes to subscribe..." | |
# run applescript which opens iTunes and the podcast subscribe window | |
`osascript \ | |
-e 'tell application "iTunes" to activate' \ | |
-e 'tell application "System Events" to click menu item "Subscribe to \ | |
Podcast…" of menu "Advanced" of menu bar 1 of process "iTunes"' \ | |
-e 'tell application "System Events" to tell process "iTunes" to tell \ | |
window "Subscribe to Podcast" to set value of text field 1 to \ | |
"'"$URL$FILENAME"'"'` | |
echo | |
echo "Starting server..." | |
echo "(because the feed cannot be served to iTunes as a file-system path," | |
echo "itunes requires a URL, use $URL$FILENAME)" | |
echo | |
echo "Press Ctrl-C to stop the server after retrieving episodes in iTunes..." | |
# run simple server from directory | |
`"${PYTHON}" -m SimpleHTTPServer` | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment