Skip to content

Instantly share code, notes, and snippets.

@thenets
Last active October 27, 2017 14:28
Show Gist options
  • Save thenets/3aa94d2f84ceda0e1ec185442f180bc9 to your computer and use it in GitHub Desktop.
Save thenets/3aa94d2f84ceda0e1ec185442f180bc9 to your computer and use it in GitHub Desktop.
ckanext-harvest Delete all datasets harvested from source
#!/bin/bash
# Settings
CONFIG_FILE='/etc/ckan/default/development.ini'
PACKAGES_NAME_FILE='/tmp/harvest_packages_names'
SQL_FILE='/etc/ckan/clear_harvest_packages.sql'
# DON'T CHANGE THE CONTENT BELOW!!!
# =====================================================================
PASTER='/usr/lib/ckan/default/bin/paster'
# Delete old datasets id file if exists
rm -f $PACKAGES_NAME_FILE
touch $PACKAGES_NAME_FILE
# Get all sources id
SOURCE_IDS=$($PASTER --plugin=ckanext-harvest harvester sources -c $CONFIG_FILE | grep Source | sed 's/Source id: //p' | uniq)
SOURCE_IDS=$SOURCE_IDS
# Get all packages id
for SOURCE_ID in $SOURCE_IDS; do
echo "GETTING PACKAGES IDS FROM SOURCE: $SOURCE_ID"
nohup bash -c "$PASTER --plugin=ckanext-harvest harvester clearsource $SOURCE_ID -c $CONFIG_FILE" >/dev/null 2>/dev/null
nohup bash -c "$PASTER --plugin=ckanext-harvest harvester run_test $SOURCE_ID -c $CONFIG_FILE" 2>&1 | grep 'Creating HarvestObject for' | cut -d" " -f8 >>$PACKAGES_NAME_FILE
sleep 0.5
done
# Create SQL script
rm -f $SQL_FILE
touch $SQL_FILE
PACKAGES_NAMES=$(cat $PACKAGES_NAME_FILE)
for PACKAGES_NAME in $PACKAGES_NAMES; do
echo "DELETE FROM package_extra_revision WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE
echo "DELETE FROM package_extra WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE
echo "DELETE FROM package_tag_revision WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE
echo "DELETE FROM package_tag WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE
echo "DELETE FROM package_revision WHERE name='$PACKAGES_NAME';" >> $SQL_FILE
echo "DELETE FROM package WHERE name='$PACKAGES_NAME';" >> $SQL_FILE
done
echo
echo 'PACKAGES NAMES CREATED:' $PACKAGES_NAME_FILE
echo 'SQL FIX CREATED:' $SQL_FILE
echo "Now you need to run the $SQL_FILE file."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment