Last active
October 27, 2017 14:28
-
-
Save thenets/3aa94d2f84ceda0e1ec185442f180bc9 to your computer and use it in GitHub Desktop.
ckanext-harvest Delete all datasets harvested from source
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Settings | |
CONFIG_FILE='/etc/ckan/default/development.ini' | |
PACKAGES_NAME_FILE='/tmp/harvest_packages_names' | |
SQL_FILE='/etc/ckan/clear_harvest_packages.sql' | |
# DON'T CHANGE THE CONTENT BELOW!!! | |
# ===================================================================== | |
PASTER='/usr/lib/ckan/default/bin/paster' | |
# Delete old datasets id file if exists | |
rm -f $PACKAGES_NAME_FILE | |
touch $PACKAGES_NAME_FILE | |
# Get all sources id | |
SOURCE_IDS=$($PASTER --plugin=ckanext-harvest harvester sources -c $CONFIG_FILE | grep Source | sed 's/Source id: //p' | uniq) | |
SOURCE_IDS=$SOURCE_IDS | |
# Get all packages id | |
for SOURCE_ID in $SOURCE_IDS; do | |
echo "GETTING PACKAGES IDS FROM SOURCE: $SOURCE_ID" | |
nohup bash -c "$PASTER --plugin=ckanext-harvest harvester clearsource $SOURCE_ID -c $CONFIG_FILE" >/dev/null 2>/dev/null | |
nohup bash -c "$PASTER --plugin=ckanext-harvest harvester run_test $SOURCE_ID -c $CONFIG_FILE" 2>&1 | grep 'Creating HarvestObject for' | cut -d" " -f8 >>$PACKAGES_NAME_FILE | |
sleep 0.5 | |
done | |
# Create SQL script | |
rm -f $SQL_FILE | |
touch $SQL_FILE | |
PACKAGES_NAMES=$(cat $PACKAGES_NAME_FILE) | |
for PACKAGES_NAME in $PACKAGES_NAMES; do | |
echo "DELETE FROM package_extra_revision WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE | |
echo "DELETE FROM package_extra WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE | |
echo "DELETE FROM package_tag_revision WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE | |
echo "DELETE FROM package_tag WHERE package_id IN (SELECT package.id FROM package WHERE name='$PACKAGES_NAME' GROUP BY package.id);" >> $SQL_FILE | |
echo "DELETE FROM package_revision WHERE name='$PACKAGES_NAME';" >> $SQL_FILE | |
echo "DELETE FROM package WHERE name='$PACKAGES_NAME';" >> $SQL_FILE | |
done | |
echo | |
echo 'PACKAGES NAMES CREATED:' $PACKAGES_NAME_FILE | |
echo 'SQL FIX CREATED:' $SQL_FILE | |
echo "Now you need to run the $SQL_FILE file." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment