Created
April 18, 2012 07:19
-
-
Save reedobrien/2411709 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/cjp/cjp/scripts/runScrapers.py b/cjp/cjp/scripts/runScrapers.py | |
index e79ccf1..6c989c4 100755 | |
--- a/cjp/cjp/scripts/runScrapers.py | |
+++ b/cjp/cjp/scripts/runScrapers.py | |
@@ -28,7 +28,7 @@ for s in scrapers: | |
# remove original HTML for articles older than two weeks | |
now = datetime.now() | |
keepdate = now - timedelta(days=14) | |
-old_articles = Article.objects.filter(created__lte = keepdate).exclude(orig_html = '') | |
+old_articles = Article.objects.filter(created__lte = keepdate).exclude(orig_html = '').iterator() | |
for article in old_articles: | |
article.orig_html = '' | |
article.save() | |
@@ -37,8 +37,14 @@ for article in old_articles: | |
# Only keep 30 days worth of logs. Should be sufficient for any | |
# debugging purposes. | |
keepdate = now - timedelta(days=30) | |
-FeedLog.objects.filter(created__lte = keepdate).delete() | |
- | |
+number_of_objects = 100 | |
+count = FeedLog.objects.filter(created__lte=keepdate).count() | |
+for i in xrange(0, count, number_of_objects): | |
+ smaller_queryset = FeedLog.objects.filter( | |
+ created__lte=keepdate)[i: i + number_of_objects] | |
+ for feed_log in smaller_queryset: | |
+ feed_log.delete() | |
+# FeedLog.objects.filter(created__lte = keepdate).delete() | |
# clean up the tables | |
for table in (Article, FeedLog): |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment