Skip to content

Instantly share code, notes, and snippets.

@reedobrien
Created April 18, 2012 07:19
Show Gist options
  • Save reedobrien/2411709 to your computer and use it in GitHub Desktop.
Save reedobrien/2411709 to your computer and use it in GitHub Desktop.
diff --git a/cjp/cjp/scripts/runScrapers.py b/cjp/cjp/scripts/runScrapers.py
index e79ccf1..6c989c4 100755
--- a/cjp/cjp/scripts/runScrapers.py
+++ b/cjp/cjp/scripts/runScrapers.py
@@ -28,7 +28,7 @@ for s in scrapers:
# remove original HTML for articles older than two weeks
now = datetime.now()
keepdate = now - timedelta(days=14)
-old_articles = Article.objects.filter(created__lte = keepdate).exclude(orig_html = '')
+old_articles = Article.objects.filter(created__lte = keepdate).exclude(orig_html = '').iterator()
for article in old_articles:
article.orig_html = ''
article.save()
@@ -37,8 +37,14 @@ for article in old_articles:
# Only keep 30 days worth of logs. Should be sufficient for any
# debugging purposes.
keepdate = now - timedelta(days=30)
-FeedLog.objects.filter(created__lte = keepdate).delete()
-
+number_of_objects = 100
+count = FeedLog.objects.filter(created__lte=keepdate).count()
+for i in xrange(0, count, number_of_objects):
+ smaller_queryset = FeedLog.objects.filter(
+ created__lte=keepdate)[i: i + number_of_objects]
+ for feed_log in smaller_queryset:
+ feed_log.delete()
+# FeedLog.objects.filter(created__lte = keepdate).delete()
# clean up the tables
for table in (Article, FeedLog):
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment