Skip to content

Instantly share code, notes, and snippets.

@kyl191
Created February 26, 2015 13:49
Show Gist options
  • Save kyl191/586c7665e96148c233d5 to your computer and use it in GitHub Desktop.
Save kyl191/586c7665e96148c233d5 to your computer and use it in GitHub Desktop.
Wordpress post comparison
# Compares 2 WP post tables to find published posts with the same name but different content
# Used after merging an old WP install into an existing one
import csv, difflib, hashlib, sys
file = open(1, encoding='utf-8')
main = [row for row in csv.reader(file)]
file = open(2, encoding='utf-8')
xen = [row for row in csv.reader(file)]
# There is no doubt a better way to do this using difflib, but I did this before finding out about it
main_posts = {}
for row in main:
if row[17] == '0':
hash = hashlib.sha1(row[4].encode("utf-8")).hexdigest()
main_posts.update({row[5]:row[4]})
xen_posts = {}
for row in xen:
if row[17] == '0':
hash = hashlib.sha1(row[4].encode("utf-8")).hexdigest()
xen_posts.update({row[5]:row[4]})
mismatch = []
match = []
for key in xen_posts:
main_hash = main_posts.get(key)
xen_hash = xen_posts.get(key)
if main_hash != xen_hash:
mismatch.append(key)
else:
match.append(key)
for post in mismatch:
os.system("cls")
print("%s: " % post)
main_post = ["%s\n" % i for i in main_posts.get(post).splitlines()]
xen_post = ["%s\n" % i for i in xen_posts.get(post).splitlines()]
sys.stdout.writelines([i for i in difflib.ndiff(main_post, xen_post) if not i.startswith(" ")])
input()
# This is identical to using difflib.ndiff
#sys.stdout.writelines(difflib.Differ().compare(main_post, xen_post))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment