Last active
July 25, 2021 21:13
-
-
Save chriskyfung/e4c863699223206c23a05eebb89faa13 to your computer and use it in GitHub Desktop.
Get Posts from Facebook Pages and Convert them to WordPress XML Import File using Python with Facebook Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# source file: https://gist.github.com/chriskyfung/e4c863699223206c23a05eebb89faa13 | |
# last update at 2021-07-17T19:36:05+00:00 | |
from datetime import datetime | |
from facebook_scraper import get_posts | |
import urllib | |
# Parameters | |
FBPAGE_ID = 'medium' # e.g. The name of target facebook page | |
NUMOFPAGES = 3 # How many posts to scrape | |
EXPORTFILE = '{}-fb-page-export.xml'.format(FBPAGE_ID) # Output file name, e.g. FB_PAGE_NAME-fb-page-export.xml | |
TITLELEN = 16 # The limit of the length of post title | |
CATEGORY = 'Imported from Fb Page' # Which category that you want the post belongs to when import it to your WordPress | |
CATEGORY_SLUG = urllib.parse.quote_plus(CATEGORY) | |
COOKIES = '.cookies.json' # The file path of your cookies | |
print(f'Start scraping https://www.facebook.com/{FBPAGE_ID}\n') | |
fb_posts = get_posts(FBPAGE_ID, pages=NUMOFPAGES, timeout=30, cookies=COOKIES) | |
print('\nFinish Facebook scrapping\n') | |
print(f'\nStart convert and output to {EXPORTFILE}\n') | |
# Write the File Header | |
x = open(EXPORTFILE, 'w') | |
x.write('<?xml version="1.0" encoding="UTF-8" ?>') | |
x.write( | |
'<rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wp="http://wordpress.org/export/1.2/">' | |
) | |
x.write('<channel><wp:wxr_version>1.2</wp:wxr_version>') | |
# Process Web Scraping | |
for idx, post in enumerate(fb_posts): | |
# Print essential values to screen | |
print('#{}'.format(idx)) | |
if len(post['text']) > TITLELEN: | |
print('title: {}...'.format(post['text'][:TITLELEN])) | |
else: | |
print('title: {}'.format(post['text'])) | |
print('fb_url: {}'.format(post['post_url'])) | |
if post['image']: | |
print('image: {}'.format(post['image'])) | |
if post['images']: | |
print('images: {}'.format(post['images'])) | |
print(post['text']) | |
print('wp:post_name: {}'.format(post['post_id'])) | |
print('wp:post_date_gmt: {}'.format(post['time'])) | |
print('wp:status: draft') | |
# Start writing a post to file | |
img_tags = '' | |
x.write('<!-- ITEM #{} -->'.format(idx)) | |
x.write('<item>') | |
# Copy the first N letters as the WP post title | |
if len(post['text']) > TITLELEN: | |
x.write('<title>{}...</title>'.format(post['text'][:TITLELEN])) | |
else: | |
x.write('<title>{}</title>'.format(post['text'])) | |
# Encode the post content and add img tags before the text content if images are present | |
if post['images']: | |
for img in post['images']: | |
img_tags += '<p><img src="{}"></p>'.format(img) | |
x.write( | |
'<content:encoded><![CDATA[{}<p>{}</p>]]></content:encoded>' | |
.format(img_tags, post['text'])) | |
else: | |
x.write( | |
'<content:encoded><![CDATA[<p>{}</p>]]></content:encoded>' | |
.format(post['text'])) | |
# Copy the FB post ID as the WP post name | |
x.write('<wp:post_name>{}</wp:post_name>'.format(post['post_id'])) | |
# Copy the FB post date as the WP post date. Use the scrape time as the post datetime if failed to get the time attribute | |
post_date = post['time'] if post['time'] else datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') | |
x.write('<wp:post_date_gmt><![CDATA[{}]]></wp:post_date_gmt>'.format( | |
post_date)) | |
# Set the post as a draft | |
x.write('<wp:status><![CDATA[draft]]></wp:status>') | |
# Set allow comments | |
x.write('<wp:comment_status><![CDATA[open]]></wp:comment_status>') | |
# Clarify its post type | |
x.write('<wp:post_type><![CDATA[post]]></wp:post_type>') | |
# Assign the category in WordPress | |
x.write('<category domain="category" nicename="{}"><![CDATA[{}]]></category>'.format(CATEGORY_SLUG, CATEGORY)) | |
# Add FB post URL as an XML comment | |
x.write('<!-- orig_fb_post_url="{}" -->'.format(post['post_url'])) | |
x.write('</item>') | |
# End a post item | |
# Write File Footer | |
x.write('</channel>') | |
x.write('</rss>') | |
x.close() | |
print('\nComplete!') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?><rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wp="http://wordpress.org/export/1.2/"><channel><wp:wxr_version>1.2</wp:wxr_version><!-- ITEM #0 --><item><title>The key to succe...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*7ZWFAD12i7xHcbE1FGySoA.jpeg"></p><p>The key to success is found in being persistently generous, Mike Thompson writes. | |
FORGE.MEDIUM.COM | |
How to Stand Out Without Showing Off</p>]]></content:encoded><wp:post_name>4474666145878509</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 22:51:29]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474666145878509" --></item><!-- ITEM #1 --><item><title>"The consistency...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*JqCG2d_E6UxOyJOy"></p><p>"The consistency with which I have been racially profiled while wearing sweatpants is so predictable, that I sometimes even bet money with my white friends that I‘ll get stopped. Guess what? I’ve won every single bet." —Rebecca Stevens A. | |
MEDIUM.COM | |
I Envy My White Friends Who Wear Sweatpants To Travel</p>]]></content:encoded><wp:post_name>4474558489222608</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 21:51:30]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474558489222608" --></item><!-- ITEM #2 --><item><title>Naomi Osaka, Ste...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*690_67v6Gf0eeetZ"></p><p>Naomi Osaka, Steph Curry, and Roger Federer have set a precedent for professional athletes to speak up about the need to take a break as part of their healing, placing their health above the push to perform. | |
Physician Dr. Amitha Kalaichandran on how healing requires rest: | |
ELEMENTAL.MEDIUM.COM | |
When Recovery Requires Rest</p>]]></content:encoded><wp:post_name>4474441485900975</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 20:51:31]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474441485900975" --></item><!-- ITEM #3 --><item><title>"The victor will...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*aVnABr6yNfJZDFknJNLsoQ.png"></p><p>"The victor will write the history of January 6—and likely lead us after Biden. And the loser’s story will never be told again." —Julio Vincent Gambuto | |
JULIOVINCENT.MEDIUM.COM | |
The Great Story War Is Raging</p>]]></content:encoded><wp:post_name>4474310852580705</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 19:51:32]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474310852580705" --></item><!-- ITEM #4 --><item><title>➡️ Fast is bette...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*Xiwy-2eHyIMj83rZ"></p><p>➡️ Fast is better than good | |
➡️ Unlearn what you know about technical debt | |
➡️ There aren’t stupid questions | |
SVPINO.COM | |
Lessons learned from the smartest Software Engineer I’ve met</p>]]></content:encoded><wp:post_name>4474179275927196</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 18:51:33]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474179275927196" --></item><!-- ITEM #5 --><item><title>Writer Susan Orl...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*[email protected]"></p><p>Writer Susan Orlean has realized that her dog is now older than she is—in dog years. | |
"Soon she won’t just be a little older than me; she will be much older than me, moving past our mutual middle age and into something more fragile. I miss her already." | |
SUSANORLEAN.MEDIUM.COM | |
Lessons from an Old Dog About Creaky Bones and Graying Hair</p>]]></content:encoded><wp:post_name>4474039199274537</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 17:51:34]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4474039199274537" --></item><!-- ITEM #6 --><item><title>Yoga asks us to ...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*_mueb5znFVvESZ1q"></p><p>Yoga asks us to stay still, which can mimic a defensive state—explaining why survivors find yoga to be triggering. | |
But strength training and/or a trauma-sensitive yoga practice *can* likely help those living with trauma, Laura Khoudari writes: | |
ELEMENTAL.MEDIUM.COM | |
Why Yoga Can Be Triggering for Trauma Survivors</p>]]></content:encoded><wp:post_name>4473881829290274</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 16:51:35]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473881829290274" --></item><!-- ITEM #7 --><item><title>Having a time cr...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*8Jl_l9AXtnaOfWl4Lep7SA.jpeg"></p><p>Having a time crunch can actually make your writing better, Sadie Hoagland writes. | |
MEDIUM.COM | |
On Writing Without Solitude</p>]]></content:encoded><wp:post_name>4473731309305326</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 14:51:36]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473731309305326" --></item><!-- ITEM #8 --><item><title>"Identify one sk...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/1*z0aDFyiQ-QppR_TMdt3IQQ.jpeg"></p><p>"Identify one skill you’d like to improve, carve out a few hours a day, and do it until you’re no longer bad at it." —Mike Thompson | |
MEDIUM.COM | |
If You Want to Stand Out, Put Your Head Down</p>]]></content:encoded><wp:post_name>4473569789321478</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 13:51:36]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473569789321478" --></item><!-- ITEM #9 --><item><title>"The advent of g...</title><content:encoded><![CDATA[<p><img src="https://miro.medium.com/max/1200/0*v624MIXwjDL_0Yq_"></p><p>"The advent of global positioning systems (GPS) means we’re getting worse at learning routes. It’s been suggested that using navigation systems when driving creates ‘inattention blindness,' a failure to 'see' elements in the environment.’" —Steph 丨凌姿 | |
STEPHSTEPHWONG.MEDIUM.COM | |
Have we lost our way? Navigation and wayfinding in the 21st century</p>]]></content:encoded><wp:post_name>4473422929336164</wp:post_name><wp:post_date_gmt><![CDATA[2021-07-17 12:51:37]]></wp:post_date_gmt><wp:status><![CDATA[draft]]></wp:status><wp:comment_status><![CDATA[open]]></wp:comment_status><wp:post_type><![CDATA[post]]></wp:post_type><category domain="category" nicename="Imported+from+Fb+Page"><![CDATA[Imported from Fb Page]]></category><!-- orig_fb_post_url="https://facebook.com/medium/posts/4473422929336164" --></item></channel></rss> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
📃Blog Post: 如何匯入Facebook專頁貼文至WordPress (2021) | by Chris K.Y. Fung | 數碼文明推廣教室 | Medium
🧪Playground: https://replit.com/@chriskyfung/FbPage2WordPressXMLImport