Skip to content

Instantly share code, notes, and snippets.

@antonizoon
Last active December 20, 2024 23:26
Show Gist options
  • Save antonizoon/d8890e1fd6c9f9b524f7b8071b1d501e to your computer and use it in GitHub Desktop.
Save antonizoon/d8890e1fd6c9f9b524f7b8071b1d501e to your computer and use it in GitHub Desktop.
Export from Wallabag Android App v2.4.2 to Wallabag v2 JSON Export format
#!/usr/bin/python3
# Wallabag Android SQLite DB to Wallabag Export format
# tested to work on Wallabag Android app version 2.4.2 as of 2021-07-08
# Preparation: To export data from the app go to "Settings - Miscellaneous - Database location" and select "External storage", after that the DB should be available as three wallabag* files in /sdcard/Android/data/fr.gaulupeau.apps.InThePoche/files/ , copy that over to your desktop.
# Usage: After that, ensure python is installed, open a command prompt or terminal
# make sure to run this script within the Android/data/fr.gaulupeau.apps.InThePoche/files/ directory. Otherwise change the OUTPUT_PATH to that directory.
# python wallabag-android-export.py
# This will create the following files:
# articles-android.json - The full metadata from the SQLite database in JSON format.
# articles.json - The wallabag v2 compatible export format, that can be imported into another wallabag v2 or higher instance
# This work is free. You can redistribute it and/or modify it under the terms of the Do What The Fuck You Want To Public License, Version 2, as published by Sam Hocevar.
"""
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2004 Sam Hocevar <[email protected]>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.
"""
import os
import sys
import json
import sqlite3
# ordered dict is only needed in versions before python 3.
#from collections import OrderedDict
OUTPUT_PATH = './'
# sqlite db filename by default
SQLITE_DB = 'wallabag'
def mkdirs(path):
"""Make directory, if it doesn't exist."""
if not os.path.exists(path):
os.makedirs(path)
# create the folder to output to
mkdirs(OUTPUT_PATH)
# generate a dict from a SQL query
def dict_factory(cursor, row):
# only need to use ordereddict in versions before python 3.8
#d = OrderedDict()
d = {}
for idx, col in enumerate(cursor.description):
if row[idx] != None:
d[col[0]] = row[idx]
return d
# connect to the SQLite DB with the dict connector
conn = sqlite3.connect(SQLITE_DB)
conn.row_factory = dict_factory
cursor = conn.cursor()
def query(query):
cursor.execute(query)
return cursor.fetchall()
def write_json(data, fname, sort_keys=False):
with open(os.path.join(OUTPUT_PATH, '%s.json' % fname), 'w') as f:
json.dump(data, f, sort_keys=sort_keys)
# write an unmodified JSON dump `articles-android.json` of the article table to disk with the android sqlite table names, which honestly make quite a lot more sense
articles_android = query('SELECT * FROM article JOIN article_content USING (_id)')
write_json(articles_android, 'articles-android')
# optionally you could also join the tags from the table "article_tags_join", but I didn't have much of those and the export format didn't have it in the example so I didn't bother.
"""
-- format of Wallabag Android DB as of 2021-07-08
-- Describe ARTICLE
CREATE TABLE "ARTICLE" ("_id" INTEGER PRIMARY KEY ,"ARTICLE_ID" INTEGER UNIQUE ,"TITLE" TEXT,"DOMAIN" TEXT,"URL" TEXT,"GIVEN_URL" TEXT,"ORIGIN_URL" TEXT,"ESTIMATED_READING_TIME" INTEGER NOT NULL ,"LANGUAGE" TEXT,"PREVIEW_PICTURE_URL" TEXT,"AUTHORS" TEXT,"FAVORITE" INTEGER,"ARCHIVE" INTEGER,"CREATION_DATE" INTEGER,"UPDATE_DATE" INTEGER,"PUBLISHED_AT" INTEGER,"STARRED_AT" INTEGER,"IS_PUBLIC" INTEGER,"PUBLIC_UID" TEXT,"ARTICLE_PROGRESS" REAL,"IMAGES_DOWNLOADED" INTEGER)
-- Describe ARTICLE_CONTENT
CREATE TABLE "ARTICLE_CONTENT" ("_id" INTEGER PRIMARY KEY ,"CONTENT" TEXT)
"""
articles = []
for article in articles_android:
articles.append(
{
# 0. _id, local primary key
"id": article["_id"],
"0": article["_id"],
# 1. title extracted for display
"title": article["TITLE"],
"1": article["TITLE"],
# url of the site
"url": article["URL"],
"2": article["URL"],
"is_read": article["ARCHIVE"],
"3": article["ARCHIVE"],
"is_fav": article["FAVORITE"],
"4": article["FAVORITE"],
"content": article["CONTENT"],
"user_id": "1",
"6": "1"
}
)
# format based on the example here: https://gist.github.com/tcitworld/c7b7e963b579a27240b2
# now write the wallabag v2 export formatted json to disk as `articles.json`
write_json(articles, 'articles')
# using the wallabag v2 export format, you can also use this ruby script to convert it into HTML Bookmark format
# https://github.com/KillianKemps/wallabag-to-html/blob/master/json_to_html_bookmarks.rb
@antonizoon
Copy link
Author

antonizoon commented Jul 9, 2021

I had forgotten to generate an export file before framabag.org shut down. Since I still had all my articles in the Wallabag Android App, I created a script to export from the Wallabag Android App's sqlite format to a Wallabag v2 compatible JSON export file.

This can be useful for others in case they lost their self hosted or cloud hosted Wallabag instance, but still have the Android app disconnected but intact.

It is crucial to generate a Wallabag v2 export JSON from the Android app before switching to a new instance, as connecting to a new instance will wipe the current Wallabag app sqlite.

@NotFluffy
Copy link

Just to say this worked for app version 2.5.3 to wallabag version 2.6.10

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment