Skip to content

Instantly share code, notes, and snippets.

@feihong
Last active August 29, 2015 14:22
Show Gist options
  • Save feihong/a48769c8aee7819d3b34 to your computer and use it in GitHub Desktop.
Save feihong/a48769c8aee7819d3b34 to your computer and use it in GitHub Desktop.
Extract text chunks from a given Crowdin page
"""
Extract source and translation texts from a given Crowdin page. The output file
generated by this script is suitable for doing translation work on a mobile
device that is occasionally offline.
This script assumes that you've created a custom Firefox profile that is
already logged into your Crowdin account, and that you've provided the path for
it below.
"""
url = 'https://crowdin.com/translate/django-girls-tutorial/53/en-zhcn'
import codecs
import os
import os.path as op
import sys
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
browser = None
def main():
global browser
text_pairs = []
browser = webdriver.Firefox(firefox_profile=get_profile())
browser.get(url)
while True:
pair = get_text_pair()
if text_pairs and pair == text_pairs[-1]:
break
text_pairs.append(pair)
next_button = browser.find_element_by_id('next_translation')
next_button.click()
print pair
browser.quit()
with codecs.open('text.txt', 'w', 'utf-8') as fp:
fp.write(url + '\n\n')
for source, translated in text_pairs:
if not translated:
translated = '-'
fp.write(source + '\n')
fp.write(translated + '\n\n')
print '\nWrote results to text.txt'
def get_text_pair():
el = browser.find_element_by_css_selector('#source_phrase_container')
source_text = el.text
el = browser.find_element_by_css_selector('#translation')
translated_text = el.get_attribute('value')
return source_text, translated_text
def get_profile():
"""
Returns the FirefoxProfile object for a profile that has the name 'selenium'
in it.
"""
home = os.environ['HOME']
if sys.platform == 'darwin':
profile_dir = '%s/Library/Application Support/Firefox/Profiles' % home
else:
profile_dir = '%/.mozilla/firefox'
for profile in os.listdir(profile_dir):
if 'selenium' in profile:
profile_path = op.join(profile_dir, profile)
return FirefoxProfile(profile_path)
return None
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment