Last active
August 29, 2015 14:22
-
-
Save feihong/a48769c8aee7819d3b34 to your computer and use it in GitHub Desktop.
Extract text chunks from a given Crowdin page
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Extract source and translation texts from a given Crowdin page. The output file | |
generated by this script is suitable for doing translation work on a mobile | |
device that is occasionally offline. | |
This script assumes that you've created a custom Firefox profile that is | |
already logged into your Crowdin account, and that you've provided the path for | |
it below. | |
""" | |
url = 'https://crowdin.com/translate/django-girls-tutorial/53/en-zhcn' | |
import codecs | |
import os | |
import os.path as op | |
import sys | |
from selenium import webdriver | |
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile | |
browser = None | |
def main(): | |
global browser | |
text_pairs = [] | |
browser = webdriver.Firefox(firefox_profile=get_profile()) | |
browser.get(url) | |
while True: | |
pair = get_text_pair() | |
if text_pairs and pair == text_pairs[-1]: | |
break | |
text_pairs.append(pair) | |
next_button = browser.find_element_by_id('next_translation') | |
next_button.click() | |
print pair | |
browser.quit() | |
with codecs.open('text.txt', 'w', 'utf-8') as fp: | |
fp.write(url + '\n\n') | |
for source, translated in text_pairs: | |
if not translated: | |
translated = '-' | |
fp.write(source + '\n') | |
fp.write(translated + '\n\n') | |
print '\nWrote results to text.txt' | |
def get_text_pair(): | |
el = browser.find_element_by_css_selector('#source_phrase_container') | |
source_text = el.text | |
el = browser.find_element_by_css_selector('#translation') | |
translated_text = el.get_attribute('value') | |
return source_text, translated_text | |
def get_profile(): | |
""" | |
Returns the FirefoxProfile object for a profile that has the name 'selenium' | |
in it. | |
""" | |
home = os.environ['HOME'] | |
if sys.platform == 'darwin': | |
profile_dir = '%s/Library/Application Support/Firefox/Profiles' % home | |
else: | |
profile_dir = '%/.mozilla/firefox' | |
for profile in os.listdir(profile_dir): | |
if 'selenium' in profile: | |
profile_path = op.join(profile_dir, profile) | |
return FirefoxProfile(profile_path) | |
return None | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment