Skip to content

Instantly share code, notes, and snippets.

@komeda-shinji
Created January 30, 2015 09:50
Show Gist options
  • Save komeda-shinji/508f35d1282b3b5d239f to your computer and use it in GitHub Desktop.
Save komeda-shinji/508f35d1282b3b5d239f to your computer and use it in GitHub Desktop.
twill: HTTP のレスポンスヘッダーでエンコーディングが示されていないとき、エンコーディングを強制する拡張モジュール
"""
Extension functions for manipulating the browser charset encoding.
Commands:
getbrowserencoding -- get encoding from meta tag, and set it to browser.
setbrowserencoding <encoding> -- set browser encoding as <encoding>.
"""
import re
import twill.commands
def getbrowserencoding():
"""
>> getbrowserencoding
Get current page encoding from HTTP header or HTML meta tag.
If Content-Type header doesn't have charset, lookup meta tag,
and set it to browser encoding.
"""
from twill.namespaces import get_twill_glocals
browser = twill.get_browser()
encoding = None
if browser.result is None:
print "not viewing any document"
return
response = browser._browser._response
split_header_words = twill.commands.mechanize._headersutil.split_header_words
for ct in response.info().getheaders("content-type"):
for k, v in split_header_words([ct])[0]:
if k == "charset":
encoding = browser._browser.encoding()
break
if encoding:
print 'encoding: %s (HTTP)' % encoding
return encoding
page = browser.get_html()
if page:
page = page.decode(browser._browser.encoding(), 'ignore')
m = re.search(r'<meta\s+charset="([^"]+)"/?>', page, re.IGNORECASE)
if not m:
m = re.search(r'<meta\s+http-equiv="Content-Type"\s+content="\S+;\s*charset=(\S+)"/?>', re.IGNORECASE)
if m:
encoding = m.group(1)
if encoding:
print 'encoding: %s (PAGE)' % encoding
setbrowserencoding(encoding)
return encoding
def setbrowserencoding(encoding):
"""
>> setbrowserencoding <encoding>
Set browser encoding as <encoding>.
This is useful for fix mismatching between browser recognized
and HTML charset.
"""
from twill.namespaces import get_twill_glocals
browser = twill.get_browser()
browser._browser._factory.encoding = encoding
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment