Created
February 7, 2013 15:46
-
-
Save Kwpolska/4731763 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [7]: lxml.html.fromstring(text).xpath("//*[@class='box']//form/input") | |
--------------------------------------------------------------------------- | |
ValueError Traceback (most recent call last) | |
<ipython-input-7-0792823b10b8> in <module>() | |
----> 1 lxml.html.fromstring(text).xpath("//*[@class='box']//form/input") | |
/usr/lib/python3.3/site-packages/lxml/html/__init__.py in fromstring(html, base_url, parser, **kw) | |
663 return document_fromstring(html, parser=parser, base_url=base_url, **kw) | |
664 # otherwise, lets parse it out... | |
--> 665 doc = document_fromstring(html, parser=parser, base_url=base_url, **kw) | |
666 bodies = doc.findall('body') | |
667 if not bodies: | |
/usr/lib/python3.3/site-packages/lxml/html/__init__.py in document_fromstring(html, parser, **kw) | |
561 if parser is None: | |
562 parser = html_parser | |
--> 563 value = etree.fromstring(html, parser, **kw) | |
564 if value is None: | |
565 raise etree.ParserError( | |
/usr/lib/python3.3/site-packages/lxml/etree.cpython-33m.so in lxml.etree.fromstring (src/lxml/lxml.etree.c:61729)() | |
/usr/lib/python3.3/site-packages/lxml/etree.cpython-33m.so in lxml.etree._parseMemoryDocument (src/lxml/lxml.etree.c:91131)() | |
ValueError: Unicode strings with encoding declaration are not supported. | |
In [8]: magic = lxml.html.fromstring(text.decode('utf-8')).xpath("//*[@class='box']//form/input") | |
--------------------------------------------------------------------------- | |
AttributeError Traceback (most recent call last) | |
<ipython-input-8-c5f755dd3176> in <module>() | |
----> 1 magic = lxml.html.fromstring(text.decode('utf-8')).xpath("//*[@class='box']//form/input") | |
AttributeError: 'str' object has no attribute 'decode' | |
In [9]: magic = lxml.html.fromstring(text.encode('utf-8')).xpath("//*[@class='box']//form/input") | |
--------------------------------------------------------------------------- | |
TypeError Traceback (most recent call last) | |
<ipython-input-9-f05c660c51b0> in <module>() | |
----> 1 magic = lxml.html.fromstring(text.encode('utf-8')).xpath("//*[@class='box']//form/input") | |
/usr/lib/python3.3/site-packages/lxml/html/__init__.py in fromstring(html, base_url, parser, **kw) | |
659 parser = html_parser | |
660 start = html[:10].lstrip().lower() | |
--> 661 if start.startswith('<html') or start.startswith('<!doctype'): | |
662 # Looks like a full HTML document | |
663 return document_fromstring(html, parser=parser, base_url=base_url, **kw) | |
TypeError: startswith first arg must be bytes or a tuple of bytes, not str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" | |
xml:lang="en" lang="en"> | |
<head> | |
<title>AUR (en) - Accounts</title> | |
<link rel='stylesheet' type='text/css' href='/css/archweb.css' /> | |
<link rel='stylesheet' type='text/css' href='/css/aur.css' /> | |
<link rel='shortcut icon' href='/images/favicon.ico' /> | |
<link rel='alternate' type='application/rss+xml' title='Newest Packages RSS' href='/rss/' /> | |
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> | |
</head> | |
<body> | |
<!-- Start of main content --> | |
<div class="box"> | |
<h2>Accounts</h2> | |
<form id="edit-profile-form" action="/account/Kwpolska/update/" method="post"> | |
<fieldset> | |
<input type="hidden" name="Action" value="UpdateAccount" /> | |
<input type="hidden" name="ID" value="25198" /> | |
<input type="hidden" name="token" value="38032b9c5226060835bc7bbe520bf5a2" /> | |
</fieldset> | |
<fieldset> | |
<p> | |
<label for="id_username">Username:</label> | |
<input type="text" size="30" maxlength="64" name="U" id="id_username" value="Kwpolska" /> (required) | |
</p> | |
<p> | |
<label for="id_email">Email Address:</label> | |
<input type="text" size="30" maxlength="64" name="E" id="id_email" value="[email protected]" /> (required) | |
</p> | |
<p> | |
<label for="id_passwd1">Password:</label> | |
<input type="password" size="30" name="P" id="id_passwd1" value="" /> | |
</p> | |
<p> | |
<label for="id_passwd2">Re-type password:</label> | |
<input type="password" size="30" name="C" id="id_passwd2" value="" /> | |
</p> | |
<p> | |
<label for="id_realname">Real Name:</label> | |
<input type="text" size="30" maxlength="32" name="R" id="id_realname" value="Chris Warrick" /> | |
</p> | |
<p> | |
<label for="id_irc">IRC Nick:</label> | |
<input type="text" size="30" maxlength="32" name="I" id="id_irc" value="Kwpolska" /> | |
</p> | |
<p> | |
<label for="id_pgp">PGP Key Fingerprint:</label> | |
<input type="text" size="30" maxlength="50" name="K" id="id_pgp" value="DF32 C99C FF84 282C F9D4 CF71 7102 3DBD 5EAA EA16 " /> | |
</p> | |
<p> | |
<label for="id_language">Language:</label> | |
<select name="L" id="id_language"> | |
<option value="ca"> Català</option> | |
<option value="cs"> česky</option> | |
<option value="da"> Dansk</option> | |
<option value="de"> Deutsch</option> | |
<option value="en" selected="selected"> English</option> | |
<option value="el"> Ελληνικά</option> | |
<option value="es"> Español</option> | |
<option value="fi"> Finnish</option> | |
<option value="fr"> Français</option> | |
<option value="he"> עברית</option> | |
<option value="hr"> Hrvatski</option> | |
<option value="hu"> Magyar</option> | |
<option value="it"> Italiano</option> | |
<option value="nb"> Norsk</option> | |
<option value="nl"> Dutch</option> | |
<option value="pl"> Polski</option> | |
<option value="pt_BR"> Português (Brasil)</option> | |
<option value="pt_PT"> Português (Portugal)</option> | |
<option value="ro"> Română</option> | |
<option value="ru"> Русский</option> | |
<option value="sr"> Srpski</option> | |
<option value="tr"> Türkçe</option> | |
<option value="uk"> Українська</option> | |
<option value="zh_CN"> 简体中文</option> | |
</select> | |
</p> | |
<p> | |
<label></label> | |
<input type="submit" class="button" value="Update" /> | |
<input type="reset" class="button" value="Reset" /> | |
</p> | |
</fieldset> | |
</form> | |
</div> | |
<!-- End of main content --> | |
<div id="footer"> | |
<p>AUR v2.0.1</p> | |
<p>Copyright © 2004-2012 AUR Development Team.</p> | |
<p>Unsupported packages are user produced content. Any use of the provided files is at your own risk.</p> | |
</div> | |
</div> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment