Skip to content

Instantly share code, notes, and snippets.

@Kwpolska
Created February 7, 2013 15:46
Show Gist options
  • Save Kwpolska/4731763 to your computer and use it in GitHub Desktop.
Save Kwpolska/4731763 to your computer and use it in GitHub Desktop.
In [7]: lxml.html.fromstring(text).xpath("//*[@class='box']//form/input")
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-7-0792823b10b8> in <module>()
----> 1 lxml.html.fromstring(text).xpath("//*[@class='box']//form/input")
/usr/lib/python3.3/site-packages/lxml/html/__init__.py in fromstring(html, base_url, parser, **kw)
663 return document_fromstring(html, parser=parser, base_url=base_url, **kw)
664 # otherwise, lets parse it out...
--> 665 doc = document_fromstring(html, parser=parser, base_url=base_url, **kw)
666 bodies = doc.findall('body')
667 if not bodies:
/usr/lib/python3.3/site-packages/lxml/html/__init__.py in document_fromstring(html, parser, **kw)
561 if parser is None:
562 parser = html_parser
--> 563 value = etree.fromstring(html, parser, **kw)
564 if value is None:
565 raise etree.ParserError(
/usr/lib/python3.3/site-packages/lxml/etree.cpython-33m.so in lxml.etree.fromstring (src/lxml/lxml.etree.c:61729)()
/usr/lib/python3.3/site-packages/lxml/etree.cpython-33m.so in lxml.etree._parseMemoryDocument (src/lxml/lxml.etree.c:91131)()
ValueError: Unicode strings with encoding declaration are not supported.
In [8]: magic = lxml.html.fromstring(text.decode('utf-8')).xpath("//*[@class='box']//form/input")
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-8-c5f755dd3176> in <module>()
----> 1 magic = lxml.html.fromstring(text.decode('utf-8')).xpath("//*[@class='box']//form/input")
AttributeError: 'str' object has no attribute 'decode'
In [9]: magic = lxml.html.fromstring(text.encode('utf-8')).xpath("//*[@class='box']//form/input")
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-9-f05c660c51b0> in <module>()
----> 1 magic = lxml.html.fromstring(text.encode('utf-8')).xpath("//*[@class='box']//form/input")
/usr/lib/python3.3/site-packages/lxml/html/__init__.py in fromstring(html, base_url, parser, **kw)
659 parser = html_parser
660 start = html[:10].lstrip().lower()
--> 661 if start.startswith('<html') or start.startswith('<!doctype'):
662 # Looks like a full HTML document
663 return document_fromstring(html, parser=parser, base_url=base_url, **kw)
TypeError: startswith first arg must be bytes or a tuple of bytes, not str
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"
xml:lang="en" lang="en">
<head>
<title>AUR (en) - Accounts</title>
<link rel='stylesheet' type='text/css' href='/css/archweb.css' />
<link rel='stylesheet' type='text/css' href='/css/aur.css' />
<link rel='shortcut icon' href='/images/favicon.ico' />
<link rel='alternate' type='application/rss+xml' title='Newest Packages RSS' href='/rss/' />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<!-- Start of main content -->
<div class="box">
<h2>Accounts</h2>
<form id="edit-profile-form" action="/account/Kwpolska/update/" method="post">
<fieldset>
<input type="hidden" name="Action" value="UpdateAccount" />
<input type="hidden" name="ID" value="25198" />
<input type="hidden" name="token" value="38032b9c5226060835bc7bbe520bf5a2" />
</fieldset>
<fieldset>
<p>
<label for="id_username">Username:</label>
<input type="text" size="30" maxlength="64" name="U" id="id_username" value="Kwpolska" /> (required)
</p>
<p>
<label for="id_email">Email Address:</label>
<input type="text" size="30" maxlength="64" name="E" id="id_email" value="[email protected]" /> (required)
</p>
<p>
<label for="id_passwd1">Password:</label>
<input type="password" size="30" name="P" id="id_passwd1" value="" />
</p>
<p>
<label for="id_passwd2">Re-type password:</label>
<input type="password" size="30" name="C" id="id_passwd2" value="" />
</p>
<p>
<label for="id_realname">Real Name:</label>
<input type="text" size="30" maxlength="32" name="R" id="id_realname" value="Chris Warrick" />
</p>
<p>
<label for="id_irc">IRC Nick:</label>
<input type="text" size="30" maxlength="32" name="I" id="id_irc" value="Kwpolska" />
</p>
<p>
<label for="id_pgp">PGP Key Fingerprint:</label>
<input type="text" size="30" maxlength="50" name="K" id="id_pgp" value="DF32 C99C FF84 282C F9D4 CF71 7102 3DBD 5EAA EA16 " />
</p>
<p>
<label for="id_language">Language:</label>
<select name="L" id="id_language">
<option value="ca"> Català</option>
<option value="cs"> česky</option>
<option value="da"> Dansk</option>
<option value="de"> Deutsch</option>
<option value="en" selected="selected"> English</option>
<option value="el"> Ελληνικά</option>
<option value="es"> Español</option>
<option value="fi"> Finnish</option>
<option value="fr"> Français</option>
<option value="he"> עברית</option>
<option value="hr"> Hrvatski</option>
<option value="hu"> Magyar</option>
<option value="it"> Italiano</option>
<option value="nb"> Norsk</option>
<option value="nl"> Dutch</option>
<option value="pl"> Polski</option>
<option value="pt_BR"> Português (Brasil)</option>
<option value="pt_PT"> Português (Portugal)</option>
<option value="ro"> Română</option>
<option value="ru"> Русский</option>
<option value="sr"> Srpski</option>
<option value="tr"> Türkçe</option>
<option value="uk"> Українська</option>
<option value="zh_CN"> 简体中文</option>
</select>
</p>
<p>
<label></label>
<input type="submit" class="button" value="Update" /> &nbsp;
<input type="reset" class="button" value="Reset" />
</p>
</fieldset>
</form>
</div>
<!-- End of main content -->
<div id="footer">
<p>AUR v2.0.1</p>
<p>Copyright &copy; 2004-2012 AUR Development Team.</p>
<p>Unsupported packages are user produced content. Any use of the provided files is at your own risk.</p>
</div>
</div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment