Skip to content

Instantly share code, notes, and snippets.

@lancejohnson
lancejohnson / cprofile.txt
Last active January 1, 2020 22:19
JSON parsing from HTML Performance
14865934 function calls (14863074 primitive calls) in 19.497 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
1 9.239 9.239 9.239 9.239 {built-in method _pickle.load}
1 1.265 1.265 19.497 19.497 bizbuysell_fetch.py:514(parse_listings_from_pkl)
891102 0.776 0.000 0.776 0.000 {method 'match' of 're.Pattern' objects}
120136 0.688 0.000 4.795 0.000 parser.py:301(parse_starttag)
245180 0.601 0.000 1.973 0.000 __init__.py:438(endData)
curl -X GET \
'https://www.truepeoplesearch.com/results?streetaddress=12604%20STILLWATER%20TERRACE%20DRIVE&citystatezip=TAMPA%2C%20FL&rid=l2x0' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'Host: www.truepeoplesearch.com' \
-H 'Postman-Token: 2e493c21-b3bb-4fe9-b1be-ab56e54561bf,924aa67f-1df1-4a8e-aeee-14a0a28c8d48' \
-H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' \
-H 'accept-encoding: gzip, deflate, br' \
-H 'accept-language: en-US,en;q=0.9' \
-H 'authority: www.truepeoplesearch.com' \
import httpx
url = "https://www.zillow.com/graphql/"
querystring = {"zpid": "162029003", "operationName": "PriceTaxQuery"}
data_payload = "{\"query\":\"query PriceTaxQuery($zpid: ID!) {\\n property(zpid: $zpid) {\\n zpid\\n livingArea\\n countyFIPS\\n parcelId\\n taxHistory {\\n time\\n taxPaid\\n taxIncreaseRate\\n value\\n valueIncreaseRate\\n }\\n priceHistory {\\n time\\n price\\n priceChangeRate\\n event\\n source\\n buyerAgent {\\n photo {\\n url\\n }\\n profileUrl\\n name\\n }\\n sellerAgent {\\n photo {\\n url\\n }\\n profileUrl\\n name\\n }\\n showCountyLink\\n postingIsRental\\n }\\n currency\\n country\\n }\\n}\\n\",\"operationName\":\"PriceTaxQuery\",\"variables\":{\"zpid\":162029003},\"clientVersion\":\"home-details/5.48.1.0.0.hotfix-11-14-2019.6ee5cd9\"}"
headers = {
'user-agent': "Mozilla/5.0 (Macintosh;
{"data":{"property":{"zpid":43177261,"livingArea":3941,"countyFIPS":"12011","parcelId":"504118080650","taxHistory":[{"time":1542547336138,"taxPaid":11312.76,"taxIncreaseRate":0,"value":581750,"valueIncreaseRate":0.8397002},{"time":1511011336138,"taxPaid":11312.76,"taxIncreaseRate":0.89639586,"value":316220,"valueIncreaseRate":0.020986699},{"time":1479475336138,"taxPaid":5965.4,"taxIncreaseRate":0.0075787334,"value":309720,"valueIncreaseRate":0.0069902786},{"time":1447852936138,"taxPaid":5920.53,"taxIncreaseRate":-0.032613702,"value":307570,"valueIncreaseRate":0.007996592},{"time":1416316936138,"taxPaid":6120.13,"taxIncreaseRate":0,"value":305130,"valueIncreaseRate":-0.3250531},{"time":1384780936138,"taxPaid":6120.13,"taxIncreaseRate":0.040228043,"value":452080,"valueIncreaseRate":0},{"time":1353244936138,"taxPaid":5883.45,"taxIncreaseRate":0.03234546,"value":452080,"valueIncreaseRate":0.03557439},{"time":1321622536138,"taxPaid":5699.11,"taxIncreaseRate":-0.0153151285,"value":436550,"valueIncreaseRate":0.54388
# I know this doesn't work, but here's what I want to do
async def gather_object_blocks(list_of_objects):
object_blocks = [list_of_objects[i:i + concurrency_limit]
for i in range(0, len(list_of_objects), concurrency_limit)]
async with aiohttp.ClientSession():
for sub_block in object_blocks:
await asyncio.gather(
for the_object in sub_block:
try:
from pprint import pprint
json_url_params = {'filterState': {}}
price_bin_jsons = []
counter = 0
for i in range(0, 2):
print('Start', counter)
pprint(price_bin_jsons)
json_url_params['filterState']['price'] = {
import psycopg2
import psycopg2.extras
from psycopg2.extensions import AsIs
import os
import datetime
def write_listings(listing_dicts):
try:
connection = psycopg2.connect(
✘ work@Lances-MacBook-Pro  ~/Dropbox/Projects/pyland   serpErrors ●  python serp.py
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=9 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=10 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=7 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=8 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=5 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=11 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=4 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=3 <class 'str'>
https://www.landwatch.com/default.aspx?ct=R&type=268,6843;5,25;6,742;13,12&pg=6 <class 'str'>
# Code from http://dangoldin.com/2018/11/16/python-3-and-aiohttp/
import aiohttp
import asyncio
import datetime
timeout = aiohttp.ClientTimeout(total=10)
URLS = [
'http://httpbin.org/delay/1?1',
Traceback (most recent call last):
File "land.py", line 72, in <module>
main()
File "land.py", line 64, in main
soups = p.map(get_soups, paginated_urls)
File "/Users/work/.pyenv/versions/3.7.3/lib/python3.7/multiprocessing/pool.py", line 268, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/Users/work/.pyenv/versions/3.7.3/lib/python3.7/multiprocessing/pool.py", line 657, in get
raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '[<!DOCTYPE html>