Skip to content

Instantly share code, notes, and snippets.

@jmcarp
jmcarp / scrapy_schema.py
Created November 8, 2015 05:12
scraping for humans?
"""
Scrapy includes an `ItemLoader` class and associated helpers to abstract
data extraction from `Reponse` objects. But this API is verbose and easily
result in more boilerplate, not less. The following is a quick sketch of
a possible interface for using marshmallow, with a few custom fields, to
pull data from Scrapy responses.
"""
class PersonSchema(Schema):
name = fields.XPath('//title/text()', fields.Str)
@jmcarp
jmcarp / summarize_aggregates.sql
Last active October 9, 2015 14:36
sum aggregates by election
with cycles as (
select
agg.*,
link.election_year
from ofec_sched_a_aggregate_state agg
join ofec_name_linkage_mv link on
agg.cmte_id = link.committee_id and
agg.cycle <= link.election_year and
agg.cycle > link.election_year -
case link.committee_type
@jmcarp
jmcarp / .about.yml
Created July 8, 2015 17:54
about-openfec
shortName: OpenFEC
fullName: OpenFEC
stage: alpha
testable: yes
team:
- githubUser: arowla
hubUser: arowla
- githubUser: lindsayyoung
hubUser: lindsayyoung
services:
@jmcarp
jmcarp / schema.json
Last active August 29, 2015 14:24
about-schema
{
"$schema": "http://json-schema.org/schema#",
"title": ".about.yml",
"type": "object",
"properties": {
"shortName": {
"type": "string",
"description": ""
},
"fullName": {
@jmcarp
jmcarp / incremental.sql
Created June 25, 2015 14:38
incremental-aggregate-update
drop table if exists ofec_sched_a_aggregate_zip;
create table ofec_sched_a_aggregate_zip as
select
cmte_id,
rpt_yr + rpt_yr % 2 as cycle,
contbr_zip as zip,
sum(contb_receipt_amt) as total
from sched_a
where rpt_yr >= 2011
group by cmte_id, cycle, zip
@jmcarp
jmcarp / schedule_a_benchmark.py
Created June 18, 2015 17:10
schedule_a_benchmark
import time
import random
from webservices import rest
from webservices.common import models
rest.app.app_context().push()
@jmcarp
jmcarp / postgres_count_estimate.py
Last active July 13, 2023 19:24
Approximate query count with PostgreSQL and SQLAlchemy
"""Approximate query count based on ANALYZE output for PostgreSQL and SQLAlchemy.
Count logic borrowed from https://wiki.postgresql.org/wiki/Count_estimate
ANALYZE borrowed from https://bitbucket.org/zzzeek/sqlalchemy/wiki/UsageRecipes/Explain
"""
import re
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.expression import Executable, ClauseElement, _literal_as_text
@jmcarp
jmcarp / marshmodel.py
Last active March 5, 2018 01:45
marshmallow-models
import six
import inflection
import marshmallow as ma
class Model(object):
def __init__(self, **kwargs):
self._schema = self.Schema()
self.load(**kwargs)
@jmcarp
jmcarp / cron.py
Created April 9, 2015 13:53
forever-cron
#!/usr/bin/env python
# encoding: utf-8
"""
Examples ::
$ ./cron.py
$ forever -c `which python` cron.py
"""
import datetime
@jmcarp
jmcarp / annotate.py
Last active August 29, 2015 14:16
Workshop RSVP annotations
"""Utilities for annotating workshop RSVP data."""
import re
import logging
from dateutil.parser import parse as parse_date
from modularodm import Q
from modularodm.exceptions import ModularOdmException