Skip to content

Instantly share code, notes, and snippets.

@gregology
Last active August 29, 2015 14:14
Show Gist options
  • Save gregology/3888cd27402175988376 to your computer and use it in GitHub Desktop.
Save gregology/3888cd27402175988376 to your computer and use it in GitHub Desktop.
test_bug.py
from __future__ import unicode_literals
import pytest
from starscream.records import Record
class TestBug():
def create_field_1_based_rdd(self, foo_rdd):
return foo_rdd.map(lambda rec: rec.rename_keys(id='int field')
.merge({
'foo event': 'field 1 event',
'foo event at': rec['date field1']
}))
def create_field_2_based_rdd(self, foo_rdd):
return foo_rdd.filter(lambda rec: rec['date field2'] is not None)\
.map(lambda rec: rec.rename_keys(id='int field')
.merge({
'foo event': 'field 2 event',
'foo event at': rec['date field2']
}))
def test_bug(self, sc):
foo_rdd = sc.records([{
'int field': 1,
'date field1': '2000-01-01T00:00:00.000Z',
'date field2': None,
}, {
'int field': 2,
'date field1': '2001-01-01T00:00:00.000Z',
'date field2': '2002-01-01T00:00:00.000Z',
}])
foo_events = self.create_field_1_based_rdd(foo_rdd) + self.create_field_2_based_rdd(foo_rdd)
expected_output = [Record({
'date field2': None,
'date field1': '2000-01-01T00:00:00.000Z',
'foo event at': '2000-01-01T00:00:00.000Z',
'int field': 1,
'foo event': 'field 1 event',
}), Record({
'date field2': '2002-01-01T00:00:00.000Z',
'date field1': '2001-01-01T00:00:00.000Z',
'foo event at': '2001-01-01T00:00:00.000Z',
'int field': 2,
'foo event': 'field 1 event',
}), Record({
'date field2': '2002-01-01T00:00:00.000Z',
'date field1': '2001-01-01T00:00:00.000Z',
'foo event at': '2002-01-01T00:00:00.000Z',
'int field': 2,
'foo event': 'field 2 event',
})]
actual_output_on_test_sc = [Record({
"foo event": "field 2 event",
"foo event at": "2002-01-01T00:00:00.000Z",
}), Record({
"date field2": None,
"date field1": "2000-01-01T00:00:00.000Z",
"foo event at": "2000-01-01T00:00:00.000Z",
"int field": 1,
"foo event": "field 1 event",
}), Record({
"date field2": "2002-01-01T00:00:00.000Z",
"date field1": "2001-01-01T00:00:00.000Z",
"foo event at": "2001-01-01T00:00:00.000Z",
"int field": 2,
"foo event": "field 1 event",
})]
assert sorted(foo_events.collect()) == expected_output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment