Skip to content

Instantly share code, notes, and snippets.

View BryanCutler's full-sized avatar

Bryan Cutler BryanCutler

View GitHub Profile
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_5_0.py
Last active May 4, 2021 22:31
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
# Addition of `Span` with Series of `SpanDtype` produces another Series
df["span"].iloc[1] + df["span"].iloc[3:5]
# 3 [0, 16): 'Monty Python and'
# 4 [0, 20): 'Monty Python and the'
# Name: span, dtype: SpanDtype
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_7.py
Last active May 3, 2021 22:10
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
@pytest.fixture
def dtype():
""" Return dtype of your extension array."""
return TensorDtype()
 
@pytest.fixture
def data(dtype):
""" Return an extension array as data for the tests."""
return pd.array(np.array([[i] for i in range(100)]), dtype=dtype)
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_6.py
Last active May 4, 2021 21:55
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
# Reduce the `SpanArray` to a single `Span` covering tokens 2 up to 5.
df["span"].iloc[2:5].sum()
# [6, 20): 'Python and the'
class SpanOpMixin:
def __add__(self, other) -> Union["Span", "SpanArray"]:
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
return NotImplemented
if isinstance(self, Span) and isinstance(other, Span):
# Span + *Span = Span
return Span(self.target_text,
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_3.ipynb
Created May 3, 2021 20:13
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_2.ipynb
Last active May 3, 2021 17:11
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_1.ipynb
Last active May 3, 2021 16:57
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@BryanCutler
BryanCutler / tf_arrow_blog_p11.py
Last active February 25, 2020 18:23
TensorFlow Arrow Blog Part 11 - Model Training Remote Dataset
def make_remote_dataset(endpoint):
"""Make a TensorFlow Arrow Dataset that reads from a remote Arrow stream."""
# Create the Arrow Dataset from a remote host serving a stream
ds = arrow_io.ArrowStreamDataset(
[endpoint],
columns=(0, 1, 2),
output_types=(tf.int64, tf.float64, tf.float64),
output_shapes=(tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([])),
batch_mode='auto')
@BryanCutler
BryanCutler / tf_arrow_blog_pt10.py
Last active August 5, 2019 17:36
TensorFlow Arrow Blog Part 10 - Serve CSV Data
def serve_csv_data(ip_addr, port_num, directory):
"""
Create a socket and serve Arrow record batches as a stream read from the
given directory containing CVS files.
"""
# Create the socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind((ip_addr, port_num))
sock.listen(1)