Last active
May 3, 2021 17:11
-
-
Save BryanCutler/88f41aaea0655fb521c39b99ca7829db to your computer and use it in GitHub Desktop.
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'text_extensions_for_pandas.array.span.SpanArray'>\n", | |
"<class 'text_extensions_for_pandas.array.tensor.TensorArray'>\n" | |
] | |
} | |
], | |
"source": [ | |
"# Extract `SpanArray` and `TensorArray` from columns.\n", | |
"spans = df[\"span\"].array\n", | |
"embeddings = df[\"embedding\"].array\n", | |
"print(f\"{type(spans)}\\n{type(embeddings)}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Column 'embedding'\n", | |
"length: 9\n", | |
"array shape: (9, 768)\n", | |
"tensor shape: (768,)\n" | |
] | |
} | |
], | |
"source": [ | |
"# Print some info about the \"embedding\" column\n", | |
"length = len(embeddings)\n", | |
"tensor_array_shape = embeddings.numpy_shape\n", | |
"tensor_shape = embeddings[0].to_numpy().shape\n", | |
"print(f\"Column 'embedding'\\nlength: {length}\\narray shape: {tensor_array_shape}\\ntensor shape: {tensor_shape}\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment