Skip to content

Instantly share code, notes, and snippets.

@alonsosilvaallende
Last active May 21, 2022 18:10
Show Gist options
  • Save alonsosilvaallende/61e29fb558a4ff309040562f6cdd54d2 to your computer and use it in GitHub Desktop.
Save alonsosilvaallende/61e29fb558a4ff309040562f6cdd54d2 to your computer and use it in GitHub Desktop.
Untitled1.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:00.157685Z",
"end_time": "2022-05-21T18:11:00.225531Z"
},
"trusted": true
},
"id": "7d2bd7d3",
"cell_type": "code",
"source": "%load_ext autoreload\n%autoreload 2",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:00.234415Z",
"end_time": "2022-05-21T18:11:04.401052Z"
},
"trusted": true
},
"id": "bf8e0c91",
"cell_type": "code",
"source": "from spacy.lang.es.examples import sentences\n\nsentences[1].lower()",
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": "'los coches autónomos delegan la responsabilidad del seguro en sus fabricantes.'"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:04.415042Z",
"end_time": "2022-05-21T18:11:05.478399Z"
},
"trusted": true
},
"id": "1f3ffad0",
"cell_type": "code",
"source": "import spacy\n\nnlp = spacy.load(\"es_core_news_sm\")\ndoc = nlp(sentences[1].lower())",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:05.488575Z",
"end_time": "2022-05-21T18:11:05.558617Z"
},
"trusted": true
},
"id": "b8928bc7",
"cell_type": "code",
"source": "for token in doc:\n print(token.text, token.pos_, token.morph.to_dict())",
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": "los DET {'Definite': 'Def', 'Gender': 'Masc', 'Number': 'Plur', 'PronType': 'Art'}\ncoches NOUN {'Gender': 'Masc', 'Number': 'Plur'}\nautónomos ADJ {'Gender': 'Masc', 'Number': 'Plur'}\ndelegan VERB {'Mood': 'Ind', 'Number': 'Plur', 'Person': '3', 'Tense': 'Pres', 'VerbForm': 'Fin'}\nla DET {'Definite': 'Def', 'Gender': 'Fem', 'Number': 'Sing', 'PronType': 'Art'}\nresponsabilidad NOUN {'Gender': 'Fem', 'Number': 'Sing'}\ndel ADP {'Definite': 'Def', 'Gender': 'Masc', 'Number': 'Sing', 'PronType': 'Art'}\nseguro NOUN {'Gender': 'Masc', 'Number': 'Sing'}\nen ADP {}\nsus DET {'Number': 'Plur', 'Person': '3', 'Poss': 'Yes', 'PronType': 'Prs'}\nfabricantes NOUN {'Number': 'Plur'}\n. PUNCT {'PunctType': 'Peri'}\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true,
"ExecuteTime": {
"start_time": "2022-05-21T18:11:05.583355Z",
"end_time": "2022-05-21T18:11:05.662552Z"
}
},
"id": "9580ff0b",
"cell_type": "code",
"source": "nouns = [token for token in doc if token.pos_ == 'NOUN']\nnouns",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "[coches, responsabilidad, seguro, fabricantes]"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:05.691098Z",
"end_time": "2022-05-21T18:11:05.825684Z"
},
"trusted": true
},
"cell_type": "code",
"source": "nouns_dict = [token.morph.to_dict() for token in nouns ]",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:05.833655Z",
"end_time": "2022-05-21T18:11:05.947749Z"
},
"trusted": true
},
"cell_type": "code",
"source": "nouns_dict",
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 7,
"data": {
"text/plain": "[{'Gender': 'Masc', 'Number': 'Plur'},\n {'Gender': 'Fem', 'Number': 'Sing'},\n {'Gender': 'Masc', 'Number': 'Sing'},\n {'Number': 'Plur'}]"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:05.962569Z",
"end_time": "2022-05-21T18:11:06.040909Z"
},
"trusted": true
},
"cell_type": "code",
"source": "genders = [nouns_dict[i]['Gender'] for i in range(len(nouns_dict)) if 'Gender' in nouns_dict[i].keys()]",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2022-05-21T18:11:06.044794Z",
"end_time": "2022-05-21T18:11:06.146692Z"
},
"trusted": true
},
"cell_type": "code",
"source": "genders.count('Masc'), genders.count('Fem')",
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 9,
"data": {
"text/plain": "(2, 1)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/61e29fb558a4ff309040562f6cdd54d2"
},
"gist": {
"id": "61e29fb558a4ff309040562f6cdd54d2",
"data": {
"description": "Untitled1.ipynb",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.10.4",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment