Created
April 28, 2023 17:39
-
-
Save mikegerber/72e57c847486163f46de94a71987ef5c to your computer and use it in GitHub Desktop.
digisam - How many pages in the year 1666?.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "d8aa77b1", | |
"cell_type": "code", | |
"source": "import pandas as pd\n\npd.set_option(\"display.max_rows\", None)", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "95a0d080", | |
"cell_type": "code", | |
"source": "mods_info_df = pd.read_csv(\"/home/mike/devel/qurator-data/digisam/mods_info/mods_info_df_all.2023-01-23.csv\",\n index_col=0)", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "/tmp/ipykernel_35645/4200959452.py:1: DtypeWarning: Columns (29,32,73,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,210,211,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436) have mixed types. Specify dtype option on import or set low_memory=False.\n mods_info_df = pd.read_csv(\"/home/mike/devel/qurator-data/digisam/mods_info/mods_info_df_all.2023-01-23.csv\",\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "09f08dc8", | |
"cell_type": "code", | |
"source": "# Select works from 1666 (created or published)\nmods_info_df = mods_info_df[\n mods_info_df[\"originInfo-production0_dateCreated\"].astype(\"string\").str.startswith(\"1666\") |\n mods_info_df[\"originInfo-publication0_dateIssued\"].astype(\"string\").str.startswith(\"1666\")\n]", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "1bd72d63", | |
"cell_type": "code", | |
"source": "# Page count could be done by looking at the count of files in the PRESENTATION METS file group.\n# This is only slightly incorrect as it also contains e.g. the color checker, but probably suffices in this case.\n#\n# (Just printing a few of the >300 values here)\nmods_info_df[\"mets_fileSec_fileGrp-PRESENTATION-count\"].head()", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "PPN607617047 33.0\nPPN848333268 20.0\nPPN873829409 17.0\nPPN664526918 12.0\nPPN71528990X 21.0\nName: mets_fileSec_fileGrp-PRESENTATION-count, dtype: float64" | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": false | |
}, | |
"id": "6e4cc65e", | |
"cell_type": "code", | |
"source": "# Summing it up:\n\nmods_info_df[\"mets_fileSec_fileGrp-PRESENTATION-count\"].sum()", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "55309.0" | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
} | |
], | |
"metadata": { | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "digisam - How many pages in the year 1666?.ipynb", | |
"public": false | |
} | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.9.13", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": true | |
}, | |
"hide_input": false | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment