Created
August 5, 2022 13:18
-
-
Save whym/0f548b883bceb3d435955e3ae89ef5ba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"id": "d9acaa1b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import os.path\n", | |
"\n", | |
"df = pd.read_csv(os.path.expanduser('~/Downloads/quarry-66237-mobile-upload-metadata-from-logging-depending-on-change-tag-run655164.csv'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"id": "61243d8d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>log_id</th>\n", | |
" <th>log_timestamp</th>\n", | |
" <th>log_actor</th>\n", | |
" <th>log_namespace</th>\n", | |
" <th>log_page</th>\n", | |
" <th>page_title</th>\n", | |
" <th>img_metadata</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>272446438</td>\n", | |
" <td>20180628193446</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>272446448</td>\n", | |
" <td>20180628193511</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>272446451</td>\n", | |
" <td>20180628193527</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>272446458</td>\n", | |
" <td>20180628193546</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>272446465</td>\n", | |
" <td>20180628193601</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9995</th>\n", | |
" <td>275395285</td>\n", | |
" <td>20180928230512</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177585</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9996</th>\n", | |
" <td>275395291</td>\n", | |
" <td>20180928230534</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177591</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9997</th>\n", | |
" <td>275395293</td>\n", | |
" <td>20180928230554</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177593</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9998</th>\n", | |
" <td>275395295</td>\n", | |
" <td>20180928230614</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177596</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9999</th>\n", | |
" <td>275395298</td>\n", | |
" <td>20180928230635</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177599</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10000 rows × 7 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" log_id log_timestamp log_actor log_namespace log_page \\\n", | |
"0 272446438 20180628193446 7120933.0 6 70328570 \n", | |
"1 272446448 20180628193511 7120933.0 6 70328570 \n", | |
"2 272446451 20180628193527 7120933.0 6 70328570 \n", | |
"3 272446458 20180628193546 7120933.0 6 70328570 \n", | |
"4 272446465 20180628193601 7120933.0 6 70328570 \n", | |
"... ... ... ... ... ... \n", | |
"9995 275395285 20180928230512 13772.0 6 73177585 \n", | |
"9996 275395291 20180928230534 13772.0 6 73177591 \n", | |
"9997 275395293 20180928230554 13772.0 6 73177593 \n", | |
"9998 275395295 20180928230614 13772.0 6 73177596 \n", | |
"9999 275395298 20180928230635 13772.0 6 73177599 \n", | |
"\n", | |
" page_title \\\n", | |
"0 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"1 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"2 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"3 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"4 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"... ... \n", | |
"9995 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9996 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9997 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9998 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9999 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"\n", | |
" img_metadata \n", | |
"0 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n", | |
"1 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n", | |
"2 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n", | |
"3 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n", | |
"4 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n", | |
"... ... \n", | |
"9995 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n", | |
"9996 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n", | |
"9997 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n", | |
"9998 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n", | |
"9999 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n", | |
"\n", | |
"[10000 rows x 7 columns]" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "dc999d3f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# find duplicates (overwritten files)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"id": "93f5fec2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from phpserialize import unserialize\n", | |
"import chardet\n", | |
"import re\n", | |
"\n", | |
"def regex_find(field, s, default):\n", | |
" m = re.search('\"\"' + field + '\"\";s:\\d+:\"\"(.*?)\"\";', s)\n", | |
" if m:\n", | |
" return m[1]\n", | |
" m = re.search('\"' + field + '\";s:\\d+:\"(.*?)\";', s)\n", | |
" if m:\n", | |
" return m[1]\n", | |
" return default\n", | |
"\n", | |
"def charset(bb):\n", | |
" d = chardet.detect(bb)\n", | |
" if d['confidence'] > 0.7:\n", | |
" return d['encoding']\n", | |
" return None\n", | |
"\n", | |
"def unser(field, s):\n", | |
" bb = s.encode()\n", | |
" ch = charset(bb)\n", | |
" if ch is None:\n", | |
" return regex_find(field.decode('utf-8'), s, 'error: no charset')\n", | |
" try:\n", | |
" u = unserialize(bb)\n", | |
" if field in u:\n", | |
" return u[field].decode(ch)\n", | |
" return None\n", | |
" except ValueError:\n", | |
" return regex_find(field.decode('utf-8'), s[0:300], 'error: unser')\n", | |
"\n", | |
"df['d_mod'] = df.img_metadata.apply(lambda x: unser(b'DateTime', x))\n", | |
"df['d_cre'] = df.img_metadata.apply(lambda x: unser(b'DateTimeOriginal', x))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"id": "31221918", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>log_id</th>\n", | |
" <th>log_timestamp</th>\n", | |
" <th>log_actor</th>\n", | |
" <th>log_namespace</th>\n", | |
" <th>log_page</th>\n", | |
" <th>page_title</th>\n", | |
" <th>img_metadata</th>\n", | |
" <th>d_mod</th>\n", | |
" <th>d_cre</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>426</th>\n", | |
" <td>272604965</td>\n", | |
" <td>20180702184734</td>\n", | |
" <td>21472.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70482003</td>\n", | |
" <td>Sien-weg.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4000,\"ImageLength\":3000,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>469</th>\n", | |
" <td>272642307</td>\n", | |
" <td>20180703122333</td>\n", | |
" <td>7240467.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70518397</td>\n", | |
" <td>Sierra_Norte_de_Sevilla.jpg</td>\n", | |
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2356</th>\n", | |
" <td>273156179</td>\n", | |
" <td>20180720154501</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71007032</td>\n", | |
" <td>Grenchenberg_-_Hooibeestje_(Coenonympha_pamphi...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":2348,\"ImageLength\":3229,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2361</th>\n", | |
" <td>273156275</td>\n", | |
" <td>20180720154744</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71007120</td>\n", | |
" <td>Grenchenberg_-_Zodeklokje_(Campanula_cochleari...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2368</th>\n", | |
" <td>273156408</td>\n", | |
" <td>20180720155424</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71007268</td>\n", | |
" <td>Grenchenberg_-_Knautia_(flower).jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2369</th>\n", | |
" <td>273156412</td>\n", | |
" <td>20180720155447</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71007273</td>\n", | |
" <td>Grenchenberg_-_Knautia.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2379</th>\n", | |
" <td>273157697</td>\n", | |
" <td>20180720164939</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71008474</td>\n", | |
" <td>Grenchenberg_-_Groene_Bergsprinkhaan_(Miramell...</td>\n", | |
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2414</th>\n", | |
" <td>273163592</td>\n", | |
" <td>20180720201447</td>\n", | |
" <td>9853.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71014066</td>\n", | |
" <td>Raymond_as_Umaru_Doma_at_Comic-Con_Internation...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2543</th>\n", | |
" <td>273179983</td>\n", | |
" <td>20180721141441</td>\n", | |
" <td>91101.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71028016</td>\n", | |
" <td>Sangagiri_new_bus_stand_1.jpg</td>\n", | |
" <td>{\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2561</th>\n", | |
" <td>273184202</td>\n", | |
" <td>20180721171318</td>\n", | |
" <td>9853.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71031982</td>\n", | |
" <td>April_O'Neil_cosplayer_at_Comic-Con_Internatio...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2579</th>\n", | |
" <td>273184643</td>\n", | |
" <td>20180721173207</td>\n", | |
" <td>9853.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71032436</td>\n", | |
" <td>Sinon_cosplayer_at_Comic-Con_International_201...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2635</th>\n", | |
" <td>273187234</td>\n", | |
" <td>20180721192622</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71035051</td>\n", | |
" <td>Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis).jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2636</th>\n", | |
" <td>273187285</td>\n", | |
" <td>20180721192710</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71035102</td>\n", | |
" <td>Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis)_...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2639</th>\n", | |
" <td>273187355</td>\n", | |
" <td>20180721193022</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71035181</td>\n", | |
" <td>Grenchen_-_Bosrank_(Clematis_vitalba).jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2754</th>\n", | |
" <td>273201527</td>\n", | |
" <td>20180722141339</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71050341</td>\n", | |
" <td>Grenchen_-_Robertskruid_(Geranium_robertianum)...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2782</th>\n", | |
" <td>273203256</td>\n", | |
" <td>20180722155257</td>\n", | |
" <td>91101.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71052217</td>\n", | |
" <td>Sangagiri_hill_seen_from_national_highway.jpg</td>\n", | |
" <td>{\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2835</th>\n", | |
" <td>273211644</td>\n", | |
" <td>20180722222420</td>\n", | |
" <td>9853.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71060261</td>\n", | |
" <td>Cosplayer_of_Kanna_Kamui_standing_at_Comic-Con...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2866</th>\n", | |
" <td>273221479</td>\n", | |
" <td>20180723125154</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71070339</td>\n", | |
" <td>Grenchen_-_Verbascum_phlomoides.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2868</th>\n", | |
" <td>273221494</td>\n", | |
" <td>20180723125331</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71070358</td>\n", | |
" <td>Grenchen_-_Gele_Kamille_(Cota_tinctoria).jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2887</th>\n", | |
" <td>273224378</td>\n", | |
" <td>20180723153327</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71073048</td>\n", | |
" <td>Grenchen_-_2_x_Bruingemarmerde_Schildwants_(Ha...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2889</th>\n", | |
" <td>273224394</td>\n", | |
" <td>20180723153447</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71073066</td>\n", | |
" <td>Grenchen_-_Europese_Honingbij_(Apis_mellifera)...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2896</th>\n", | |
" <td>273227143</td>\n", | |
" <td>20180723181237</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71075743</td>\n", | |
" <td>Grenchen_-_Gewone_Hooiwagen_(Phalangium_opilio...</td>\n", | |
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2937</th>\n", | |
" <td>273230521</td>\n", | |
" <td>20180723213707</td>\n", | |
" <td>98647.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71079316</td>\n", | |
" <td>Alkmaar_Ansichten_-_Gewelfde_Stenenbrug.jpg</td>\n", | |
" <td>{\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2944</th>\n", | |
" <td>273230748</td>\n", | |
" <td>20180723220621</td>\n", | |
" <td>98647.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71079632</td>\n", | |
" <td>Alkmaar_Ansichten_-_Appelsteegbrug.jpg</td>\n", | |
" <td>{\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3043</th>\n", | |
" <td>273242880</td>\n", | |
" <td>20180724153446</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71090845</td>\n", | |
" <td>Walperswil_-_Tower.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3177</th>\n", | |
" <td>273258739</td>\n", | |
" <td>20180725135753</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71107445</td>\n", | |
" <td>Grenchen_-_Huisvlieg_(Musca_domestica).jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3264</th>\n", | |
" <td>273280450</td>\n", | |
" <td>20180726173539</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71129960</td>\n", | |
" <td>Biel_-_Kruisspin_(Araneus_diadematus).jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3357</th>\n", | |
" <td>273292615</td>\n", | |
" <td>20180727125521</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71141931</td>\n", | |
" <td>Grenchen_-_Cerceris.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3646</th>\n", | |
" <td>273316860</td>\n", | |
" <td>20180728170250</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71167089</td>\n", | |
" <td>Grenchen_-_Hibiscus.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3649</th>\n", | |
" <td>273316897</td>\n", | |
" <td>20180728170512</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71167126</td>\n", | |
" <td>Grenchen_-_Holcostethus_sphacelatus.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3761</th>\n", | |
" <td>273333194</td>\n", | |
" <td>20180729151800</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71183367</td>\n", | |
" <td>Grenchen_-_Bruingemarmerde_Schildwants_(Halyom...</td>\n", | |
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3875</th>\n", | |
" <td>273344497</td>\n", | |
" <td>20180730063908</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71194072</td>\n", | |
" <td>Grenchen_-_Grote_Langlijf_(Sphaerophoria_scrip...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":3024,\"Model\":\"Nexus 5X\",...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3883</th>\n", | |
" <td>273347896</td>\n", | |
" <td>20180730100427</td>\n", | |
" <td>2444.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71197784</td>\n", | |
" <td>Grenchen_-_Drietandvlakjesmot_(Catoptria_false...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":1803,\"ImageLength\":2496,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5421</th>\n", | |
" <td>273703699</td>\n", | |
" <td>20180812134147</td>\n", | |
" <td>7268929.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71552514</td>\n", | |
" <td>Une_vieille_charrette_abandonnée.jpg</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":5312,\"ImageLength\":2988,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5603</th>\n", | |
" <td>273768940</td>\n", | |
" <td>20180814182000</td>\n", | |
" <td>22097.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71616122</td>\n", | |
" <td>Runsten_Frustuna_35-1.jpg</td>\n", | |
" <td>a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5604</th>\n", | |
" <td>273769099</td>\n", | |
" <td>20180814182434</td>\n", | |
" <td>22097.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71616285</td>\n", | |
" <td>Runsten_Frustuna_35-1_Information.jpg</td>\n", | |
" <td>a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5613</th>\n", | |
" <td>273782307</td>\n", | |
" <td>20180815035327</td>\n", | |
" <td>115107.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71629055</td>\n", | |
" <td>Usable_water_tank.jpg</td>\n", | |
" <td>{\"data\":{\"Make\":\"OPPO\",\"Model\":\"1206\",\"XResolu...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8028</th>\n", | |
" <td>274735256</td>\n", | |
" <td>20180908150048</td>\n", | |
" <td>20694.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72557335</td>\n", | |
" <td>Simit_seller,_Istanbul.jpg</td>\n", | |
" <td>{\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 550...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8709</th>\n", | |
" <td>274989950</td>\n", | |
" <td>20180916181154</td>\n", | |
" <td>38096.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72790476</td>\n", | |
" <td>The_Tank_exterior,_September_2018.jpg</td>\n", | |
" <td>a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8964</th>\n", | |
" <td>275064700</td>\n", | |
" <td>20180919102121</td>\n", | |
" <td>7377458.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72864560</td>\n", | |
" <td>Water_fountain_in_the_Public_Park_of_Yangon_2.jpg</td>\n", | |
" <td>{\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 700...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9147</th>\n", | |
" <td>275093545</td>\n", | |
" <td>20180920132547</td>\n", | |
" <td>7381757.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72890791</td>\n", | |
" <td>Malva_(209113498).jpg</td>\n", | |
" <td>{\"data\":{\"DateTime\":\"2018:09:19 21:56:24\",\"Mod...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9275</th>\n", | |
" <td>275111560</td>\n", | |
" <td>20180921041135</td>\n", | |
" <td>9853.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72907467</td>\n", | |
" <td>University_Avenue_Hillcrest_neighborhood_sign_...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9276</th>\n", | |
" <td>275111561</td>\n", | |
" <td>20180921041148</td>\n", | |
" <td>9853.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72907469</td>\n", | |
" <td>University_Avenue_Hillcrest_neighborhood_sign_...</td>\n", | |
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9349</th>\n", | |
" <td>275135369</td>\n", | |
" <td>20180921234950</td>\n", | |
" <td>38096.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72930726</td>\n", | |
" <td>Sheepshead_Bay_Library,_September_2018.jpg</td>\n", | |
" <td>a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9779</th>\n", | |
" <td>275298512</td>\n", | |
" <td>20180926154025</td>\n", | |
" <td>7080362.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73089560</td>\n", | |
" <td>Halifax-Dartmouth_Ferry.jpg</td>\n", | |
" <td>a:4:{s:8:\"Software\";s:6:\"Google\";s:11:\"ExifVer...</td>\n", | |
" <td>error: unser</td>\n", | |
" <td>error: unser</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" log_id log_timestamp log_actor log_namespace log_page \\\n", | |
"426 272604965 20180702184734 21472.0 6 70482003 \n", | |
"469 272642307 20180703122333 7240467.0 6 70518397 \n", | |
"2356 273156179 20180720154501 2444.0 6 71007032 \n", | |
"2361 273156275 20180720154744 2444.0 6 71007120 \n", | |
"2368 273156408 20180720155424 2444.0 6 71007268 \n", | |
"2369 273156412 20180720155447 2444.0 6 71007273 \n", | |
"2379 273157697 20180720164939 2444.0 6 71008474 \n", | |
"2414 273163592 20180720201447 9853.0 6 71014066 \n", | |
"2543 273179983 20180721141441 91101.0 6 71028016 \n", | |
"2561 273184202 20180721171318 9853.0 6 71031982 \n", | |
"2579 273184643 20180721173207 9853.0 6 71032436 \n", | |
"2635 273187234 20180721192622 2444.0 6 71035051 \n", | |
"2636 273187285 20180721192710 2444.0 6 71035102 \n", | |
"2639 273187355 20180721193022 2444.0 6 71035181 \n", | |
"2754 273201527 20180722141339 2444.0 6 71050341 \n", | |
"2782 273203256 20180722155257 91101.0 6 71052217 \n", | |
"2835 273211644 20180722222420 9853.0 6 71060261 \n", | |
"2866 273221479 20180723125154 2444.0 6 71070339 \n", | |
"2868 273221494 20180723125331 2444.0 6 71070358 \n", | |
"2887 273224378 20180723153327 2444.0 6 71073048 \n", | |
"2889 273224394 20180723153447 2444.0 6 71073066 \n", | |
"2896 273227143 20180723181237 2444.0 6 71075743 \n", | |
"2937 273230521 20180723213707 98647.0 6 71079316 \n", | |
"2944 273230748 20180723220621 98647.0 6 71079632 \n", | |
"3043 273242880 20180724153446 2444.0 6 71090845 \n", | |
"3177 273258739 20180725135753 2444.0 6 71107445 \n", | |
"3264 273280450 20180726173539 2444.0 6 71129960 \n", | |
"3357 273292615 20180727125521 2444.0 6 71141931 \n", | |
"3646 273316860 20180728170250 2444.0 6 71167089 \n", | |
"3649 273316897 20180728170512 2444.0 6 71167126 \n", | |
"3761 273333194 20180729151800 2444.0 6 71183367 \n", | |
"3875 273344497 20180730063908 2444.0 6 71194072 \n", | |
"3883 273347896 20180730100427 2444.0 6 71197784 \n", | |
"5421 273703699 20180812134147 7268929.0 6 71552514 \n", | |
"5603 273768940 20180814182000 22097.0 6 71616122 \n", | |
"5604 273769099 20180814182434 22097.0 6 71616285 \n", | |
"5613 273782307 20180815035327 115107.0 6 71629055 \n", | |
"8028 274735256 20180908150048 20694.0 6 72557335 \n", | |
"8709 274989950 20180916181154 38096.0 6 72790476 \n", | |
"8964 275064700 20180919102121 7377458.0 6 72864560 \n", | |
"9147 275093545 20180920132547 7381757.0 6 72890791 \n", | |
"9275 275111560 20180921041135 9853.0 6 72907467 \n", | |
"9276 275111561 20180921041148 9853.0 6 72907469 \n", | |
"9349 275135369 20180921234950 38096.0 6 72930726 \n", | |
"9779 275298512 20180926154025 7080362.0 6 73089560 \n", | |
"\n", | |
" page_title \\\n", | |
"426 Sien-weg.jpg \n", | |
"469 Sierra_Norte_de_Sevilla.jpg \n", | |
"2356 Grenchenberg_-_Hooibeestje_(Coenonympha_pamphi... \n", | |
"2361 Grenchenberg_-_Zodeklokje_(Campanula_cochleari... \n", | |
"2368 Grenchenberg_-_Knautia_(flower).jpg \n", | |
"2369 Grenchenberg_-_Knautia.jpg \n", | |
"2379 Grenchenberg_-_Groene_Bergsprinkhaan_(Miramell... \n", | |
"2414 Raymond_as_Umaru_Doma_at_Comic-Con_Internation... \n", | |
"2543 Sangagiri_new_bus_stand_1.jpg \n", | |
"2561 April_O'Neil_cosplayer_at_Comic-Con_Internatio... \n", | |
"2579 Sinon_cosplayer_at_Comic-Con_International_201... \n", | |
"2635 Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis).jpg \n", | |
"2636 Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis)_... \n", | |
"2639 Grenchen_-_Bosrank_(Clematis_vitalba).jpg \n", | |
"2754 Grenchen_-_Robertskruid_(Geranium_robertianum)... \n", | |
"2782 Sangagiri_hill_seen_from_national_highway.jpg \n", | |
"2835 Cosplayer_of_Kanna_Kamui_standing_at_Comic-Con... \n", | |
"2866 Grenchen_-_Verbascum_phlomoides.jpg \n", | |
"2868 Grenchen_-_Gele_Kamille_(Cota_tinctoria).jpg \n", | |
"2887 Grenchen_-_2_x_Bruingemarmerde_Schildwants_(Ha... \n", | |
"2889 Grenchen_-_Europese_Honingbij_(Apis_mellifera)... \n", | |
"2896 Grenchen_-_Gewone_Hooiwagen_(Phalangium_opilio... \n", | |
"2937 Alkmaar_Ansichten_-_Gewelfde_Stenenbrug.jpg \n", | |
"2944 Alkmaar_Ansichten_-_Appelsteegbrug.jpg \n", | |
"3043 Walperswil_-_Tower.jpg \n", | |
"3177 Grenchen_-_Huisvlieg_(Musca_domestica).jpg \n", | |
"3264 Biel_-_Kruisspin_(Araneus_diadematus).jpg \n", | |
"3357 Grenchen_-_Cerceris.jpg \n", | |
"3646 Grenchen_-_Hibiscus.jpg \n", | |
"3649 Grenchen_-_Holcostethus_sphacelatus.jpg \n", | |
"3761 Grenchen_-_Bruingemarmerde_Schildwants_(Halyom... \n", | |
"3875 Grenchen_-_Grote_Langlijf_(Sphaerophoria_scrip... \n", | |
"3883 Grenchen_-_Drietandvlakjesmot_(Catoptria_false... \n", | |
"5421 Une_vieille_charrette_abandonnée.jpg \n", | |
"5603 Runsten_Frustuna_35-1.jpg \n", | |
"5604 Runsten_Frustuna_35-1_Information.jpg \n", | |
"5613 Usable_water_tank.jpg \n", | |
"8028 Simit_seller,_Istanbul.jpg \n", | |
"8709 The_Tank_exterior,_September_2018.jpg \n", | |
"8964 Water_fountain_in_the_Public_Park_of_Yangon_2.jpg \n", | |
"9147 Malva_(209113498).jpg \n", | |
"9275 University_Avenue_Hillcrest_neighborhood_sign_... \n", | |
"9276 University_Avenue_Hillcrest_neighborhood_sign_... \n", | |
"9349 Sheepshead_Bay_Library,_September_2018.jpg \n", | |
"9779 Halifax-Dartmouth_Ferry.jpg \n", | |
"\n", | |
" img_metadata d_mod \\\n", | |
"426 {\"data\":{\"ImageWidth\":4000,\"ImageLength\":3000,... error: unser \n", | |
"469 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n", | |
"2356 {\"data\":{\"ImageWidth\":2348,\"ImageLength\":3229,... error: unser \n", | |
"2361 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n", | |
"2368 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2369 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2379 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n", | |
"2414 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n", | |
"2543 {\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30... error: unser \n", | |
"2561 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n", | |
"2579 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n", | |
"2635 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2636 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2639 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2754 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2782 {\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30... error: unser \n", | |
"2835 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n", | |
"2866 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n", | |
"2868 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n", | |
"2887 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2889 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"2896 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n", | |
"2937 {\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)... error: unser \n", | |
"2944 {\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)... error: unser \n", | |
"3043 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n", | |
"3177 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"3264 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"3357 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"3646 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n", | |
"3649 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n", | |
"3761 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n", | |
"3875 {\"data\":{\"ImageWidth\":3024,\"Model\":\"Nexus 5X\",... error: unser \n", | |
"3883 {\"data\":{\"ImageWidth\":1803,\"ImageLength\":2496,... error: unser \n", | |
"5421 {\"data\":{\"ImageWidth\":5312,\"ImageLength\":2988,... error: unser \n", | |
"5603 a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi... error: unser \n", | |
"5604 a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi... error: unser \n", | |
"5613 {\"data\":{\"Make\":\"OPPO\",\"Model\":\"1206\",\"XResolu... error: unser \n", | |
"8028 {\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 550... error: unser \n", | |
"8709 a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng... error: unser \n", | |
"8964 {\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 700... error: unser \n", | |
"9147 {\"data\":{\"DateTime\":\"2018:09:19 21:56:24\",\"Mod... error: unser \n", | |
"9275 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n", | |
"9276 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n", | |
"9349 a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng... error: unser \n", | |
"9779 a:4:{s:8:\"Software\";s:6:\"Google\";s:11:\"ExifVer... error: unser \n", | |
"\n", | |
" d_cre \n", | |
"426 error: unser \n", | |
"469 error: unser \n", | |
"2356 error: unser \n", | |
"2361 error: unser \n", | |
"2368 error: unser \n", | |
"2369 error: unser \n", | |
"2379 error: unser \n", | |
"2414 error: unser \n", | |
"2543 error: unser \n", | |
"2561 error: unser \n", | |
"2579 error: unser \n", | |
"2635 error: unser \n", | |
"2636 error: unser \n", | |
"2639 error: unser \n", | |
"2754 error: unser \n", | |
"2782 error: unser \n", | |
"2835 error: unser \n", | |
"2866 error: unser \n", | |
"2868 error: unser \n", | |
"2887 error: unser \n", | |
"2889 error: unser \n", | |
"2896 error: unser \n", | |
"2937 error: unser \n", | |
"2944 error: unser \n", | |
"3043 error: unser \n", | |
"3177 error: unser \n", | |
"3264 error: unser \n", | |
"3357 error: unser \n", | |
"3646 error: unser \n", | |
"3649 error: unser \n", | |
"3761 error: unser \n", | |
"3875 error: unser \n", | |
"3883 error: unser \n", | |
"5421 error: unser \n", | |
"5603 error: unser \n", | |
"5604 error: unser \n", | |
"5613 error: unser \n", | |
"8028 error: unser \n", | |
"8709 error: unser \n", | |
"8964 error: unser \n", | |
"9147 error: unser \n", | |
"9275 error: unser \n", | |
"9276 error: unser \n", | |
"9349 error: unser \n", | |
"9779 error: unser " | |
] | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# check for parse errors\n", | |
"df[df.d_mod.str.contains(\"error:\") == True]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"id": "6545539c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>log_id</th>\n", | |
" <th>log_timestamp</th>\n", | |
" <th>log_actor</th>\n", | |
" <th>log_namespace</th>\n", | |
" <th>log_page</th>\n", | |
" <th>page_title</th>\n", | |
" <th>img_metadata</th>\n", | |
" <th>d_mod</th>\n", | |
" <th>d_cre</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>272446438</td>\n", | |
" <td>20180628193446</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" <td>2018:06:28 15:55:08</td>\n", | |
" <td>2018:02:26 08:31:44</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>272446448</td>\n", | |
" <td>20180628193511</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" <td>2018:06:28 15:55:08</td>\n", | |
" <td>2018:02:26 08:31:44</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>272446451</td>\n", | |
" <td>20180628193527</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" <td>2018:06:28 15:55:08</td>\n", | |
" <td>2018:02:26 08:31:44</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>272446458</td>\n", | |
" <td>20180628193546</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" <td>2018:06:28 15:55:08</td>\n", | |
" <td>2018:02:26 08:31:44</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>272446465</td>\n", | |
" <td>20180628193601</td>\n", | |
" <td>7120933.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70328570</td>\n", | |
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n", | |
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n", | |
" <td>2018:06:28 15:55:08</td>\n", | |
" <td>2018:02:26 08:31:44</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9995</th>\n", | |
" <td>275395285</td>\n", | |
" <td>20180928230512</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177585</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" <td>2018:09:28 15:37:55</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9996</th>\n", | |
" <td>275395291</td>\n", | |
" <td>20180928230534</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177591</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" <td>2018:09:28 15:38:32</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9997</th>\n", | |
" <td>275395293</td>\n", | |
" <td>20180928230554</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177593</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" <td>2018:09:28 15:37:13</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9998</th>\n", | |
" <td>275395295</td>\n", | |
" <td>20180928230614</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177596</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" <td>2018:09:28 15:36:41</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9999</th>\n", | |
" <td>275395298</td>\n", | |
" <td>20180928230635</td>\n", | |
" <td>13772.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73177599</td>\n", | |
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n", | |
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n", | |
" <td>2018:09:28 15:36:20</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>2179 rows × 9 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" log_id log_timestamp log_actor log_namespace log_page \\\n", | |
"0 272446438 20180628193446 7120933.0 6 70328570 \n", | |
"1 272446448 20180628193511 7120933.0 6 70328570 \n", | |
"2 272446451 20180628193527 7120933.0 6 70328570 \n", | |
"3 272446458 20180628193546 7120933.0 6 70328570 \n", | |
"4 272446465 20180628193601 7120933.0 6 70328570 \n", | |
"... ... ... ... ... ... \n", | |
"9995 275395285 20180928230512 13772.0 6 73177585 \n", | |
"9996 275395291 20180928230534 13772.0 6 73177591 \n", | |
"9997 275395293 20180928230554 13772.0 6 73177593 \n", | |
"9998 275395295 20180928230614 13772.0 6 73177596 \n", | |
"9999 275395298 20180928230635 13772.0 6 73177599 \n", | |
"\n", | |
" page_title \\\n", | |
"0 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"1 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"2 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"3 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"4 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n", | |
"... ... \n", | |
"9995 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9996 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9997 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9998 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"9999 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n", | |
"\n", | |
" img_metadata d_mod \\\n", | |
"0 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n", | |
"1 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n", | |
"2 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n", | |
"3 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n", | |
"4 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n", | |
"... ... ... \n", | |
"9995 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:37:55 \n", | |
"9996 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:38:32 \n", | |
"9997 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:37:13 \n", | |
"9998 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:36:41 \n", | |
"9999 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:36:20 \n", | |
"\n", | |
" d_cre \n", | |
"0 2018:02:26 08:31:44 \n", | |
"1 2018:02:26 08:31:44 \n", | |
"2 2018:02:26 08:31:44 \n", | |
"3 2018:02:26 08:31:44 \n", | |
"4 2018:02:26 08:31:44 \n", | |
"... ... \n", | |
"9995 None \n", | |
"9996 None \n", | |
"9997 None \n", | |
"9998 None \n", | |
"9999 None \n", | |
"\n", | |
"[2179 rows x 9 columns]" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# list candidates file pages to be fixed\n", | |
"df.loc[df.d_mod.str.contains(\"error:\") == True, 'd_mod'] = None\n", | |
"df.loc[df.d_cre.str.contains(\"error:\") == True, 'd_cre'] = None\n", | |
"df_to_fix = df[(df.d_mod != df.d_cre) & (df.d_mod.notnull())]\n", | |
"df_to_fix" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"id": "ee09a13a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>log_id</th>\n", | |
" <th>log_timestamp</th>\n", | |
" <th>log_actor</th>\n", | |
" <th>log_namespace</th>\n", | |
" <th>log_page</th>\n", | |
" <th>page_title</th>\n", | |
" <th>img_metadata</th>\n", | |
" <th>d_mod</th>\n", | |
" <th>d_cre</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>5238</th>\n", | |
" <td>273667246</td>\n", | |
" <td>20180811022148</td>\n", | |
" <td>23537.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71517161</td>\n", | |
" <td>Penguin_incubation,_penguins_beach_Cape_Town_.jpg</td>\n", | |
" <td>a:36:{s:10:\"ImageWidth\";i:2576;s:11:\"ImageLeng...</td>\n", | |
" <td>2018:07:18 16:16:37</td>\n", | |
" <td>2018:07:18 16:16:37</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6859</th>\n", | |
" <td>274203234</td>\n", | |
" <td>20180826233019</td>\n", | |
" <td>7268929.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72048177</td>\n", | |
" <td>3_donkeys_saying_hello.jpg</td>\n", | |
" <td>a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;}</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8788</th>\n", | |
" <td>275015090</td>\n", | |
" <td>20180917141853</td>\n", | |
" <td>957.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72815806</td>\n", | |
" <td>Menhir_Hackpfüffel_-_3.jpg</td>\n", | |
" <td>a:42:{s:4:\"Make\";s:7:\"samsung\";s:5:\"Model\";s:9...</td>\n", | |
" <td>2018:09:16 16:57:27</td>\n", | |
" <td>2018:09:16 16:57:27</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5162</th>\n", | |
" <td>273650797</td>\n", | |
" <td>20180810120809</td>\n", | |
" <td>7268129.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71500539</td>\n", | |
" <td>Tomba_Amerigo_2.jpg</td>\n", | |
" <td>a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;}</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7571</th>\n", | |
" <td>274520386</td>\n", | |
" <td>20180903234215</td>\n", | |
" <td>7360763.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72332397</td>\n", | |
" <td>In_west_Mogok.jpg</td>\n", | |
" <td>a:45:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:...</td>\n", | |
" <td>2018:03:29 10:13:50</td>\n", | |
" <td>2018:03:29 10:13:50</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5627</th>\n", | |
" <td>273787443</td>\n", | |
" <td>20180815085448</td>\n", | |
" <td>4887272.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71633870</td>\n", | |
" <td>Melmuri_water_flood_2018.jpg</td>\n", | |
" <td>a:32:{s:4:\"Make\";s:4:\"LAVA\";s:5:\"Model\";s:2:\"R...</td>\n", | |
" <td>2018:08:15 14:25:42</td>\n", | |
" <td>2018:08:15 14:25:42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4387</th>\n", | |
" <td>273477636</td>\n", | |
" <td>20180804075231</td>\n", | |
" <td>8589.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71329345</td>\n", | |
" <td>Wittenberger_straße_dresden_2018-08-04_-_7.jpg</td>\n", | |
" <td>a:55:{s:10:\"ImageWidth\";i:4160;s:11:\"ImageLeng...</td>\n", | |
" <td>2018:08:04 08:40:23</td>\n", | |
" <td>2018:08:04 08:40:23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4855</th>\n", | |
" <td>273594028</td>\n", | |
" <td>20180808062154</td>\n", | |
" <td>31637.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71445302</td>\n", | |
" <td>Curious_building_near_有栖川公園.jpg</td>\n", | |
" <td>a:46:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:...</td>\n", | |
" <td>2018:07:29 15:21:19</td>\n", | |
" <td>2018:07:29 15:21:19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5486</th>\n", | |
" <td>273718990</td>\n", | |
" <td>20180813033611</td>\n", | |
" <td>5869.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71567068</td>\n", | |
" <td>Maa_Vindhyavasini_temple,_Vindhyachal.jpg</td>\n", | |
" <td>a:37:{s:4:\"Make\";s:9:\"PANASONIC\";s:5:\"Model\";s...</td>\n", | |
" <td>2018:06:22 08:53:37</td>\n", | |
" <td>2018:06:22 08:53:37</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3257</th>\n", | |
" <td>273279853</td>\n", | |
" <td>20180726171238</td>\n", | |
" <td>1366.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71129400</td>\n", | |
" <td>Visitor_centre_for_Ferniehirst_Castle.jpg</td>\n", | |
" <td>a:32:{s:4:\"Make\";s:5:\"CUBOT\";s:5:\"Model\";s:9:\"...</td>\n", | |
" <td>2018:07:26 14:38:33</td>\n", | |
" <td>2018:07:26 14:38:33</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>7693 rows × 9 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" log_id log_timestamp log_actor log_namespace log_page \\\n", | |
"5238 273667246 20180811022148 23537.0 6 71517161 \n", | |
"6859 274203234 20180826233019 7268929.0 6 72048177 \n", | |
"8788 275015090 20180917141853 957.0 6 72815806 \n", | |
"5162 273650797 20180810120809 7268129.0 6 71500539 \n", | |
"7571 274520386 20180903234215 7360763.0 6 72332397 \n", | |
"... ... ... ... ... ... \n", | |
"5627 273787443 20180815085448 4887272.0 6 71633870 \n", | |
"4387 273477636 20180804075231 8589.0 6 71329345 \n", | |
"4855 273594028 20180808062154 31637.0 6 71445302 \n", | |
"5486 273718990 20180813033611 5869.0 6 71567068 \n", | |
"3257 273279853 20180726171238 1366.0 6 71129400 \n", | |
"\n", | |
" page_title \\\n", | |
"5238 Penguin_incubation,_penguins_beach_Cape_Town_.jpg \n", | |
"6859 3_donkeys_saying_hello.jpg \n", | |
"8788 Menhir_Hackpfüffel_-_3.jpg \n", | |
"5162 Tomba_Amerigo_2.jpg \n", | |
"7571 In_west_Mogok.jpg \n", | |
"... ... \n", | |
"5627 Melmuri_water_flood_2018.jpg \n", | |
"4387 Wittenberger_straße_dresden_2018-08-04_-_7.jpg \n", | |
"4855 Curious_building_near_有栖川公園.jpg \n", | |
"5486 Maa_Vindhyavasini_temple,_Vindhyachal.jpg \n", | |
"3257 Visitor_centre_for_Ferniehirst_Castle.jpg \n", | |
"\n", | |
" img_metadata d_mod \\\n", | |
"5238 a:36:{s:10:\"ImageWidth\";i:2576;s:11:\"ImageLeng... 2018:07:18 16:16:37 \n", | |
"6859 a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;} None \n", | |
"8788 a:42:{s:4:\"Make\";s:7:\"samsung\";s:5:\"Model\";s:9... 2018:09:16 16:57:27 \n", | |
"5162 a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;} None \n", | |
"7571 a:45:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:... 2018:03:29 10:13:50 \n", | |
"... ... ... \n", | |
"5627 a:32:{s:4:\"Make\";s:4:\"LAVA\";s:5:\"Model\";s:2:\"R... 2018:08:15 14:25:42 \n", | |
"4387 a:55:{s:10:\"ImageWidth\";i:4160;s:11:\"ImageLeng... 2018:08:04 08:40:23 \n", | |
"4855 a:46:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:... 2018:07:29 15:21:19 \n", | |
"5486 a:37:{s:4:\"Make\";s:9:\"PANASONIC\";s:5:\"Model\";s... 2018:06:22 08:53:37 \n", | |
"3257 a:32:{s:4:\"Make\";s:5:\"CUBOT\";s:5:\"Model\";s:9:\"... 2018:07:26 14:38:33 \n", | |
"\n", | |
" d_cre \n", | |
"5238 2018:07:18 16:16:37 \n", | |
"6859 None \n", | |
"8788 2018:09:16 16:57:27 \n", | |
"5162 None \n", | |
"7571 2018:03:29 10:13:50 \n", | |
"... ... \n", | |
"5627 2018:08:15 14:25:42 \n", | |
"4387 2018:08:04 08:40:23 \n", | |
"4855 2018:07:29 15:21:19 \n", | |
"5486 2018:06:22 08:53:37 \n", | |
"3257 2018:07:26 14:38:33 \n", | |
"\n", | |
"[7693 rows x 9 columns]" | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# list candidates file pages NOT to be fixed\n", | |
"df_not_to_fix = df[(df.d_mod == df.d_cre) | (df.d_mod.isnull() & df.d_cre.isnull())]\n", | |
"df_not_to_fix.sample(frac=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"id": "d8764e5a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>log_id</th>\n", | |
" <th>log_timestamp</th>\n", | |
" <th>log_actor</th>\n", | |
" <th>log_namespace</th>\n", | |
" <th>log_page</th>\n", | |
" <th>page_title</th>\n", | |
" <th>img_metadata</th>\n", | |
" <th>d_mod</th>\n", | |
" <th>d_cre</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>8442</th>\n", | |
" <td>274882044</td>\n", | |
" <td>20180912185800</td>\n", | |
" <td>7404411.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72684628</td>\n", | |
" <td>The_alone_flower.jpg</td>\n", | |
" <td>a:26:{s:5:\"Model\";s:4:\"A33f\";s:16:\"YCbCrPositi...</td>\n", | |
" <td>None</td>\n", | |
" <td>2017:01:24 06:36:13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8840</th>\n", | |
" <td>275034405</td>\n", | |
" <td>20180918063725</td>\n", | |
" <td>7415301.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72834653</td>\n", | |
" <td>Cannon_4.jpg</td>\n", | |
" <td>a:24:{s:4:\"Make\";s:4:\"vivo\";s:5:\"Model\";s:7:\"v...</td>\n", | |
" <td>None</td>\n", | |
" <td>2017:12:18 16:05:58</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2927</th>\n", | |
" <td>273230220</td>\n", | |
" <td>20180723210526</td>\n", | |
" <td>7322156.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71078928</td>\n", | |
" <td>Bexbacher_Hindenburgturm.jpg</td>\n", | |
" <td>a:28:{s:4:\"Make\";s:5:\"Nokia\";s:5:\"Model\";s:9:\"...</td>\n", | |
" <td>None</td>\n", | |
" <td>2017:06:08 16:23:16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3972</th>\n", | |
" <td>273367517</td>\n", | |
" <td>20180731083019</td>\n", | |
" <td>121788.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71218647</td>\n", | |
" <td>Clouds_6.jpg</td>\n", | |
" <td>a:10:{s:4:\"Make\";s:6:\"Xiaomi\";s:5:\"Model\";s:5:...</td>\n", | |
" <td>None</td>\n", | |
" <td>2018:07:06 10:26:05</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9658</th>\n", | |
" <td>275271055</td>\n", | |
" <td>20180925214941</td>\n", | |
" <td>246422.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73062910</td>\n", | |
" <td>Plaça_de_la_Palmera_de_Sant_Martí_17.jpg</td>\n", | |
" <td>a:13:{s:4:\"Make\";s:8:\"motorola\";s:5:\"Model\";s:...</td>\n", | |
" <td>None</td>\n", | |
" <td>2018:09:25 15:33:31</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2898</th>\n", | |
" <td>273227276</td>\n", | |
" <td>20180723182249</td>\n", | |
" <td>157819.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71075869</td>\n", | |
" <td>Banja_lake,_Gramsh,_Albania_-_6.jpg</td>\n", | |
" <td>a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"...</td>\n", | |
" <td>None</td>\n", | |
" <td>2018:07:08 10:27:15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9825</th>\n", | |
" <td>275332475</td>\n", | |
" <td>20180927070920</td>\n", | |
" <td>46042.0</td>\n", | |
" <td>6</td>\n", | |
" <td>73117677</td>\n", | |
" <td>Passiflora_foetida,_Love-in-a-mist,_Stinking_p...</td>\n", | |
" <td>a:13:{s:4:\"Make\";s:4:\"SONY\";s:5:\"Model\";s:10:\"...</td>\n", | |
" <td>None</td>\n", | |
" <td>2018:09:25 08:27:00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7167</th>\n", | |
" <td>274314346</td>\n", | |
" <td>20180830121916</td>\n", | |
" <td>20979.0</td>\n", | |
" <td>6</td>\n", | |
" <td>72159293</td>\n", | |
" <td>地铁吉祥村站.jpg</td>\n", | |
" <td>a:23:{s:4:\"Make\";s:3:\"ZTE\";s:5:\"Model\";s:4:\"A8...</td>\n", | |
" <td>None</td>\n", | |
" <td>2018:08:06 19:03:54</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2905</th>\n", | |
" <td>273227327</td>\n", | |
" <td>20180723182746</td>\n", | |
" <td>157819.0</td>\n", | |
" <td>6</td>\n", | |
" <td>71075924</td>\n", | |
" <td>Banja_lake,_Gramsh,_Albania_-_3.jpeg</td>\n", | |
" <td>a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"...</td>\n", | |
" <td>None</td>\n", | |
" <td>2018:07:08 10:26:50</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1134</th>\n", | |
" <td>272818528</td>\n", | |
" <td>20180709123345</td>\n", | |
" <td>7298796.0</td>\n", | |
" <td>6</td>\n", | |
" <td>70688877</td>\n", | |
" <td>Friendship_Holding_hands..jpg</td>\n", | |
" <td>a:38:{s:4:\"Make\";s:8:\"Motorola\";s:5:\"Model\";s:...</td>\n", | |
" <td>None</td>\n", | |
" <td>2018:07:07 11:06:44</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>128 rows × 9 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" log_id log_timestamp log_actor log_namespace log_page \\\n", | |
"8442 274882044 20180912185800 7404411.0 6 72684628 \n", | |
"8840 275034405 20180918063725 7415301.0 6 72834653 \n", | |
"2927 273230220 20180723210526 7322156.0 6 71078928 \n", | |
"3972 273367517 20180731083019 121788.0 6 71218647 \n", | |
"9658 275271055 20180925214941 246422.0 6 73062910 \n", | |
"... ... ... ... ... ... \n", | |
"2898 273227276 20180723182249 157819.0 6 71075869 \n", | |
"9825 275332475 20180927070920 46042.0 6 73117677 \n", | |
"7167 274314346 20180830121916 20979.0 6 72159293 \n", | |
"2905 273227327 20180723182746 157819.0 6 71075924 \n", | |
"1134 272818528 20180709123345 7298796.0 6 70688877 \n", | |
"\n", | |
" page_title \\\n", | |
"8442 The_alone_flower.jpg \n", | |
"8840 Cannon_4.jpg \n", | |
"2927 Bexbacher_Hindenburgturm.jpg \n", | |
"3972 Clouds_6.jpg \n", | |
"9658 Plaça_de_la_Palmera_de_Sant_Martí_17.jpg \n", | |
"... ... \n", | |
"2898 Banja_lake,_Gramsh,_Albania_-_6.jpg \n", | |
"9825 Passiflora_foetida,_Love-in-a-mist,_Stinking_p... \n", | |
"7167 地铁吉祥村站.jpg \n", | |
"2905 Banja_lake,_Gramsh,_Albania_-_3.jpeg \n", | |
"1134 Friendship_Holding_hands..jpg \n", | |
"\n", | |
" img_metadata d_mod \\\n", | |
"8442 a:26:{s:5:\"Model\";s:4:\"A33f\";s:16:\"YCbCrPositi... None \n", | |
"8840 a:24:{s:4:\"Make\";s:4:\"vivo\";s:5:\"Model\";s:7:\"v... None \n", | |
"2927 a:28:{s:4:\"Make\";s:5:\"Nokia\";s:5:\"Model\";s:9:\"... None \n", | |
"3972 a:10:{s:4:\"Make\";s:6:\"Xiaomi\";s:5:\"Model\";s:5:... None \n", | |
"9658 a:13:{s:4:\"Make\";s:8:\"motorola\";s:5:\"Model\";s:... None \n", | |
"... ... ... \n", | |
"2898 a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"... None \n", | |
"9825 a:13:{s:4:\"Make\";s:4:\"SONY\";s:5:\"Model\";s:10:\"... None \n", | |
"7167 a:23:{s:4:\"Make\";s:3:\"ZTE\";s:5:\"Model\";s:4:\"A8... None \n", | |
"2905 a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"... None \n", | |
"1134 a:38:{s:4:\"Make\";s:8:\"Motorola\";s:5:\"Model\";s:... None \n", | |
"\n", | |
" d_cre \n", | |
"8442 2017:01:24 06:36:13 \n", | |
"8840 2017:12:18 16:05:58 \n", | |
"2927 2017:06:08 16:23:16 \n", | |
"3972 2018:07:06 10:26:05 \n", | |
"9658 2018:09:25 15:33:31 \n", | |
"... ... \n", | |
"2898 2018:07:08 10:27:15 \n", | |
"9825 2018:09:25 08:27:00 \n", | |
"7167 2018:08:06 19:03:54 \n", | |
"2905 2018:07:08 10:26:50 \n", | |
"1134 2018:07:07 11:06:44 \n", | |
"\n", | |
"[128 rows x 9 columns]" | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# list file pages for manual inspection\n", | |
"df.drop(df_to_fix.index.union(df_not_to_fix.index), axis=0).sample(frac=1)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment