Skip to content

Instantly share code, notes, and snippets.

@whym
Created August 5, 2022 13:18
Show Gist options
  • Save whym/0f548b883bceb3d435955e3ae89ef5ba to your computer and use it in GitHub Desktop.
Save whym/0f548b883bceb3d435955e3ae89ef5ba to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 37,
"id": "d9acaa1b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os.path\n",
"\n",
"df = pd.read_csv(os.path.expanduser('~/Downloads/quarry-66237-mobile-upload-metadata-from-logging-depending-on-change-tag-run655164.csv'))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "61243d8d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>log_id</th>\n",
" <th>log_timestamp</th>\n",
" <th>log_actor</th>\n",
" <th>log_namespace</th>\n",
" <th>log_page</th>\n",
" <th>page_title</th>\n",
" <th>img_metadata</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>272446438</td>\n",
" <td>20180628193446</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>272446448</td>\n",
" <td>20180628193511</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>272446451</td>\n",
" <td>20180628193527</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>272446458</td>\n",
" <td>20180628193546</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>272446465</td>\n",
" <td>20180628193601</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9995</th>\n",
" <td>275395285</td>\n",
" <td>20180928230512</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177585</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9996</th>\n",
" <td>275395291</td>\n",
" <td>20180928230534</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177591</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9997</th>\n",
" <td>275395293</td>\n",
" <td>20180928230554</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177593</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9998</th>\n",
" <td>275395295</td>\n",
" <td>20180928230614</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177596</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9999</th>\n",
" <td>275395298</td>\n",
" <td>20180928230635</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177599</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10000 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" log_id log_timestamp log_actor log_namespace log_page \\\n",
"0 272446438 20180628193446 7120933.0 6 70328570 \n",
"1 272446448 20180628193511 7120933.0 6 70328570 \n",
"2 272446451 20180628193527 7120933.0 6 70328570 \n",
"3 272446458 20180628193546 7120933.0 6 70328570 \n",
"4 272446465 20180628193601 7120933.0 6 70328570 \n",
"... ... ... ... ... ... \n",
"9995 275395285 20180928230512 13772.0 6 73177585 \n",
"9996 275395291 20180928230534 13772.0 6 73177591 \n",
"9997 275395293 20180928230554 13772.0 6 73177593 \n",
"9998 275395295 20180928230614 13772.0 6 73177596 \n",
"9999 275395298 20180928230635 13772.0 6 73177599 \n",
"\n",
" page_title \\\n",
"0 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"1 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"2 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"3 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"4 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"... ... \n",
"9995 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9996 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9997 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9998 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9999 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"\n",
" img_metadata \n",
"0 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n",
"1 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n",
"2 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n",
"3 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n",
"4 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... \n",
"... ... \n",
"9995 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n",
"9996 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n",
"9997 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n",
"9998 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n",
"9999 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... \n",
"\n",
"[10000 rows x 7 columns]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc999d3f",
"metadata": {},
"outputs": [],
"source": [
"# find duplicates (overwritten files)\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "93f5fec2",
"metadata": {},
"outputs": [],
"source": [
"from phpserialize import unserialize\n",
"import chardet\n",
"import re\n",
"\n",
"def regex_find(field, s, default):\n",
" m = re.search('\"\"' + field + '\"\";s:\\d+:\"\"(.*?)\"\";', s)\n",
" if m:\n",
" return m[1]\n",
" m = re.search('\"' + field + '\";s:\\d+:\"(.*?)\";', s)\n",
" if m:\n",
" return m[1]\n",
" return default\n",
"\n",
"def charset(bb):\n",
" d = chardet.detect(bb)\n",
" if d['confidence'] > 0.7:\n",
" return d['encoding']\n",
" return None\n",
"\n",
"def unser(field, s):\n",
" bb = s.encode()\n",
" ch = charset(bb)\n",
" if ch is None:\n",
" return regex_find(field.decode('utf-8'), s, 'error: no charset')\n",
" try:\n",
" u = unserialize(bb)\n",
" if field in u:\n",
" return u[field].decode(ch)\n",
" return None\n",
" except ValueError:\n",
" return regex_find(field.decode('utf-8'), s[0:300], 'error: unser')\n",
"\n",
"df['d_mod'] = df.img_metadata.apply(lambda x: unser(b'DateTime', x))\n",
"df['d_cre'] = df.img_metadata.apply(lambda x: unser(b'DateTimeOriginal', x))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "31221918",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>log_id</th>\n",
" <th>log_timestamp</th>\n",
" <th>log_actor</th>\n",
" <th>log_namespace</th>\n",
" <th>log_page</th>\n",
" <th>page_title</th>\n",
" <th>img_metadata</th>\n",
" <th>d_mod</th>\n",
" <th>d_cre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>426</th>\n",
" <td>272604965</td>\n",
" <td>20180702184734</td>\n",
" <td>21472.0</td>\n",
" <td>6</td>\n",
" <td>70482003</td>\n",
" <td>Sien-weg.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":4000,\"ImageLength\":3000,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>469</th>\n",
" <td>272642307</td>\n",
" <td>20180703122333</td>\n",
" <td>7240467.0</td>\n",
" <td>6</td>\n",
" <td>70518397</td>\n",
" <td>Sierra_Norte_de_Sevilla.jpg</td>\n",
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2356</th>\n",
" <td>273156179</td>\n",
" <td>20180720154501</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71007032</td>\n",
" <td>Grenchenberg_-_Hooibeestje_(Coenonympha_pamphi...</td>\n",
" <td>{\"data\":{\"ImageWidth\":2348,\"ImageLength\":3229,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2361</th>\n",
" <td>273156275</td>\n",
" <td>20180720154744</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71007120</td>\n",
" <td>Grenchenberg_-_Zodeklokje_(Campanula_cochleari...</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2368</th>\n",
" <td>273156408</td>\n",
" <td>20180720155424</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71007268</td>\n",
" <td>Grenchenberg_-_Knautia_(flower).jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2369</th>\n",
" <td>273156412</td>\n",
" <td>20180720155447</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71007273</td>\n",
" <td>Grenchenberg_-_Knautia.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2379</th>\n",
" <td>273157697</td>\n",
" <td>20180720164939</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71008474</td>\n",
" <td>Grenchenberg_-_Groene_Bergsprinkhaan_(Miramell...</td>\n",
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2414</th>\n",
" <td>273163592</td>\n",
" <td>20180720201447</td>\n",
" <td>9853.0</td>\n",
" <td>6</td>\n",
" <td>71014066</td>\n",
" <td>Raymond_as_Umaru_Doma_at_Comic-Con_Internation...</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2543</th>\n",
" <td>273179983</td>\n",
" <td>20180721141441</td>\n",
" <td>91101.0</td>\n",
" <td>6</td>\n",
" <td>71028016</td>\n",
" <td>Sangagiri_new_bus_stand_1.jpg</td>\n",
" <td>{\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2561</th>\n",
" <td>273184202</td>\n",
" <td>20180721171318</td>\n",
" <td>9853.0</td>\n",
" <td>6</td>\n",
" <td>71031982</td>\n",
" <td>April_O'Neil_cosplayer_at_Comic-Con_Internatio...</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2579</th>\n",
" <td>273184643</td>\n",
" <td>20180721173207</td>\n",
" <td>9853.0</td>\n",
" <td>6</td>\n",
" <td>71032436</td>\n",
" <td>Sinon_cosplayer_at_Comic-Con_International_201...</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2635</th>\n",
" <td>273187234</td>\n",
" <td>20180721192622</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71035051</td>\n",
" <td>Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis).jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2636</th>\n",
" <td>273187285</td>\n",
" <td>20180721192710</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71035102</td>\n",
" <td>Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis)_...</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2639</th>\n",
" <td>273187355</td>\n",
" <td>20180721193022</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71035181</td>\n",
" <td>Grenchen_-_Bosrank_(Clematis_vitalba).jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2754</th>\n",
" <td>273201527</td>\n",
" <td>20180722141339</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71050341</td>\n",
" <td>Grenchen_-_Robertskruid_(Geranium_robertianum)...</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2782</th>\n",
" <td>273203256</td>\n",
" <td>20180722155257</td>\n",
" <td>91101.0</td>\n",
" <td>6</td>\n",
" <td>71052217</td>\n",
" <td>Sangagiri_hill_seen_from_national_highway.jpg</td>\n",
" <td>{\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2835</th>\n",
" <td>273211644</td>\n",
" <td>20180722222420</td>\n",
" <td>9853.0</td>\n",
" <td>6</td>\n",
" <td>71060261</td>\n",
" <td>Cosplayer_of_Kanna_Kamui_standing_at_Comic-Con...</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2866</th>\n",
" <td>273221479</td>\n",
" <td>20180723125154</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71070339</td>\n",
" <td>Grenchen_-_Verbascum_phlomoides.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2868</th>\n",
" <td>273221494</td>\n",
" <td>20180723125331</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71070358</td>\n",
" <td>Grenchen_-_Gele_Kamille_(Cota_tinctoria).jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2887</th>\n",
" <td>273224378</td>\n",
" <td>20180723153327</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71073048</td>\n",
" <td>Grenchen_-_2_x_Bruingemarmerde_Schildwants_(Ha...</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2889</th>\n",
" <td>273224394</td>\n",
" <td>20180723153447</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71073066</td>\n",
" <td>Grenchen_-_Europese_Honingbij_(Apis_mellifera)...</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2896</th>\n",
" <td>273227143</td>\n",
" <td>20180723181237</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71075743</td>\n",
" <td>Grenchen_-_Gewone_Hooiwagen_(Phalangium_opilio...</td>\n",
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2937</th>\n",
" <td>273230521</td>\n",
" <td>20180723213707</td>\n",
" <td>98647.0</td>\n",
" <td>6</td>\n",
" <td>71079316</td>\n",
" <td>Alkmaar_Ansichten_-_Gewelfde_Stenenbrug.jpg</td>\n",
" <td>{\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2944</th>\n",
" <td>273230748</td>\n",
" <td>20180723220621</td>\n",
" <td>98647.0</td>\n",
" <td>6</td>\n",
" <td>71079632</td>\n",
" <td>Alkmaar_Ansichten_-_Appelsteegbrug.jpg</td>\n",
" <td>{\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3043</th>\n",
" <td>273242880</td>\n",
" <td>20180724153446</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71090845</td>\n",
" <td>Walperswil_-_Tower.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3177</th>\n",
" <td>273258739</td>\n",
" <td>20180725135753</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71107445</td>\n",
" <td>Grenchen_-_Huisvlieg_(Musca_domestica).jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3264</th>\n",
" <td>273280450</td>\n",
" <td>20180726173539</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71129960</td>\n",
" <td>Biel_-_Kruisspin_(Araneus_diadematus).jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3357</th>\n",
" <td>273292615</td>\n",
" <td>20180727125521</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71141931</td>\n",
" <td>Grenchen_-_Cerceris.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3646</th>\n",
" <td>273316860</td>\n",
" <td>20180728170250</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71167089</td>\n",
" <td>Grenchen_-_Hibiscus.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3649</th>\n",
" <td>273316897</td>\n",
" <td>20180728170512</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71167126</td>\n",
" <td>Grenchen_-_Holcostethus_sphacelatus.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3761</th>\n",
" <td>273333194</td>\n",
" <td>20180729151800</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71183367</td>\n",
" <td>Grenchen_-_Bruingemarmerde_Schildwants_(Halyom...</td>\n",
" <td>{\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}}</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3875</th>\n",
" <td>273344497</td>\n",
" <td>20180730063908</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71194072</td>\n",
" <td>Grenchen_-_Grote_Langlijf_(Sphaerophoria_scrip...</td>\n",
" <td>{\"data\":{\"ImageWidth\":3024,\"Model\":\"Nexus 5X\",...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3883</th>\n",
" <td>273347896</td>\n",
" <td>20180730100427</td>\n",
" <td>2444.0</td>\n",
" <td>6</td>\n",
" <td>71197784</td>\n",
" <td>Grenchen_-_Drietandvlakjesmot_(Catoptria_false...</td>\n",
" <td>{\"data\":{\"ImageWidth\":1803,\"ImageLength\":2496,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5421</th>\n",
" <td>273703699</td>\n",
" <td>20180812134147</td>\n",
" <td>7268929.0</td>\n",
" <td>6</td>\n",
" <td>71552514</td>\n",
" <td>Une_vieille_charrette_abandonnée.jpg</td>\n",
" <td>{\"data\":{\"ImageWidth\":5312,\"ImageLength\":2988,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5603</th>\n",
" <td>273768940</td>\n",
" <td>20180814182000</td>\n",
" <td>22097.0</td>\n",
" <td>6</td>\n",
" <td>71616122</td>\n",
" <td>Runsten_Frustuna_35-1.jpg</td>\n",
" <td>a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5604</th>\n",
" <td>273769099</td>\n",
" <td>20180814182434</td>\n",
" <td>22097.0</td>\n",
" <td>6</td>\n",
" <td>71616285</td>\n",
" <td>Runsten_Frustuna_35-1_Information.jpg</td>\n",
" <td>a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5613</th>\n",
" <td>273782307</td>\n",
" <td>20180815035327</td>\n",
" <td>115107.0</td>\n",
" <td>6</td>\n",
" <td>71629055</td>\n",
" <td>Usable_water_tank.jpg</td>\n",
" <td>{\"data\":{\"Make\":\"OPPO\",\"Model\":\"1206\",\"XResolu...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8028</th>\n",
" <td>274735256</td>\n",
" <td>20180908150048</td>\n",
" <td>20694.0</td>\n",
" <td>6</td>\n",
" <td>72557335</td>\n",
" <td>Simit_seller,_Istanbul.jpg</td>\n",
" <td>{\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 550...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8709</th>\n",
" <td>274989950</td>\n",
" <td>20180916181154</td>\n",
" <td>38096.0</td>\n",
" <td>6</td>\n",
" <td>72790476</td>\n",
" <td>The_Tank_exterior,_September_2018.jpg</td>\n",
" <td>a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8964</th>\n",
" <td>275064700</td>\n",
" <td>20180919102121</td>\n",
" <td>7377458.0</td>\n",
" <td>6</td>\n",
" <td>72864560</td>\n",
" <td>Water_fountain_in_the_Public_Park_of_Yangon_2.jpg</td>\n",
" <td>{\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 700...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9147</th>\n",
" <td>275093545</td>\n",
" <td>20180920132547</td>\n",
" <td>7381757.0</td>\n",
" <td>6</td>\n",
" <td>72890791</td>\n",
" <td>Malva_(209113498).jpg</td>\n",
" <td>{\"data\":{\"DateTime\":\"2018:09:19 21:56:24\",\"Mod...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9275</th>\n",
" <td>275111560</td>\n",
" <td>20180921041135</td>\n",
" <td>9853.0</td>\n",
" <td>6</td>\n",
" <td>72907467</td>\n",
" <td>University_Avenue_Hillcrest_neighborhood_sign_...</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9276</th>\n",
" <td>275111561</td>\n",
" <td>20180921041148</td>\n",
" <td>9853.0</td>\n",
" <td>6</td>\n",
" <td>72907469</td>\n",
" <td>University_Avenue_Hillcrest_neighborhood_sign_...</td>\n",
" <td>{\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9349</th>\n",
" <td>275135369</td>\n",
" <td>20180921234950</td>\n",
" <td>38096.0</td>\n",
" <td>6</td>\n",
" <td>72930726</td>\n",
" <td>Sheepshead_Bay_Library,_September_2018.jpg</td>\n",
" <td>a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9779</th>\n",
" <td>275298512</td>\n",
" <td>20180926154025</td>\n",
" <td>7080362.0</td>\n",
" <td>6</td>\n",
" <td>73089560</td>\n",
" <td>Halifax-Dartmouth_Ferry.jpg</td>\n",
" <td>a:4:{s:8:\"Software\";s:6:\"Google\";s:11:\"ExifVer...</td>\n",
" <td>error: unser</td>\n",
" <td>error: unser</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" log_id log_timestamp log_actor log_namespace log_page \\\n",
"426 272604965 20180702184734 21472.0 6 70482003 \n",
"469 272642307 20180703122333 7240467.0 6 70518397 \n",
"2356 273156179 20180720154501 2444.0 6 71007032 \n",
"2361 273156275 20180720154744 2444.0 6 71007120 \n",
"2368 273156408 20180720155424 2444.0 6 71007268 \n",
"2369 273156412 20180720155447 2444.0 6 71007273 \n",
"2379 273157697 20180720164939 2444.0 6 71008474 \n",
"2414 273163592 20180720201447 9853.0 6 71014066 \n",
"2543 273179983 20180721141441 91101.0 6 71028016 \n",
"2561 273184202 20180721171318 9853.0 6 71031982 \n",
"2579 273184643 20180721173207 9853.0 6 71032436 \n",
"2635 273187234 20180721192622 2444.0 6 71035051 \n",
"2636 273187285 20180721192710 2444.0 6 71035102 \n",
"2639 273187355 20180721193022 2444.0 6 71035181 \n",
"2754 273201527 20180722141339 2444.0 6 71050341 \n",
"2782 273203256 20180722155257 91101.0 6 71052217 \n",
"2835 273211644 20180722222420 9853.0 6 71060261 \n",
"2866 273221479 20180723125154 2444.0 6 71070339 \n",
"2868 273221494 20180723125331 2444.0 6 71070358 \n",
"2887 273224378 20180723153327 2444.0 6 71073048 \n",
"2889 273224394 20180723153447 2444.0 6 71073066 \n",
"2896 273227143 20180723181237 2444.0 6 71075743 \n",
"2937 273230521 20180723213707 98647.0 6 71079316 \n",
"2944 273230748 20180723220621 98647.0 6 71079632 \n",
"3043 273242880 20180724153446 2444.0 6 71090845 \n",
"3177 273258739 20180725135753 2444.0 6 71107445 \n",
"3264 273280450 20180726173539 2444.0 6 71129960 \n",
"3357 273292615 20180727125521 2444.0 6 71141931 \n",
"3646 273316860 20180728170250 2444.0 6 71167089 \n",
"3649 273316897 20180728170512 2444.0 6 71167126 \n",
"3761 273333194 20180729151800 2444.0 6 71183367 \n",
"3875 273344497 20180730063908 2444.0 6 71194072 \n",
"3883 273347896 20180730100427 2444.0 6 71197784 \n",
"5421 273703699 20180812134147 7268929.0 6 71552514 \n",
"5603 273768940 20180814182000 22097.0 6 71616122 \n",
"5604 273769099 20180814182434 22097.0 6 71616285 \n",
"5613 273782307 20180815035327 115107.0 6 71629055 \n",
"8028 274735256 20180908150048 20694.0 6 72557335 \n",
"8709 274989950 20180916181154 38096.0 6 72790476 \n",
"8964 275064700 20180919102121 7377458.0 6 72864560 \n",
"9147 275093545 20180920132547 7381757.0 6 72890791 \n",
"9275 275111560 20180921041135 9853.0 6 72907467 \n",
"9276 275111561 20180921041148 9853.0 6 72907469 \n",
"9349 275135369 20180921234950 38096.0 6 72930726 \n",
"9779 275298512 20180926154025 7080362.0 6 73089560 \n",
"\n",
" page_title \\\n",
"426 Sien-weg.jpg \n",
"469 Sierra_Norte_de_Sevilla.jpg \n",
"2356 Grenchenberg_-_Hooibeestje_(Coenonympha_pamphi... \n",
"2361 Grenchenberg_-_Zodeklokje_(Campanula_cochleari... \n",
"2368 Grenchenberg_-_Knautia_(flower).jpg \n",
"2369 Grenchenberg_-_Knautia.jpg \n",
"2379 Grenchenberg_-_Groene_Bergsprinkhaan_(Miramell... \n",
"2414 Raymond_as_Umaru_Doma_at_Comic-Con_Internation... \n",
"2543 Sangagiri_new_bus_stand_1.jpg \n",
"2561 April_O'Neil_cosplayer_at_Comic-Con_Internatio... \n",
"2579 Sinon_cosplayer_at_Comic-Con_International_201... \n",
"2635 Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis).jpg \n",
"2636 Grenchen_-_Gewone_Tuinslak_(Cepaea_nemoralis)_... \n",
"2639 Grenchen_-_Bosrank_(Clematis_vitalba).jpg \n",
"2754 Grenchen_-_Robertskruid_(Geranium_robertianum)... \n",
"2782 Sangagiri_hill_seen_from_national_highway.jpg \n",
"2835 Cosplayer_of_Kanna_Kamui_standing_at_Comic-Con... \n",
"2866 Grenchen_-_Verbascum_phlomoides.jpg \n",
"2868 Grenchen_-_Gele_Kamille_(Cota_tinctoria).jpg \n",
"2887 Grenchen_-_2_x_Bruingemarmerde_Schildwants_(Ha... \n",
"2889 Grenchen_-_Europese_Honingbij_(Apis_mellifera)... \n",
"2896 Grenchen_-_Gewone_Hooiwagen_(Phalangium_opilio... \n",
"2937 Alkmaar_Ansichten_-_Gewelfde_Stenenbrug.jpg \n",
"2944 Alkmaar_Ansichten_-_Appelsteegbrug.jpg \n",
"3043 Walperswil_-_Tower.jpg \n",
"3177 Grenchen_-_Huisvlieg_(Musca_domestica).jpg \n",
"3264 Biel_-_Kruisspin_(Araneus_diadematus).jpg \n",
"3357 Grenchen_-_Cerceris.jpg \n",
"3646 Grenchen_-_Hibiscus.jpg \n",
"3649 Grenchen_-_Holcostethus_sphacelatus.jpg \n",
"3761 Grenchen_-_Bruingemarmerde_Schildwants_(Halyom... \n",
"3875 Grenchen_-_Grote_Langlijf_(Sphaerophoria_scrip... \n",
"3883 Grenchen_-_Drietandvlakjesmot_(Catoptria_false... \n",
"5421 Une_vieille_charrette_abandonnée.jpg \n",
"5603 Runsten_Frustuna_35-1.jpg \n",
"5604 Runsten_Frustuna_35-1_Information.jpg \n",
"5613 Usable_water_tank.jpg \n",
"8028 Simit_seller,_Istanbul.jpg \n",
"8709 The_Tank_exterior,_September_2018.jpg \n",
"8964 Water_fountain_in_the_Public_Park_of_Yangon_2.jpg \n",
"9147 Malva_(209113498).jpg \n",
"9275 University_Avenue_Hillcrest_neighborhood_sign_... \n",
"9276 University_Avenue_Hillcrest_neighborhood_sign_... \n",
"9349 Sheepshead_Bay_Library,_September_2018.jpg \n",
"9779 Halifax-Dartmouth_Ferry.jpg \n",
"\n",
" img_metadata d_mod \\\n",
"426 {\"data\":{\"ImageWidth\":4000,\"ImageLength\":3000,... error: unser \n",
"469 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n",
"2356 {\"data\":{\"ImageWidth\":2348,\"ImageLength\":3229,... error: unser \n",
"2361 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n",
"2368 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2369 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2379 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n",
"2414 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n",
"2543 {\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30... error: unser \n",
"2561 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n",
"2579 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n",
"2635 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2636 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2639 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2754 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2782 {\"data\":{\"Make\":\"OnePlus\",\"Model\":\"ONEPLUS A30... error: unser \n",
"2835 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":2268,... error: unser \n",
"2866 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n",
"2868 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n",
"2887 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2889 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"2896 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n",
"2937 {\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)... error: unser \n",
"2944 {\"data\":{\"Make\":\"Motorola\",\"Model\":\"Moto G (4)... error: unser \n",
"3043 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n",
"3177 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"3264 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"3357 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"3646 {\"data\":{\"ImageWidth\":3024,\"ImageLength\":4032,... error: unser \n",
"3649 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n",
"3761 {\"data\":{\"MEDIAWIKI_EXIF_VERSION\":2}} error: unser \n",
"3875 {\"data\":{\"ImageWidth\":3024,\"Model\":\"Nexus 5X\",... error: unser \n",
"3883 {\"data\":{\"ImageWidth\":1803,\"ImageLength\":2496,... error: unser \n",
"5421 {\"data\":{\"ImageWidth\":5312,\"ImageLength\":2988,... error: unser \n",
"5603 a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi... error: unser \n",
"5604 a:45:{s:5:\"Model\";s:8:\"SM-G800F\";s:10:\"ImageWi... error: unser \n",
"5613 {\"data\":{\"Make\":\"OPPO\",\"Model\":\"1206\",\"XResolu... error: unser \n",
"8028 {\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 550... error: unser \n",
"8709 a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng... error: unser \n",
"8964 {\"data\":{\"Make\":\"Canon\",\"Model\":\"Canon EOS 700... error: unser \n",
"9147 {\"data\":{\"DateTime\":\"2018:09:19 21:56:24\",\"Mod... error: unser \n",
"9275 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n",
"9276 {\"data\":{\"ImageWidth\":4032,\"ImageLength\":3024,... error: unser \n",
"9349 a:42:{s:10:\"ImageWidth\";i:4032;s:11:\"ImageLeng... error: unser \n",
"9779 a:4:{s:8:\"Software\";s:6:\"Google\";s:11:\"ExifVer... error: unser \n",
"\n",
" d_cre \n",
"426 error: unser \n",
"469 error: unser \n",
"2356 error: unser \n",
"2361 error: unser \n",
"2368 error: unser \n",
"2369 error: unser \n",
"2379 error: unser \n",
"2414 error: unser \n",
"2543 error: unser \n",
"2561 error: unser \n",
"2579 error: unser \n",
"2635 error: unser \n",
"2636 error: unser \n",
"2639 error: unser \n",
"2754 error: unser \n",
"2782 error: unser \n",
"2835 error: unser \n",
"2866 error: unser \n",
"2868 error: unser \n",
"2887 error: unser \n",
"2889 error: unser \n",
"2896 error: unser \n",
"2937 error: unser \n",
"2944 error: unser \n",
"3043 error: unser \n",
"3177 error: unser \n",
"3264 error: unser \n",
"3357 error: unser \n",
"3646 error: unser \n",
"3649 error: unser \n",
"3761 error: unser \n",
"3875 error: unser \n",
"3883 error: unser \n",
"5421 error: unser \n",
"5603 error: unser \n",
"5604 error: unser \n",
"5613 error: unser \n",
"8028 error: unser \n",
"8709 error: unser \n",
"8964 error: unser \n",
"9147 error: unser \n",
"9275 error: unser \n",
"9276 error: unser \n",
"9349 error: unser \n",
"9779 error: unser "
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check for parse errors\n",
"df[df.d_mod.str.contains(\"error:\") == True]"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "6545539c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>log_id</th>\n",
" <th>log_timestamp</th>\n",
" <th>log_actor</th>\n",
" <th>log_namespace</th>\n",
" <th>log_page</th>\n",
" <th>page_title</th>\n",
" <th>img_metadata</th>\n",
" <th>d_mod</th>\n",
" <th>d_cre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>272446438</td>\n",
" <td>20180628193446</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" <td>2018:06:28 15:55:08</td>\n",
" <td>2018:02:26 08:31:44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>272446448</td>\n",
" <td>20180628193511</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" <td>2018:06:28 15:55:08</td>\n",
" <td>2018:02:26 08:31:44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>272446451</td>\n",
" <td>20180628193527</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" <td>2018:06:28 15:55:08</td>\n",
" <td>2018:02:26 08:31:44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>272446458</td>\n",
" <td>20180628193546</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" <td>2018:06:28 15:55:08</td>\n",
" <td>2018:02:26 08:31:44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>272446465</td>\n",
" <td>20180628193601</td>\n",
" <td>7120933.0</td>\n",
" <td>6</td>\n",
" <td>70328570</td>\n",
" <td>Red_rumped_swallow_(Scientific_name-_Cecropis_...</td>\n",
" <td>a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:...</td>\n",
" <td>2018:06:28 15:55:08</td>\n",
" <td>2018:02:26 08:31:44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9995</th>\n",
" <td>275395285</td>\n",
" <td>20180928230512</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177585</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" <td>2018:09:28 15:37:55</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9996</th>\n",
" <td>275395291</td>\n",
" <td>20180928230534</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177591</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" <td>2018:09:28 15:38:32</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9997</th>\n",
" <td>275395293</td>\n",
" <td>20180928230554</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177593</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" <td>2018:09:28 15:37:13</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9998</th>\n",
" <td>275395295</td>\n",
" <td>20180928230614</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177596</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" <td>2018:09:28 15:36:41</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9999</th>\n",
" <td>275395298</td>\n",
" <td>20180928230635</td>\n",
" <td>13772.0</td>\n",
" <td>6</td>\n",
" <td>73177599</td>\n",
" <td>Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_...</td>\n",
" <td>a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng...</td>\n",
" <td>2018:09:28 15:36:20</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2179 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" log_id log_timestamp log_actor log_namespace log_page \\\n",
"0 272446438 20180628193446 7120933.0 6 70328570 \n",
"1 272446448 20180628193511 7120933.0 6 70328570 \n",
"2 272446451 20180628193527 7120933.0 6 70328570 \n",
"3 272446458 20180628193546 7120933.0 6 70328570 \n",
"4 272446465 20180628193601 7120933.0 6 70328570 \n",
"... ... ... ... ... ... \n",
"9995 275395285 20180928230512 13772.0 6 73177585 \n",
"9996 275395291 20180928230534 13772.0 6 73177591 \n",
"9997 275395293 20180928230554 13772.0 6 73177593 \n",
"9998 275395295 20180928230614 13772.0 6 73177596 \n",
"9999 275395298 20180928230635 13772.0 6 73177599 \n",
"\n",
" page_title \\\n",
"0 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"1 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"2 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"3 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"4 Red_rumped_swallow_(Scientific_name-_Cecropis_... \n",
"... ... \n",
"9995 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9996 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9997 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9998 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"9999 Praha,_Betlémská_kaple,_stavby_z_dob_Karla_IV_... \n",
"\n",
" img_metadata d_mod \\\n",
"0 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n",
"1 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n",
"2 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n",
"3 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n",
"4 a:40:{s:4:\"Make\";s:5:\"NIKON\";s:5:\"Model\";s:12:... 2018:06:28 15:55:08 \n",
"... ... ... \n",
"9995 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:37:55 \n",
"9996 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:38:32 \n",
"9997 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:37:13 \n",
"9998 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:36:41 \n",
"9999 a:45:{s:10:\"ImageWidth\";i:4128;s:11:\"ImageLeng... 2018:09:28 15:36:20 \n",
"\n",
" d_cre \n",
"0 2018:02:26 08:31:44 \n",
"1 2018:02:26 08:31:44 \n",
"2 2018:02:26 08:31:44 \n",
"3 2018:02:26 08:31:44 \n",
"4 2018:02:26 08:31:44 \n",
"... ... \n",
"9995 None \n",
"9996 None \n",
"9997 None \n",
"9998 None \n",
"9999 None \n",
"\n",
"[2179 rows x 9 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# list candidates file pages to be fixed\n",
"df.loc[df.d_mod.str.contains(\"error:\") == True, 'd_mod'] = None\n",
"df.loc[df.d_cre.str.contains(\"error:\") == True, 'd_cre'] = None\n",
"df_to_fix = df[(df.d_mod != df.d_cre) & (df.d_mod.notnull())]\n",
"df_to_fix"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "ee09a13a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>log_id</th>\n",
" <th>log_timestamp</th>\n",
" <th>log_actor</th>\n",
" <th>log_namespace</th>\n",
" <th>log_page</th>\n",
" <th>page_title</th>\n",
" <th>img_metadata</th>\n",
" <th>d_mod</th>\n",
" <th>d_cre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5238</th>\n",
" <td>273667246</td>\n",
" <td>20180811022148</td>\n",
" <td>23537.0</td>\n",
" <td>6</td>\n",
" <td>71517161</td>\n",
" <td>Penguin_incubation,_penguins_beach_Cape_Town_.jpg</td>\n",
" <td>a:36:{s:10:\"ImageWidth\";i:2576;s:11:\"ImageLeng...</td>\n",
" <td>2018:07:18 16:16:37</td>\n",
" <td>2018:07:18 16:16:37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6859</th>\n",
" <td>274203234</td>\n",
" <td>20180826233019</td>\n",
" <td>7268929.0</td>\n",
" <td>6</td>\n",
" <td>72048177</td>\n",
" <td>3_donkeys_saying_hello.jpg</td>\n",
" <td>a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;}</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8788</th>\n",
" <td>275015090</td>\n",
" <td>20180917141853</td>\n",
" <td>957.0</td>\n",
" <td>6</td>\n",
" <td>72815806</td>\n",
" <td>Menhir_Hackpfüffel_-_3.jpg</td>\n",
" <td>a:42:{s:4:\"Make\";s:7:\"samsung\";s:5:\"Model\";s:9...</td>\n",
" <td>2018:09:16 16:57:27</td>\n",
" <td>2018:09:16 16:57:27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5162</th>\n",
" <td>273650797</td>\n",
" <td>20180810120809</td>\n",
" <td>7268129.0</td>\n",
" <td>6</td>\n",
" <td>71500539</td>\n",
" <td>Tomba_Amerigo_2.jpg</td>\n",
" <td>a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;}</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7571</th>\n",
" <td>274520386</td>\n",
" <td>20180903234215</td>\n",
" <td>7360763.0</td>\n",
" <td>6</td>\n",
" <td>72332397</td>\n",
" <td>In_west_Mogok.jpg</td>\n",
" <td>a:45:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:...</td>\n",
" <td>2018:03:29 10:13:50</td>\n",
" <td>2018:03:29 10:13:50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5627</th>\n",
" <td>273787443</td>\n",
" <td>20180815085448</td>\n",
" <td>4887272.0</td>\n",
" <td>6</td>\n",
" <td>71633870</td>\n",
" <td>Melmuri_water_flood_2018.jpg</td>\n",
" <td>a:32:{s:4:\"Make\";s:4:\"LAVA\";s:5:\"Model\";s:2:\"R...</td>\n",
" <td>2018:08:15 14:25:42</td>\n",
" <td>2018:08:15 14:25:42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4387</th>\n",
" <td>273477636</td>\n",
" <td>20180804075231</td>\n",
" <td>8589.0</td>\n",
" <td>6</td>\n",
" <td>71329345</td>\n",
" <td>Wittenberger_straße_dresden_2018-08-04_-_7.jpg</td>\n",
" <td>a:55:{s:10:\"ImageWidth\";i:4160;s:11:\"ImageLeng...</td>\n",
" <td>2018:08:04 08:40:23</td>\n",
" <td>2018:08:04 08:40:23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4855</th>\n",
" <td>273594028</td>\n",
" <td>20180808062154</td>\n",
" <td>31637.0</td>\n",
" <td>6</td>\n",
" <td>71445302</td>\n",
" <td>Curious_building_near_有栖川公園.jpg</td>\n",
" <td>a:46:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:...</td>\n",
" <td>2018:07:29 15:21:19</td>\n",
" <td>2018:07:29 15:21:19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5486</th>\n",
" <td>273718990</td>\n",
" <td>20180813033611</td>\n",
" <td>5869.0</td>\n",
" <td>6</td>\n",
" <td>71567068</td>\n",
" <td>Maa_Vindhyavasini_temple,_Vindhyachal.jpg</td>\n",
" <td>a:37:{s:4:\"Make\";s:9:\"PANASONIC\";s:5:\"Model\";s...</td>\n",
" <td>2018:06:22 08:53:37</td>\n",
" <td>2018:06:22 08:53:37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3257</th>\n",
" <td>273279853</td>\n",
" <td>20180726171238</td>\n",
" <td>1366.0</td>\n",
" <td>6</td>\n",
" <td>71129400</td>\n",
" <td>Visitor_centre_for_Ferniehirst_Castle.jpg</td>\n",
" <td>a:32:{s:4:\"Make\";s:5:\"CUBOT\";s:5:\"Model\";s:9:\"...</td>\n",
" <td>2018:07:26 14:38:33</td>\n",
" <td>2018:07:26 14:38:33</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7693 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" log_id log_timestamp log_actor log_namespace log_page \\\n",
"5238 273667246 20180811022148 23537.0 6 71517161 \n",
"6859 274203234 20180826233019 7268929.0 6 72048177 \n",
"8788 275015090 20180917141853 957.0 6 72815806 \n",
"5162 273650797 20180810120809 7268129.0 6 71500539 \n",
"7571 274520386 20180903234215 7360763.0 6 72332397 \n",
"... ... ... ... ... ... \n",
"5627 273787443 20180815085448 4887272.0 6 71633870 \n",
"4387 273477636 20180804075231 8589.0 6 71329345 \n",
"4855 273594028 20180808062154 31637.0 6 71445302 \n",
"5486 273718990 20180813033611 5869.0 6 71567068 \n",
"3257 273279853 20180726171238 1366.0 6 71129400 \n",
"\n",
" page_title \\\n",
"5238 Penguin_incubation,_penguins_beach_Cape_Town_.jpg \n",
"6859 3_donkeys_saying_hello.jpg \n",
"8788 Menhir_Hackpfüffel_-_3.jpg \n",
"5162 Tomba_Amerigo_2.jpg \n",
"7571 In_west_Mogok.jpg \n",
"... ... \n",
"5627 Melmuri_water_flood_2018.jpg \n",
"4387 Wittenberger_straße_dresden_2018-08-04_-_7.jpg \n",
"4855 Curious_building_near_有栖川公園.jpg \n",
"5486 Maa_Vindhyavasini_temple,_Vindhyachal.jpg \n",
"3257 Visitor_centre_for_Ferniehirst_Castle.jpg \n",
"\n",
" img_metadata d_mod \\\n",
"5238 a:36:{s:10:\"ImageWidth\";i:2576;s:11:\"ImageLeng... 2018:07:18 16:16:37 \n",
"6859 a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;} None \n",
"8788 a:42:{s:4:\"Make\";s:7:\"samsung\";s:5:\"Model\";s:9... 2018:09:16 16:57:27 \n",
"5162 a:1:{s:22:\"MEDIAWIKI_EXIF_VERSION\";i:2;} None \n",
"7571 a:45:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:... 2018:03:29 10:13:50 \n",
"... ... ... \n",
"5627 a:32:{s:4:\"Make\";s:4:\"LAVA\";s:5:\"Model\";s:2:\"R... 2018:08:15 14:25:42 \n",
"4387 a:55:{s:10:\"ImageWidth\";i:4160;s:11:\"ImageLeng... 2018:08:04 08:40:23 \n",
"4855 a:46:{s:4:\"Make\";s:17:\"NIKON CORPORATION\";s:5:... 2018:07:29 15:21:19 \n",
"5486 a:37:{s:4:\"Make\";s:9:\"PANASONIC\";s:5:\"Model\";s... 2018:06:22 08:53:37 \n",
"3257 a:32:{s:4:\"Make\";s:5:\"CUBOT\";s:5:\"Model\";s:9:\"... 2018:07:26 14:38:33 \n",
"\n",
" d_cre \n",
"5238 2018:07:18 16:16:37 \n",
"6859 None \n",
"8788 2018:09:16 16:57:27 \n",
"5162 None \n",
"7571 2018:03:29 10:13:50 \n",
"... ... \n",
"5627 2018:08:15 14:25:42 \n",
"4387 2018:08:04 08:40:23 \n",
"4855 2018:07:29 15:21:19 \n",
"5486 2018:06:22 08:53:37 \n",
"3257 2018:07:26 14:38:33 \n",
"\n",
"[7693 rows x 9 columns]"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# list candidates file pages NOT to be fixed\n",
"df_not_to_fix = df[(df.d_mod == df.d_cre) | (df.d_mod.isnull() & df.d_cre.isnull())]\n",
"df_not_to_fix.sample(frac=1)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "d8764e5a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>log_id</th>\n",
" <th>log_timestamp</th>\n",
" <th>log_actor</th>\n",
" <th>log_namespace</th>\n",
" <th>log_page</th>\n",
" <th>page_title</th>\n",
" <th>img_metadata</th>\n",
" <th>d_mod</th>\n",
" <th>d_cre</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8442</th>\n",
" <td>274882044</td>\n",
" <td>20180912185800</td>\n",
" <td>7404411.0</td>\n",
" <td>6</td>\n",
" <td>72684628</td>\n",
" <td>The_alone_flower.jpg</td>\n",
" <td>a:26:{s:5:\"Model\";s:4:\"A33f\";s:16:\"YCbCrPositi...</td>\n",
" <td>None</td>\n",
" <td>2017:01:24 06:36:13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8840</th>\n",
" <td>275034405</td>\n",
" <td>20180918063725</td>\n",
" <td>7415301.0</td>\n",
" <td>6</td>\n",
" <td>72834653</td>\n",
" <td>Cannon_4.jpg</td>\n",
" <td>a:24:{s:4:\"Make\";s:4:\"vivo\";s:5:\"Model\";s:7:\"v...</td>\n",
" <td>None</td>\n",
" <td>2017:12:18 16:05:58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2927</th>\n",
" <td>273230220</td>\n",
" <td>20180723210526</td>\n",
" <td>7322156.0</td>\n",
" <td>6</td>\n",
" <td>71078928</td>\n",
" <td>Bexbacher_Hindenburgturm.jpg</td>\n",
" <td>a:28:{s:4:\"Make\";s:5:\"Nokia\";s:5:\"Model\";s:9:\"...</td>\n",
" <td>None</td>\n",
" <td>2017:06:08 16:23:16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3972</th>\n",
" <td>273367517</td>\n",
" <td>20180731083019</td>\n",
" <td>121788.0</td>\n",
" <td>6</td>\n",
" <td>71218647</td>\n",
" <td>Clouds_6.jpg</td>\n",
" <td>a:10:{s:4:\"Make\";s:6:\"Xiaomi\";s:5:\"Model\";s:5:...</td>\n",
" <td>None</td>\n",
" <td>2018:07:06 10:26:05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9658</th>\n",
" <td>275271055</td>\n",
" <td>20180925214941</td>\n",
" <td>246422.0</td>\n",
" <td>6</td>\n",
" <td>73062910</td>\n",
" <td>Plaça_de_la_Palmera_de_Sant_Martí_17.jpg</td>\n",
" <td>a:13:{s:4:\"Make\";s:8:\"motorola\";s:5:\"Model\";s:...</td>\n",
" <td>None</td>\n",
" <td>2018:09:25 15:33:31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2898</th>\n",
" <td>273227276</td>\n",
" <td>20180723182249</td>\n",
" <td>157819.0</td>\n",
" <td>6</td>\n",
" <td>71075869</td>\n",
" <td>Banja_lake,_Gramsh,_Albania_-_6.jpg</td>\n",
" <td>a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"...</td>\n",
" <td>None</td>\n",
" <td>2018:07:08 10:27:15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9825</th>\n",
" <td>275332475</td>\n",
" <td>20180927070920</td>\n",
" <td>46042.0</td>\n",
" <td>6</td>\n",
" <td>73117677</td>\n",
" <td>Passiflora_foetida,_Love-in-a-mist,_Stinking_p...</td>\n",
" <td>a:13:{s:4:\"Make\";s:4:\"SONY\";s:5:\"Model\";s:10:\"...</td>\n",
" <td>None</td>\n",
" <td>2018:09:25 08:27:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7167</th>\n",
" <td>274314346</td>\n",
" <td>20180830121916</td>\n",
" <td>20979.0</td>\n",
" <td>6</td>\n",
" <td>72159293</td>\n",
" <td>地铁吉祥村站.jpg</td>\n",
" <td>a:23:{s:4:\"Make\";s:3:\"ZTE\";s:5:\"Model\";s:4:\"A8...</td>\n",
" <td>None</td>\n",
" <td>2018:08:06 19:03:54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2905</th>\n",
" <td>273227327</td>\n",
" <td>20180723182746</td>\n",
" <td>157819.0</td>\n",
" <td>6</td>\n",
" <td>71075924</td>\n",
" <td>Banja_lake,_Gramsh,_Albania_-_3.jpeg</td>\n",
" <td>a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"...</td>\n",
" <td>None</td>\n",
" <td>2018:07:08 10:26:50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1134</th>\n",
" <td>272818528</td>\n",
" <td>20180709123345</td>\n",
" <td>7298796.0</td>\n",
" <td>6</td>\n",
" <td>70688877</td>\n",
" <td>Friendship_Holding_hands..jpg</td>\n",
" <td>a:38:{s:4:\"Make\";s:8:\"Motorola\";s:5:\"Model\";s:...</td>\n",
" <td>None</td>\n",
" <td>2018:07:07 11:06:44</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>128 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" log_id log_timestamp log_actor log_namespace log_page \\\n",
"8442 274882044 20180912185800 7404411.0 6 72684628 \n",
"8840 275034405 20180918063725 7415301.0 6 72834653 \n",
"2927 273230220 20180723210526 7322156.0 6 71078928 \n",
"3972 273367517 20180731083019 121788.0 6 71218647 \n",
"9658 275271055 20180925214941 246422.0 6 73062910 \n",
"... ... ... ... ... ... \n",
"2898 273227276 20180723182249 157819.0 6 71075869 \n",
"9825 275332475 20180927070920 46042.0 6 73117677 \n",
"7167 274314346 20180830121916 20979.0 6 72159293 \n",
"2905 273227327 20180723182746 157819.0 6 71075924 \n",
"1134 272818528 20180709123345 7298796.0 6 70688877 \n",
"\n",
" page_title \\\n",
"8442 The_alone_flower.jpg \n",
"8840 Cannon_4.jpg \n",
"2927 Bexbacher_Hindenburgturm.jpg \n",
"3972 Clouds_6.jpg \n",
"9658 Plaça_de_la_Palmera_de_Sant_Martí_17.jpg \n",
"... ... \n",
"2898 Banja_lake,_Gramsh,_Albania_-_6.jpg \n",
"9825 Passiflora_foetida,_Love-in-a-mist,_Stinking_p... \n",
"7167 地铁吉祥村站.jpg \n",
"2905 Banja_lake,_Gramsh,_Albania_-_3.jpeg \n",
"1134 Friendship_Holding_hands..jpg \n",
"\n",
" img_metadata d_mod \\\n",
"8442 a:26:{s:5:\"Model\";s:4:\"A33f\";s:16:\"YCbCrPositi... None \n",
"8840 a:24:{s:4:\"Make\";s:4:\"vivo\";s:5:\"Model\";s:7:\"v... None \n",
"2927 a:28:{s:4:\"Make\";s:5:\"Nokia\";s:5:\"Model\";s:9:\"... None \n",
"3972 a:10:{s:4:\"Make\";s:6:\"Xiaomi\";s:5:\"Model\";s:5:... None \n",
"9658 a:13:{s:4:\"Make\";s:8:\"motorola\";s:5:\"Model\";s:... None \n",
"... ... ... \n",
"2898 a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"... None \n",
"9825 a:13:{s:4:\"Make\";s:4:\"SONY\";s:5:\"Model\";s:10:\"... None \n",
"7167 a:23:{s:4:\"Make\";s:3:\"ZTE\";s:5:\"Model\";s:4:\"A8... None \n",
"2905 a:26:{s:11:\"Orientation\";i:1;s:5:\"Model\";s:9:\"... None \n",
"1134 a:38:{s:4:\"Make\";s:8:\"Motorola\";s:5:\"Model\";s:... None \n",
"\n",
" d_cre \n",
"8442 2017:01:24 06:36:13 \n",
"8840 2017:12:18 16:05:58 \n",
"2927 2017:06:08 16:23:16 \n",
"3972 2018:07:06 10:26:05 \n",
"9658 2018:09:25 15:33:31 \n",
"... ... \n",
"2898 2018:07:08 10:27:15 \n",
"9825 2018:09:25 08:27:00 \n",
"7167 2018:08:06 19:03:54 \n",
"2905 2018:07:08 10:26:50 \n",
"1134 2018:07:07 11:06:44 \n",
"\n",
"[128 rows x 9 columns]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# list file pages for manual inspection\n",
"df.drop(df_to_fix.index.union(df_not_to_fix.index), axis=0).sample(frac=1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment