Last active
November 18, 2024 20:27
-
-
Save book000/6766197c9ab30f21b32f4fa3ce7fea38 to your computer and use it in GitHub Desktop.
VRCX のデータベースから、インスタンスごとの動画情報をまとめてJSONに書き出す
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import sqlite3 | |
import pandas as pd | |
import os | |
appdata_path = os.getenv('APPDATA') | |
dbpath = os.path.join(appdata_path, 'VRCX', 'VRCX.sqlite3') | |
print("DB Path: " + dbpath) | |
conn = sqlite3.connect(dbpath) | |
# 動画再生ログ | |
df_videos = pd.read_sql_query('SELECT * FROM gamelog_video_play', conn) | |
df_videos = df_videos.rename(columns={ | |
'id': 'video_id', | |
'created_at': 'video_created_at', | |
}) | |
print("Video Log Count: " + str(len(df_videos))) | |
# インスタンスログ | |
df_locations = pd.read_sql_query('SELECT * FROM gamelog_location', conn) | |
df_locations = df_locations.rename(columns={ | |
'id': 'location_id', | |
'created_at': 'location_created_at', | |
}) | |
df_locations["location_finish_at"] = df_locations["location_created_at"].shift(-1) | |
print("Location Log Count: " + str(len(df_locations))) | |
# location をキーにして結合 | |
df = pd.merge(df_videos, df_locations, on='location') | |
# 動画の再生時間を計算するために、次の動画の再生時間を取得 | |
df['video_finished_at'] = df['video_created_at'].shift(-1) | |
# 動画の再生時間を計算 | |
df['video_created_at_df'] = pd.to_datetime(df['video_created_at']) | |
df['video_finished_at_df'] = pd.to_datetime(df['video_finished_at']) | |
df['duration_seconds'] = (df['video_finished_at_df'] - df['video_created_at_df']).dt.total_seconds() | |
df['duration_seconds'] = df['duration_seconds'].fillna(0).astype(int) | |
df = df.drop(columns=['video_created_at_df', 'video_finished_at_df']) | |
# インスタンスごとにまとめる | |
instances = [] | |
df_grouped = df.groupby('location') | |
for name, group in df_grouped: | |
world_name = group['world_name'].values[0] | |
world_id = group['world_id'].values[0] | |
location = group['location'].values[0] | |
location_created_at_min = group['location_created_at'].min() | |
location_created_at_max = group['location_finish_at'].max() | |
video_urls = group['video_url'].values | |
video_urls = list(set(video_urls)) | |
videos = [] | |
for video_url in video_urls: | |
video_names = df_videos[(df_videos['video_url'] == video_url) & (df_videos['video_name'] != '')]['video_name'].values | |
video_name = video_names[0] if len(video_names) > 0 else '' | |
# 再生ごとの情報を取得 | |
df_plays = df_grouped.get_group(location).groupby('video_url').get_group(video_url) | |
df_plays = df_plays.sort_values('video_created_at', ascending=False) | |
videos.append({ | |
'video_url': video_url, # 動画URL | |
'video_name': video_name, # 動画名 | |
'video_count': len(df_videos[df_videos['video_url'] == video_url]), # 再生回数 | |
"first_play": df_plays['video_created_at'].min(), # インスタンス内での最初の再生日時 | |
"last_play": df_plays['video_created_at'].max(), # インスタンス内での最後の再生日時 | |
'plays': df_plays[['video_created_at', 'video_finished_at', 'duration_seconds']].to_dict(orient='records'), # 各再生の日時と再生時間 | |
}) | |
# 最初の再生日時でソート | |
videos = sorted(videos, key=lambda x: x['first_play']) | |
# インスタンスの滞在時間を取得 (秒, int) | |
duration_seconds = (pd.to_datetime(location_created_at_max) - pd.to_datetime(location_created_at_min)).total_seconds() | |
duration_seconds = int(duration_seconds) | |
instances.append({ | |
'world_name': world_name, # ワールド名 | |
'world_id': world_id, # ワールドID | |
'location': location, # インスタンスID | |
'location_join_at': location_created_at_min, # インスタンスの参加時間 | |
'location_left_at': location_created_at_max, # インスタンスの退室時間 | |
'duration_seconds': duration_seconds, # インスタンスの滞在時間 | |
'videos': videos, # 再生された動画一覧 | |
}) | |
# インスタンスの参加時間で降順にソート | |
instances = sorted(instances, key=lambda x: x['location_join_at'], reverse=True) | |
with open('output.json', 'w', encoding='utf-8') as f: | |
json.dump(instances, f, indent=2, ensure_ascii=False) | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment