Skip to content

Instantly share code, notes, and snippets.

@book000
Last active November 18, 2024 20:27
Show Gist options
  • Save book000/6766197c9ab30f21b32f4fa3ce7fea38 to your computer and use it in GitHub Desktop.
Save book000/6766197c9ab30f21b32f4fa3ce7fea38 to your computer and use it in GitHub Desktop.
VRCX のデータベースから、インスタンスごとの動画情報をまとめてJSONに書き出す
import json
import sqlite3
import pandas as pd
import os
appdata_path = os.getenv('APPDATA')
dbpath = os.path.join(appdata_path, 'VRCX', 'VRCX.sqlite3')
print("DB Path: " + dbpath)
conn = sqlite3.connect(dbpath)
# 動画再生ログ
df_videos = pd.read_sql_query('SELECT * FROM gamelog_video_play', conn)
df_videos = df_videos.rename(columns={
'id': 'video_id',
'created_at': 'video_created_at',
})
print("Video Log Count: " + str(len(df_videos)))
# インスタンスログ
df_locations = pd.read_sql_query('SELECT * FROM gamelog_location', conn)
df_locations = df_locations.rename(columns={
'id': 'location_id',
'created_at': 'location_created_at',
})
df_locations["location_finish_at"] = df_locations["location_created_at"].shift(-1)
print("Location Log Count: " + str(len(df_locations)))
# location をキーにして結合
df = pd.merge(df_videos, df_locations, on='location')
# 動画の再生時間を計算するために、次の動画の再生時間を取得
df['video_finished_at'] = df['video_created_at'].shift(-1)
# 動画の再生時間を計算
df['video_created_at_df'] = pd.to_datetime(df['video_created_at'])
df['video_finished_at_df'] = pd.to_datetime(df['video_finished_at'])
df['duration_seconds'] = (df['video_finished_at_df'] - df['video_created_at_df']).dt.total_seconds()
df['duration_seconds'] = df['duration_seconds'].fillna(0).astype(int)
df = df.drop(columns=['video_created_at_df', 'video_finished_at_df'])
# インスタンスごとにまとめる
instances = []
df_grouped = df.groupby('location')
for name, group in df_grouped:
world_name = group['world_name'].values[0]
world_id = group['world_id'].values[0]
location = group['location'].values[0]
location_created_at_min = group['location_created_at'].min()
location_created_at_max = group['location_finish_at'].max()
video_urls = group['video_url'].values
video_urls = list(set(video_urls))
videos = []
for video_url in video_urls:
video_names = df_videos[(df_videos['video_url'] == video_url) & (df_videos['video_name'] != '')]['video_name'].values
video_name = video_names[0] if len(video_names) > 0 else ''
# 再生ごとの情報を取得
df_plays = df_grouped.get_group(location).groupby('video_url').get_group(video_url)
df_plays = df_plays.sort_values('video_created_at', ascending=False)
videos.append({
'video_url': video_url, # 動画URL
'video_name': video_name, # 動画名
'video_count': len(df_videos[df_videos['video_url'] == video_url]), # 再生回数
"first_play": df_plays['video_created_at'].min(), # インスタンス内での最初の再生日時
"last_play": df_plays['video_created_at'].max(), # インスタンス内での最後の再生日時
'plays': df_plays[['video_created_at', 'video_finished_at', 'duration_seconds']].to_dict(orient='records'), # 各再生の日時と再生時間
})
# 最初の再生日時でソート
videos = sorted(videos, key=lambda x: x['first_play'])
# インスタンスの滞在時間を取得 (秒, int)
duration_seconds = (pd.to_datetime(location_created_at_max) - pd.to_datetime(location_created_at_min)).total_seconds()
duration_seconds = int(duration_seconds)
instances.append({
'world_name': world_name, # ワールド名
'world_id': world_id, # ワールドID
'location': location, # インスタンスID
'location_join_at': location_created_at_min, # インスタンスの参加時間
'location_left_at': location_created_at_max, # インスタンスの退室時間
'duration_seconds': duration_seconds, # インスタンスの滞在時間
'videos': videos, # 再生された動画一覧
})
# インスタンスの参加時間で降順にソート
instances = sorted(instances, key=lambda x: x['location_join_at'], reverse=True)
with open('output.json', 'w', encoding='utf-8') as f:
json.dump(instances, f, indent=2, ensure_ascii=False)
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment