book000 · November 18, 2024 20:27
diff --git a/get_instance_videos_from_vrcx.py b/get_instance_videos_from_vrcx.py
 import json
 import sqlite3
 import pandas as pd
 import os

 appdata_path = os.getenv('APPDATA')
 dbpath = os.path.join(appdata_path, 'VRCX', 'VRCX.sqlite3')
 print("DB Path: " + dbpath)
 conn = sqlite3.connect(dbpath)

 # 動画再生ログ
 df_videos = pd.read_sql_query('SELECT * FROM gamelog_video_play', conn)
 df_videos = df_videos.rename(columns={
  'id': 'video_id',
  'created_at': 'video_created_at',
 })

 print("Video Log Count: " + str(len(df_videos)))

 # インスタンスログ
 df_locations = pd.read_sql_query('SELECT * FROM gamelog_location', conn)
 df_locations = df_locations.rename(columns={
  'id': 'location_id',
  'created_at': 'location_created_at',
 })
 df_locations["location_finish_at"] = df_locations["location_created_at"].shift(-1)
 print("Location Log Count: " + str(len(df_locations)))

 # location をキーにして結合
 df = pd.merge(df_videos, df_locations, on='location')

 # 動画の再生時間を計算するために、次の動画の再生時間を取得
 df['video_finished_at'] = df['video_created_at'].shift(-1)

 # 動画の再生時間を計算
 df['video_created_at_df'] = pd.to_datetime(df['video_created_at'])
 df['video_finished_at_df'] = pd.to_datetime(df['video_finished_at'])
 df['duration_seconds'] = (df['video_finished_at_df'] - df['video_created_at_df']).dt.total_seconds()
 df['duration_seconds'] = df['duration_seconds'].fillna(0).astype(int)
 df = df.drop(columns=['video_created_at_df', 'video_finished_at_df'])

 # インスタンスごとにまとめる
 instances = []
 df_grouped = df.groupby('location')
 for name, group in df_grouped:
  world_name = group['world_name'].values[0]
  world_id = group['world_id'].values[0]
  location = group['location'].values[0]
  location_created_at_min = group['location_created_at'].min()
  location_created_at_max = group['location_finish_at'].max()

  video_urls = group['video_url'].values
  video_urls = list(set(video_urls))

  videos = []
  for video_url in video_urls:
    video_names = df_videos[(df_videos['video_url'] == video_url) & (df_videos['video_name'] != '')]['video_name'].values
    video_name = video_names[0] if len(video_names) > 0 else ''

    # 再生ごとの情報を取得
    df_plays = df_grouped.get_group(location).groupby('video_url').get_group(video_url)
    df_plays = df_plays.sort_values('video_created_at', ascending=False)

    videos.append({
      'video_url': video_url, # 動画URL
      'video_name': video_name, # 動画名
      'video_count': len(df_videos[df_videos['video_url'] == video_url]), # 再生回数
      "first_play": df_plays['video_created_at'].min(), # インスタンス内での最初の再生日時
      "last_play": df_plays['video_created_at'].max(), # インスタンス内での最後の再生日時
      'plays': df_plays[['video_created_at', 'video_finished_at', 'duration_seconds']].to_dict(orient='records'), # 各再生の日時と再生時間
    })

  # 最初の再生日時でソート
  videos = sorted(videos, key=lambda x: x['first_play'])

  # インスタンスの滞在時間を取得 (秒, int)
  duration_seconds = (pd.to_datetime(location_created_at_max) - pd.to_datetime(location_created_at_min)).total_seconds()
  duration_seconds = int(duration_seconds)

  instances.append({
    'world_name': world_name, # ワールド名
    'world_id': world_id, # ワールドID
    'location': location, # インスタンスID
    'location_join_at': location_created_at_min, # インスタンスの参加時間
    'location_left_at': location_created_at_max, # インスタンスの退室時間
    'duration_seconds': duration_seconds, # インスタンスの滞在時間
    'videos': videos, # 再生された動画一覧
  })

 # インスタンスの参加時間で降順にソート
 instances = sorted(instances, key=lambda x: x['location_join_at'], reverse=True)

 with open('output.json', 'w', encoding='utf-8') as f:
  json.dump(instances, f, indent=2, ensure_ascii=False)

 conn.close()
	import json
	import sqlite3
	import pandas as pd
	import os

	appdata_path = os.getenv('APPDATA')
	dbpath = os.path.join(appdata_path, 'VRCX', 'VRCX.sqlite3')
	print("DB Path: " + dbpath)
	conn = sqlite3.connect(dbpath)

	# 動画再生ログ
	df_videos = pd.read_sql_query('SELECT * FROM gamelog_video_play', conn)
	df_videos = df_videos.rename(columns={
	'id': 'video_id',
	'created_at': 'video_created_at',
	})

	print("Video Log Count: " + str(len(df_videos)))

	# インスタンスログ
	df_locations = pd.read_sql_query('SELECT * FROM gamelog_location', conn)
	df_locations = df_locations.rename(columns={
	'id': 'location_id',
	'created_at': 'location_created_at',
	})
	df_locations["location_finish_at"] = df_locations["location_created_at"].shift(-1)
	print("Location Log Count: " + str(len(df_locations)))

	# location をキーにして結合
	df = pd.merge(df_videos, df_locations, on='location')

	# 動画の再生時間を計算するために、次の動画の再生時間を取得
	df['video_finished_at'] = df['video_created_at'].shift(-1)

	# 動画の再生時間を計算
	df['video_created_at_df'] = pd.to_datetime(df['video_created_at'])
	df['video_finished_at_df'] = pd.to_datetime(df['video_finished_at'])
	df['duration_seconds'] = (df['video_finished_at_df'] - df['video_created_at_df']).dt.total_seconds()
	df['duration_seconds'] = df['duration_seconds'].fillna(0).astype(int)
	df = df.drop(columns=['video_created_at_df', 'video_finished_at_df'])

	# インスタンスごとにまとめる
	instances = []
	df_grouped = df.groupby('location')
	for name, group in df_grouped:
	world_name = group['world_name'].values[0]
	world_id = group['world_id'].values[0]
	location = group['location'].values[0]
	location_created_at_min = group['location_created_at'].min()
	location_created_at_max = group['location_finish_at'].max()

	video_urls = group['video_url'].values
	video_urls = list(set(video_urls))

	videos = []
	for video_url in video_urls:
	video_names = df_videos[(df_videos['video_url'] == video_url) & (df_videos['video_name'] != '')]['video_name'].values
	video_name = video_names[0] if len(video_names) > 0 else ''

	# 再生ごとの情報を取得
	df_plays = df_grouped.get_group(location).groupby('video_url').get_group(video_url)
	df_plays = df_plays.sort_values('video_created_at', ascending=False)

	videos.append({
	'video_url': video_url, # 動画URL
	'video_name': video_name, # 動画名
	'video_count': len(df_videos[df_videos['video_url'] == video_url]), # 再生回数
	"first_play": df_plays['video_created_at'].min(), # インスタンス内での最初の再生日時
	"last_play": df_plays['video_created_at'].max(), # インスタンス内での最後の再生日時
	'plays': df_plays[['video_created_at', 'video_finished_at', 'duration_seconds']].to_dict(orient='records'), # 各再生の日時と再生時間
	})

	# 最初の再生日時でソート
	videos = sorted(videos, key=lambda x: x['first_play'])

	# インスタンスの滞在時間を取得 (秒, int)
	duration_seconds = (pd.to_datetime(location_created_at_max) - pd.to_datetime(location_created_at_min)).total_seconds()
	duration_seconds = int(duration_seconds)

	instances.append({
	'world_name': world_name, # ワールド名
	'world_id': world_id, # ワールドID
	'location': location, # インスタンスID
	'location_join_at': location_created_at_min, # インスタンスの参加時間
	'location_left_at': location_created_at_max, # インスタンスの退室時間
	'duration_seconds': duration_seconds, # インスタンスの滞在時間
	'videos': videos, # 再生された動画一覧
	})

	# インスタンスの参加時間で降順にソート
	instances = sorted(instances, key=lambda x: x['location_join_at'], reverse=True)

	with open('output.json', 'w', encoding='utf-8') as f:
	json.dump(instances, f, indent=2, ensure_ascii=False)

	conn.close()
No results found