HenryYang · September 26, 2024 07:31 · HenryYang · Sep 26, 2024
diff --git a/appintheair_convert.py b/appintheair_convert.py
 from datetime import datetime, timedelta

 # 定義輸入與輸出檔案路徑
 input_file_path = './data.txt'  # 已經清理的檔案路徑
 output_file_path = './data_final_sorted_utc8.txt'  # 最終輸出檔案路徑

 # 讀取檔案
 with open(input_file_path, 'r') as file:
    lines = file.readlines()

 # 新的處理邏輯：當遇到 Ownership.NOT_MINE 開頭時，跳過後續的行，直到遇到空白行
 filtered_lines = []
 skip_mode = False  # 記錄是否處於跳過模式

 for line in lines:
    if skip_mode:
        if line.strip() == "":  # 如果遇到空白行，停止跳過
            skip_mode = False
        continue
    if line.startswith('Ownership.NOT_MINE'):  # 開始跳過模式
        skip_mode = True
        continue
    filtered_lines.append(line)  # 只在非跳過模式下記錄行

 # 過濾掉以 "hotels:"、"rental cars:"、"expenses:" 或 "Ownership.MINE" 開頭的行
 final_cleaned_lines = [line for line in filtered_lines if not (
    line.startswith('hotels:') or 
    line.startswith('rental cars:') or 
    line.startswith('expenses:') or 
    line.startswith('flights:') or                                                 
    line.startswith('Ownership.MINE')
 )]

 # 擷取每行第 8 個和第 9 個分號之後的字串，並轉換第 13 個分號後的時間為 UTC+8
 extracted_parts = []
 for line in final_cleaned_lines:
    parts = line.split(';')
    if len(parts) > 13:  # 確保行有足夠的分號
        # 第 13 個分號後的時間部分
        time_str = parts[12]
        # 轉換時間為 UTC+8
        try:
            utc_time = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S")
            utc8_time = utc_time + timedelta(hours=8)  # 增加8小時
            # 格式化日期部分
            formatted_date = utc8_time.strftime("%Y-%m-%d")
        except ValueError:
            continue  # 如果日期格式不正確，跳過這行
        
        # 連接第8和第9部分，加上轉換後的日期
        extracted = parts[7] + parts[8] + " " + formatted_date
        extracted_parts.append(extracted)

 # 將擷取的資料轉換為 (資料, 日期) 的形式，並根據日期排序
 data_with_dates = [(line.split()[0], line.split()[1]) for line in extracted_parts]
 sorted_data_with_dates = sorted(data_with_dates, key=lambda x: x[1])

 # 格式化排序後的資料
 sorted_output = "\n".join([f"{data[0]} {data[1]}" for data in sorted_data_with_dates])

 # 將結果寫入新檔案
 with open(output_file_path, 'w') as sorted_file:
    sorted_file.write(sorted_output)

 print(f"已清理、轉換並排序的檔案儲存至: {output_file_path}")
	from datetime import datetime, timedelta

	# 定義輸入與輸出檔案路徑
	input_file_path = './data.txt' # 已經清理的檔案路徑
	output_file_path = './data_final_sorted_utc8.txt' # 最終輸出檔案路徑

	# 讀取檔案
	with open(input_file_path, 'r') as file:
	lines = file.readlines()

	# 新的處理邏輯：當遇到 Ownership.NOT_MINE 開頭時，跳過後續的行，直到遇到空白行
	filtered_lines = []
	skip_mode = False # 記錄是否處於跳過模式

	for line in lines:
	if skip_mode:
	if line.strip() == "": # 如果遇到空白行，停止跳過
	skip_mode = False
	continue
	if line.startswith('Ownership.NOT_MINE'): # 開始跳過模式
	skip_mode = True
	continue
	filtered_lines.append(line) # 只在非跳過模式下記錄行

	# 過濾掉以 "hotels:"、"rental cars:"、"expenses:" 或 "Ownership.MINE" 開頭的行
	final_cleaned_lines = [line for line in filtered_lines if not (
	line.startswith('hotels:') or
	line.startswith('rental cars:') or
	line.startswith('expenses:') or
	line.startswith('flights:') or
	line.startswith('Ownership.MINE')
	)]

	# 擷取每行第 8 個和第 9 個分號之後的字串，並轉換第 13 個分號後的時間為 UTC+8
	extracted_parts = []
	for line in final_cleaned_lines:
	parts = line.split(';')
	if len(parts) > 13: # 確保行有足夠的分號
	# 第 13 個分號後的時間部分
	time_str = parts[12]
	# 轉換時間為 UTC+8
	try:
	utc_time = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S")
	utc8_time = utc_time + timedelta(hours=8) # 增加8小時
	# 格式化日期部分
	formatted_date = utc8_time.strftime("%Y-%m-%d")
	except ValueError:
	continue # 如果日期格式不正確，跳過這行

	# 連接第8和第9部分，加上轉換後的日期
	extracted = parts[7] + parts[8] + " " + formatted_date
	extracted_parts.append(extracted)

	# 將擷取的資料轉換為 (資料, 日期) 的形式，並根據日期排序
	data_with_dates = [(line.split()[0], line.split()[1]) for line in extracted_parts]
	sorted_data_with_dates = sorted(data_with_dates, key=lambda x: x[1])

	# 格式化排序後的資料
	sorted_output = "\n".join([f"{data[0]} {data[1]}" for data in sorted_data_with_dates])

	# 將結果寫入新檔案
	with open(output_file_path, 'w') as sorted_file:
	sorted_file.write(sorted_output)

	print(f"已清理、轉換並排序的檔案儲存至: {output_file_path}")
No results found