Last active
September 26, 2024 07:31
-
-
Save HenryYang/c7e45a013cc1639fea06aed82945c6f7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, timedelta | |
# 定義輸入與輸出檔案路徑 | |
input_file_path = './data.txt' # 已經清理的檔案路徑 | |
output_file_path = './data_final_sorted_utc8.txt' # 最終輸出檔案路徑 | |
# 讀取檔案 | |
with open(input_file_path, 'r') as file: | |
lines = file.readlines() | |
# 新的處理邏輯:當遇到 Ownership.NOT_MINE 開頭時,跳過後續的行,直到遇到空白行 | |
filtered_lines = [] | |
skip_mode = False # 記錄是否處於跳過模式 | |
for line in lines: | |
if skip_mode: | |
if line.strip() == "": # 如果遇到空白行,停止跳過 | |
skip_mode = False | |
continue | |
if line.startswith('Ownership.NOT_MINE'): # 開始跳過模式 | |
skip_mode = True | |
continue | |
filtered_lines.append(line) # 只在非跳過模式下記錄行 | |
# 過濾掉以 "hotels:"、"rental cars:"、"expenses:" 或 "Ownership.MINE" 開頭的行 | |
final_cleaned_lines = [line for line in filtered_lines if not ( | |
line.startswith('hotels:') or | |
line.startswith('rental cars:') or | |
line.startswith('expenses:') or | |
line.startswith('flights:') or | |
line.startswith('Ownership.MINE') | |
)] | |
# 擷取每行第 8 個和第 9 個分號之後的字串,並轉換第 13 個分號後的時間為 UTC+8 | |
extracted_parts = [] | |
for line in final_cleaned_lines: | |
parts = line.split(';') | |
if len(parts) > 13: # 確保行有足夠的分號 | |
# 第 13 個分號後的時間部分 | |
time_str = parts[12] | |
# 轉換時間為 UTC+8 | |
try: | |
utc_time = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S") | |
utc8_time = utc_time + timedelta(hours=8) # 增加8小時 | |
# 格式化日期部分 | |
formatted_date = utc8_time.strftime("%Y-%m-%d") | |
except ValueError: | |
continue # 如果日期格式不正確,跳過這行 | |
# 連接第8和第9部分,加上轉換後的日期 | |
extracted = parts[7] + parts[8] + " " + formatted_date | |
extracted_parts.append(extracted) | |
# 將擷取的資料轉換為 (資料, 日期) 的形式,並根據日期排序 | |
data_with_dates = [(line.split()[0], line.split()[1]) for line in extracted_parts] | |
sorted_data_with_dates = sorted(data_with_dates, key=lambda x: x[1]) | |
# 格式化排序後的資料 | |
sorted_output = "\n".join([f"{data[0]} {data[1]}" for data in sorted_data_with_dates]) | |
# 將結果寫入新檔案 | |
with open(output_file_path, 'w') as sorted_file: | |
sorted_file.write(sorted_output) | |
print(f"已清理、轉換並排序的檔案儲存至: {output_file_path}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
使用 chatgpt 產生的,經測試可以把 App in the Air 匯出的檔案變成可一眼看出來的版本