Skip to content

Instantly share code, notes, and snippets.

@HenryYang
Last active September 26, 2024 07:31
Show Gist options
  • Save HenryYang/c7e45a013cc1639fea06aed82945c6f7 to your computer and use it in GitHub Desktop.
Save HenryYang/c7e45a013cc1639fea06aed82945c6f7 to your computer and use it in GitHub Desktop.
from datetime import datetime, timedelta
# 定義輸入與輸出檔案路徑
input_file_path = './data.txt' # 已經清理的檔案路徑
output_file_path = './data_final_sorted_utc8.txt' # 最終輸出檔案路徑
# 讀取檔案
with open(input_file_path, 'r') as file:
lines = file.readlines()
# 新的處理邏輯:當遇到 Ownership.NOT_MINE 開頭時,跳過後續的行,直到遇到空白行
filtered_lines = []
skip_mode = False # 記錄是否處於跳過模式
for line in lines:
if skip_mode:
if line.strip() == "": # 如果遇到空白行,停止跳過
skip_mode = False
continue
if line.startswith('Ownership.NOT_MINE'): # 開始跳過模式
skip_mode = True
continue
filtered_lines.append(line) # 只在非跳過模式下記錄行
# 過濾掉以 "hotels:"、"rental cars:"、"expenses:" 或 "Ownership.MINE" 開頭的行
final_cleaned_lines = [line for line in filtered_lines if not (
line.startswith('hotels:') or
line.startswith('rental cars:') or
line.startswith('expenses:') or
line.startswith('flights:') or
line.startswith('Ownership.MINE')
)]
# 擷取每行第 8 個和第 9 個分號之後的字串,並轉換第 13 個分號後的時間為 UTC+8
extracted_parts = []
for line in final_cleaned_lines:
parts = line.split(';')
if len(parts) > 13: # 確保行有足夠的分號
# 第 13 個分號後的時間部分
time_str = parts[12]
# 轉換時間為 UTC+8
try:
utc_time = datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%S")
utc8_time = utc_time + timedelta(hours=8) # 增加8小時
# 格式化日期部分
formatted_date = utc8_time.strftime("%Y-%m-%d")
except ValueError:
continue # 如果日期格式不正確,跳過這行
# 連接第8和第9部分,加上轉換後的日期
extracted = parts[7] + parts[8] + " " + formatted_date
extracted_parts.append(extracted)
# 將擷取的資料轉換為 (資料, 日期) 的形式,並根據日期排序
data_with_dates = [(line.split()[0], line.split()[1]) for line in extracted_parts]
sorted_data_with_dates = sorted(data_with_dates, key=lambda x: x[1])
# 格式化排序後的資料
sorted_output = "\n".join([f"{data[0]} {data[1]}" for data in sorted_data_with_dates])
# 將結果寫入新檔案
with open(output_file_path, 'w') as sorted_file:
sorted_file.write(sorted_output)
print(f"已清理、轉換並排序的檔案儲存至: {output_file_path}")
@HenryYang
Copy link
Author

使用 chatgpt 產生的,經測試可以把 App in the Air 匯出的檔案變成可一眼看出來的版本

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment