Last active
January 2, 2024 18:52
-
-
Save hlb/084b6c491ae2ea7c6606e041e015e4d9 to your computer and use it in GitHub Desktop.
文章匯入 Notion,以及彙整 Notion AI 摘要的程式
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests # 引入 requests 模組用於發送 HTTP 請求 | |
import json # 引入 json 模組用於處理 JSON 資料 | |
import sys # 引入 sys 模組用於處理系統相關的參數和功能 | |
import configparser # 引入 configparser 模組用於讀取配置文件 | |
# 檢查是否有提供必要的命令行參數 | |
if len(sys.argv) < 2: | |
print("Usage: python import_notion.py path/to/json") | |
sys.exit(1) | |
# 從命令行讀取 JSON 檔案的路徑 | |
json_file_path = sys.argv[1] | |
# 從配置文件中加載 NOTION_TOKEN 和 DATABASE_ID | |
config = configparser.ConfigParser() | |
config.read("config.ini") # 讀取配置文件 | |
try: | |
NOTION_TOKEN = config["NOTION"]["NOTION_TOKEN"] | |
DATABASE_ID = config["NOTION"]["DATABASE_ID"] | |
SERIES_DATABASE_ID = config["NOTION"]["SERIES_DATABASE_ID"] | |
except KeyError: | |
print("Error: Missing NOTION_TOKEN, DATABASE_ID or SERIES_DATABASE_ID in the config file.") | |
sys.exit(1) | |
NOTION_ENDPOINT = f"https://api.notion.com/v1/pages" # Notion API 的頁面端點 | |
SEARCH_ENDPOINT = f"https://api.notion.com/v1/databases/{SERIES_DATABASE_ID}/query" # Notion API 的查詢端點 | |
# 為 Notion API 請求設定 headers | |
headers = { | |
"Authorization": f"Bearer {NOTION_TOKEN}", | |
"Content-Type": "application/json", | |
"Notion-Version": "2022-06-28" | |
} | |
def find_page_by_url(url): | |
# 用給定的 URL 搜索主數據庫 | |
search_payload = { | |
"filter": { | |
"property": "URL", | |
"url": { | |
"equals": url | |
} | |
} | |
} | |
response = requests.post(f"https://api.notion.com/v1/databases/{DATABASE_ID}/query", headers=headers, json=search_payload) | |
results = response.json().get('results') | |
if results and len(results) > 0: | |
return results[0]['id'] | |
return None | |
def get_or_create_series_page_id(series_name): | |
# 搜索系列數據庫以查找 series_name | |
search_payload = { | |
"filter": { | |
"property": "Series Name", | |
"rich_text": { | |
"equals": series_name | |
} | |
} | |
} | |
response = requests.post(SEARCH_ENDPOINT, headers=headers, json=search_payload) | |
results = response.json().get('results') | |
if results and len(results) > 0: | |
return results[0]['id'] | |
# 如果 series_name 不存在,則創建它 | |
series_payload = { | |
"parent": {"database_id": SERIES_DATABASE_ID}, | |
"properties": { | |
"Series Name": {"title": [{"text": {"content": series_name}}]} | |
} | |
} | |
response = requests.post(NOTION_ENDPOINT, headers=headers, json=series_payload) | |
if response.status_code == 200: | |
return response.json().get('id') | |
else: | |
raise Exception("Error creating series page: {}".format(response.text)) | |
# 從 JSON 檔案中加載數據 | |
try: | |
with open(json_file_path, "r") as file: | |
data = json.load(file) | |
except Exception as e: | |
print(f"Error reading the JSON file: {e}") | |
sys.exit(1) # 如果讀取 JSON 檔案失敗,顯示錯誤並退出程序 | |
def split_content_by_newline(content, limit=1950): | |
# 將內容按換行符分割,並確保每個塊的大小不超過限制 | |
lines = content.split("\n") | |
chunks, current_chunk = [], "" | |
for line in lines: | |
if len(current_chunk) + len(line) + 1 > limit: | |
chunks.append(current_chunk.strip()) | |
current_chunk = line + "\n" | |
else: | |
current_chunk += line + "\n" | |
if current_chunk.strip(): | |
chunks.append(current_chunk.strip()) | |
return chunks | |
chunks = split_content_by_newline(data["Article Content"]) | |
children_blocks = [{ | |
"object": "block", | |
"type": "paragraph", | |
"paragraph": { | |
"rich_text": [{"type": "text", "text": {"content": chunk}}] | |
} | |
} for chunk in chunks] | |
series_page_id = get_or_create_series_page_id(data["Series Name"]) | |
body = { | |
"parent": {"database_id": DATABASE_ID}, | |
"properties": { | |
"Article Title": {"title": [{"text": {"content": data["Article Title"]}}]}, | |
"Series Name": {"relation": [{"id": series_page_id}]}, # 設置系列名稱的關聯 | |
"Author Name": {"rich_text": [{"text": {"content": data["Author Name"]}}]}, | |
"Day": {"number": data["Day"]}, | |
"URL": {"url": data["URL"]} | |
}, | |
"children": children_blocks | |
} | |
# 檢查是否已存在相同 URL 的頁面 | |
existing_page_id = find_page_by_url(data["URL"]) | |
# 如果頁面存在,則刪除它 | |
if existing_page_id: | |
delete_url = f"{NOTION_ENDPOINT}/{existing_page_id}" | |
response = requests.delete(delete_url, headers=headers) | |
print(f"Remove Old: {sys.argv[1]}") | |
# 發送請求以創建或更新 Notion 頁面 | |
response = requests.post(NOTION_ENDPOINT, headers=headers, json=body) | |
if response.status_code == 200: # 200 表示更新成功,201 表示創建成功 | |
print(f"Success: {sys.argv[1]}") | |
else: | |
print(f"Error: {sys.argv[1]} - {response.status_code} - {response.text}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests # 匯入 requests 套件,用於發送 HTTP 請求 | |
import sys # 匯入 sys 套件,用於處理系統相關的操作,例如退出程式 | |
import configparser # 匯入 configparser 套件,用於讀取設定檔案 | |
# 從設定檔案讀取配置值(NOTION_TOKEN 和 DATABASE_ID) | |
config = configparser.ConfigParser() | |
config.read("config.ini") | |
# 從配置中提取 NOTION_TOKEN 和 DATABASE_ID 值 | |
try: | |
NOTION_TOKEN = config["NOTION"]["NOTION_TOKEN"] | |
DATABASE_ID = config["NOTION"]["DATABASE_ID"] | |
except KeyError: | |
print("Error: Missing NOTION_TOKEN, DATABASE_ID in the config file.") | |
sys.exit(1) # 如果找不到配置,則顯示錯誤並結束程式 | |
# 為 Notion API 請求設定 headers | |
HEADERS = { | |
"Authorization": f"Bearer {NOTION_TOKEN}", | |
"Content-Type": "application/json", | |
"Notion-Version": "2022-06-28" | |
} | |
def get_related_page_ids(page_id): | |
""" | |
查詢 Notion 頁面的 'Articles' 屬性,並獲取所有相關頁面的 ID | |
""" | |
url = f'https://api.notion.com/v1/pages/{page_id}' | |
response = requests.get(url, headers=HEADERS) # 發送 GET 請求 | |
data = response.json() # 解析回應的 JSON 資料 | |
related_page_ids = [] | |
articles_property = data['properties'].get('Articles', {}).get('relation', []) | |
for article_relation in articles_property: | |
related_page_ids.append(article_relation['id']) # 收集相關頁面的 ID | |
return related_page_ids | |
def get_related_pages(related_page_ids): | |
""" | |
根據提供的頁面 ID 獲取相關頁面的詳情 | |
""" | |
related_pages = [] | |
for page_id in related_page_ids: | |
url = f'https://api.notion.com/v1/pages/{page_id}' | |
response = requests.get(url, headers=HEADERS) | |
data = response.json() | |
title = data['properties']['Article Title']['title'][0]['text']['content'] | |
day = data['properties'].get('Day', {}).get('number', 0) # 獲取 'Day' 屬性,如果不存在則預設為 0 | |
print(day) | |
print(title) | |
try: | |
summary = data['properties']['AI Summary']['rich_text'][0]['plain_text'] | |
except KeyError: | |
print(f"Error processing 'AI summary' for page with title: {title}. Skipping this page.") | |
continue | |
print(summary) | |
related_pages.append((title, summary, day)) # 收集標題、摘要和日期 | |
# 根據 'Day' 屬性對相關頁面進行排序 | |
related_pages = sorted(related_pages, key=lambda x: x[2]) | |
# 返回排序後的 (標題, 摘要) 組合列表 | |
return [(title, summary) for title, summary, day in related_pages] | |
def update_page_content(page_id, related_pages): | |
""" | |
更新 Notion 頁面的內容 | |
""" | |
url = f'https://api.notion.com/v1/blocks/{page_id}/children' | |
children_blocks = [] | |
# 對於每一對標題和摘要,創建兩個段落塊 | |
for title, summary in related_pages: | |
title_block = { | |
"object": "block", | |
"type": "heading_2", | |
"heading_2": { | |
"rich_text": [{"type": "text", "text": {"content": title}}] | |
} | |
} | |
summary_block = { | |
"object": "block", | |
"type": "paragraph", | |
"paragraph": { | |
"rich_text": [{"type": "text", "text": {"content": summary}}] | |
} | |
} | |
children_blocks.extend([title_block, summary_block]) # 將塊加入列表 | |
body = { | |
'children': children_blocks | |
} | |
response = requests.patch(url, headers=HEADERS, json=body) | |
if response.status_code != 200: | |
print(f"Failed to update content. Error: {response.text}") | |
def get_page_title(page_id): | |
""" | |
根據提供的頁面 ID 獲取 Notion 頁面的標題 | |
""" | |
url = f'https://api.notion.com/v1/pages/{page_id}' | |
response = requests.get(url, headers=HEADERS) | |
data = response.json() | |
try: | |
title = data['properties']['Series Name']['title'][0]['text']['content'] | |
return title | |
except KeyError: | |
print(f"Error processing 'title' for page with ID: {page_id}.") | |
return None | |
def main(): | |
# 檢查是否提供了 Notion 頁面 ID 作為參數 | |
if len(sys.argv) < 2: | |
print("Please provide a Notion page ID as an argument.") | |
sys.exit(1) # 如果未提供,顯示錯誤並結束程式 | |
page_id = sys.argv[1] | |
title = get_page_title(page_id) | |
if title: | |
print(f"Title: {title}") | |
related_page_ids = get_related_page_ids(page_id) | |
related_pages = get_related_pages(related_page_ids) | |
update_page_content(page_id, related_pages) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment