KuRRe8 · June 22, 2025 09:09 · KuRRe8 · May 8, 2025
diff --git a/_intro.md b/_intro.md
diff --git a/app_deploy_tutorial.ipynb b/app_deploy_tutorial.ipynb
diff --git a/asyncio_tutorial.ipynb b/asyncio_tutorial.ipynb
diff --git a/boosting_libs_tutorial.ipynb b/boosting_libs_tutorial.ipynb
diff --git a/c_extensions_tutorial.ipynb b/c_extensions_tutorial.ipynb
diff --git a/concurrency_tutorial.ipynb b/concurrency_tutorial.ipynb
diff --git a/context_managers_tutorial.ipynb b/context_managers_tutorial.ipynb
diff --git a/descriptor.md b/descriptor.md
diff --git a/design_patterns_tutorial.ipynb b/design_patterns_tutorial.ipynb
diff --git a/dunder_methods_tutorial.ipynb b/dunder_methods_tutorial.ipynb
diff --git a/experiment_tracking_tutorial.ipynb b/experiment_tracking_tutorial.ipynb
diff --git a/faiss_tutorial.ipynb b/faiss_tutorial.ipynb
diff --git a/generics_tutorial.ipynb b/generics_tutorial.ipynb
diff --git a/gymnasium_tutorial.ipynb b/gymnasium_tutorial.ipynb
diff --git a/hpo_tutorial.ipynb b/hpo_tutorial.ipynb
diff --git a/iterators_generators_tutorial.ipynb b/iterators_generators_tutorial.ipynb
diff --git a/langchain_tutorial.ipynb b/langchain_tutorial.ipynb
diff --git a/llamaindex_tutorial.ipynb b/llamaindex_tutorial.ipynb
diff --git a/matplotlib_tutorial.ipynb b/matplotlib_tutorial.ipynb
diff --git a/metaprogramming_tutorial.ipynb b/metaprogramming_tutorial.ipynb
diff --git a/numpy_tutorial.ipynb b/numpy_tutorial.ipynb
diff --git a/opencv_tutorial.ipynb b/opencv_tutorial.ipynb
diff --git a/pandas_tutorial.ipynb b/pandas_tutorial.ipynb
diff --git a/pattern_matching_tutorial.ipynb b/pattern_matching_tutorial.ipynb
diff --git a/python_internals_performance_tutorial.ipynb b/python_internals_performance_tutorial.ipynb
diff --git a/pytorch_tutorial.ipynb b/pytorch_tutorial.ipynb
diff --git a/scikit_learn_tutorial.ipynb b/scikit_learn_tutorial.ipynb
diff --git a/seaborn_tutorial.ipynb b/seaborn_tutorial.ipynb
diff --git a/shap_tutorial.ipynb b/shap_tutorial.ipynb
diff --git a/sigh.md b/sigh.md
diff --git a/stable_baselines3_tutorial.ipynb b/stable_baselines3_tutorial.ipynb
diff --git a/standard_library_tutorial.ipynb b/standard_library_tutorial.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Python 标准库常用模块基础教程\n",
    "\n",
    "Python 的标准库非常强大，提供了大量预置模块，用于处理各种常见任务，无需额外安装。本教程将介绍一些最常用标准库模块的基础用法，并提供简单的代码示例。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. 内置函数与类型 (Built-in Functions and Types)\n",
    "\n",
    "Python 自带许多可以直接使用的函数和类型，无需导入。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 类型转换\n",
    "print(f\"int('10'): {int('10')}\")\n",
    "print(f\"float('3.14'): {float('3.14')}\")\n",
    "print(f\"str(123): {str(123)}\")\n",
    "print(f\"list((1, 2, 3)): {list((1, 2, 3))}\")\n",
    "print(f\"tuple([1, 2, 3]): {tuple([1, 2, 3])}\")\n",
    "print(f\"dict(a=1, b=2): {dict(a=1, b=2)}\")\n",
    "print(f\"set([1, 2, 2, 3]): {set([1, 2, 2, 3])}\")\n",
    "\n",
    "# 数学相关\n",
    "print(f\"\\nabs(-5): {abs(-5)}\")\n",
    "print(f\"round(3.14159, 2): {round(3.14159, 2)}\")\n",
    "print(f\"pow(2, 3): {pow(2, 3)}\")\n",
    "print(f\"sum([1, 2, 3, 4]): {sum([1, 2, 3, 4])}\")\n",
    "print(f\"min(5, 1, 9): {min(5, 1, 9)}\")\n",
    "print(f\"max(5, 1, 9): {max(5, 1, 9)}\")\n",
    "\n",
    "# 序列操作\n",
    "my_list = [10, 20, 30, 40]\n",
    "print(f\"\\nlen(my_list): {len(my_list)}\")\n",
    "print(f\"sorted([3, 1, 2]): {sorted([3, 1, 2])}\")\n",
    "print(\"Enumerating my_list:\")\n",
    "for i, val in enumerate(my_list):\n",
    "    print(f\"  Index {i}: {val}\")\n",
    "\n",
    "# 输入输出 (在Jupyter中，input()会显示一个输入框)\n",
    "# name = input(\"Enter your name: \") \n",
    "# print(f\"Hello, {name}\")\n",
    "\n",
    "# 其他\n",
    "print(f\"\\ntype(my_list): {type(my_list)}\")\n",
    "print(f\"isinstance(my_list, list): {isinstance(my_list, list)}\")\n",
    "# help(len) # 取消注释以查看帮助文档"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. `math` - 数学函数\n",
    "\n",
    "提供标准 C 库中定义的数学函数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "print(f\"math.sqrt(16): {math.sqrt(16)}\")      # 平方根\n",
    "print(f\"math.pow(2, 3): {math.pow(2, 3)}\")     # 幂运算\n",
    "print(f\"math.pi: {math.pi}\")            # 圆周率\n",
    "print(f\"math.e: {math.e}\")             # 自然常数\n",
    "print(f\"math.sin(math.pi/2): {math.sin(math.pi/2)}\")# 正弦 (参数为弧度)\n",
    "print(f\"math.cos(0): {math.cos(0)}\")        # 余弦\n",
    "print(f\"math.log(100, 10): {math.log(100, 10)}\")  # 对数\n",
    "print(f\"math.floor(3.7): {math.floor(3.7)}\")    # 向下取整\n",
    "print(f\"math.ceil(3.1): {math.ceil(3.1)}\")     # 向上取整\n",
    "print(f\"math.factorial(5): {math.factorial(5)}\")  # 阶乘"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. `random` - 生成伪随机数\n",
    "\n",
    "用于生成各种分布的伪随机数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "print(f\"random.random(): {random.random()}\") # [0.0, 1.0) 之间的随机浮点数\n",
    "print(f\"random.randint(1, 10): {random.randint(1, 10)}\") # [1, 10] 之间的随机整数\n",
    "print(f\"random.choice(['apple', 'banana', 'cherry']): {random.choice(['apple', 'banana', 'cherry'])}\")\n",
    "\n",
    "my_numbers = [1, 2, 3, 4, 5]\n",
    "random.shuffle(my_numbers)    # 原地打乱序列顺序\n",
    "print(f\"Shuffled my_numbers: {my_numbers}\")\n",
    "\n",
    "print(f\"random.sample(range(100), 5): {random.sample(range(100), 5)}\") # 无放回抽样"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. `datetime` - 日期和时间处理\n",
    "\n",
    "提供用于处理日期和时间的类。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime, date, timedelta\n",
    "\n",
    "# 当前日期和时间\n",
    "now = datetime.now()\n",
    "print(f\"Now: {now}\")\n",
    "\n",
    "# 当前日期\n",
    "today = date.today()\n",
    "print(f\"Today: {today}\")\n",
    "\n",
    "# 创建特定日期时间\n",
    "dt = datetime(2024, 1, 1, 10, 30, 0)\n",
    "print(f\"Specific datetime: {dt}\")\n",
    "\n",
    "# 日期时间格式化 (strftime)\n",
    "print(f\"Formatted now: {now.strftime('%Y-%m-%d %H:%M:%S')}\")\n",
    "print(f\"Formatted date: {today.strftime('%A, %B %d, %Y')}\")\n",
    "\n",
    "# 从字符串解析日期时间 (strptime)\n",
    "date_str = \"2023-11-15 14:45:00\"\n",
    "parsed_datetime = datetime.strptime(date_str, \"%Y-%m-%d %H:%M:%S\")\n",
    "print(f\"Parsed datetime: {parsed_datetime}\")\n",
    "\n",
    "# 时间差 (timedelta)\n",
    "one_week = timedelta(weeks=1)\n",
    "last_week = today - one_week\n",
    "print(f\"Last week: {last_week}\")\n",
    "\n",
    "future_time = now + timedelta(hours=2, minutes=30)\n",
    "print(f\"Future time (now + 2h30m): {future_time}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. `time` - 时间相关函数\n",
    "\n",
    "提供各种时间相关的函数，更偏底层。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "print(f\"Current timestamp (seconds since epoch): {time.time()}\")\n",
    "print(f\"Readable local time: {time.ctime()}\") # 等同于 time.asctime(time.localtime())\n",
    "print(f\"UTC time struct: {time.gmtime()}\")\n",
    "print(f\"Local time struct: {time.localtime()}\")\n",
    "\n",
    "print(\"\\nSleeping for 0.5 second...\")\n",
    "time.sleep(0.5) # 暂停执行\n",
    "print(\"Awake!\")\n",
    "\n",
    "start_perf = time.perf_counter() # 高精度计时器\n",
    "sum(i for i in range(100000)) # 一些操作\n",
    "end_perf = time.perf_counter()\n",
    "print(f\"Operation took (perf_counter): {end_perf - start_perf:.6f} seconds\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. `os` - 操作系统接口\n",
    "\n",
    "提供了一种使用操作系统相关功能（如读写文件、操作目录、获取环境变量等）的便携方式。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "print(f\"Current working directory: {os.getcwd()}\")\n",
    "\n",
    "# 创建目录 (如果已存在会报错，除非使用 exist_ok=True)\n",
    "test_dir_name_os = \"my_test_dir_os_std_lib\"\n",
    "if not os.path.exists(test_dir_name_os):\n",
    "    os.mkdir(test_dir_name_os)\n",
    "    print(f\"Directory '{test_dir_name_os}' created.\")\n",
    "else:\n",
    "    print(f\"Directory '{test_dir_name_os}' already exists.\")\n",
    "\n",
    "print(f\"List directory contents (current): {os.listdir('.')[:5]} ... (first 5)\")\n",
    "\n",
    "# 路径操作\n",
    "file_path_os = os.path.join(test_dir_name_os, \"test_file_os.txt\")\n",
    "print(f\"Constructed file path: {file_path_os}\")\n",
    "print(f\"Is '{file_path_os}' a file? {os.path.isfile(file_path_os)}\")\n",
    "print(f\"Does '{file_path_os}' exist? {os.path.exists(file_path_os)}\")\n",
    "\n",
    "# 获取环境变量\n",
    "print(f\"User's PATH (example env var): {os.getenv('PATH', 'PATH Not Set')[:30]} ...\")\n",
    "\n",
    "# 清理\n",
    "if os.path.exists(file_path_os):\n",
    "    os.remove(file_path_os)\n",
    "    print(f\"File '{file_path_os}' removed.\")\n",
    "if os.path.exists(test_dir_name_os):\n",
    "    os.rmdir(test_dir_name_os)\n",
    "    print(f\"Directory '{test_dir_name_os}' removed.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. `sys` - 系统相关的参数和函数\n",
    "\n",
    "提供对解释器使用或维护的变量的访问，以及与解释器强烈交互的函数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "\n",
    "print(f\"Python version: {sys.version[:40]}...\")\n",
    "print(f\"Platform: {sys.platform}\")\n",
    "print(f\"Command line arguments (sys.argv): {sys.argv}\") # 在Jupyter中，这通常是启动kernel的参数\n",
    "print(f\"Python path (sys.path): {sys.path[0]} ... (first entry)\")\n",
    "\n",
    "# sys.exit(\"Exiting with a message\") # 退出程序 (在Jupyter中会导致kernel重启)\n",
    "# print(\"This line won't be reached if sys.exit is called.\")\n",
    "\n",
    "print(f\"Standard output (stdout) is: {type(sys.stdout)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. `json` - JSON 编码和解码\n",
    "\n",
    "用于处理 JSON (JavaScript Object Notation) 数据格式。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os # For file cleanup\n",
    "\n",
    "# Python 字典转 JSON 字符串 (序列化/编码)\n",
    "data_dict = {\"name\": \"Alice\", \"age\": 30, \"city\": \"New York\", \"isStudent\": False, \"grades\": None}\n",
    "json_string = json.dumps(data_dict, indent=4) # indent 用于美化输出\n",
    "print(\"JSON string:\")\n",
    "print(json_string)\n",
    "\n",
    "# JSON 字符串转 Python 字典 (反序列化/解码)\n",
    "json_data_to_parse = '{\"id\": 101, \"product\": \"Laptop\", \"price\": 1200.50}'\n",
    "parsed_dict = json.loads(json_data_to_parse)\n",
    "print(f\"\\nParsed dictionary: {parsed_dict}\")\n",
    "print(f\"Product name: {parsed_dict['product']}\")\n",
    "\n",
    "# 读写 JSON 文件\n",
    "json_file_path = \"data_std_lib.json\"\n",
    "with open(json_file_path, 'w') as f_write:\n",
    "    json.dump(data_dict, f_write, indent=4) # 直接写入文件\n",
    "print(f\"\\nData written to {json_file_path}\")\n",
    "\n",
    "with open(json_file_path, 'r') as f_read:\n",
    "    loaded_data = json.load(f_read) # 从文件读取并解析\n",
    "print(f\"Data loaded from {json_file_path}: {loaded_data}\")\n",
    "\n",
    "# 清理\n",
    "if os.path.exists(json_file_path):\n",
    "    os.remove(json_file_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. `re` - 正则表达式操作\n",
    "\n",
    "提供对 Perl 风格正则表达式模式的支持。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "\n",
    "text = \"The rain in Spain falls mainly on the plain. Phone: 123-456-7890. Email: [email protected]\"\n",
    "\n",
    "# 查找所有匹配项\n",
    "matches_ai = re.findall(r\"\\b\\w*ai\\w*\\b\", text) # 查找包含 \"ai\" 的单词\n",
    "print(f\"Words with 'ai': {matches_ai}\")\n",
    "\n",
    "# 搜索第一个匹配项\n",
    "match_phone = re.search(r\"\\d{3}-\\d{3}-\\d{4}\", text)\n",
    "if match_phone:\n",
    "    print(f\"Phone number found: {match_phone.group(0)}\") # group(0) 是整个匹配\n",
    "else:\n",
    "    print(\"No phone number found.\")\n",
    "\n",
    "# 替换匹配项\n",
    "replaced_text = re.sub(r\"Spain\", \"Portugal\", text)\n",
    "print(f\"Replaced text: {replaced_text[:30]}...\")\n",
    "\n",
    "# 分割字符串\n",
    "parts = re.split(r\"\\.\\s*\", text) # 按句号和可选空格分割\n",
    "print(f\"Split parts (first 2): {parts[:2]}\")\n",
    "\n",
    "# 编译正则表达式以提高效率 (如果多次使用)\n",
    "email_pattern = re.compile(r\"[\\w\\.-]+@[\\w\\.-]+\\.\\w+\")\n",
    "match_email = email_pattern.search(text)\n",
    "if match_email:\n",
    "    print(f\"Email found: {match_email.group(0)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. `collections` - 容器数据类型\n",
    "\n",
    "提供了标准内置容器 `dict`, `list`, `set`, 和 `tuple` 的替代品，以及一些专门的容器类型。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter, defaultdict, deque, namedtuple\n",
    "\n",
    "# Counter: 用于计算可哈希对象的频率\n",
    "word_list = [\"apple\", \"banana\", \"apple\", \"orange\", \"banana\", \"apple\"]\n",
    "word_counts = Counter(word_list)\n",
    "print(f\"Word counts: {word_counts}\")\n",
    "print(f\"Most common: {word_counts.most_common(1)}\")\n",
    "\n",
    "# defaultdict: 当访问不存在的键时，提供一个默认值\n",
    "name_dd = defaultdict(lambda: \"Unknown\") # 默认值工厂函数\n",
    "name_dd['Alice'] = 'Engineer'\n",
    "print(f\"Name Alice: {name_dd['Alice']}\")\n",
    "print(f\"Name Bob (not set): {name_dd['Bob']}\") # 会返回 'Unknown'\n",
    "\n",
    "city_list_dd = defaultdict(list) # 默认值为空列表\n",
    "city_list_dd['USA'].append('New York')\n",
    "city_list_dd['USA'].append('Los Angeles')\n",
    "city_list_dd['Canada'].append('Toronto')\n",
    "print(f\"Cities: {dict(city_list_dd)}\") # Convert to dict for cleaner print\n",
    "\n",
    "# deque: 双端队列，支持从两端高效添加和删除元素\n",
    "d = deque([1, 2, 3])\n",
    "d.append(4)       # 从右端添加\n",
    "d.appendleft(0)   # 从左端添加\n",
    "print(f\"Deque: {d}\")\n",
    "print(f\"Popped from right: {d.pop()}\")\n",
    "print(f\"Popped from left: {d.popleft()}\")\n",
    "print(f\"Deque after pops: {d}\")\n",
    "\n",
    "# namedtuple: 创建带有命名字段的元组子类\n",
    "Point = namedtuple('Point', ['x', 'y', 'z'])\n",
    "p1 = Point(10, 20, 30)\n",
    "print(f\"Named tuple Point: {p1}\")\n",
    "print(f\"p1.x: {p1.x}, p1.y: {p1.y}\")\n",
    "print(f\"p1[0]: {p1[0]}\") # 也可以通过索引访问"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11. `itertools` - 高效迭代的函数\n",
    "\n",
    "包含一系列用于创建高效迭代器的函数。在“生成器与迭代器协议”教程中已有详细介绍，这里仅作简单回顾。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import itertools\n",
    "\n",
    "# count: 无限计数器\n",
    "counter = itertools.count(start=5, step=2)\n",
    "print(\"First 3 from count(5, 2):\", next(counter), next(counter), next(counter))\n",
    "\n",
    "# cycle: 无限循环可迭代对象\n",
    "cycler = itertools.cycle(\"AB\")\n",
    "print(\"First 5 from cycle('AB'):\", next(cycler), next(cycler), next(cycler), next(cycler), next(cycler))\n",
    "\n",
    "# chain: 连接多个可迭代对象\n",
    "chained_iter = itertools.chain([1, 2], ('a', 'b'), 'CD')\n",
    "print(f\"Chained list: {list(chained_iter)}\")\n",
    "\n",
    "# combinations: 生成组合\n",
    "elements = ['X', 'Y', 'Z']\n",
    "combs = itertools.combinations(elements, 2)\n",
    "print(f\"Combinations of {elements} taken 2 at a time: {list(combs)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 12. `functools` - 高阶函数和可调用对象的操作\n",
    "\n",
    "提供用于处理函数和可调用对象的工具。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import functools\n",
    "\n",
    "# partial: 固定函数的部分参数，返回一个新的可调用对象\n",
    "def power(base, exponent):\n",
    "    return base ** exponent\n",
    "\n",
    "square = functools.partial(power, exponent=2)\n",
    "cube = functools.partial(power, exponent=3)\n",
    "print(f\"square(5): {square(5)}\") # 25\n",
    "print(f\"cube(3): {cube(3)}\")   # 27\n",
    "\n",
    "# lru_cache: 为函数结果提供最近最少使用 (LRU) 缓存 (装饰器)\n",
    "@functools.lru_cache(maxsize=128) # 缓存最多128个结果\n",
    "def fibonacci(n):\n",
    "    if n < 2:\n",
    "        return n\n",
    "    # print(f\"Calculating fibonacci({n})\") # 只在未缓存时打印 (Jupyter中多次运行cell会重置缓存)\n",
    "    return fibonacci(n-1) + fibonacci(n-2)\n",
    "\n",
    "print(f\"\\nCalculating Fibonacci numbers with LRU cache:\")\n",
    "print(f\"fibonacci(10): {fibonacci(10)}\")\n",
    "print(\"Calling fibonacci(10) again (should be cached):\")\n",
    "print(f\"fibonacci(10): {fibonacci(10)}\")\n",
    "print(f\"Cache info for fibonacci: {fibonacci.cache_info()}\")\n",
    "fibonacci.cache_clear() # 清除缓存\n",
    "print(f\"Cache info after clear: {fibonacci.cache_info()}\")\n",
    "\n",
    "# wraps: 用于编写装饰器时，保留被装饰函数的元信息\n",
    "def my_decorator(func):\n",
    "    @functools.wraps(func) # 重要！\n",
    "    def wrapper(*args, **kwargs):\n",
    "        # print(\"Something is happening before the function is called.\")\n",
    "        result = func(*args, **kwargs)\n",
    "        # print(\"Something is happening after the function is called.\")\n",
    "        return result\n",
    "    return wrapper\n",
    "\n",
    "@my_decorator\n",
    "def say_hello(name):\n",
    "    \"\"\"A simple greeting function.\"\"\"\n",
    "    print(f\"Hello, {name}!\")\n",
    "\n",
    "say_hello(\"World\")\n",
    "print(f\"say_hello name: {say_hello.__name__}\")\n",
    "print(f\"say_hello doc: {say_hello.__doc__}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 13. `pathlib` - 面向对象的文件系统路径 (Python 3.4+)\n",
    "\n",
    "提供了一种面向对象的方式来处理文件和目录路径，通常比 `os.path` 更易用和直观。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "import os # For cleanup\n",
    "\n",
    "# 创建 Path 对象\n",
    "current_dir = Path.cwd() # 当前工作目录\n",
    "home_dir = Path.home()   # 用户主目录\n",
    "print(f\"Current directory: {current_dir}\")\n",
    "print(f\"Home directory: {home_dir}\")\n",
    "\n",
    "# 路径拼接 (使用 / 操作符)\n",
    "test_dir_pathlib_name = \"my_pathlib_dir_std_lib\"\n",
    "test_path = current_dir / test_dir_pathlib_name / \"test_file_pathlib.txt\"\n",
    "print(f\"Constructed path: {test_path}\")\n",
    "\n",
    "# 获取路径的各个部分\n",
    "print(f\"Parent directory: {test_path.parent}\")\n",
    "print(f\"File name: {test_path.name}\")\n",
    "print(f\"File stem (name without suffix): {test_path.stem}\")\n",
    "print(f\"File suffix: {test_path.suffix}\")\n",
    "\n",
    "# 检查路径属性\n",
    "print(f\"Does '{test_path}' exist? {test_path.exists()}\")\n",
    "print(f\"Is it a file? {test_path.is_file()}\")\n",
    "print(f\"Is it a directory? {test_path.is_dir()}\")\n",
    "\n",
    "# 创建目录和文件 (示例)\n",
    "test_dir_pathlib = current_dir / test_dir_pathlib_name\n",
    "test_dir_pathlib.mkdir(parents=True, exist_ok=True) # 创建目录，包括父目录，如果已存在则忽略\n",
    "print(f\"Directory '{test_dir_pathlib}' created or already exists.\")\n",
    "\n",
    "file_in_pathlib_dir = test_dir_pathlib / \"another_file_pathlib.txt\"\n",
    "file_in_pathlib_dir.write_text(\"Hello from pathlib!\\nThis is a test.\")\n",
    "print(f\"Content written to '{file_in_pathlib_dir}'\")\n",
    "print(f\"Content read: '{file_in_pathlib_dir.read_text().strip()}'\")\n",
    "\n",
    "# 遍历目录\n",
    "print(\"\\nFiles in current directory (glob *.ipynb for example):\")\n",
    "count = 0\n",
    "for py_file in current_dir.glob('*.ipynb'): # 查找当前目录下所有 .ipynb 文件\n",
    "    print(f\"  - {py_file.name}\")\n",
    "    count += 1\n",
    "    if count >=3: break # Limit output\n",
    "\n",
    "# 清理\n",
    "if file_in_pathlib_dir.exists():\n",
    "    file_in_pathlib_dir.unlink() # 删除文件\n",
    "if test_dir_pathlib.exists():\n",
    "    # pathlib.Path.rmdir() can only remove empty directories.\n",
    "    # For non-empty, you'd use shutil.rmtree or os.rmdir after emptying.\n",
    "    try:\n",
    "        test_dir_pathlib.rmdir() # 删除空目录\n",
    "    except OSError as e:\n",
    "        print(f\"Could not remove {test_dir_pathlib}: {e} (may not be empty or other issue)\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 14. `urllib.request` - 打开和读取 URL\n",
    "\n",
    "用于获取 URL (例如 HTTP, FTP)。对于更复杂的 HTTP 请求 (如 POST, headers, cookies)，通常推荐使用第三方库如 `requests`。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import urllib.request\n",
    "import urllib.error\n",
    "import json # For parsing the example API response\n",
    "\n",
    "url_to_fetch = \"https://jsonplaceholder.typicode.com/todos/1\" # 一个公共的测试API\n",
    "\n",
    "print(f\"--- Fetching URL: {url_to_fetch} ---\")\n",
    "try:\n",
    "    with urllib.request.urlopen(url_to_fetch, timeout=10) as response: # Added timeout\n",
    "        print(f\"Status code: {response.status}\")\n",
    "        print(f\"Headers (Content-Type): {response.getheader('Content-Type')}\")\n",
    "        \n",
    "        content_bytes = response.read()\n",
    "        content_str = content_bytes.decode('utf-8')\n",
    "        print(f\"\\nContent (first 100 chars):\\n{content_str[:100]}...\")\n",
    "        \n",
    "        todo_item = json.loads(content_str)\n",
    "        print(f\"\\nParsed JSON title: {todo_item.get('title')}\")\n",
    "\n",
    "except urllib.error.URLError as e:\n",
    "    print(f\"Error fetching URL {url_to_fetch}: {e.reason}\")\n",
    "except urllib.error.HTTPError as e:\n",
    "    print(f\"HTTP Error for {url_to_fetch}: {e.code} {e.reason}\")\n",
    "except Exception as e:\n",
    "    print(f\"An unexpected error occurred: {e}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 15. `http.server` - 简单的 HTTP 服务器\n",
    "\n",
    "提供了一个基本的 HTTP 服务器实现，主要用于测试或简单的本地文件共享。\n",
    "**注意**：通常在脚本中运行，而不是直接在 Jupyter 单元格中长时间运行，因为它会阻塞。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import http.server\n",
    "import socketserver\n",
    "import threading\n",
    "import time\n",
    "\n",
    "PORT = 8002 # Changed port to avoid conflict if previous cell was run recently\n",
    "\n",
    "def run_simple_server_briefly():\n",
    "    Handler = http.server.SimpleHTTPRequestHandler\n",
    "    httpd = None\n",
    "    server_thread = None\n",
    "    try:\n",
    "        httpd = socketserver.TCPServer((\"localhost\", PORT), Handler)\n",
    "        print(f\"Serving HTTP on localhost port {PORT}...\")\n",
    "        \n",
    "        server_thread = threading.Thread(target=httpd.serve_forever)\n",
    "        server_thread.daemon = True \n",
    "        server_thread.start()\n",
    "        \n",
    "        print(\"Server started in a thread. Will run for ~3 seconds.\")\n",
    "        time.sleep(3) # Let server run for a short time for demo\n",
    "\n",
    "    except OSError as e:\n",
    "        print(f\"Could not start server on port {PORT}: {e}. Port might be in use.\")\n",
    "    finally:\n",
    "        if httpd:\n",
    "            print(\"Shutting down the server...\")\n",
    "            httpd.shutdown()\n",
    "            httpd.server_close()\n",
    "            if server_thread and server_thread.is_alive():\n",
    "                 server_thread.join(timeout=1)\n",
    "        print(\"Server shut down attempt complete.\")\n",
    "\n",
    "print(\"--- Simple HTTP Server Example (runs briefly) ---\")\n",
    "# run_simple_server_briefly() # Commented out by default\n",
    "print(\"http.server example is commented out. Uncomment 'run_simple_server_briefly()' to try.\")\n",
    "print(f\"If you run it, access http://localhost:{PORT}/ in your browser within 3 seconds.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 16. `subprocess` - 子进程管理\n",
    "\n",
    "允许你创建新的子进程，连接到它们的输入/输出/错误管道，并获取它们的返回码。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "import os\n",
    "import sys\n",
    "\n",
    "print(\"--- Running an OS command via subprocess --- \")\n",
    "try:\n",
    "    command = ['dir'] if os.name == 'nt' else ['ls', '-l', '.']\n",
    "    result = subprocess.run(command, capture_output=True, text=True, check=False, timeout=5)\n",
    "\n",
    "    print(f\"Command: {' '.join(command)}\")\n",
    "    print(f\"Return code: {result.returncode}\")\n",
    "    if result.stdout:\n",
    "        print(f\"Stdout (first 150 chars):\\n{result.stdout[:150].strip()}...\")\n",
    "    if result.stderr:\n",
    "        print(f\"Stderr:\\n{result.stderr.strip()}\")\n",
    "\n",
    "except FileNotFoundError:\n",
    "    print(f\"Error: Command '{command[0]}' not found.\")\n",
    "except subprocess.TimeoutExpired:\n",
    "    print(f\"Error: Command '{' '.join(command)}' timed out.\")\n",
    "except Exception as e:\n",
    "    print(f\"An unexpected error occurred with subprocess: {e}\")\n",
    "\n",
    "python_executable = sys.executable\n",
    "script_content = \"import sys; print(f'Hello from subprocess script! Python: {sys.version_info.major}.{sys.version_info.minor}')\"\n",
    "print(\"\\n--- Running a Python one-liner via subprocess ---\")\n",
    "try:\n",
    "    py_result = subprocess.run([python_executable, \"-c\", script_content], \n",
    "                               capture_output=True, text=True, check=True, timeout=5)\n",
    "    print(f\"Python script stdout:\\n{py_result.stdout.strip()}\")\n",
    "except Exception as e:\n",
    "    print(f\"Error running python script via subprocess: {e}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 17. `csv` - CSV 文件读写\n",
    "\n",
    "用于处理逗号分隔值 (CSV) 文件。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import os # For cleanup\n",
    "\n",
    "csv_file_path = \"example_std_lib.csv\"\n",
    "data_to_write = [\n",
    "    ['Name', 'Age', 'City'],\n",
    "    ['Alice', 30, 'New York'],\n",
    "    ['Bob', 24, 'Los Angeles'],\n",
    "    ['Charlie', 35, 'Chicago']\n",
    "]\n",
    "\n",
    "try:\n",
    "    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:\n",
    "        csv_writer = csv.writer(csvfile)\n",
    "        for row in data_to_write:\n",
    "            csv_writer.writerow(row)\n",
    "    print(f\"Data written to {csv_file_path}\")\n",
    "\n",
    "    print(\"\\nReading data from CSV:\")\n",
    "    with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:\n",
    "        csv_reader = csv.reader(csvfile)\n",
    "        header = next(csv_reader)\n",
    "        print(f\"Header: {header}\")\n",
    "        for row in csv_reader:\n",
    "            print(f\"  Row: {row}\")\n",
    "\n",
    "    print(\"\\nReading data using DictReader:\")\n",
    "    with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:\n",
    "        dict_reader = csv.DictReader(csvfile)\n",
    "        for row_dict in dict_reader:\n",
    "            print(f\"  DictRow: Name={row_dict['Name']}, Age={row_dict['Age']}\")\n",
    "finally:\n",
    "    if os.path.exists(csv_file_path):\n",
    "        os.remove(csv_file_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 18. `sqlite3` - SQLite 数据库接口\n",
    "\n",
    "提供了与 SQLite 数据库文件交互的 DB-API 2.0 兼容接口。SQLite 是一个轻量级的、基于文件的数据库。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlite3\n",
    "import os # For cleanup\n",
    "\n",
    "db_file = \"mydatabase_std_lib.db\"\n",
    "conn = None\n",
    "\n",
    "try:\n",
    "    conn = sqlite3.connect(db_file)\n",
    "    cursor = conn.cursor()\n",
    "    print(f\"Connected to SQLite database: {db_file}\")\n",
    "\n",
    "    cursor.execute('''CREATE TABLE IF NOT EXISTS employees\n",
    "                      (id INTEGER PRIMARY KEY, name TEXT, department TEXT)''')\n",
    "    conn.commit()\n",
    "    print(\"Table 'employees' created or exists.\")\n",
    "\n",
    "    employees_data = [\n",
    "        (1, 'Eve', 'Engineering'),\n",
    "        (2, 'Frank', 'Sales')\n",
    "    ]\n",
    "    # Insert, ignoring if ID already exists (for reruns)\n",
    "    cursor.executemany(\"INSERT OR IGNORE INTO employees VALUES (?,?,?)\", employees_data)\n",
    "    conn.commit()\n",
    "    print(f\"{cursor.rowcount} new rows inserted (or 0 if already present).\")\n",
    "\n",
    "    print(\"\\nQuerying all employees:\")\n",
    "    for row in cursor.execute(\"SELECT * FROM employees\"):\n",
    "        print(f\"  {row}\")\n",
    "\n",
    "except sqlite3.Error as e:\n",
    "    print(f\"SQLite error: {e}\")\n",
    "finally:\n",
    "    if conn:\n",
    "        conn.close()\n",
    "        print(\"\\nSQLite connection closed.\")\n",
    "    if os.path.exists(db_file):\n",
    "        os.remove(db_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 19. `logging` - 日志工具\n",
    "\n",
    "提供了一个灵活的事件日志系统。应用程序和库可以使用它来记录事件。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
    "# Get a logger instance\n",
    "logger = logging.getLogger('StdLibTutorialLogger')\n",
    "\n",
    "# Configure logger (important for Jupyter, do it only once or clear handlers)\n",
    "if not logger.handlers:\n",
    "    logger.setLevel(logging.DEBUG) # Set level for this specific logger\n",
    "    ch = logging.StreamHandler() # Console handler\n",
    "    ch.setLevel(logging.DEBUG)\n",
    "    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')\n",
    "    ch.setFormatter(formatter)\n",
    "    logger.addHandler(ch)\n",
    "else: # If re-running cell, ensure level is still set\n",
    "    logger.setLevel(logging.DEBUG)\n",
    "    for handler in logger.handlers:\n",
    "        handler.setLevel(logging.DEBUG)\n",
    "\n",
    "print(\"--- Logging Example (output to console/stderr) ---\")\n",
    "logger.debug(\"This is a debug message for the tutorial.\")\n",
    "logger.info(\"Informational message here.\")\n",
    "logger.warning(\"A warning occurred.\")\n",
    "logger.error(\"An error has happened.\")\n",
    "logger.critical(\"Critical failure!\")\n",
    "\n",
    "try:\n",
    "    x = 1 / 0\n",
    "except ZeroDivisionError:\n",
    "    logger.exception(\"ZeroDivisionError caught and logged with stack trace\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 20. `argparse` - 命令行参数解析\n",
    "\n",
    "用于编写用户友好的命令行接口。它解析 `sys.argv` 中的参数。\n",
    "**注意**：通常在独立的 Python 脚本中使用，而不是直接在 Jupyter Notebook 中，因为 Jupyter 的参数传递方式不同。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import argparse\n",
    "\n",
    "simulated_args_list = ['my_script.py', 'input.txt', '--verbose', '-n', '10']\n",
    "\n",
    "def main_argparse_demo(args_to_parse=None):\n",
    "    parser = argparse.ArgumentParser(description=\"Argparse demo for std lib tutorial.\")\n",
    "    parser.add_argument(\"filename\", help=\"The input filename\")\n",
    "    parser.add_argument(\"-n\", \"--count\", type=int, default=1, help=\"Number of times to process\")\n",
    "    parser.add_argument(\"-v\", \"--verbose\", action=\"store_true\", help=\"Enable verbose output\")\n",
    "\n",
    "    actual_args_to_pass = None\n",
    "    if args_to_parse:\n",
    "        actual_args_to_pass = args_to_parse[1:] # Exclude script name for parse_args\n",
    "    \n",
    "    try:\n",
    "        # In a script, you'd use: args = parser.parse_args()\n",
    "        args = parser.parse_args(actual_args_to_pass) \n",
    "        print(f\"\\nParsed arguments:\")\n",
    "        print(f\"  Filename: {args.filename}\")\n",
    "        print(f\"  Count: {args.count}\")\n",
    "        print(f\"  Verbose: {args.verbose}\")\n",
    "        if args.verbose:\n",
    "            print(\"Verbose mode enabled.\")\n",
    "    except SystemExit as e:\n",
    "        # argparse calls sys.exit() on errors or when --help is used.\n",
    "        # In Jupyter, this might just print help/error and not kill the kernel.\n",
    "        print(f\"argparse exited with code: {e.code} (Likely printed help or an error message)\")\n",
    "    except Exception as e:\n",
    "        print(f\"Error during argparse: {e}\")\n",
    "\n",
    "print(\"--- Argparse Example (simulating command line args) ---\")\n",
    "main_argparse_demo(simulated_args_list)\n",
    "\n",
    "print(\"\\n--- Argparse Example (simulating --help) ---\")\n",
    "main_argparse_demo(['my_script.py', '--help'])\n",
    "\n",
    "print(\"\\n--- Argparse Example (simulating missing required arg) ---\")\n",
    "main_argparse_demo(['my_script.py']) # Missing 'filename'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 总结\n",
    "\n",
    "Python 标准库是其强大功能和广泛适用性的重要组成部分。熟悉这些常用模块可以极大地提高你的开发效率。\n",
    "\n",
    "**进一步学习：**\n",
    "\n",
    "*   **官方文档**：Python 官方文档是学习标准库最权威、最全面的资源。\n",
    "*   **实践**：尝试在你的项目中使用这些模块来解决实际问题。\n",
    "*   **探索更多模块**：标准库中还有许多其他有用的模块，例如 `glob` (文件名模式匹配), `shutil` (高级文件操作), `pickle` (对象序列化), `gzip`/`zipfile` (压缩), `threading`/`multiprocessing`/`asyncio` (并发) 等等。\n",
    "\n",
    "祝你使用 Python 标准库愉快！"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
diff --git a/transformers_tutorial.ipynb b/transformers_tutorial.ipynb