Last active
November 15, 2023 14:10
-
-
Save Hosuke/cffac46ee8034ce6f65a3729ac421d11 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import yaml | |
# 读取CSV文件 | |
df = pd.read_csv('dex_base_trades_seed.csv') | |
# 初始化schema字典 | |
schema = { | |
"version": 2, | |
"seeds": [] | |
} | |
# 按'project'和'blockchain'字段分组并处理CSV文件和schema | |
for (project, blockchain), group in df.groupby(['project', 'blockchain']): | |
# 只取'project'字段中的第一个词 | |
project_name = project.split()[0] | |
filename = f"{project_name}_{blockchain}_base_trades_seed.csv" | |
group.to_csv(filename, index=False) | |
# 添加到schema字典 | |
seed_name = f"{project_name}_{blockchain}_base_trades_seed" | |
seed_config = { | |
"name": seed_name, | |
"config": { | |
"column_types": { | |
"blockchain": "varchar", | |
"project": "varchar", | |
"version": "varchar", | |
"tx_hash": "varbinary", | |
"evt_index": "uint256", | |
"block_number": "uint256", | |
"token_bought_address": "varbinary", | |
"token_sold_address": "varbinary", | |
"token_bought_amount_raw": "uint256", | |
"token_sold_amount_raw": "uint256", | |
"block_date": "timestamp" | |
} | |
} | |
} | |
schema["seeds"].append(seed_config) | |
# 将schema字典转换为YAML格式字符串 | |
schema_yaml = yaml.dump(schema, sort_keys=False) | |
# 打印或保存schema | |
# print(schema_yaml) | |
# 可以选择将schema写入文件 | |
with open('schema.yml', 'w') as file: | |
file.write(schema_yaml) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment