python3 gen-user.py 1000000
mv guser.csv csv_files
docker exec -it mydb /bin/bash
cd /var/lib/mysql_files
mysql -u honux -p honuxdb --local-infile
myqsl> set global local_infile=ON
mysql> drop table if exists... (테이블 생성)
mysql> load data local infile 'guser.csv' into table user fields terminated by ',' ignore 1 lines;
Last active
October 23, 2025 07:27
-
-
Save honux77/40d78662448796e8a3d5cf439e7ba62e to your computer and use it in GitHub Desktop.
Generate Random User data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| def generateMoney(numPlayers): | |
| # global variables and Constants | |
| MAX_MONEY = 9_999_900 | |
| MIN_MONEY = 0 | |
| STEP = 100 | |
| # 정규분포 설정 | |
| mean = (MAX_MONEY - MIN_MONEY) / 2 # 중앙값을 평균으로 | |
| std_dev = mean / 6 # 대략 ±3σ가 전체 범위가 되도록 설정 | |
| # 정규분포로 난수 생성 | |
| money_values = np.random.normal(loc=mean, scale=std_dev, size=numPlayers) | |
| # 범위를 벗어나는 값들을 잘라내기 (클리핑) | |
| money_values = np.clip(money_values, MIN_MONEY, MAX_MONEY) | |
| # 100원 단위로 반올림 | |
| money_values = (np.round(money_values / STEP) * STEP).astype(int) | |
| return money_values | |
| def generateRank(numPlayers): | |
| # game rank, B, S, G, P, D, C. B가 가장 빈도가 높고, C가 가장 낮음 | |
| # B = 50%, S = 25%, G = 15%, P = 5%, D = 3%, C = 2% | |
| ranks = ["B", "S", "G", "P", "D", "C"] | |
| rank_values = np.random.choice(ranks, size=numPlayers, p=[0.5, 0.25, 0.15, 0.05, 0.03, 0.02]) | |
| return rank_values | |
| def generateStartDate(numPlayers): | |
| # start_date: 최근 1년 이내의 날짜, 정규분포, yyyy-mm-dd 포맷 | |
| from datetime import datetime, timedelta | |
| days = np.random.normal(loc=180, scale=30, size=numPlayers) | |
| start_date_values = [datetime.now() - timedelta(days=float(d)) for d in days] | |
| return [d.strftime("%Y-%m-%d") for d in start_date_values] | |
| def generateLastVisitDate(numPlayers): | |
| ''' | |
| last_visit: 최근 3개월 이내의 날짜, 감마분포, 최근으로 올수록 빈도가 높아야 함, yyyy-mm-dd 포맷 | |
| ''' | |
| from datetime import datetime, timedelta | |
| MAX_DAYS = 90 | |
| shape = 2.0 # 감마 분포의 모양 파라미터 | |
| scale = 15.0 # 감마 분포의 스케일 파라미터 | |
| days = np.random.gamma(shape, scale, numPlayers) | |
| last_visit_values = [datetime.now() - timedelta(days=float(d)) for d in days] | |
| return [d.strftime("%Y-%m-%d %H:%M:%S") for d in last_visit_values] | |
| if __name__ == "__main__": | |
| # 테스트 | |
| print(generateMoney(10)) | |
| print(generateRank(10)) | |
| print(generateStartDate(10)) | |
| print(generateLastVisitDate(10)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| version: "3.9" | |
| services: | |
| mysql: | |
| image: mysql:8.4 | |
| container_name: mydb | |
| restart: always | |
| environment: | |
| MYSQL_ROOT_PASSWORD: db1004 # 루트 비밀번호 | |
| MYSQL_DATABASE: honuxdb # 초기 생성할 DB명 | |
| MYSQL_USER: honux | |
| MYSQL_PASSWORD: bandi1004 | |
| ports: | |
| - "3306:3306" | |
| volumes: | |
| # 1️⃣ MySQL 데이터 파일 (DB 데이터) | |
| - ./mysql_data:/var/lib/mysql | |
| # 2️⃣ CSV 파일 (LOAD DATA INFILE용) | |
| - ./csv_files:/var/lib/mysql-files | |
| command: | |
| --local-infile=1 | |
| --secure-file-priv=/var/lib/mysql-files |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import numpy as np | |
| import random | |
| ''' | |
| create table trade ( | |
| id int primary key auto_increment, | |
| seller int, | |
| item_name varchar(255), | |
| price dec(7, 0), | |
| trade_date date, | |
| foreign key (seller) references guser(id) | |
| ) character set utf8mb4; | |
| ''' | |
| from ninckname import generate_items | |
| if len(sys.argv) < 3: | |
| print("Usage: {} numItem numUser".format(sys.argv[0])) | |
| exit(1) | |
| size = int(sys.argv[1]) | |
| usize = int(sys.argv[2]) | |
| def genUid(): | |
| return random.randint(1, usize) | |
| def genPrice(): | |
| return int(random.random()* 1000) * 100 | |
| def genTradeDate(): | |
| from datetime import datetime, timedelta | |
| days_ago = random.randint(0, 365) | |
| trade_date = datetime.now() - timedelta(days=days_ago) | |
| return trade_date.strftime("%Y-%m-%d") | |
| if __name__ == "__main__": | |
| import pandas as pd | |
| ids = np.arange(1, size + 1) | |
| items = generate_items(size) | |
| uids = [genUid() for _ in range(size)] | |
| prices = [genPrice() for _ in range(size)] | |
| trade_dates = [genTradeDate() for _ in range(size)] | |
| df = pd.DataFrame({ | |
| "id": ids, | |
| "seller": uids, | |
| "item_name": items, | |
| "price": prices, | |
| "trade_date": trade_dates | |
| }) | |
| print(df.head()) | |
| df.to_csv("trade.csv", index=False) | |
| print(f"Generated {size} trade records to trade.csv") | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import numpy as np | |
| import random | |
| from namae import generate_names | |
| from ninckname import generate_usernames | |
| from amoosoo import generateMoney, generateRank, generateStartDate, generateLastVisitDate | |
| """ | |
| Generate player data for testing | |
| # Player(id, name, nickname, rank, money, start_date, last_login) | |
| # id: int pk auto_increment | |
| # name: 한글 2-5글자 | |
| # nickname: 영문 32글자 이내, unique | |
| # grank: 1 - 10, 정규분포로 1 쪽으로 갈수록 빈도가 높을 것 | |
| # money: $ 0.00- 99,999,999.99, 정규분포 | |
| # start_date: 최근 1년 이내의 날짜, 정규분포의 절반, 오래된 플레이어가 더 많을 것, yyyy-mm-dd 포맷 | |
| # last_login: start_date 보다 클 것, 최근 3개월 이내, 정규분포의 절반, 최근 플레이어가 더 많을 것, NUNLL 허용, yyyy-mm-dd hh:mm:ss 포맷 | |
| """ | |
| np.random.seed(0) | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 2: | |
| print("Usage: python gen-player.py [numPlayers]") | |
| exit(1) | |
| else: | |
| import pandas as pd | |
| numPlayers = int(sys.argv[1]) | |
| # 만든 함수를 기반으로 레코드 생성, csv로 출력 | |
| names = generate_names(numPlayers, include_compound_surname=True) | |
| nicknames = generate_usernames(numPlayers, style="underscore", add_number=True, number_range=(0, 9999), max_length=32) | |
| ranks = generateRank(numPlayers) | |
| moneys = generateMoney(numPlayers) | |
| start_dates = generateStartDate(numPlayers) | |
| last_visit_dates = generateLastVisitDate(numPlayers) | |
| # DataFrame 생성 | |
| df = pd.DataFrame({ | |
| "id": np.arange(1, numPlayers + 1), | |
| "name": names, | |
| "nickname": nicknames, | |
| "rank": ranks, | |
| "money": moneys, | |
| "start_date": start_dates, | |
| "last_login": last_visit_dates | |
| }) | |
| # 데이터 샘플 출력 | |
| print(df.head()) | |
| # CSV로 저장 | |
| df.to_csv("guser.csv", index=False) | |
| print(f"Generated {numPlayers} player records to guser.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # sudo mysql -u root tigerdb --local-infile=1 < insert.sql | |
| set | |
| global local_infile = 1; | |
| alter table | |
| guser disable keys; | |
| set | |
| foreign_key_checks = 0; | |
| load data local infile 'guser.csv' into table guser fields terminated by ',' ignore 1 lines; | |
| load data local infile 'trade.csv' into table trade fields terminated by ',' ignore 1 lines; | |
| alter table | |
| guser enable keys; | |
| set | |
| foreign_key_checks = 1; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| from typing import List, Literal, Optional | |
| # 상위 빈도 성씨 (통계청 상위권 + 자주 쓰는 성씨) | |
| COMMON_SURNAMES = [ | |
| "김","이","박","최","정","강","조","윤","장","임", | |
| "오","한","신","서","권","황","안","송","류","홍", | |
| "전","고","문","양","손","배","백","허","노","하", | |
| "곽","성","차","주","우","구","민","유","진","지", | |
| "엄","채","원","천","방","공","현","변" | |
| ] | |
| # 복성(두 글자 성) — 필요 시만 포함 | |
| RARE_COMPOUND_SURNAMES = ["남궁","제갈","선우","서문","독고","황보","사공","동방","공손","탁"] | |
| # 성별/중성 이름 음절(자주 쓰이는 한글 음절; 한자음 의미는 생략) | |
| # 가중치는 뒤쪽 섹션에서 부여 | |
| SYL_MASC = ["민","준","현","우","준","호","도","윤","진","혁","태","성","재","석","훈","기","범","환","건","원"] | |
| SYL_FEM = ["서","지","수","은","연","아","예","유","린","슬","솔","하","소","채","미","영","희","진","나","현"] | |
| SYL_NEUT = ["은","윤","진","지","현","유","민","연","하","수","성","태","아","예","우","채","서","소","원","도"] | |
| # 간단한 가중치(출현 빈도 느낌만 반영; 합리적/임의 값) | |
| def _weights_for(syllables: List[str]) -> List[int]: | |
| base = { # 자주 쓰이는 음절은 가중치↑ | |
| "민":9,"준":10,"현":10,"서":10,"지":10,"수":9,"윤":9,"진":8,"아":8,"예":8, | |
| "우":8,"채":7,"연":7,"은":8,"하":8,"성":6,"태":7,"영":6,"희":5,"유":7, | |
| "소":6,"슬":4,"솔":3,"도":4,"재":6,"석":4,"훈":5,"기":5,"범":3,"환":4,"건":4,"원":5,"나":5,"린":6 | |
| } | |
| return [base.get(s, 3) for s in syllables] # 기본 3 | |
| # 성씨도 아주 대략적인 가중치(김/이/박/최/정 비중↑) | |
| def _surname_weights(surnames: List[str]) -> List[int]: | |
| boost = {"김":20,"이":18,"박":13,"최":8,"정":7} | |
| return [boost.get(s, 3) for s in surnames] | |
| def _pick_surname(include_compound: bool) -> str: | |
| pool = COMMON_SURNAMES[:] | |
| if include_compound: | |
| pool = pool + RARE_COMPOUND_SURNAMES | |
| weights = _surname_weights(pool) | |
| return random.choices(pool, weights=weights, k=1)[0] | |
| def _pick_syllable(gender: Literal["male","female","any"], position: Literal["first","second"]) -> str: | |
| # position은 '이름'의 1·2번째 음절 위치. 간단히 같은 풀에서 뽑되 성별로 가중치 달리. | |
| if gender == "male": | |
| pool = SYL_MASC + SYL_NEUT | |
| elif gender == "female": | |
| pool = SYL_FEM + SYL_NEUT | |
| else: | |
| pool = SYL_NEUT + SYL_MASC + SYL_FEM | |
| weights = _weights_for(pool) | |
| return random.choices(pool, weights=weights, k=1)[0] | |
| def make_korean_name( | |
| gender: Literal["male","female","any"]="any", | |
| include_compound_surname: bool=False, | |
| two_syllable_given: bool=True, | |
| separator: str="", # "" (공백 없음) 또는 " " (성/이름 사이 공백) | |
| ) -> str: | |
| """ | |
| 한국식 이름 생성기 (기본: 성 1글자 + 이름 2글자 = 3글자, 공백 없음) | |
| """ | |
| surname = _pick_surname(include_compound_surname) | |
| if two_syllable_given: | |
| given = _pick_syllable(gender, "first") + _pick_syllable(gender, "second") | |
| else: | |
| # 드물지만 1음절 이름 옵션 | |
| given = _pick_syllable(gender, "first") | |
| if separator: | |
| return f"{surname}{separator}{given}" | |
| return f"{surname}{given}" | |
| def generate_names( | |
| n: int, | |
| gender: Literal["male","female","any"]="any", | |
| include_compound_surname: bool=False, | |
| two_syllable_given: bool=True, | |
| separator: str="", | |
| unique: bool=False, | |
| seed: Optional[int]=None, | |
| ) -> List[str]: | |
| """ | |
| n개의 이름을 생성. | |
| - unique=True: 중복 제거(필요 시 더 뽑아 채움 — 충돌이 많으면 시간이 늘 수 있음) | |
| - seed: 난수 고정 | |
| """ | |
| if seed is not None: | |
| random.seed(seed) | |
| names: List[str] = [] | |
| if not unique: | |
| for _ in range(n): | |
| names.append(make_korean_name(gender, include_compound_surname, two_syllable_given, separator)) | |
| return names | |
| seen = set() | |
| # 최대 시도 제한을 넉넉히 둠 (풀 사이즈가 작은 편이라 충돌 가능성 존재) | |
| attempts, limit = 0, n * 20 | |
| while len(names) < n and attempts < limit: | |
| attempts += 1 | |
| name = make_korean_name(gender, include_compound_surname, two_syllable_given, separator) | |
| if name not in seen: | |
| seen.add(name) | |
| names.append(name) | |
| if len(names) < n: | |
| # 유일성 보장 실패 시 알려주고 현재까지 반환 | |
| # (필요하면 음절 풀을 더 늘리면 해결) | |
| print(f"[warn] 충돌이 많아 {n}개 중 {len(names)}개만 생성했습니다. 음절 풀을 늘리거나 unique=False를 사용하세요.") | |
| return names | |
| # 데모 | |
| if __name__ == "__main__": | |
| print("아무 성별 5개:", generate_names(5, gender="any", seed=42)) | |
| print("남성 스타일 5개:", generate_names(5, gender="male", seed=43)) | |
| print("여성 스타일 5개(복성 허용, 공백 포함):", generate_names(5, gender="female", include_compound_surname=True, separator=" ", seed=44)) | |
| print("유일 20개:", generate_names(20, unique=True, seed=45)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import random | |
| import numpy as np | |
| ADJECTIVES = [ | |
| "happy", "sleepy", "brave", "curious", "gentle", "wild", "clever", | |
| "calm", "shiny", "fuzzy", "lazy", "swift", "lucky", "bold", "silent", | |
| "tiny", "stormy", "crispy", "cozy", "sunny", "snowy", "blue", "green", | |
| "golden", "silver", "mystic", "cosmic", "retro", "neon", "pixel" | |
| ] | |
| NOUNS = [ | |
| "cat", "dog", "otter", "fox", "panda", "bear", "wolf", "lion", "tiger", | |
| "owl", "whale", "eagle", "koala", "penguin", "seal", "rabbit", "monkey", | |
| "sun", "moon", "star", "sky", "forest", "mountain", "river", "ocean", | |
| "storm", "rain", "cloud", "fire", "shadow", "wind" | |
| ] | |
| ITEMS = [ | |
| "sword", "shield", "potion", "armor", "ring", "amulet", "boots", | |
| "helmet", "gloves", "bow", "arrow", "staff", "wand", "dagger", | |
| "cloak", "belt", "scroll", "gem", "map", "key" | |
| ] | |
| def make_username( | |
| style: str = "underscore", # "underscore", "dash", "camel" | |
| add_number: bool = True, | |
| number_range: tuple = (0, 9999), | |
| max_length: int = 30, | |
| seed: int | None = None | |
| ) -> str: | |
| """ | |
| 적당히 자연스러운 인터넷 아이디 생성기 | |
| """ | |
| if seed is not None: | |
| random.seed(seed) | |
| adj = random.choice(ADJECTIVES) | |
| noun = random.choice(NOUNS) | |
| # 결합 스타일 | |
| if style == "underscore": | |
| name = f"{adj}_{noun}" | |
| elif style == "dash": | |
| name = f"{adj}-{noun}" | |
| elif style == "camel": | |
| name = adj + noun.capitalize() | |
| else: | |
| raise ValueError("style must be one of: underscore, dash, camel") | |
| # 숫자 추가 | |
| if add_number and random.random() < 0.7: # 70% 확률로 숫자 붙이기 | |
| num = str(random.randint(*number_range)) | |
| name = f"{name}.{num}" | |
| # 길이 제한 (길면 중간 자름) | |
| if len(name) > max_length: | |
| name = name[:max_length] | |
| return name | |
| def generate_items( | |
| numItems: int, | |
| max_length: int = 32, | |
| seed: int | None = None | |
| ) -> np.ndarray: | |
| ''' | |
| numItems 수만큼 무작위 아이템을 생성한다. | |
| ''' | |
| if seed is not None: | |
| random.seed(seed) | |
| items = [ | |
| make_item() | |
| for _ in range(numItems) | |
| ] | |
| return np.array(items) | |
| def make_item() -> str: | |
| ''' | |
| 무작위 아이템 이름 생성 | |
| ''' | |
| adjective = random.choice(ADJECTIVES) | |
| item = random.choice(ITEMS) | |
| return f"{adjective}-{item}" | |
| def generate_usernames( | |
| numUsernames: int, | |
| style: str = "underscore", # "underscore", "dash", "camel" | |
| add_number: bool = True, | |
| number_range: tuple = (0, 9999), | |
| max_length: int = 15, | |
| seed: int | None = None | |
| ) -> np.ndarray: | |
| """ | |
| numUsernames 수만큼 유저네임을 생성한다. | |
| """ | |
| return np.array([ | |
| make_username( | |
| style=style, | |
| add_number=add_number, | |
| number_range=number_range, | |
| max_length=max_length, | |
| seed=seed | |
| ) | |
| for _ in range(numUsernames) | |
| ]) | |
| # ✅ 예시 | |
| if __name__ == "__main__": | |
| for name in generate_usernames(numUsernames = 10): | |
| print(name) | |
| print("\n-----\n") | |
| for item in generate_items(numItems = 10): | |
| print(item) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Player(id, nickname, rank, money, start_date, last_login) | |
| # id: int pk auto_increment | |
| # name: 한글 2- 5글자 | |
| # nickname: 영문 8 - 32글자, unique | |
| # grank: B, S, G, P, D, C | |
| # money: 0 - 9999900 (원) | |
| # start_date as date: 최근 1년 이내의 날짜 | |
| # last_visit as datetime: start_date 보다 클 것, 최근 3개월 이내, NULL 가능 | |
| drop table if exists trade; | |
| drop table if exists guser; | |
| create table guser ( | |
| id int primary key auto_increment, | |
| name char(4), | |
| nickname varchar(64) unique, | |
| grank char(1), | |
| money dec(7, 0), | |
| start_date date, | |
| last_visit datetime | |
| ) character set utf8mb4; | |
| create table trade ( | |
| id int primary key auto_increment, | |
| seller int, | |
| item_name varchar(255), | |
| price dec(7, 0), | |
| trade_date date, | |
| foreign key (seller) references guser(id) | |
| ) character set utf8mb4; |
Bulk insert 시에 사용하는 일반적인 성능향상 팁들
ALTER TABLE your_table DISABLE KEYS;
-- 데이터 로드
ALTER TABLE your_table ENABLE KEYS;SET foreign_key_checks = 0;
-- 데이터 로드
SET foreign_key_checks = 1;SET autocommit = 0;
-- 데이터 로드
COMMIT;LOCK TABLES your_table WRITE;
-- 데이터 로드
UNLOCK TABLES;SET GLOBAL bulk_insert_buffer_size = 512 * 1024 * 1024; -- 예: 512MB
Author
Author
8.0 부터 server에서 root로 로그인후
set global local_infile=ON
필요합니다.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
실행하기