Skip to content

Instantly share code, notes, and snippets.

@honux77
Last active September 30, 2024 08:51
Show Gist options
  • Save honux77/40d78662448796e8a3d5cf439e7ba62e to your computer and use it in GitHub Desktop.
Save honux77/40d78662448796e8a3d5cf439e7ba62e to your computer and use it in GitHub Desktop.
Generate Random User data

스크립트 예시

python3 gen-user.py 1000000 > guser.csv
docker cp.user.csv mydb:/root
docker exec -it mydb /bin/bash
cd /root
mysql -u tiger -p tigerdb --local-infile

myqsl> set global local_infile=ON
mysql> drop table if exists... (테이블 생성)
mysql> load data local infile 'guser.csv' into table user fields terminated by ',';
import sys
import numpy as np
import random
# setup
words = []
if len(sys.argv) < 3:
print("Usage: {} numItem numUser".format(sys.argv[0]))
exit(1)
size = int(sys.argv[1])
usize = int(sys.argv[2])
with open("word.txt") as f:
for item in f:
words.append(item[:-1])
def genItem():
return "{}-{}".format(random.choice(words), random.randint(100, 20000))
def genUid():
return random.randint(1, usize)
def genPrice():
return random.random() * 5000
def printData(id):
print("{},{},{},{:.2f}".format(id, genUid(), genItem(), genPrice()))
for i in range(1, size + 1):
printData(i)
import sys
import numpy as np
import random
"""
Generate player data for testing
# Player(id, name, nickname, rank, money, start_date, last_login)
# id: int pk auto_increment
# name: 한글 2-5글자
# nickname: 영문 32글자 이내, unique
# grank: 1 - 10, 정규분포로 1 쪽으로 갈수록 빈도가 높을 것
# money: $ 0.00- 99,999,999.99, 정규분포
# start_date: 최근 1년 이내의 날짜, 정규분포의 절반, 오래된 플레이어가 더 많을 것
# last_login: start_date 보다 클 것, 최근 3개월 이내, 정규분포의 절반, 최근 플레이어가 더 많을 것, NUNLL 허용
"""
if len(sys.argv) < 2:
print("Usage: python gen-player.py [numPlayers]")
exit(1)
else:
numPlayers = int(sys.argv[1])
# global variables and Constants
MAX_MONEY = 99999999.99
# oldest login month
# LAST_START = current - 1 year
# LAST_LOGIN = current - 91 days
np.random.seed(0)
# read words
with open("word.txt") as f:
words = [line.strip() for line in f]
moneyDisribution = np.random.normal(0.5, 0.1, numPlayers) * MAX_MONEY
rankDistribution = np.random.exponential(scale=1, size=numPlayers)
rankDistribution = np.round(rankDistribution / np.max(rankDistribution) * 10).astype(
int
)
startDateDistribution = np.random.exponential(scale=1, size=numPlayers)
maxStartMinutes = 365 * 24 * 60
startDateDistribution = maxStartMinutes - abs(
startDateDistribution / np.max(startDateDistribution) * maxStartMinutes
)
maxLastVisitMinutes = 91 * 24 * 60
lastVisitDistribution = np.random.exponential(scale=1, size=numPlayers)
lastVisitDistribution = maxLastVisitMinutes - abs(
lastVisitDistribution / np.max(lastVisitDistribution) * 91
)
def genString(length):
az = "abcdefghijklmnopqrstuvwxyz"
ret = []
for i in range(length):
ret.append(random.choice(az))
return "".join(ret)
def generateName():
fisrt = "김이박최정강조윤장임오한신권고"
others = "은지현승수영진석성준희우호동철주태연동해물과백두산이마르고닳도록하느님이보우하사우리나라만세"
length = random.randint(1, 4)
name = random.choice(fisrt)
for i in range(length):
name += random.choice(others)
return name
def generateNickname(id):
global words
global numPlayers
return "{}-{}-{}".format(
random.choice(words), genString(5), random.randint(1, numPlayers * 100)
)
def generateRank(id):
return rankDistribution[id - 1]
# 돈은 정규분포로
# dec(10, 2) 포맷으로 리턴
def generateMoney(id):
return "{:.2f}".format(moneyDisribution[id - 1])
# 현재 시각에서 30일 이전 시각을 임의로 생성
def generateStartDate(id):
import datetime
d = datetime.datetime.now() - datetime.timedelta(
minutes=int(startDateDistribution[id - 1])
)
return d.strftime("%Y-%m-%d %H:%M:%S")
def generateLastVisitDate(id):
import datetime
d = datetime.datetime.now() - datetime.timedelta(
minutes=lastVisitDistribution[id - 1]
)
return d.strftime("%Y-%m-%d %H:%M:%S")
def generateRecord(id):
# Player(id, nickname, rank, money, start_date, last_login)
# id is auto increment for MySQL
return f"{id},{generateName()},{generateNickname(id)},{generateRank(id)},{generateMoney(id)},{generateStartDate(id)},{generateLastVisitDate(id)}"
def main(numPlayers):
for i in range(numPlayers):
print(generateRecord(i + 1))
if __name__ == "__main__":
main(numPlayers)
# sudo mysql -u root tigerdb --local-infile=1 < insert.sql
set
global local_infile = 1;
alter table
guser disable keys;
set
foreign_key_checks = 0;
load data local infile 'guser.csv' into table guser fields terminated by ',';
load data local infile 'trade.csv' into table trade fields terminated by ',';
alter table
guser enable keys;
set
foreign_key_checks = 1;
# Player(id, nickname, rank, money, start_date, last_login)
# id: int pk auto_increment
# name: 한글 2- 5글자
# nickname: 영문 8 - 32글자, unique
# grank: 1 - 10
# money: $ 0.00 - 99,999,999.99
# start_date: 최근 1년 이내의 날짜
# last_login: start_date 보다 클 것, 최근 3개월 이내, NULL 가능
drop table if exists trade;
drop table if exists guser;
create table guser (
id int primary key auto_increment,
name varchar(5),
nickname varchar(64) unique,
grank tinyint,
money dec(10, 2),
start_date datetime,
last_visit datetime
) character set utf8mb4;
create table trade (
id int primary key auto_increment,
seller int,
item_name varchar(255),
price dec(10, 2),
foreign key (seller) references guser(id)
) character set utf8mb4;

Bulk insert 시에 사용하는 일반적인 성능향상 팁들

인덱스 비활성화

ALTER TABLE your_table DISABLE KEYS;
-- 데이터 로드
ALTER TABLE your_table ENABLE KEYS;

제약 조건 비활성화

SET foreign_key_checks = 0;
-- 데이터 로드
SET foreign_key_checks = 1;

자동 커밋 끄기

SET autocommit = 0;
-- 데이터 로드
COMMIT;

테이블 잠금

LOCK TABLES your_table WRITE;
-- 데이터 로드
UNLOCK TABLES;

쓰기 버퍼 크기 조정

SET GLOBAL bulk_insert_buffer_size = 512 * 1024 * 1024;  -- 예: 512MB
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
ample
achieve
concentration
enfix
owl
attachment
hypothesize
catch
gene
scheme
start
manufacture
hunter
separation
possibility
revenge
volume
analyst
lonely
condition
design
night
secretion
reference
crusade
quality
jump
weapon
pit
aquarium
stroll
hierarchy
layout
bathtub
exact
teenager
button
translate
suburb
fun
plastic
analysis
note
deer
scan
know
snatch
relinquish
cook
ground
guide
annual
Venus
qualify
aunt
slab
line
benefit
global
grounds
belly
progress
flawed
oil
video
strict
horizon
map
pluck
thin
parameter
electronics
betray
ankle
drama
director
appetite
earthwax
mind
exclude
parade
crystal
quotation
vegetable
clerk
finance
traffic
literacy
sun
adventure
musical
car
recover
decrease
conflict
moral
fish
bow
presidency
guarantee
@honux77
Copy link
Author

honux77 commented Jan 29, 2021

실행하기

python3 gen-user.py 1000000 > user.csv
docker cp.user.csv mydb:/root
docker exec -it mydb /bin/bash
cd /root
mysql -u tiger -p tigerdb --local-infile
(# input password 패스워드 입력)
mysql> drop table if exists... (테이블 생성)
mysql> load data local infile 'root.csv' into table user fields terminated by ',';

@honux77
Copy link
Author

honux77 commented Jan 25, 2022

8.0 부터 server에서 root로 로그인후

set global local_infile=ON

필요합니다.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment