python3 gen-user.py 1000000 > guser.csv
docker cp.user.csv mydb:/root
docker exec -it mydb /bin/bash
cd /root
mysql -u tiger -p tigerdb --local-infile
myqsl> set global local_infile=ON
mysql> drop table if exists... (테이블 생성)
mysql> load data local infile 'guser.csv' into table user fields terminated by ',';
Last active
September 30, 2024 08:51
-
-
Save honux77/40d78662448796e8a3d5cf439e7ba62e to your computer and use it in GitHub Desktop.
Generate Random User data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import numpy as np | |
import random | |
# setup | |
words = [] | |
if len(sys.argv) < 3: | |
print("Usage: {} numItem numUser".format(sys.argv[0])) | |
exit(1) | |
size = int(sys.argv[1]) | |
usize = int(sys.argv[2]) | |
with open("word.txt") as f: | |
for item in f: | |
words.append(item[:-1]) | |
def genItem(): | |
return "{}-{}".format(random.choice(words), random.randint(100, 20000)) | |
def genUid(): | |
return random.randint(1, usize) | |
def genPrice(): | |
return random.random() * 5000 | |
def printData(id): | |
print("{},{},{},{:.2f}".format(id, genUid(), genItem(), genPrice())) | |
for i in range(1, size + 1): | |
printData(i) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import numpy as np | |
import random | |
""" | |
Generate player data for testing | |
# Player(id, name, nickname, rank, money, start_date, last_login) | |
# id: int pk auto_increment | |
# name: 한글 2-5글자 | |
# nickname: 영문 32글자 이내, unique | |
# grank: 1 - 10, 정규분포로 1 쪽으로 갈수록 빈도가 높을 것 | |
# money: $ 0.00- 99,999,999.99, 정규분포 | |
# start_date: 최근 1년 이내의 날짜, 정규분포의 절반, 오래된 플레이어가 더 많을 것 | |
# last_login: start_date 보다 클 것, 최근 3개월 이내, 정규분포의 절반, 최근 플레이어가 더 많을 것, NUNLL 허용 | |
""" | |
if len(sys.argv) < 2: | |
print("Usage: python gen-player.py [numPlayers]") | |
exit(1) | |
else: | |
numPlayers = int(sys.argv[1]) | |
# global variables and Constants | |
MAX_MONEY = 99999999.99 | |
# oldest login month | |
# LAST_START = current - 1 year | |
# LAST_LOGIN = current - 91 days | |
np.random.seed(0) | |
# read words | |
with open("word.txt") as f: | |
words = [line.strip() for line in f] | |
moneyDisribution = np.random.normal(0.5, 0.1, numPlayers) * MAX_MONEY | |
rankDistribution = np.random.exponential(scale=1, size=numPlayers) | |
rankDistribution = np.round(rankDistribution / np.max(rankDistribution) * 10).astype( | |
int | |
) | |
startDateDistribution = np.random.exponential(scale=1, size=numPlayers) | |
maxStartMinutes = 365 * 24 * 60 | |
startDateDistribution = maxStartMinutes - abs( | |
startDateDistribution / np.max(startDateDistribution) * maxStartMinutes | |
) | |
maxLastVisitMinutes = 91 * 24 * 60 | |
lastVisitDistribution = np.random.exponential(scale=1, size=numPlayers) | |
lastVisitDistribution = maxLastVisitMinutes - abs( | |
lastVisitDistribution / np.max(lastVisitDistribution) * 91 | |
) | |
def genString(length): | |
az = "abcdefghijklmnopqrstuvwxyz" | |
ret = [] | |
for i in range(length): | |
ret.append(random.choice(az)) | |
return "".join(ret) | |
def generateName(): | |
fisrt = "김이박최정강조윤장임오한신권고" | |
others = "은지현승수영진석성준희우호동철주태연동해물과백두산이마르고닳도록하느님이보우하사우리나라만세" | |
length = random.randint(1, 4) | |
name = random.choice(fisrt) | |
for i in range(length): | |
name += random.choice(others) | |
return name | |
def generateNickname(id): | |
global words | |
global numPlayers | |
return "{}-{}-{}".format( | |
random.choice(words), genString(5), random.randint(1, numPlayers * 100) | |
) | |
def generateRank(id): | |
return rankDistribution[id - 1] | |
# 돈은 정규분포로 | |
# dec(10, 2) 포맷으로 리턴 | |
def generateMoney(id): | |
return "{:.2f}".format(moneyDisribution[id - 1]) | |
# 현재 시각에서 30일 이전 시각을 임의로 생성 | |
def generateStartDate(id): | |
import datetime | |
d = datetime.datetime.now() - datetime.timedelta( | |
minutes=int(startDateDistribution[id - 1]) | |
) | |
return d.strftime("%Y-%m-%d %H:%M:%S") | |
def generateLastVisitDate(id): | |
import datetime | |
d = datetime.datetime.now() - datetime.timedelta( | |
minutes=lastVisitDistribution[id - 1] | |
) | |
return d.strftime("%Y-%m-%d %H:%M:%S") | |
def generateRecord(id): | |
# Player(id, nickname, rank, money, start_date, last_login) | |
# id is auto increment for MySQL | |
return f"{id},{generateName()},{generateNickname(id)},{generateRank(id)},{generateMoney(id)},{generateStartDate(id)},{generateLastVisitDate(id)}" | |
def main(numPlayers): | |
for i in range(numPlayers): | |
print(generateRecord(i + 1)) | |
if __name__ == "__main__": | |
main(numPlayers) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# sudo mysql -u root tigerdb --local-infile=1 < insert.sql | |
set | |
global local_infile = 1; | |
alter table | |
guser disable keys; | |
set | |
foreign_key_checks = 0; | |
load data local infile 'guser.csv' into table guser fields terminated by ','; | |
load data local infile 'trade.csv' into table trade fields terminated by ','; | |
alter table | |
guser enable keys; | |
set | |
foreign_key_checks = 1; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Player(id, nickname, rank, money, start_date, last_login) | |
# id: int pk auto_increment | |
# name: 한글 2- 5글자 | |
# nickname: 영문 8 - 32글자, unique | |
# grank: 1 - 10 | |
# money: $ 0.00 - 99,999,999.99 | |
# start_date: 최근 1년 이내의 날짜 | |
# last_login: start_date 보다 클 것, 최근 3개월 이내, NULL 가능 | |
drop table if exists trade; | |
drop table if exists guser; | |
create table guser ( | |
id int primary key auto_increment, | |
name varchar(5), | |
nickname varchar(64) unique, | |
grank tinyint, | |
money dec(10, 2), | |
start_date datetime, | |
last_visit datetime | |
) character set utf8mb4; | |
create table trade ( | |
id int primary key auto_increment, | |
seller int, | |
item_name varchar(255), | |
price dec(10, 2), | |
foreign key (seller) references guser(id) | |
) character set utf8mb4; |
Bulk insert 시에 사용하는 일반적인 성능향상 팁들
ALTER TABLE your_table DISABLE KEYS;
-- 데이터 로드
ALTER TABLE your_table ENABLE KEYS;
SET foreign_key_checks = 0;
-- 데이터 로드
SET foreign_key_checks = 1;
SET autocommit = 0;
-- 데이터 로드
COMMIT;
LOCK TABLES your_table WRITE;
-- 데이터 로드
UNLOCK TABLES;
SET GLOBAL bulk_insert_buffer_size = 512 * 1024 * 1024; -- 예: 512MB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ample | |
achieve | |
concentration | |
enfix | |
owl | |
attachment | |
hypothesize | |
catch | |
gene | |
scheme | |
start | |
manufacture | |
hunter | |
separation | |
possibility | |
revenge | |
volume | |
analyst | |
lonely | |
condition | |
design | |
night | |
secretion | |
reference | |
crusade | |
quality | |
jump | |
weapon | |
pit | |
aquarium | |
stroll | |
hierarchy | |
layout | |
bathtub | |
exact | |
teenager | |
button | |
translate | |
suburb | |
fun | |
plastic | |
analysis | |
note | |
deer | |
scan | |
know | |
snatch | |
relinquish | |
cook | |
ground | |
guide | |
annual | |
Venus | |
qualify | |
aunt | |
slab | |
line | |
benefit | |
global | |
grounds | |
belly | |
progress | |
flawed | |
oil | |
video | |
strict | |
horizon | |
map | |
pluck | |
thin | |
parameter | |
electronics | |
betray | |
ankle | |
drama | |
director | |
appetite | |
earthwax | |
mind | |
exclude | |
parade | |
crystal | |
quotation | |
vegetable | |
clerk | |
finance | |
traffic | |
literacy | |
sun | |
adventure | |
musical | |
car | |
recover | |
decrease | |
conflict | |
moral | |
fish | |
bow | |
presidency | |
guarantee |
8.0 부터 server에서 root로 로그인후
set global local_infile=ON
필요합니다.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
실행하기