Created
March 21, 2023 21:24
-
-
Save KevinGutowski/035f4a2ec5f9fff64fe54fbdbdaa4175 to your computer and use it in GitHub Desktop.
Parse a japanese pokemon card
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
baseURL = 'https://www.pokemon-card.com' | |
def parseSoup(soup): | |
topHeadings = soup.find_all('h2') | |
leftBox = soup.find(class_='LeftBox') | |
rightBox = soup.find(class_='RightBox') | |
topInfo = soup.find(class_='TopInfo') | |
atr = { | |
'name':'', | |
'cardClass': '', # "ポケモン, Trainer, エネルギー" | |
'cardSubclasses': [], # [str], Single Strike Pokemon, Basic Pokemon, Goods, Tool, etc... | |
'cardTypes':[], | |
'hp':None, | |
'effect':'', | |
'moves':[], # [{energyCost:[str],convertedEnergyCost:int,title:str,damage:str,description:str}] | |
'abilities':[], # [{title:str,description:str,type:str,subtypes:[str]}] types are like Pokébody, Ability | |
'weaknesses':[], #[{type:str,value:str}] | |
'resistances':[], #[{type:str,value:str}] | |
'retreatCost':[], #[str] | |
'rules':[], #[{type:str,description:str}] | |
'printedNumberText':'0 / 0', | |
'numerator':0, | |
'ancientTrait':None, #{title:str,description:str} | |
'denominator':0, | |
'illustrator':'', | |
'illustratorURL':'', | |
'regulationMark':'', | |
'regulationMarkURL':'', | |
'rarityImageURL':'', | |
'evolution':[], #[{'evolvesInto':[str],'evolvesFrom':[str],'relatedEvolution':[str], | |
#'tree':{'stage0':[{'name':str,'current':bool}]},{'stage1':[{...}]]}] | |
'dexInfo':None #{number:int,species:str,height:str,weight:str,description:str} | |
} | |
atr['name'] = soup.find('h1').text.strip() | |
topInfoDetails = parseTopInfo(topInfo) | |
atr['hp'] = topInfoDetails['hp'] | |
atr['cardTypes'] = topInfoDetails['cardTypes'] | |
for heading in topHeadings: | |
parsedHeading = parseHeading(heading) | |
if len(parsedHeading['rules']) > 0: | |
atr['rules'] = atr['rules'] + parsedHeading['rules'] | |
if parsedHeading['cardClass'] != '': | |
atr['cardClass'] = parsedHeading['cardClass'] | |
if len(parsedHeading['cardSubclasses']) > 0: | |
atr['cardSubclasses'] = atr['cardSubclasses'] + parsedHeading['cardSubclasses'] | |
if len(parsedHeading['moves']) > 0: | |
atr['moves'] = atr['moves'] + parsedHeading['moves'] | |
if len(parsedHeading['abilities']) > 0: | |
atr['abilities'] = atr['abilities'] + parsedHeading['abilities'] | |
if parsedHeading['effect'] != '': | |
atr['effect'] = atr['effect'] + parsedHeading['effect'] | |
if len(parsedHeading['evolution']) > 0: | |
atr['evolution'] = atr['evolution'] + parsedHeading['evolution'] | |
if parsedHeading['ancientTrait']: | |
atr['ancientTrait'] = parsedHeading['ancientTrait'] | |
try: | |
kind = rightBox.find(class_='type').text.strip() | |
if kind == "2 進化": | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['2 進化'] | |
elif kind == "1 進化": | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['1 進化'] | |
elif kind == 'たね': | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['たね'] | |
elif kind == 'V進化': | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['V進化'] | |
elif kind == 'V-UNION': | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['V-UNION'] | |
elif kind == 'M進化': | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['M進化'] | |
elif kind == 'BREAK進化': | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['BREAK進化'] | |
elif king == '復元': | |
atr['cardClass'] = 'ポケモン' | |
atr['cardSubclasses'] = atr['cardSubclasses'] + ['復元'] | |
else: | |
print('unhandled card type') | |
except: | |
pass | |
info = getIllustratorInfo(leftBox) | |
atr['illustrator'] = info['name'] | |
atr['illustratorURL'] = info['link'] | |
subTextInfo = parseSubText(leftBox) | |
atr['printedNumberText'] = subTextInfo['printedNumberText'] | |
atr['regulationMarkURL'] = subTextInfo['regulationMarkURL'] | |
atr['regulationMark'] = subTextInfo['regulationMark'] | |
atr['numerator'] = subTextInfo['numerator'] | |
atr['denominator'] = subTextInfo['denominator'] | |
atr['rarityImageURL'] = subTextInfo['rarityImageURL'] | |
atr['dexInfo'] = parseDexInfo(leftBox) | |
parseTable = parseWeaknessResistanceRetreat(rightBox) | |
atr['weaknesses'] = parseTable['weaknesses'] | |
atr['resistances'] = parseTable['resistances'] | |
atr['retreatCost'] = parseTable['retreatCost'] | |
if atr['cardClass'] == '': | |
print("Warning: No class set!") | |
return atr | |
def parseHeading(heading): | |
info = { | |
'rules':[], | |
'cardClass':'', | |
'cardSubclasses':[], | |
'effect':'', | |
'moves':[], | |
'abilities':[], | |
'evolution':[], | |
'ancientTrait':None | |
} | |
text = heading.text | |
if text == 'サポート': | |
info['cardClass'] = 'サポート' | |
parse = parseSupporter(heading) | |
info['rules'] = info['rules'] + parse['rules'] | |
info['effect'] = parse['effect'] | |
elif text == 'グッズ': | |
info['cardClass'] = 'トレーナーズ' | |
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ'] | |
parse = parseGoods(heading) | |
info['rules'] = info['rules'] + parse['rules'] | |
info['effect'] = parse['effect'] | |
elif text == '特殊エネルギー': | |
info['cardClass'] = 'トレーナーズ' | |
info['cardSubclasses'] = info['cardSubclasses'] + ['特殊エネルギー'] | |
parse = parseSpecialEnergy(heading) | |
info['effect'] = parse['effect'] | |
elif text == 'スタジアム': | |
parse = parseStadium(heading) | |
info['cardClass'] = 'トレーナーズ' | |
info['cardSubclasses'] = info['cardSubclasses'] + ['スタジアム'] | |
info['rules'] = info['rules'] + parse['rules'] | |
info['effect'] = parse['effect'] | |
elif text == 'ポケモンのどうぐ': | |
parse = parseTool(heading) | |
info['rules'] = parse['rules'] | |
info['cardClass'] = 'トレーナーズ' | |
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ','ポケモンのどうぐ'] | |
info['effect'] = parse['effect'] | |
elif text == '特別なルール': | |
parse = getSpecialRules(heading) | |
info['rules'] = info['rules'] + parse['rules'] | |
info['ancientTrait'] = parse['ancientTrait'] | |
elif text == 'ワザ': | |
info['moves'] = info['moves'] + getMoves(heading) | |
elif text == '特性': | |
info['abilities'] = info['abilities'] + [getAbility(heading)] | |
elif text == 'VSTARパワー': | |
parse = parseVSTARPower(heading) | |
info['moves'] = info['moves'] + parse['moves'] | |
info['abilities'] = info['abilities'] + parse['abilities'] | |
elif text == '進化': | |
parse = parseEvolutions(heading) | |
info['evolution'] = parse['evolution'] | |
elif text == '基本エネルギー': | |
info['cardClass'] = 'エネルギー' | |
elif text == 'トレーナー': | |
parse = parseTrainer(heading) | |
info['cardClass'] = 'トレーナーズ' | |
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ'] | |
info['effect'] = parse['effect'] | |
info['rules'] = info['rules'] + parse['rules'] | |
elif text == 'GXワザ': | |
info['moves'] = info['moves'] + getMoves(heading) | |
info['cardSubclasses'] = info['cardSubclasses'] + ['GX'] | |
elif text == '古代能力': | |
parse = parseAncientTrait(heading) | |
info['ancientTrait'] = parse['ancientTrait'] | |
elif text == 'ポケボディー': | |
info['abilities'] = info['abilities'] + [getAbility(heading)] | |
elif text == 'ポケパワー': | |
info['abilities'] = info['abilities'] + [getAbility(heading)] | |
elif text in ['どうぐ','きのみ']: | |
info['cardClass'] = 'トレーナーズ' | |
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ'] | |
parse = parseOldTool(heading) | |
info['abilities'] = info['abilities'] + parse['abilities'] | |
elif text == 'ワザマシン': | |
info['cardClass'] = 'トレーナーズ' | |
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ'] | |
parse = parseTM(heading) | |
info['abilities'] = info['abilities'] + parse['abilities'] | |
else: | |
print('unhandled heading') | |
print(text) | |
return info | |
def parseTM(heading): | |
info = { | |
'abilities':[] | |
} | |
ability = { | |
'title':'ワザマシン', | |
'description':'', | |
'type':'ワザマシン' | |
} | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'p': | |
ability['description'] = nextSibling.text | |
else: | |
print('Unhandled TM') | |
print(nextSibling) | |
info['abilities'] = [ability] | |
return info | |
def parseOldTool(heading): | |
info = { | |
'abilities':[] | |
} | |
nextSibling = heading.find_next_sibling() | |
ability = { | |
'title':'', | |
'description':'', | |
'type':heading.text | |
} | |
if nextSibling.name == 'h4': | |
ability['title'] = nextSibling.text | |
nextSibling = nextSibling.find_next_sibling() | |
if nextSibling.name == 'p': | |
ability['description'] = getEnergyText(nextSibling) | |
info['abilities'] = [ability] | |
return info | |
def parseAncientTrait(heading): | |
info = { | |
'ancientTrait':None | |
} | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'h4': | |
nextSibling = nextSibling.find_next_sibling() | |
if nextSibling.name == 'p': | |
if nextSibling.text == 'このカードは、最初の自分の番や出したばかりのポケモンからでも、手札から進化できる。': | |
info['ancientTrait'] = { | |
'title':'Δ進化', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンは、「ポケモンのどうぐ」を2枚までつけられる。': | |
info['ancientTrait'] = { | |
'title':'Θダブル', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンは、相手のポケモンからの特性の効果を受けない。': | |
info['ancientTrait'] = { | |
'title':'Θストップ', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンに、手札からエネルギーをつけるとき、同時に2枚までつけられる。(ワザ・特性・トレーナーズでつける場合はのぞく。)': | |
info['ancientTrait'] = { | |
'title':'α(アルファ)グロウ', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンは、相手が使うトレーナーズの効果を受けない。(ポケモンのどうぐ・スタジアムはのぞく。)': | |
info['ancientTrait'] = { | |
'title':'Ω(オメガ)バリア', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンは、ワザを2回連続で使える。(1回目で相手のバトルポケモンがきぜつしたなら、次のポケモンが出た後、2回目を使う。)': | |
info['ancientTrait'] = { | |
'title':'Ω(オメガ)連打', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンが、ワザのダメージで相手のポケモンをきぜつさせたなら、サイドを1枚多くとる。': | |
info['ancientTrait'] = { | |
'title':'Δ(デルタ)プラス', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンになったとき、このポケモンのHPをすべて回復する。': | |
info['ancientTrait'] = { | |
'title':'Θ(シータ)マックス', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンが、相手のポケモンから受けるワザのダメージは、「-20」される。': | |
info['ancientTrait'] = { | |
'title':'Δ(デルタ)ワイルド', | |
'description':nextSibling.text | |
} | |
elif nextSibling.text == 'このポケモンのHPが回復するとき、その回復する量は2倍になる。': | |
info['ancientTrait'] = { | |
'title':'α(アルファ)回復', | |
'description':nextSibling.text | |
} | |
else: | |
print(nextSibling.text) | |
print('Unhandled Ancient Trait') | |
else: | |
print('Unhandled Ancient Trait') | |
return info | |
def parseTrainer(heading): | |
info = { | |
'effect':'', | |
'rules':[] | |
} | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'p': | |
info['effect'] = nextSibling.text | |
info['rules'] = commonRulings['goodsRules'] | |
return info | |
def parseWeaknessResistanceRetreat(rightBox): | |
info = { | |
'weaknesses':[], | |
'resistances':[], | |
'retreatCost':[] | |
} | |
tableNode = rightBox.find('table') | |
try: | |
trs = tableNode.find_all('tr') | |
tds = trs[1].find_all('td') | |
info['weaknesses'] = parseWeaknessOrResistance(tds[0]) | |
info['resistances'] = parseWeaknessOrResistance(tds[1]) | |
info['retreatCost'] = parseRetreat(tds[2]) | |
except: | |
pass | |
return info | |
def parseRetreat(td): | |
energies = td.find_all(class_='icon') | |
mappedEnergies = list(map(lambda x: parseEnergyClass(x['class'][0]), energies)) | |
return mappedEnergies | |
def parseWeaknessOrResistance(td): | |
# 'weaknesses/resistance':[{type:str,value:str},...], | |
if len(td.findChildren()) == 1: | |
value = td.text.strip() | |
type_ = parseEnergyClass(td.find(class_='icon')['class'][0]) | |
return [{'type':type_,'value':value}] | |
elif td.text.strip() == '--': | |
return [] | |
elif len(td.findChildren()) > 1: | |
value = td.text.strip() | |
icons = td.find_all(class_='icon') | |
result = [] | |
for icon in icons: | |
type_ = parseEnergyClass(icon['class'][0]) | |
result.append({'type':type_,'value':value}) | |
return result | |
else: | |
print('Unhandled Weakness') | |
return [] | |
def parseDexInfo(leftBox): | |
# {number:int,species:str,height:str,weight:str,description:str} | |
info = None | |
dexNode = leftBox.find(class_='card') | |
if dexNode: | |
try: | |
dexNodeHeading = dexNode.find('h4') | |
# handle random incorrect dex info https://www.pokemon-card.com/card-search/details.php/card/39976/regu/all | |
if dexNodeHeading.text.strip() != '炎': | |
try: | |
dexNumber = int(dexNodeHeading.text.split(' ')[0].split('.')[1]) | |
speciesText = dexNodeHeading.text.split(' ')[1].strip() | |
except: | |
dexNumber = None | |
speciesText = dexNodeHeading.text.strip() | |
dexNodeParagraphs = dexNode.find_all('p') | |
if dexNodeParagraphs: | |
if '高さ' in dexNodeParagraphs[0].text.strip(): | |
height = dexNodeParagraphs[0].text.split(' ')[0].split(':')[1].strip() | |
weight = dexNodeParagraphs[0].text.split(' ')[1].split(':')[1].strip() | |
try: | |
description = dexNodeParagraphs[1].text.strip() | |
except: | |
description = '' | |
else: | |
height = '' | |
weight = '' | |
description = dexNodeParagraphs[0].text.strip() | |
if dexNumber: | |
info = { | |
'number':dexNumber, | |
'species': speciesText, | |
'height': height, | |
'weight': weight, | |
'description': description | |
} | |
else: | |
info = { | |
'species': speciesText, | |
'height': height, | |
'weight': weight, | |
'description': description | |
} | |
else: | |
info = { | |
'number': dexNumber, | |
'species': speciesText | |
} | |
except: | |
info = {'description':dexNode.text} | |
return info | |
def parseEvolutions(heading): | |
info = { | |
'evolution': [] # { name:str,stage:int,current:bool} | |
} | |
evolutionNodes = heading.parent.find_all(class_='evolution') | |
evolutionNodes = list(reversed(evolutionNodes)) | |
for idx, evolutionNode in enumerate(evolutionNodes): | |
inBoxes = evolutionNode.find_all(class_='in-box') | |
if inBoxes: | |
for inBox in inBoxes: | |
classes = inBox['class'] | |
if 'ev_off' in classes: | |
info['evolution'] = info['evolution'] + [{'name':inBox.text.strip(),'stage':idx,'current':False}] | |
elif 'ev_on' in classes: | |
info['evolution'] = info['evolution'] + [{'name':inBox.text.strip(),'stage':idx,'current':True}] | |
else: | |
classes = evolutionNode['class'] | |
if 'ev_off' in classes: | |
info['evolution'] = info['evolution'] + [{'name':evolutionNode.text.strip(),'stage':idx,'current':False}] | |
elif 'ev_on' in classes: | |
info['evolution'] = info['evolution'] + [{'name':evolutionNode.text.strip(),'stage':idx,'current':True}] | |
return info | |
commonRulings = { | |
'supporterRules':[ | |
{ | |
'type': 'サポート', | |
'description': 'サポートは、自分の番に1枚しか使えない。' | |
} | |
], | |
'toolRules':[ | |
{ | |
'type':'ポケモンのどうぐ', | |
'description':'ポケモンのどうぐは、自分のポケモンにつけて使う。ポケモン1匹につき1枚だけつけられ、つけたままにする。' | |
}, | |
{ | |
'type':'グッズ', | |
'description':'グッズは、自分の番に何枚でも使える。' | |
} | |
], | |
'stadiumRules':[ | |
{ | |
'type':'スタジアム', | |
'description':'スタジアムは、自分の番に1枚だけ、バトル場の横に出せる。別の名前のスタジアムが場に出たなら、このカードをトラッシュする。' | |
} | |
], | |
'goodsRules':[ | |
{ | |
'type':'グッズ', | |
'description':'グッズは、自分の番に何枚でも使える。' | |
} | |
] | |
} | |
def parseSupporter(heading): | |
info = { | |
'rules': commonRulings['supporterRules'], | |
'effect':'' | |
} | |
try: | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'p': | |
info['effect'] = getEnergyText(nextSibling) | |
else: | |
print('Supporter Ruling: Found unexpected next element') | |
print(nextSibling) | |
except: | |
print('Unhandled Supporter description') | |
return info | |
def parseStadium(heading): | |
info = { | |
'rules':commonRulings['stadiumRules'], | |
'effect':'' | |
} | |
try: | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'p': | |
info['effect'] = nextSibling.text | |
else: | |
print('Stadium Ruling: Found unexpected next element') | |
print(nextSibling) | |
except: | |
print('unhandled stadium card description') | |
return info | |
def parseSpecialEnergy(heading): | |
info = { | |
'effect':'' | |
} | |
try: | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'p': | |
info['effect'] = nextSibling.text | |
elif nextSibling.text == '特別なルール': | |
info['effect'] = nextSibling.find_next_sibling().text | |
else: | |
print('Special Energy Ruling: Found unexpected next element') | |
print(nextSibling) | |
except: | |
print('Unhandled Special Energy description') | |
return info | |
def parseTool(heading): | |
info = { | |
'rules': commonRulings['toolRules'], | |
'effect': '' | |
} | |
textsToCheck = list(map(lambda x: x['description'], commonRulings['toolRules'])) | |
counter = 0 | |
try: | |
nextSibling = heading.find_next_sibling() | |
while nextSibling.name == 'p': | |
if nextSibling.text in textsToCheck: | |
counter = counter + 1 | |
if counter > 10: | |
print('WARNING Infinite loop triggered trying to find tool text') | |
raise Exception("Infinite loop protection") | |
break | |
nextSibling = nextSibling.find_next_sibling() | |
else: | |
info['effect'] = nextSibling.text | |
break | |
except: | |
print('Unhandled Tool description') | |
return info | |
def parseGoods(heading): | |
info = { | |
'rules': commonRulings['goodsRules'], | |
'effect':'' | |
} | |
try: | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'p': | |
info['effect'] = nextSibling.text | |
else: | |
print('GoodsRuling: Found unexpected next element') | |
print(nextSibling) | |
except: | |
print('unhandled trainer card description') | |
return info | |
def parseTopInfo(topInfo): | |
info = { | |
'cardTypes':[], | |
'hp':None, | |
} | |
try: | |
rightTopInfo = topInfo.find(class_='td-r') | |
info['hp'] = rightTopInfo.find(class_='hp-num').text.strip() | |
types = rightTopInfo.find_all(class_='icon') | |
info['cardTypes'] = list(map(lambda x: parseEnergyClass(x['class'][0]), types)) | |
# Convert colorless to normal | |
# info['cardType'] = 'normal' if cardType == 'colorless' else cardType | |
except: | |
pass | |
return info | |
def parseSubText(leftBox): | |
info = { | |
'regulationMark':'', | |
'regulationMarkURL':'', | |
'printedNumberText':'', | |
'numerator':0, | |
'denominator':0, | |
'rarityImageURL':'' | |
} | |
try: | |
subText = leftBox.find(class_='subtext') | |
info['printedNumberText'] = subText.text.strip().replace('\xa0','') | |
info['numerator'] = int(info['printedNumberText'].split('/')[0]) | |
info['denominator'] = int(info['printedNumberText'].split('/')[1]) | |
try: | |
regulationATag = subText.find(class_='img-regulation', alt=True) | |
info['regulationMark'] = regulationATag['alt'] | |
info['regulationMarkURL'] = (baseURL + regulationATag['src']) | |
except: | |
pass | |
try: | |
rarityImage = subText.find("img", {"width": "24"}) | |
info['rarityImageURL'] = baseURL+rarityImage['src'] | |
except: | |
pass | |
except: | |
pass | |
# print(info) | |
return info | |
def getIllustratorInfo(leftBox): | |
author = '' | |
info = { 'name':'','link':''} | |
try: | |
author = leftBox.find(class_='author').find('a') | |
info = { 'name':author.text,'link':baseURL + author['href']} | |
except: | |
pass | |
return info | |
def getCardText(topHeadings): | |
paragraphs = topHeadings[0].parent.find_all('p') | |
text = '' | |
for p in paragraphs: | |
text = text + p.text + '\n' | |
text = text[:len(text) - 2] # remove last \n characters | |
return text | |
def getMoves(heading): | |
moves = [] | |
def getH4s(node): | |
# Get h4s but ensure that it is within the current moves h2 | |
moveHeadings = [] | |
nextSibling = node.find_next_sibling() | |
try: | |
while nextSibling.name in ['h4','p']: | |
if nextSibling.name == 'h4': | |
moveHeadings.append(nextSibling) | |
nextSibling = nextSibling.find_next_sibling() | |
except: | |
pass | |
return moveHeadings | |
moveHeadings = getH4s(heading) | |
for moveHeading in moveHeadings: | |
attack = { | |
'energyCost':[], | |
'convertedEnergyCost':0, | |
'title':'', | |
'damage':'', | |
'description':'' | |
} | |
attack['title']= moveHeading.find(text=True, recursive=False).strip() | |
energyArray = moveHeading.find_all("span", {"class": "icon"}) | |
attack['convertedEnergyCost'] = len(energyArray) | |
try: | |
attack['damage'] = moveHeading.find("span", {"class": "f_right"}).text.strip() | |
except: | |
pass | |
for energy in energyArray: | |
classNames = energy['class'] | |
attack['energyCost'].append(parseEnergyClass(classNames[0])) | |
nextSibling = moveHeading.find_next_sibling() | |
if nextSibling.name == 'p': | |
attack['description'] = getEnergyText(nextSibling) | |
moves.append(attack) | |
return moves | |
def getSpecialRules(heading): | |
info = { | |
'rules':[], | |
'ancientTrait':None | |
} | |
rules = [] | |
def getNextParagraphs(heading): | |
paragraphs = [] | |
nextSibling = heading.find_next_sibling() | |
checkInnerP = nextSibling.find_all('p') | |
paragraphs = paragraphs + checkInnerP | |
# Looping method | |
# try: | |
# print(nextSibling.name) | |
# while nextSibling.name in ['br','p']: | |
# if nextSibling.name == 'p': | |
# print('appending node') | |
# paragraphs.append(nextSibling) | |
# print('triggered again') | |
# nextSibling = nextSibling.find_next_sibling() | |
# except: | |
# pass | |
return paragraphs | |
nextParagraphs = getNextParagraphs(heading) | |
for paragraph in nextParagraphs: | |
specialRule = getSpecialRule(paragraph) | |
info['ancientTrait'] = specialRule['ancientTrait'] | |
info['rules'] = info['rules'] + [specialRule['rule']] | |
return info | |
def getSpecialRule(paragraph): | |
info = { | |
'rule':None, | |
'ancientTrait':None | |
} | |
try: | |
description = paragraph.text | |
if description == 'ポケモンVSTARがきぜつしたとき、相手はサイドを2枚とる。': | |
info['rule'] = { | |
'type':'VSTAR', | |
'description':description | |
} | |
elif description == 'ポケモンVがきぜつしたとき、相手はサイドを2枚とる。': | |
info['rule'] = { | |
'type':'V', | |
'description':description | |
} | |
elif description == 'かがやくポケモンは、デッキに1枚しか入れられない。': | |
info['rule'] = { | |
'type':'かがやくポケモン', | |
'description':description | |
} | |
elif description == 'ポケモンVMAXがきぜつしたとき、相手はサイドを3枚とる。': | |
info['rule'] = { | |
'type':'VMAX', | |
'description':description | |
} | |
elif description == 'サポートは、自分の番に1枚しか使えない。': | |
info['rule'] = { | |
'type':'サポート', | |
'description':description | |
} | |
elif description == 'ポケモンのどうぐは、自分のポケモンにつけて使う。ポケモン1匹につき1枚だけつけられ、つけたままにする。': | |
info['rule'] = { | |
'type':'ポケモンのどうぐ', | |
'description':description | |
} | |
elif description == 'ポケモン【V】がきぜつしたとき、相手はサイドを2枚とる。': | |
info['rule'] = { | |
'type':'V', | |
'description':description | |
} | |
elif description == 'TAG TEAMがきぜつしたとき、相手はサイドを3枚とる。': | |
info['rule'] = { | |
'type':'TAG TEAM', | |
'description':description | |
} | |
elif description == 'ポケモンEXがきぜつしたとき、相手はサイドを2枚とる。': | |
info['rule'] = { | |
'type':'EX', | |
'description':description | |
} | |
elif description == 'ポケモンGXがきぜつしたとき、相手はサイドを2枚とる。': | |
info['rule'] = { | |
'type':'GX', | |
'description':description | |
} | |
elif description == 'ACE SPECカードは、1つのデッキにつき1枚しか入れられない。': | |
info['rule'] = { | |
'type':'ACE SPEC', | |
'description':description | |
} | |
elif 'V-UNION' in description: | |
info['rule'] = { | |
'type':'V-UNION', | |
'description':description | |
} | |
elif description == 'M進化ポケモンになったとき、自分の番は終わる。': | |
info['rule'] = { | |
'type':'M進化', | |
'description':description | |
} | |
elif description == 'このポケモンが、ワザのダメージで相手のポケモンをきぜつさせたなら、サイドを1枚多くとる。': | |
info['ancientTrait'] = { | |
'title':'Δプラス', | |
'description': description | |
} | |
elif description == '同じ名前の(プリズムスター)のカードは、デッキに1枚しか入れられない。トラッシュには行かず、ロストゾーンに置く。': | |
info['rule'] = { | |
'type':'プリズムスター', | |
'description': description | |
} | |
elif description == 'スタジアムは、自分の番に1枚だけ、バトル場の横に出せる。別の名前のスタジアムが場に出たなら、このカードをトラッシュする。': | |
info['rule'] = { | |
'type':'スタジアム', | |
'description':description | |
} | |
elif description == 'グッズは、自分の番に何枚でも使える。': | |
info['rule'] = { | |
'type':'グッズ', | |
'description':description | |
} | |
elif description == 'こちらのカードのレギュレーションは[C]として扱います。': | |
info['rule'] = { | |
'type':'レギュレーション', | |
'description':description | |
} | |
elif description == 'ゲンシカイオーガEXになったとき、自分の番は終わる。': | |
info['rule'] = { | |
'type':'ゲンシカイキ', | |
'description':description | |
} | |
elif description == 'ゲンシグラードンEXになったとき、自分の番は終わる。': | |
info['rule'] = { | |
'type':'ゲンシカイキ', | |
'description':description | |
} | |
elif 'BREAK進化する前' in description: | |
info['rule'] = { | |
'type':'BREAK進化', | |
'description':description | |
} | |
elif 'サチコEX' in description: | |
info['rule'] = { | |
'type':'MサチコEX', | |
'description':description | |
} | |
elif description == '(このカードは、公式大会では使えない。)': | |
info['rule'] = { | |
'type':'レギュレーション', | |
'description':description | |
} | |
elif 'エネルギー1個ぶんとしてはたらく' in description: | |
info['rule'] = { | |
'type':'特殊エネルギー', | |
'description':description | |
} | |
elif description == 'このカードは、エネルギー2個ぶんとしてはたらく。': | |
info['rule'] = { | |
'type':'特殊エネルギー', | |
'description':description | |
} | |
elif 'LEGENDを組み合わせて、ベンチに出す。' in description: | |
info['rule'] = { | |
'type':'LEGENDを組み合わせて', | |
'description':description | |
} | |
elif description == 'このポケモンがきぜつしたら、相手はサイドを2枚とる。': | |
info['rule'] = { | |
'type':'LEGENDを組み合わせて', | |
'description':description | |
} | |
elif description == 'このカードはエネルギー2個ぶんとしてはたらく。': | |
info['rule'] = { | |
'type':'特殊エネルギー', | |
'description':description | |
} | |
elif 'レベルアップ前のワザ・ポケパワーも使うことができ、ポケボディーもはたらく。' in description: | |
info['rule'] = { | |
'type':'LV. X', | |
'description':description | |
} | |
elif 'バトルロード サマー★2007' in description: | |
info['rule'] = { | |
'type': 'バトルロード サマー★2007', | |
'description':description | |
} | |
elif description == 'ポケモンカードゲーム公式大会入賞カード': | |
info['rule'] = { | |
'type': 'ポケモンカードゲーム公式大会入賞カード', | |
'description':description | |
} | |
elif description == 'ポケモンexがきぜつしたとき、相手プレイヤーはサイドを2枚とります。': | |
info['rule'] = { | |
'type':'ポケモンex', | |
'description':description | |
} | |
elif description == 'ポケモン☆は、デッキに1枚しか入れることができません。': | |
info['rule'] = { | |
'type': 'ポケモン☆', | |
'description':description | |
} | |
elif description == 'ポケモンexがきぜつしたとき、相手はサイドを2枚とる。': | |
info['rule'] = { | |
'type': 'ポケモンex', | |
'description':description | |
} | |
elif description == 'ポケモンのどうぐは、自分の番に何枚でも、自分のポケモンにつけられる。ポケモン1匹につき1枚だけつけられ、つけたままにする。': | |
info['rule'] = { | |
'type': 'ポケモンのどうぐ', | |
'description': description | |
} | |
else: | |
print('Unhandled special rule typing') | |
print(description) | |
except: | |
print('Possible Error: unhandled or no text description of special rule') | |
return info | |
def parseVSTARPower(heading): | |
info = { | |
'abilities': [], | |
'moves':[] | |
} | |
power = { | |
'title':'', | |
'description':'', | |
'type':'VSTARパワー' | |
} | |
try: | |
nextSibling = heading.find_next_sibling() | |
if nextSibling.name == 'h4': | |
if nextSibling.text == '特性': | |
parse = getAbility(nextSibling) | |
power['title'] = parse['title'] | |
power['description'] = parse['description'] | |
info['abilities'] = [power] | |
elif nextSibling.text == 'ワザ': | |
info['moves'] = getMoves(nextSibling) | |
else: | |
print('Unhandled VSTAR Power') | |
else: | |
print("VSTAR Power Error: Unknown next element") | |
except: | |
pass | |
return info | |
def getAbility(node): | |
ability = { | |
'title':'', | |
'description':'', | |
'type':'' | |
} | |
# todo find card with different ability types | |
ability['type'] = node.text | |
try: | |
nextSibling = node.find_next_sibling() | |
if nextSibling.name == 'h4': | |
ability['title'] = nextSibling.text | |
else: | |
print('Error: Unhandled ability title') | |
siblingAfter = nextSibling.find_next_sibling() | |
if siblingAfter.name == 'p': | |
ability['description'] = getEnergyText(siblingAfter) | |
else: | |
print('Error: Unhandled ability description') | |
except: | |
print(ability) | |
print('Error: Unhandled ability') | |
return ability | |
def parseEnergyClass(iconClass): | |
if iconClass == 'icon-none': | |
return 'colorless' | |
elif iconClass == 'icon-grass': | |
return 'grass' | |
elif iconClass == 'icon-fire': | |
return 'fire' | |
elif iconClass == 'icon-dark': | |
return 'darkness' | |
elif iconClass == 'icon-psychic': | |
return 'psychic' | |
elif iconClass == 'icon-electric': | |
return 'electric' | |
elif iconClass == 'icon-water': | |
return 'water' | |
elif iconClass == 'icon-fighting': | |
return 'fightning' | |
elif iconClass == 'icon-void': | |
return 'empty' | |
elif iconClass == 'icon-steel': | |
return 'steel' | |
elif iconClass == 'icon-dragon': | |
return 'dragon' | |
elif iconClass == 'icon-fairy': | |
return 'fairy' | |
elif iconClass == 'icon-plus': | |
return 'plus' | |
else: | |
print(iconClass) | |
print('unhandled typing') | |
def getEnergyText(node): | |
elements = node.contents | |
stringElements = [] | |
for el in elements: | |
if el.name == 'span': | |
energy = "{" + parseEnergyClass(el['class'][0]) + "}" | |
stringElements.append(energy) | |
else: | |
stringElements.append(el.text) | |
return ' '.join(stringElements) | |
parsedCards = [] | |
for item in soupURLs: | |
print(item['Details URL']) | |
card = parseSoup(item['soup']) | |
card['Sets'] = item['Sets'] | |
card['Card_ID'] = item['Card_ID'] | |
card['Details URL'] = item['Details URL'] | |
parsedCards.append(card) | |
df2 = pd.DataFrame(parsedCards) | |
df2.to_csv(fileName, index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment