Skip to content

Instantly share code, notes, and snippets.

@KevinGutowski
Created March 21, 2023 21:24
Show Gist options
  • Save KevinGutowski/035f4a2ec5f9fff64fe54fbdbdaa4175 to your computer and use it in GitHub Desktop.
Save KevinGutowski/035f4a2ec5f9fff64fe54fbdbdaa4175 to your computer and use it in GitHub Desktop.
Parse a japanese pokemon card
baseURL = 'https://www.pokemon-card.com'
def parseSoup(soup):
topHeadings = soup.find_all('h2')
leftBox = soup.find(class_='LeftBox')
rightBox = soup.find(class_='RightBox')
topInfo = soup.find(class_='TopInfo')
atr = {
'name':'',
'cardClass': '', # "ポケモン, Trainer, エネルギー"
'cardSubclasses': [], # [str], Single Strike Pokemon, Basic Pokemon, Goods, Tool, etc...
'cardTypes':[],
'hp':None,
'effect':'',
'moves':[], # [{energyCost:[str],convertedEnergyCost:int,title:str,damage:str,description:str}]
'abilities':[], # [{title:str,description:str,type:str,subtypes:[str]}] types are like Pokébody, Ability
'weaknesses':[], #[{type:str,value:str}]
'resistances':[], #[{type:str,value:str}]
'retreatCost':[], #[str]
'rules':[], #[{type:str,description:str}]
'printedNumberText':'0 / 0',
'numerator':0,
'ancientTrait':None, #{title:str,description:str}
'denominator':0,
'illustrator':'',
'illustratorURL':'',
'regulationMark':'',
'regulationMarkURL':'',
'rarityImageURL':'',
'evolution':[], #[{'evolvesInto':[str],'evolvesFrom':[str],'relatedEvolution':[str],
#'tree':{'stage0':[{'name':str,'current':bool}]},{'stage1':[{...}]]}]
'dexInfo':None #{number:int,species:str,height:str,weight:str,description:str}
}
atr['name'] = soup.find('h1').text.strip()
topInfoDetails = parseTopInfo(topInfo)
atr['hp'] = topInfoDetails['hp']
atr['cardTypes'] = topInfoDetails['cardTypes']
for heading in topHeadings:
parsedHeading = parseHeading(heading)
if len(parsedHeading['rules']) > 0:
atr['rules'] = atr['rules'] + parsedHeading['rules']
if parsedHeading['cardClass'] != '':
atr['cardClass'] = parsedHeading['cardClass']
if len(parsedHeading['cardSubclasses']) > 0:
atr['cardSubclasses'] = atr['cardSubclasses'] + parsedHeading['cardSubclasses']
if len(parsedHeading['moves']) > 0:
atr['moves'] = atr['moves'] + parsedHeading['moves']
if len(parsedHeading['abilities']) > 0:
atr['abilities'] = atr['abilities'] + parsedHeading['abilities']
if parsedHeading['effect'] != '':
atr['effect'] = atr['effect'] + parsedHeading['effect']
if len(parsedHeading['evolution']) > 0:
atr['evolution'] = atr['evolution'] + parsedHeading['evolution']
if parsedHeading['ancientTrait']:
atr['ancientTrait'] = parsedHeading['ancientTrait']
try:
kind = rightBox.find(class_='type').text.strip()
if kind == "2 進化":
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['2 進化']
elif kind == "1 進化":
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['1 進化']
elif kind == 'たね':
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['たね']
elif kind == 'V進化':
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['V進化']
elif kind == 'V-UNION':
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['V-UNION']
elif kind == 'M進化':
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['M進化']
elif kind == 'BREAK進化':
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['BREAK進化']
elif king == '復元':
atr['cardClass'] = 'ポケモン'
atr['cardSubclasses'] = atr['cardSubclasses'] + ['復元']
else:
print('unhandled card type')
except:
pass
info = getIllustratorInfo(leftBox)
atr['illustrator'] = info['name']
atr['illustratorURL'] = info['link']
subTextInfo = parseSubText(leftBox)
atr['printedNumberText'] = subTextInfo['printedNumberText']
atr['regulationMarkURL'] = subTextInfo['regulationMarkURL']
atr['regulationMark'] = subTextInfo['regulationMark']
atr['numerator'] = subTextInfo['numerator']
atr['denominator'] = subTextInfo['denominator']
atr['rarityImageURL'] = subTextInfo['rarityImageURL']
atr['dexInfo'] = parseDexInfo(leftBox)
parseTable = parseWeaknessResistanceRetreat(rightBox)
atr['weaknesses'] = parseTable['weaknesses']
atr['resistances'] = parseTable['resistances']
atr['retreatCost'] = parseTable['retreatCost']
if atr['cardClass'] == '':
print("Warning: No class set!")
return atr
def parseHeading(heading):
info = {
'rules':[],
'cardClass':'',
'cardSubclasses':[],
'effect':'',
'moves':[],
'abilities':[],
'evolution':[],
'ancientTrait':None
}
text = heading.text
if text == 'サポート':
info['cardClass'] = 'サポート'
parse = parseSupporter(heading)
info['rules'] = info['rules'] + parse['rules']
info['effect'] = parse['effect']
elif text == 'グッズ':
info['cardClass'] = 'トレーナーズ'
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ']
parse = parseGoods(heading)
info['rules'] = info['rules'] + parse['rules']
info['effect'] = parse['effect']
elif text == '特殊エネルギー':
info['cardClass'] = 'トレーナーズ'
info['cardSubclasses'] = info['cardSubclasses'] + ['特殊エネルギー']
parse = parseSpecialEnergy(heading)
info['effect'] = parse['effect']
elif text == 'スタジアム':
parse = parseStadium(heading)
info['cardClass'] = 'トレーナーズ'
info['cardSubclasses'] = info['cardSubclasses'] + ['スタジアム']
info['rules'] = info['rules'] + parse['rules']
info['effect'] = parse['effect']
elif text == 'ポケモンのどうぐ':
parse = parseTool(heading)
info['rules'] = parse['rules']
info['cardClass'] = 'トレーナーズ'
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ','ポケモンのどうぐ']
info['effect'] = parse['effect']
elif text == '特別なルール':
parse = getSpecialRules(heading)
info['rules'] = info['rules'] + parse['rules']
info['ancientTrait'] = parse['ancientTrait']
elif text == 'ワザ':
info['moves'] = info['moves'] + getMoves(heading)
elif text == '特性':
info['abilities'] = info['abilities'] + [getAbility(heading)]
elif text == 'VSTARパワー':
parse = parseVSTARPower(heading)
info['moves'] = info['moves'] + parse['moves']
info['abilities'] = info['abilities'] + parse['abilities']
elif text == '進化':
parse = parseEvolutions(heading)
info['evolution'] = parse['evolution']
elif text == '基本エネルギー':
info['cardClass'] = 'エネルギー'
elif text == 'トレーナー':
parse = parseTrainer(heading)
info['cardClass'] = 'トレーナーズ'
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ']
info['effect'] = parse['effect']
info['rules'] = info['rules'] + parse['rules']
elif text == 'GXワザ':
info['moves'] = info['moves'] + getMoves(heading)
info['cardSubclasses'] = info['cardSubclasses'] + ['GX']
elif text == '古代能力':
parse = parseAncientTrait(heading)
info['ancientTrait'] = parse['ancientTrait']
elif text == 'ポケボディー':
info['abilities'] = info['abilities'] + [getAbility(heading)]
elif text == 'ポケパワー':
info['abilities'] = info['abilities'] + [getAbility(heading)]
elif text in ['どうぐ','きのみ']:
info['cardClass'] = 'トレーナーズ'
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ']
parse = parseOldTool(heading)
info['abilities'] = info['abilities'] + parse['abilities']
elif text == 'ワザマシン':
info['cardClass'] = 'トレーナーズ'
info['cardSubclasses'] = info['cardSubclasses'] + ['グッズ']
parse = parseTM(heading)
info['abilities'] = info['abilities'] + parse['abilities']
else:
print('unhandled heading')
print(text)
return info
def parseTM(heading):
info = {
'abilities':[]
}
ability = {
'title':'ワザマシン',
'description':'',
'type':'ワザマシン'
}
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'p':
ability['description'] = nextSibling.text
else:
print('Unhandled TM')
print(nextSibling)
info['abilities'] = [ability]
return info
def parseOldTool(heading):
info = {
'abilities':[]
}
nextSibling = heading.find_next_sibling()
ability = {
'title':'',
'description':'',
'type':heading.text
}
if nextSibling.name == 'h4':
ability['title'] = nextSibling.text
nextSibling = nextSibling.find_next_sibling()
if nextSibling.name == 'p':
ability['description'] = getEnergyText(nextSibling)
info['abilities'] = [ability]
return info
def parseAncientTrait(heading):
info = {
'ancientTrait':None
}
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'h4':
nextSibling = nextSibling.find_next_sibling()
if nextSibling.name == 'p':
if nextSibling.text == 'このカードは、最初の自分の番や出したばかりのポケモンからでも、手札から進化できる。':
info['ancientTrait'] = {
'title':'Δ進化',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンは、「ポケモンのどうぐ」を2枚までつけられる。':
info['ancientTrait'] = {
'title':'Θダブル',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンは、相手のポケモンからの特性の効果を受けない。':
info['ancientTrait'] = {
'title':'Θストップ',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンに、手札からエネルギーをつけるとき、同時に2枚までつけられる。(ワザ・特性・トレーナーズでつける場合はのぞく。)':
info['ancientTrait'] = {
'title':'α(アルファ)グロウ',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンは、相手が使うトレーナーズの効果を受けない。(ポケモンのどうぐ・スタジアムはのぞく。)':
info['ancientTrait'] = {
'title':'Ω(オメガ)バリア',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンは、ワザを2回連続で使える。(1回目で相手のバトルポケモンがきぜつしたなら、次のポケモンが出た後、2回目を使う。)':
info['ancientTrait'] = {
'title':'Ω(オメガ)連打',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンが、ワザのダメージで相手のポケモンをきぜつさせたなら、サイドを1枚多くとる。':
info['ancientTrait'] = {
'title':'Δ(デルタ)プラス',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンになったとき、このポケモンのHPをすべて回復する。':
info['ancientTrait'] = {
'title':'Θ(シータ)マックス',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンが、相手のポケモンから受けるワザのダメージは、「-20」される。':
info['ancientTrait'] = {
'title':'Δ(デルタ)ワイルド',
'description':nextSibling.text
}
elif nextSibling.text == 'このポケモンのHPが回復するとき、その回復する量は2倍になる。':
info['ancientTrait'] = {
'title':'α(アルファ)回復',
'description':nextSibling.text
}
else:
print(nextSibling.text)
print('Unhandled Ancient Trait')
else:
print('Unhandled Ancient Trait')
return info
def parseTrainer(heading):
info = {
'effect':'',
'rules':[]
}
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'p':
info['effect'] = nextSibling.text
info['rules'] = commonRulings['goodsRules']
return info
def parseWeaknessResistanceRetreat(rightBox):
info = {
'weaknesses':[],
'resistances':[],
'retreatCost':[]
}
tableNode = rightBox.find('table')
try:
trs = tableNode.find_all('tr')
tds = trs[1].find_all('td')
info['weaknesses'] = parseWeaknessOrResistance(tds[0])
info['resistances'] = parseWeaknessOrResistance(tds[1])
info['retreatCost'] = parseRetreat(tds[2])
except:
pass
return info
def parseRetreat(td):
energies = td.find_all(class_='icon')
mappedEnergies = list(map(lambda x: parseEnergyClass(x['class'][0]), energies))
return mappedEnergies
def parseWeaknessOrResistance(td):
# 'weaknesses/resistance':[{type:str,value:str},...],
if len(td.findChildren()) == 1:
value = td.text.strip()
type_ = parseEnergyClass(td.find(class_='icon')['class'][0])
return [{'type':type_,'value':value}]
elif td.text.strip() == '--':
return []
elif len(td.findChildren()) > 1:
value = td.text.strip()
icons = td.find_all(class_='icon')
result = []
for icon in icons:
type_ = parseEnergyClass(icon['class'][0])
result.append({'type':type_,'value':value})
return result
else:
print('Unhandled Weakness')
return []
def parseDexInfo(leftBox):
# {number:int,species:str,height:str,weight:str,description:str}
info = None
dexNode = leftBox.find(class_='card')
if dexNode:
try:
dexNodeHeading = dexNode.find('h4')
# handle random incorrect dex info https://www.pokemon-card.com/card-search/details.php/card/39976/regu/all
if dexNodeHeading.text.strip() != '炎':
try:
dexNumber = int(dexNodeHeading.text.split(' ')[0].split('.')[1])
speciesText = dexNodeHeading.text.split(' ')[1].strip()
except:
dexNumber = None
speciesText = dexNodeHeading.text.strip()
dexNodeParagraphs = dexNode.find_all('p')
if dexNodeParagraphs:
if '高さ' in dexNodeParagraphs[0].text.strip():
height = dexNodeParagraphs[0].text.split('  ')[0].split(':')[1].strip()
weight = dexNodeParagraphs[0].text.split('  ')[1].split(':')[1].strip()
try:
description = dexNodeParagraphs[1].text.strip()
except:
description = ''
else:
height = ''
weight = ''
description = dexNodeParagraphs[0].text.strip()
if dexNumber:
info = {
'number':dexNumber,
'species': speciesText,
'height': height,
'weight': weight,
'description': description
}
else:
info = {
'species': speciesText,
'height': height,
'weight': weight,
'description': description
}
else:
info = {
'number': dexNumber,
'species': speciesText
}
except:
info = {'description':dexNode.text}
return info
def parseEvolutions(heading):
info = {
'evolution': [] # { name:str,stage:int,current:bool}
}
evolutionNodes = heading.parent.find_all(class_='evolution')
evolutionNodes = list(reversed(evolutionNodes))
for idx, evolutionNode in enumerate(evolutionNodes):
inBoxes = evolutionNode.find_all(class_='in-box')
if inBoxes:
for inBox in inBoxes:
classes = inBox['class']
if 'ev_off' in classes:
info['evolution'] = info['evolution'] + [{'name':inBox.text.strip(),'stage':idx,'current':False}]
elif 'ev_on' in classes:
info['evolution'] = info['evolution'] + [{'name':inBox.text.strip(),'stage':idx,'current':True}]
else:
classes = evolutionNode['class']
if 'ev_off' in classes:
info['evolution'] = info['evolution'] + [{'name':evolutionNode.text.strip(),'stage':idx,'current':False}]
elif 'ev_on' in classes:
info['evolution'] = info['evolution'] + [{'name':evolutionNode.text.strip(),'stage':idx,'current':True}]
return info
commonRulings = {
'supporterRules':[
{
'type': 'サポート',
'description': 'サポートは、自分の番に1枚しか使えない。'
}
],
'toolRules':[
{
'type':'ポケモンのどうぐ',
'description':'ポケモンのどうぐは、自分のポケモンにつけて使う。ポケモン1匹につき1枚だけつけられ、つけたままにする。'
},
{
'type':'グッズ',
'description':'グッズは、自分の番に何枚でも使える。'
}
],
'stadiumRules':[
{
'type':'スタジアム',
'description':'スタジアムは、自分の番に1枚だけ、バトル場の横に出せる。別の名前のスタジアムが場に出たなら、このカードをトラッシュする。'
}
],
'goodsRules':[
{
'type':'グッズ',
'description':'グッズは、自分の番に何枚でも使える。'
}
]
}
def parseSupporter(heading):
info = {
'rules': commonRulings['supporterRules'],
'effect':''
}
try:
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'p':
info['effect'] = getEnergyText(nextSibling)
else:
print('Supporter Ruling: Found unexpected next element')
print(nextSibling)
except:
print('Unhandled Supporter description')
return info
def parseStadium(heading):
info = {
'rules':commonRulings['stadiumRules'],
'effect':''
}
try:
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'p':
info['effect'] = nextSibling.text
else:
print('Stadium Ruling: Found unexpected next element')
print(nextSibling)
except:
print('unhandled stadium card description')
return info
def parseSpecialEnergy(heading):
info = {
'effect':''
}
try:
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'p':
info['effect'] = nextSibling.text
elif nextSibling.text == '特別なルール':
info['effect'] = nextSibling.find_next_sibling().text
else:
print('Special Energy Ruling: Found unexpected next element')
print(nextSibling)
except:
print('Unhandled Special Energy description')
return info
def parseTool(heading):
info = {
'rules': commonRulings['toolRules'],
'effect': ''
}
textsToCheck = list(map(lambda x: x['description'], commonRulings['toolRules']))
counter = 0
try:
nextSibling = heading.find_next_sibling()
while nextSibling.name == 'p':
if nextSibling.text in textsToCheck:
counter = counter + 1
if counter > 10:
print('WARNING Infinite loop triggered trying to find tool text')
raise Exception("Infinite loop protection")
break
nextSibling = nextSibling.find_next_sibling()
else:
info['effect'] = nextSibling.text
break
except:
print('Unhandled Tool description')
return info
def parseGoods(heading):
info = {
'rules': commonRulings['goodsRules'],
'effect':''
}
try:
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'p':
info['effect'] = nextSibling.text
else:
print('GoodsRuling: Found unexpected next element')
print(nextSibling)
except:
print('unhandled trainer card description')
return info
def parseTopInfo(topInfo):
info = {
'cardTypes':[],
'hp':None,
}
try:
rightTopInfo = topInfo.find(class_='td-r')
info['hp'] = rightTopInfo.find(class_='hp-num').text.strip()
types = rightTopInfo.find_all(class_='icon')
info['cardTypes'] = list(map(lambda x: parseEnergyClass(x['class'][0]), types))
# Convert colorless to normal
# info['cardType'] = 'normal' if cardType == 'colorless' else cardType
except:
pass
return info
def parseSubText(leftBox):
info = {
'regulationMark':'',
'regulationMarkURL':'',
'printedNumberText':'',
'numerator':0,
'denominator':0,
'rarityImageURL':''
}
try:
subText = leftBox.find(class_='subtext')
info['printedNumberText'] = subText.text.strip().replace('\xa0','')
info['numerator'] = int(info['printedNumberText'].split('/')[0])
info['denominator'] = int(info['printedNumberText'].split('/')[1])
try:
regulationATag = subText.find(class_='img-regulation', alt=True)
info['regulationMark'] = regulationATag['alt']
info['regulationMarkURL'] = (baseURL + regulationATag['src'])
except:
pass
try:
rarityImage = subText.find("img", {"width": "24"})
info['rarityImageURL'] = baseURL+rarityImage['src']
except:
pass
except:
pass
# print(info)
return info
def getIllustratorInfo(leftBox):
author = ''
info = { 'name':'','link':''}
try:
author = leftBox.find(class_='author').find('a')
info = { 'name':author.text,'link':baseURL + author['href']}
except:
pass
return info
def getCardText(topHeadings):
paragraphs = topHeadings[0].parent.find_all('p')
text = ''
for p in paragraphs:
text = text + p.text + '\n'
text = text[:len(text) - 2] # remove last \n characters
return text
def getMoves(heading):
moves = []
def getH4s(node):
# Get h4s but ensure that it is within the current moves h2
moveHeadings = []
nextSibling = node.find_next_sibling()
try:
while nextSibling.name in ['h4','p']:
if nextSibling.name == 'h4':
moveHeadings.append(nextSibling)
nextSibling = nextSibling.find_next_sibling()
except:
pass
return moveHeadings
moveHeadings = getH4s(heading)
for moveHeading in moveHeadings:
attack = {
'energyCost':[],
'convertedEnergyCost':0,
'title':'',
'damage':'',
'description':''
}
attack['title']= moveHeading.find(text=True, recursive=False).strip()
energyArray = moveHeading.find_all("span", {"class": "icon"})
attack['convertedEnergyCost'] = len(energyArray)
try:
attack['damage'] = moveHeading.find("span", {"class": "f_right"}).text.strip()
except:
pass
for energy in energyArray:
classNames = energy['class']
attack['energyCost'].append(parseEnergyClass(classNames[0]))
nextSibling = moveHeading.find_next_sibling()
if nextSibling.name == 'p':
attack['description'] = getEnergyText(nextSibling)
moves.append(attack)
return moves
def getSpecialRules(heading):
info = {
'rules':[],
'ancientTrait':None
}
rules = []
def getNextParagraphs(heading):
paragraphs = []
nextSibling = heading.find_next_sibling()
checkInnerP = nextSibling.find_all('p')
paragraphs = paragraphs + checkInnerP
# Looping method
# try:
# print(nextSibling.name)
# while nextSibling.name in ['br','p']:
# if nextSibling.name == 'p':
# print('appending node')
# paragraphs.append(nextSibling)
# print('triggered again')
# nextSibling = nextSibling.find_next_sibling()
# except:
# pass
return paragraphs
nextParagraphs = getNextParagraphs(heading)
for paragraph in nextParagraphs:
specialRule = getSpecialRule(paragraph)
info['ancientTrait'] = specialRule['ancientTrait']
info['rules'] = info['rules'] + [specialRule['rule']]
return info
def getSpecialRule(paragraph):
info = {
'rule':None,
'ancientTrait':None
}
try:
description = paragraph.text
if description == 'ポケモンVSTARがきぜつしたとき、相手はサイドを2枚とる。':
info['rule'] = {
'type':'VSTAR',
'description':description
}
elif description == 'ポケモンVがきぜつしたとき、相手はサイドを2枚とる。':
info['rule'] = {
'type':'V',
'description':description
}
elif description == 'かがやくポケモンは、デッキに1枚しか入れられない。':
info['rule'] = {
'type':'かがやくポケモン',
'description':description
}
elif description == 'ポケモンVMAXがきぜつしたとき、相手はサイドを3枚とる。':
info['rule'] = {
'type':'VMAX',
'description':description
}
elif description == 'サポートは、自分の番に1枚しか使えない。':
info['rule'] = {
'type':'サポート',
'description':description
}
elif description == 'ポケモンのどうぐは、自分のポケモンにつけて使う。ポケモン1匹につき1枚だけつけられ、つけたままにする。':
info['rule'] = {
'type':'ポケモンのどうぐ',
'description':description
}
elif description == 'ポケモン【V】がきぜつしたとき、相手はサイドを2枚とる。':
info['rule'] = {
'type':'V',
'description':description
}
elif description == 'TAG TEAMがきぜつしたとき、相手はサイドを3枚とる。':
info['rule'] = {
'type':'TAG TEAM',
'description':description
}
elif description == 'ポケモンEXがきぜつしたとき、相手はサイドを2枚とる。':
info['rule'] = {
'type':'EX',
'description':description
}
elif description == 'ポケモンGXがきぜつしたとき、相手はサイドを2枚とる。':
info['rule'] = {
'type':'GX',
'description':description
}
elif description == 'ACE SPECカードは、1つのデッキにつき1枚しか入れられない。':
info['rule'] = {
'type':'ACE SPEC',
'description':description
}
elif 'V-UNION' in description:
info['rule'] = {
'type':'V-UNION',
'description':description
}
elif description == 'M進化ポケモンになったとき、自分の番は終わる。':
info['rule'] = {
'type':'M進化',
'description':description
}
elif description == 'このポケモンが、ワザのダメージで相手のポケモンをきぜつさせたなら、サイドを1枚多くとる。':
info['ancientTrait'] = {
'title':'Δプラス',
'description': description
}
elif description == '同じ名前の(プリズムスター)のカードは、デッキに1枚しか入れられない。トラッシュには行かず、ロストゾーンに置く。':
info['rule'] = {
'type':'プリズムスター',
'description': description
}
elif description == 'スタジアムは、自分の番に1枚だけ、バトル場の横に出せる。別の名前のスタジアムが場に出たなら、このカードをトラッシュする。':
info['rule'] = {
'type':'スタジアム',
'description':description
}
elif description == 'グッズは、自分の番に何枚でも使える。':
info['rule'] = {
'type':'グッズ',
'description':description
}
elif description == 'こちらのカードのレギュレーションは[C]として扱います。':
info['rule'] = {
'type':'レギュレーション',
'description':description
}
elif description == 'ゲンシカイオーガEXになったとき、自分の番は終わる。':
info['rule'] = {
'type':'ゲンシカイキ',
'description':description
}
elif description == 'ゲンシグラードンEXになったとき、自分の番は終わる。':
info['rule'] = {
'type':'ゲンシカイキ',
'description':description
}
elif 'BREAK進化する前' in description:
info['rule'] = {
'type':'BREAK進化',
'description':description
}
elif 'サチコEX' in description:
info['rule'] = {
'type':'MサチコEX',
'description':description
}
elif description == '(このカードは、公式大会では使えない。)':
info['rule'] = {
'type':'レギュレーション',
'description':description
}
elif 'エネルギー1個ぶんとしてはたらく' in description:
info['rule'] = {
'type':'特殊エネルギー',
'description':description
}
elif description == 'このカードは、エネルギー2個ぶんとしてはたらく。':
info['rule'] = {
'type':'特殊エネルギー',
'description':description
}
elif 'LEGENDを組み合わせて、ベンチに出す。' in description:
info['rule'] = {
'type':'LEGENDを組み合わせて',
'description':description
}
elif description == 'このポケモンがきぜつしたら、相手はサイドを2枚とる。':
info['rule'] = {
'type':'LEGENDを組み合わせて',
'description':description
}
elif description == 'このカードはエネルギー2個ぶんとしてはたらく。':
info['rule'] = {
'type':'特殊エネルギー',
'description':description
}
elif 'レベルアップ前のワザ・ポケパワーも使うことができ、ポケボディーもはたらく。' in description:
info['rule'] = {
'type':'LV. X',
'description':description
}
elif 'バトルロード サマー★2007' in description:
info['rule'] = {
'type': 'バトルロード サマー★2007',
'description':description
}
elif description == 'ポケモンカードゲーム公式大会入賞カード':
info['rule'] = {
'type': 'ポケモンカードゲーム公式大会入賞カード',
'description':description
}
elif description == 'ポケモンexがきぜつしたとき、相手プレイヤーはサイドを2枚とります。':
info['rule'] = {
'type':'ポケモンex',
'description':description
}
elif description == 'ポケモン☆は、デッキに1枚しか入れることができません。':
info['rule'] = {
'type': 'ポケモン☆',
'description':description
}
elif description == 'ポケモンexがきぜつしたとき、相手はサイドを2枚とる。':
info['rule'] = {
'type': 'ポケモンex',
'description':description
}
elif description == 'ポケモンのどうぐは、自分の番に何枚でも、自分のポケモンにつけられる。ポケモン1匹につき1枚だけつけられ、つけたままにする。':
info['rule'] = {
'type': 'ポケモンのどうぐ',
'description': description
}
else:
print('Unhandled special rule typing')
print(description)
except:
print('Possible Error: unhandled or no text description of special rule')
return info
def parseVSTARPower(heading):
info = {
'abilities': [],
'moves':[]
}
power = {
'title':'',
'description':'',
'type':'VSTARパワー'
}
try:
nextSibling = heading.find_next_sibling()
if nextSibling.name == 'h4':
if nextSibling.text == '特性':
parse = getAbility(nextSibling)
power['title'] = parse['title']
power['description'] = parse['description']
info['abilities'] = [power]
elif nextSibling.text == 'ワザ':
info['moves'] = getMoves(nextSibling)
else:
print('Unhandled VSTAR Power')
else:
print("VSTAR Power Error: Unknown next element")
except:
pass
return info
def getAbility(node):
ability = {
'title':'',
'description':'',
'type':''
}
# todo find card with different ability types
ability['type'] = node.text
try:
nextSibling = node.find_next_sibling()
if nextSibling.name == 'h4':
ability['title'] = nextSibling.text
else:
print('Error: Unhandled ability title')
siblingAfter = nextSibling.find_next_sibling()
if siblingAfter.name == 'p':
ability['description'] = getEnergyText(siblingAfter)
else:
print('Error: Unhandled ability description')
except:
print(ability)
print('Error: Unhandled ability')
return ability
def parseEnergyClass(iconClass):
if iconClass == 'icon-none':
return 'colorless'
elif iconClass == 'icon-grass':
return 'grass'
elif iconClass == 'icon-fire':
return 'fire'
elif iconClass == 'icon-dark':
return 'darkness'
elif iconClass == 'icon-psychic':
return 'psychic'
elif iconClass == 'icon-electric':
return 'electric'
elif iconClass == 'icon-water':
return 'water'
elif iconClass == 'icon-fighting':
return 'fightning'
elif iconClass == 'icon-void':
return 'empty'
elif iconClass == 'icon-steel':
return 'steel'
elif iconClass == 'icon-dragon':
return 'dragon'
elif iconClass == 'icon-fairy':
return 'fairy'
elif iconClass == 'icon-plus':
return 'plus'
else:
print(iconClass)
print('unhandled typing')
def getEnergyText(node):
elements = node.contents
stringElements = []
for el in elements:
if el.name == 'span':
energy = "{" + parseEnergyClass(el['class'][0]) + "}"
stringElements.append(energy)
else:
stringElements.append(el.text)
return ' '.join(stringElements)
parsedCards = []
for item in soupURLs:
print(item['Details URL'])
card = parseSoup(item['soup'])
card['Sets'] = item['Sets']
card['Card_ID'] = item['Card_ID']
card['Details URL'] = item['Details URL']
parsedCards.append(card)
df2 = pd.DataFrame(parsedCards)
df2.to_csv(fileName, index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment