Last active
November 23, 2023 22:02
-
-
Save tkfu/bc5dc2c6cee4d1e582a3d369c3077bb5 to your computer and use it in GitHub Desktop.
Script to scrape SRD-licensed monster data from DnD Beyond. Brittle, simple, but it works as of the posting of this gist.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'json' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'rails-html-sanitizer' | |
def get_monster_list | |
monster_urls = [] | |
firstpage = Nokogiri::HTML(open("https://www.dndbeyond.com/monsters?filter-search=&filter-source=1&filter-type=0&page=1")) | |
num_pages = firstpage.xpath('//li[@class="b-pagination-item"]')[firstpage.xpath('//li[@class="b-pagination-item"]').length - 2].text.to_i | |
puts "Found #{num_pages} pages..." | |
(1..num_pages).each do |p| | |
puts "Downloading page #{p}..." | |
page = Nokogiri::HTML(open("https://www.dndbeyond.com/monsters?filter-search=&filter-source=1&filter-type=0&page=#{p}")) | |
page_urls = page.xpath('//span[@class="name"]/a/@href').map {|attr| "https://www.dndbeyond.com" + attr.value + "/more-info"} | |
monster_urls += page_urls | |
end | |
monster_urls | |
end | |
def load_monster monster_url | |
return Nokogiri::HTML(open(monster_url)) | |
end | |
# monster is a nokogiri HTML object | |
def get_monster_stats monster_url | |
monster = Nokogiri::HTML(open(monster_url)) | |
link_sanitizer = Rails::Html::LinkSanitizer.new | |
stat_block = {} | |
monster_name = monster.at_css('.mon-stat-block__name-link').text | |
meta = monster.at_css('.mon-stat-block__meta').text | |
stat_block.merge!({ | |
"name" => monster_name, | |
"meta" => meta | |
}) | |
attributes = monster.at_css('.mon-stat-block__attributes').element_children | |
attributes.each do |attr| | |
key = attr.at_css('.mon-stat-block__attribute-label').text.strip | |
value = "#{attr.at_css('.mon-stat-block__attribute-data-value')&.text&.strip} #{(attr.at_css('.mon-stat-block__attribute-data-extra')&.text&.strip)}" | |
stat_block.merge!({key => value}) | |
end | |
stats = monster.at_css('.ability-block').element_children | |
stats.each do |stat| | |
ability = stat.at_css('.ability-block__heading').text.strip | |
ability_val = stat.at_css('.ability-block__score')&.text&.strip | |
mod = "#{ability}_mod" | |
mod_val = stat.at_css('.ability-block__modifier')&.text&.strip | |
stat_block.merge!({ability => ability_val,mod => mod_val}) | |
end | |
tidbits = monster.at_css('.mon-stat-block__tidbits').element_children | |
tidbits.each do |tidbit| | |
key = tidbit.at_css('.mon-stat-block__tidbit-label').text.strip | |
value = tidbit.at_css('.mon-stat-block__tidbit-data')&.text&.strip | |
stat_block.merge!({key => value}) | |
end | |
description_blocks = monster.at_css('.mon-stat-block__description-blocks').element_children | |
description_blocks.each do |block| | |
block.at_css('.mon-stat-block__description-block-heading') ? | |
key = block.at_css('.mon-stat-block__description-block-heading').text.strip : | |
key = "Traits" | |
value = link_sanitizer.sanitize(block.at_css('.mon-stat-block__description-block-content').element_children.to_s.tr("\n","")) | |
stat_block.merge!({key => value}) | |
end | |
monster_img = Nokogiri::HTML(open(monster_url.split('/more-info')[0])).at_css('.monster-image')&.attribute('src').to_s | |
monster_img = "https:" + monster_img if monster_img.start_with?("//") | |
stat_block.merge!({"img_url" => monster_img}) | |
puts "Added #{stat_block['name']}..." | |
stat_block | |
end | |
def get_monster_desc | |
pass | |
end | |
def get_monster_img monster_url | |
monster_main = Nokogiri::HTML(open(monster_url.split('/more-info')[0])) | |
image_url = img.at_css('.monster-image')&.attribute('src').to_s | |
image_url | |
end | |
urls = get_monster_list | |
all_monsters = [] | |
urls.each do |url| | |
all_monsters.push(get_monster_stats(url)) | |
end | |
File.open("beyond_monsters.json","w") {|f| f.write(all_monsters.to_json)} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment