Skip to content

Instantly share code, notes, and snippets.

@tkfu
Last active November 23, 2023 22:02
Show Gist options
  • Save tkfu/bc5dc2c6cee4d1e582a3d369c3077bb5 to your computer and use it in GitHub Desktop.
Save tkfu/bc5dc2c6cee4d1e582a3d369c3077bb5 to your computer and use it in GitHub Desktop.
Script to scrape SRD-licensed monster data from DnD Beyond. Brittle, simple, but it works as of the posting of this gist.
require 'json'
require 'nokogiri'
require 'open-uri'
require 'rails-html-sanitizer'
def get_monster_list
monster_urls = []
firstpage = Nokogiri::HTML(open("https://www.dndbeyond.com/monsters?filter-search=&filter-source=1&filter-type=0&page=1"))
num_pages = firstpage.xpath('//li[@class="b-pagination-item"]')[firstpage.xpath('//li[@class="b-pagination-item"]').length - 2].text.to_i
puts "Found #{num_pages} pages..."
(1..num_pages).each do |p|
puts "Downloading page #{p}..."
page = Nokogiri::HTML(open("https://www.dndbeyond.com/monsters?filter-search=&filter-source=1&filter-type=0&page=#{p}"))
page_urls = page.xpath('//span[@class="name"]/a/@href').map {|attr| "https://www.dndbeyond.com" + attr.value + "/more-info"}
monster_urls += page_urls
end
monster_urls
end
def load_monster monster_url
return Nokogiri::HTML(open(monster_url))
end
# monster is a nokogiri HTML object
def get_monster_stats monster_url
monster = Nokogiri::HTML(open(monster_url))
link_sanitizer = Rails::Html::LinkSanitizer.new
stat_block = {}
monster_name = monster.at_css('.mon-stat-block__name-link').text
meta = monster.at_css('.mon-stat-block__meta').text
stat_block.merge!({
"name" => monster_name,
"meta" => meta
})
attributes = monster.at_css('.mon-stat-block__attributes').element_children
attributes.each do |attr|
key = attr.at_css('.mon-stat-block__attribute-label').text.strip
value = "#{attr.at_css('.mon-stat-block__attribute-data-value')&.text&.strip} #{(attr.at_css('.mon-stat-block__attribute-data-extra')&.text&.strip)}"
stat_block.merge!({key => value})
end
stats = monster.at_css('.ability-block').element_children
stats.each do |stat|
ability = stat.at_css('.ability-block__heading').text.strip
ability_val = stat.at_css('.ability-block__score')&.text&.strip
mod = "#{ability}_mod"
mod_val = stat.at_css('.ability-block__modifier')&.text&.strip
stat_block.merge!({ability => ability_val,mod => mod_val})
end
tidbits = monster.at_css('.mon-stat-block__tidbits').element_children
tidbits.each do |tidbit|
key = tidbit.at_css('.mon-stat-block__tidbit-label').text.strip
value = tidbit.at_css('.mon-stat-block__tidbit-data')&.text&.strip
stat_block.merge!({key => value})
end
description_blocks = monster.at_css('.mon-stat-block__description-blocks').element_children
description_blocks.each do |block|
block.at_css('.mon-stat-block__description-block-heading') ?
key = block.at_css('.mon-stat-block__description-block-heading').text.strip :
key = "Traits"
value = link_sanitizer.sanitize(block.at_css('.mon-stat-block__description-block-content').element_children.to_s.tr("\n",""))
stat_block.merge!({key => value})
end
monster_img = Nokogiri::HTML(open(monster_url.split('/more-info')[0])).at_css('.monster-image')&.attribute('src').to_s
monster_img = "https:" + monster_img if monster_img.start_with?("//")
stat_block.merge!({"img_url" => monster_img})
puts "Added #{stat_block['name']}..."
stat_block
end
def get_monster_desc
pass
end
def get_monster_img monster_url
monster_main = Nokogiri::HTML(open(monster_url.split('/more-info')[0]))
image_url = img.at_css('.monster-image')&.attribute('src').to_s
image_url
end
urls = get_monster_list
all_monsters = []
urls.each do |url|
all_monsters.push(get_monster_stats(url))
end
File.open("beyond_monsters.json","w") {|f| f.write(all_monsters.to_json)}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment