Skip to content

Instantly share code, notes, and snippets.

@Ruin0x11
Created September 20, 2019 02:16
Show Gist options
  • Save Ruin0x11/e3fb433d1f91ee0a6daa6db8164105fb to your computer and use it in GitHub Desktop.
Save Ruin0x11/e3fb433d1f91ee0a6daa6db8164105fb to your computer and use it in GitHub Desktop.
Scrape Reaktor User Library
require 'mechanize'
require 'nokogiri'
require 'json'
require 'pp'
class String
def between marker
self[/#{Regexp.escape(marker)}(.*?)#{Regexp.escape(marker)}/m, 1]
end
end
class Reaktor
BASE = "https://www.native-instruments.com"
LOGIN = "#{BASE}/typo3conf/ext/ni_account/login.php?api_path=auth/token"
URL = "#{BASE}/en/reaktor-community/reaktor-user-library"
USERNAME = ""
PASSWORD = ""
def initialize
@agent = Mechanize.new
@agent.user_agent_alias = 'Mac Safari'
end
def page_url(id)
"#{URL}/all/all/all/all/all/latest/#{id}/all/"
end
def login
data = {username: USERNAME, password: PASSWORD}
response = @agent.post LOGIN, data.to_json, {'Content-Type' => 'application/json'}
json = JSON.parse(response.body)
token = json["response_body"]["access"]["token"]
cookie = Mechanize::Cookie.new :domain => '.native-instruments.com', :name => 'access-token', :value => token, :path => '/', :expires => (Date.today + 1).to_s
@agent.cookie_jar << cookie
puts "Logged in."
end
def go
login
page = @agent.get(URL)
results = Integer(page.at(".info-result").text.strip!.split(" ")[0])
page_count = (results / 15) + 1
page_count.times do |i|
id = i + 1
page = @agent.get(page_url(id))
puts "===== Page #{id} ====="
download(page)
end
end
def download(page)
items = page.search(".//li[contains(@class, 'item-box')]")
items.each do |i|
begin
retries ||= 0
script = i.at("script")
path = script.text.between("'")
fullpath = BASE + path
file = @agent.get(fullpath)
if file.filename == "index.html"
login
raise
end
puts "> #{file.filename}"
next if File.file?(file.filename)
filename = file.save
details = get_details(i)
details_name = filename + ".json"
File.write(details_name, JSON.pretty_generate(details), encoding: 'UTF-8')
rescue
retry if (retries += 1) < 100
end
end
end
def get_details(i)
link = i.css('div.description-title a').map { |link| link['href'] }[0]
detail = @agent.get(BASE + link)
name = detail.at("div.detail-headline h2").text
headline = detail.at("div.detail-headline h3").text
description = detail.at("div.detail-description p").text.strip
rating = detail.at('div#rating')["data-average"]
rating_count = detail.at('span#vote-number').text
downloads = detail.at("span.download-count").text.strip
author = get_detail(detail, "Author")
version = get_detail(detail, "Version")
created = get_detail(detail, "Created")
made_with = get_detail(detail, "Made with")
category = get_array_detail(detail, "Category")
tags = get_array_detail(detail, "Tags")
comments = get_comments(detail)
return {link: BASE + link,
name: name,
headline: headline,
description: description,
rating: rating,
rating_count: rating_count,
downloads: downloads,
author: author,
version: version,
created: created,
made_with: made_with,
category: category,
tags: tags,
comments: comments}
end
def get_detail(detail, name)
detail = detail.search(".//div[contains(@class, 'detail-info')]").at("label:contains('#{name}:')")
return "" if detail.nil?
detail = detail.parent.text.strip.split(":")[1].strip
return detail
end
def get_array_detail(detail, name)
return get_detail(detail, name).split("\n").map(&:strip).reject(&:empty?)
end
def get_comments(detail)
comments = detail.css("div.comment.row")
comments = comments.map do |c|
author = c.at("div.author").text.strip
body = c.at("div.clear").next_sibling.next_sibling.text.strip
time = c.at("span.time").text.strip
{ author: author, body: body, time: time }
end
comments.pop
return comments
end
end
r = Reaktor.new
r.go
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment