Skip to content

Instantly share code, notes, and snippets.

@itolosa
Created October 23, 2015 14:30
Show Gist options
  • Save itolosa/59f5d6eca810f9b34434 to your computer and use it in GitHub Desktop.
Save itolosa/59f5d6eca810f9b34434 to your computer and use it in GitHub Desktop.
require 'open-uri'
require 'nokogiri'
class AppkedApi
def initialize
require 'open-uri'
require 'nokogiri'
end
def content(url, entry=true)
site = open(url)
doc = Nokogiri::HTML(site)
download_url = doc.css('.article.clearfix .text .appdl a').first['href']
all_entries = []
entries = doc.css('.post.clearfix .entry')
entries.each do |entry|
title_link = entry.css('h2 a').first
site_url = title_link['href']
title = title_link.text
desc = entry.css('.desc')
category_tags = desc.css('a[rel="category tag"]').collect{|s| [s.text, s['href']]}.to_h
date = Time.parse(desc.text.split('/').collect{|s| s.strip}.last)
tags = entry.css('.tag a').collect{|a| [a.text, a['href']]}.to_h
views = entry.css('.view a').text
if entry
data = {
title: title,
url: site_url,
categories: category_tags,
date: date,
tags: tags,
views: views,
download_url: download_url
}
else
data = {
title: title,
url: site_url,
categories: category_tags,
date: date,
tags: tags,
views: views
}
end
if entry
return data
end
all_entries << data
end
all_entries
end
def topics
site = open('http://www.macneed.com/topics/')
doc = Nokogiri::HTML(site)
raw_tags = doc.xpath('//div[@id="page"]/p[3]/a')
raw_tags.collect {|a| [a.text, a['href']] }.to_h
end
def apps
doc = Nokogiri::HTML(open('http://www.macneed.com/articles/'))
doc.css('ul.car-monthlisting li a').collect{ |a| [a.text, a['href']]}.group_by{|x| get_name(x.first)}
end
def download(url)
doc = Nokogiri::HTML(open(url))
content = doc.css('#content')
date = Time.parse(content.css('span.appked_date').text)
title_link = content.css('p.appked_title a').first
download_links = content.css('.downloadlink a').collect{|x| [x.text, x['href']]}.to_h
a = {
url: title_link['href'],
title: title_link['title'],
download_links: download_links,
date: date
}
a
end
private
def get_name(text)
m = /(.*?)(?:(?:(?:\d+[.])+\d+)|$)/.match(text.split('–').first)[1]
/(\[.*?\])?[ ]?([^\[]*?)(?:\d+|$)/.match(m.strip)[2]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment