Skip to content

Instantly share code, notes, and snippets.

@0m3r
Last active August 29, 2015 14:16
Show Gist options
  • Save 0m3r/db4c1e1f73b1cc555791 to your computer and use it in GitHub Desktop.
Save 0m3r/db4c1e1f73b1cc555791 to your computer and use it in GitHub Desktop.
osint psb4ukr
#!/usr/bin/ruby
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'json'
class Psb4ukrorgParser
attr_accessor :data
def initialize (url)
@url = url
end
def parse
dom_html = Nokogiri::HTML(open(@url))
dom_table = dom_html.css('table.criminal')
# dom_description = dom_html.css('.criminal-description');
links = dom_html.css('.criminal-description a').map { |a| a['href']}
images = dom_table.css('img').map { |a| a['src']}
# images.each{|src|
# puts "#{src} #{Psb4ukrorgPhoto.new(src).exif?}"
# }
vks = links.find_all { |u|
u =~ /^https?:\/\/vk\.com/ &&
(not u =~ /^https?:\/\/vk\.com\/search\?/) &&
(not u =~ /^https?:\/\/vk\.com\/write\.php\?email/) &&
(not u =~ /^https?:\/\/vk\.com\/groups\?id/) &&
(not u =~ /^https?:\/\/vk\.com\/club/) &&
(not u =~ /^https?:\/\/vk\.com\/away\.php\?/)
}
oks = links.find_all {|u| u =~ /^https?:\/\/ok\.ru/}
@data = {
'name' => dom_html.css('.post-body .postheader a').text,
'url' => @url,
'country' => dom_table.css('tr td')[1].text,
'address' => dom_table.css('tr td')[2].text,
'phone' => dom_table.css('tr td')[3].text,
'description' => dom_table.css('.criminal-description').text,
# 'links' => links,
'images' => images,
'ok' => oks,
'vk' => vks
}
end
end
class Psb4ukrorgParserList
attr_accessor :data
def initialize
@data = Array.new
url_placeholder = "https://psb4ukr.org/criminal/page/"
interval = (2..3) # (2..1903)
interval.each{|i|
url = "#{url_placeholder}#{i}/"
# puts url
page = Nokogiri::HTML(open(url))
links = page.css('div.hentry .postheader a')
links.each {|a|
parser = Psb4ukrorgParser.new(a['href'])
@data.push(parser.parse)
}
}
end
end
parser = Psb4ukrorgParserList.new
filename = "Psb4ukrorgList.json";
File.open(filename,"w") do |f|
f.write(parser.data.to_json)
end
# puts parser.data.to_json
# separ = Psb4ukrorgParser.new("https://psb4ukr.org/criminal/artyomenko-aleksej/")
# sdata = separ.parse
# puts sdata['vk']
# puts sdata.to_json
@0m3r
Copy link
Author

0m3r commented Feb 27, 2015

a = [1, 2, 3, 4, 2, 4, 1, 4, 3, 4, 4, 3].concat([1,2,1])
wf = Hash.new(0)
a.each { |n| wf[n] += 1 }
Hash[wf.sort_by {|k, v| v}].select{|k,v| v > 6}
Hash[wf.sort_by {|k, v| v}].select{|k,v| v > 6}.map{|k,v|k}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment