Skip to content

Instantly share code, notes, and snippets.

@devton
devton / url_parser.rb
Created February 10, 2015 23:10
Crawler::UrlParser
module Crawler
class UrlParser
# Instancia a classe e já faz o parser
# retornando um hash com os atributos da URL
def self.parse url
_ = new url
_.parse!
end
def initialize url
@devton
devton / url_parser_spec.rb
Created February 10, 2015 22:59
Crawler::UrlParser spec
require "rails_helper"
RSpec.describe Crawler::UrlParser, :type => :service do
describe ".parse" do
subject { Crawler::UrlParser.parse url }
context "normal url" do
let(:url) { 'www.my-example.url.com' }
let(:url_attributes) {
{
@devton
devton / created_crawled_urls.rb
Created February 10, 2015 22:20
CrawledUrlsTable
class CreateCrawledUrls < ActiveRecord::Migration
def change
create_table :crawled_urls do |t|
t.string :url_scheme, null: false
t.text :host, null: false
t.text :path, null: false
t.text :fragment
t.text :query_strings
t.datetime :last_parsed_at
t.datetime :last_check_at
@devton
devton / negative_expression.rb
Created February 9, 2015 01:48
NegativeExpression model
class NegativeExpression < ActiveRecord::Base
# Scope que verifica se existe alguma
# expressão negativa para o dominio (host) informado
scope :expressions_for, -> (host) {
where("lower(?) ~* ANY(domains)", host)
}
# Scope que faz um regex match no campo expressions
# verificando se o path informado retornando
# todas as expressões que passou no match
@devton
devton / negative_expression_spec.rb
Created February 9, 2015 01:30
NegativeExpression spec
require 'rails_helper'
RSpec.describe NegativeExpression, :type => :model do
describe ".expressions_for" do
subject { NegativeExpression.expressions_for 'http://example.com' }
context "should get expressions" do
context "when have a domain expression" do
before do
@devton
devton / negative_model_migration.rb
Created February 9, 2015 00:14
Arquivo de migração gerado pelo: rails g model NegativeExpression
# Arquivo de migração gerado pelo: rails g model NegativeExpression
class CreateNegativeExpressions < ActiveRecord::Migration
def change
create_table :negative_expressions do |t|
t.text :domains, array: true, null: false
t.text :expressions, array: true, null: false
t.timestamps null: false
end
@devton
devton / web.rb
Last active August 29, 2015 14:15
app/services/crawler/web.rb
module Crawler
class Web
attr_reader :valid_urls
# Metodo de classe que já coleta e retorna um array unico
# com todos os links percorridos dentro do site
def self.collect_links_from url
_ = new url
_.collect_links!
_.valid_urls.uniq
@devton
devton / web_spec.rb
Created February 8, 2015 06:39
app/services/crawler/web_spec.rb
require "rails_helper"
RSpec.describe Crawler::Web, :type => :service do
describe ".collect_links_from" do
let(:collected_links) {
[
'http://www.example.com/page_link_1.html',
'http://www.example.com/page_link_2.html',
'http://www.example.com/page_link_3.html',
'http://www.example.com/page_link_4.html',
@devton
devton / perf.rake
Created February 8, 2015 04:39
Basica benchmark for MetaInspector and Direct nokogiri
# user system total real
# MetaInspector: 1.150000 0.040000 1.190000 (1.241086)
# Nokogiri #xpath: 0.010000 0.000000 0.010000 (0.011218)
# Nokogiri #css 0.010000 0.000000 0.010000 (0.010822)
# Nokogiri #search 0.010000 0.000000 0.010000 (0.010091)
require 'open-uri'
namespace :benchmarks do
desc 'MetaInspector vs Nokogiri'
@devton
devton / 0_reuse_code.js
Created May 22, 2014 14:45
Here are some things you can do with Gists in GistBox.
// Use Gists to store code you would like to remember later on
console.log(window); // log the "window" object to the console