-
-
Save devdatta/973951 to your computer and use it in GitHub Desktop.
Import your Gmail messages into ElasticSearch and search them with a simple web application.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.DS_Store | |
*.log | |
Gemfile.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'http://rubygems.org' | |
gem 'tire' | |
gem 'mime' | |
gem 'gmail' | |
gem 'sinatra' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ======================================================= | |
# Importing Gmail messages into ElasticSearch | |
# ======================================================= | |
# | |
# Import your Gmail messages into ElasticSearch and search them with a simple web application. | |
# | |
# Requirements: | |
# ------------- | |
# | |
# * ElasticSearch 0.16.x | |
# * Ruby 1.8.x | |
# * Rubygems | |
# * Bundler gem | |
# | |
# Usage: | |
# ------ | |
# | |
# Install the required gems: | |
# | |
# $ bundle install | |
# | |
# Run this script to import your e-mail into ElasticSearch: | |
# | |
# $ ruby gmail-import.rb [email protected] yourpassword | |
# | |
# Note, that messages are fetched one by one, so the process depends on your connection. | |
# You may abort the process in any time and search already stored messages. | |
# | |
# Then launch the web application: | |
# | |
# $ [email protected] ruby gmail-server.rb | |
# | |
# Open <http://localhost:4567/> in your browser. | |
# | |
# | |
require 'rubygems' | |
require 'time' | |
require 'iconv' | |
require 'tire' | |
require 'mime' | |
require 'gmail' | |
STDOUT.sync = true | |
USERNAME, PASSWORD = ARGV | |
unless (USERNAME && PASSWORD) | |
puts "[ERROR] Please provide your Gmail credentials:", "", | |
" #{__FILE__} [email protected] password", "" | |
exit(1) | |
end | |
# Helper variables | |
# | |
@done = 0 | |
@total = 0 | |
@errors = [] | |
# Helper method to display elapsed time | |
# | |
def elapsed_to_human(elapsed) | |
hour = 60*60 | |
day = hour*24 | |
case elapsed | |
when 0..59 | |
"#{sprintf("%1.5f", elapsed)} seconds" | |
when 60..hour-1 | |
"#{elapsed.to_i/60} minutes and #{elapsed.to_i % 60} seconds" | |
when hour..day | |
"#{elapsed.to_i/hour} hours and #{elapsed.to_i % hour} minutes" | |
else | |
"#{elapsed.to_i/hour} hours" | |
end | |
end | |
# Display import statistics | |
# | |
def report | |
["", | |
"Imported #{@done} messages into index: " + | |
"<http://localhost:9200/#{USERNAME}/_search?q=*> ", | |
"in #{elapsed_to_human(@elapsed)}. " + | |
"There were #{@errors.size} errors.", | |
""].join("\n") | |
end | |
# Clean exit on interrupt | |
# | |
trap(:INT) do | |
puts "\r\nExiting...\n" | |
puts report | |
exit( @errors.size > 0 ? 1 : 0 ) | |
end | |
# Set up ElasticSearch index with the same name as your account | |
# | |
index = Tire.index USERNAME do | |
# Remove the indef if force set to true | |
# | |
delete if ENV['FORCE'] | |
# Create the index for messages with proper mapping | |
# | |
create :mappings => { | |
:message => { | |
:properties => { | |
:id => { :type => 'string', :index => 'not_analyzed', :store => true }, | |
:subject => { :type => 'string', :analyzer => 'snowball', :boost => 10 }, | |
:from => { :type => 'multi_field', | |
:fields => { :from => { :type => 'string', :analyzer => 'snowball', :boost => 100 }, | |
:exact => { :type => 'string', :index => 'not_analyzed', :store => true } } | |
}, | |
:to => { :type => 'string', :analyzer => 'keyword' }, | |
:date => { :type => 'date', }, | |
:body => { :type => 'string', :analyzer => 'snowball' }, | |
} | |
} | |
} | |
end | |
@elapsed = Benchmark.realtime do | |
# Helper method to strip non-UTF-8 characters | |
# | |
def force_utf(s) | |
Iconv.conv('UTF-8//IGNORE', 'UTF-8', s + ' ')[0..-2] | |
end | |
puts '-'*80, "Connecting to Gmail account '#{USERNAME}'...", '-'*80 | |
# Connect to Gmail account | |
# | |
Gmail.new(USERNAME, PASSWORD) do |gmail| | |
@total = gmail.inbox.count | |
puts "Importing #{@total} messages, press Ctrl-C to abort...", '-'*80 | |
# Process inbox messages one by one | |
# | |
gmail.inbox.emails.each do |email| | |
# Defensively define message properties (clean IDs, force UTF, etc) | |
# | |
document = {} | |
document[:id] = email.message_id.to_s.tr('<>', '').tr('/', '-') | |
document[:subject] = force_utf(email.subject.to_s) | |
document[:from] = Array(email.from).map { |a| "#{a.name} <#{a.mailbox}@#{a.host}>" } | |
document[:to] = Array(email.to).map { |a| "#{a.name} <#{a.mailbox}@#{a.host}>" } | |
document[:date] = (Time.parse(email.date).strftime('%Y-%m-%dT%H:%M:%S%z') rescue nil) | |
document[:body] = force_utf( (email.body.parts.first.body.to_s rescue email.body.to_s) ) | |
begin | |
# Store the message in the index | |
# | |
index.store :message, document | |
@done += 1 | |
puts "\e[32m#{@done.to_s.ljust(4)}\e[0m #{email.subject} <#{email.from_addrs.join(', ')}>" | |
rescue Exception => e | |
# Display failure message | |
# | |
puts "\e[31m[!]\e[0m #{email.subject} <#{email.from_addrs.join(', ')}>" | |
puts " #{e.inspect}" | |
@errors << email | |
end | |
end | |
end | |
end | |
puts report |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ======================================================= | |
# Simple web application to search your Gmail messages | |
# ======================================================= | |
# | |
# Usage: | |
# ------ | |
# | |
# First, import your messages with the `gmail-import.rb` script. | |
# | |
# Then, launch this application: | |
# | |
# $ [email protected] ruby gmail-server.rb | |
# | |
# | |
require 'rubygems' | |
require 'tire' | |
require 'sinatra' | |
unless ENV['INDEX'] | |
puts "[ERROR] Please set the index name with the INDEX environment variable:", "", | |
" $ [email protected] ruby #{__FILE__}", "" | |
exit(1) | |
end | |
configure do | |
set :views, File.dirname(__FILE__) | |
set :per_page, 25 | |
end | |
helpers do | |
def simple_format(text) | |
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n | |
text.gsub!(/\n\n{2}/, "\n") # \n\n -> \n | |
text.gsub!(/\n\n+/, "</p>\n<p>") # \n -> paragraph | |
"<p>" + text + "</p>" | |
end | |
def link_to_unless(condition, name, url) | |
condition ? %Q|<a href="#{url}">#{name}</a>| : "#{name}" | |
end | |
def link_to_tip(query, legend) | |
%Q|<p class="tip"><a href="/?q=#{query}">#{query}</a><span>#{legend}</span></p>| | |
end | |
end | |
get '/' do | |
q = params[:q].to_s !~ /\S/ ? '*' : params[:q].to_s | |
s = params[:s] == 'date' | |
f = params[:p].to_i*settings.per_page | |
@s = Tire.search( ENV['INDEX'] ) do |search| | |
search.query { |query| query.string q } | |
search.highlight :subject => {:number_of_fragments => 0}, | |
:body => {:number_of_fragments => 0}, | |
:options => { :tag => '<em class="highlight">' } | |
search.sort { date :desc } if s | |
search.size settings.per_page | |
search.from f | |
end | |
# puts @s.to_curl | |
erb :results | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Search your Gmail (<%= ENV['INDEX'] %>)</title> | |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
<script src="http://code.jquery.com/jquery-1.6.1.min.js"></script> | |
<style> | |
body | |
{ color: #222; background: #fff; | |
font-size: 76%; | |
font-family: Helvetica, sans-serif; | |
padding: 2em 6em; } | |
a { color: #2f3b4c; text-decoration: none !important; } | |
h1 | |
{ color: #999; | |
font-size: 120%; | |
padding: 0.5em 0.8em 0 0; | |
margin: 0; | |
float: left; | |
position: relative; } | |
h1 a { color: #999; } | |
#search-form | |
{ border-bottom: 2px solid #ccc; | |
padding: 0.5em 0 0.5em 0; | |
clear: both; } | |
#search-form input[type='text'] | |
{ color: #222; | |
font-size: 110%; | |
padding: 0.25em; | |
width: 50em; } | |
#search-form #tools | |
{ color: #34383e; | |
margin: 0 0 0 11.6em; } | |
#search-form #tools a | |
{ color: #2f3b4c; text-decoration: underline !important; } | |
#search-form #tools .dim | |
{ color: #878787; } | |
#toggle-tips | |
{ font-size: 10px; | |
font-weight: normal; | |
text-decoration: underline !important; | |
position: absolute; | |
top: 0 bottom: 0; } | |
#search-form #tips | |
{ background-color: #eff0f1; | |
padding: 1em 2em; | |
margin: 0 0 0 11.6em; | |
position: relative; | |
-moz-border-radius: 0.5em; | |
-webkit-border-radius: 0.5em; | |
border-radius: 0.5em; } | |
#search-form #tips p | |
{ padding: 0.5em 0 0.5em 0; | |
margin: 0; } | |
#search-form #tips a | |
{ background: #B9D4FA; | |
padding: 0.25em 0.5em 0.1em 0.5em; | |
-moz-border-radius: 0.25em; | |
-webkit-border-radius: 0.25em; | |
border-radius: 0.25em; } | |
#search-form #tips a:hover | |
{ color: #dde4ed; | |
background: #444e5d; } | |
#search-form #tips span | |
{ color: #878787; | |
font-size: 95%; | |
margin-left: 1em; } | |
.message | |
{ line-height: 125%; | |
padding: 1em 0; | |
border-bottom: 1px solid #ccc; | |
position: relative; } | |
.message p | |
{ margin: 0 0 0.5em 0; } | |
.message .from | |
{ color: #34383e; | |
font-weight: bold; | |
float: left; } | |
.message .from small | |
{ color: #5f646b; | |
font-weight: normal; } | |
.message .date | |
{ color: #5976a1; | |
float: right; } | |
.message .subject | |
{ color: #34383e; | |
clear: both; } | |
.message .body | |
{ color: #87858f; | |
font-size: 95%; | |
height: 1.25em; | |
overflow: hidden; } | |
.message .body p | |
{ display: inline; } | |
.message.expanded .body | |
{ height: auto; } | |
.message.expanded .body p | |
{ display: block; } | |
.message:hover | |
{ background: #f5f5f8; } | |
.highlight { | |
font-size: normal; | |
background-color: #fef4c1; | |
padding: 0.25em 0.25em; | |
-moz-border-radius: 0.25em; | |
-webkit-border-radius: 0.25em; | |
border-radius: 0.25em; | |
} | |
</style> | |
<script> | |
$(function() { | |
$('#tips').hide(); | |
$('.message .body'). | |
hover(function() { $(this).css({ cursor : 'pointer' }); }). | |
click(function() { $(this).parent().toggleClass('expanded'); return false; }); | |
$('#toggle-tips'). | |
click(function() { $('#tips').toggle('fast'); return false; }); | |
}); | |
</script> | |
</head> | |
<body> | |
<div id="search-form"> | |
<h1> | |
<a href="/">Search your Gmail</a><br> | |
<a id="toggle-tips" href="#">Toggle tips</a> | |
</h1> | |
<form action="/" method="get" accept-charset="utf-8"> | |
<input type="hidden" name="s" value="<%= params[:s] %>"> | |
<input type="text" name="q" value="<%= params[:q] %>"> | |
<input type="submit" value="Search"> | |
</form> | |
<div id="tools"> | |
<p> | |
<span class="dim">Sort by:</span> | |
<%= link_to_unless params[:s] =~ /\S/, 'relevance', "/?q=#{params[:q]}" %> <span class="dim">or</span> | |
<%= link_to_unless params[:s] !~ /\S/, 'date', "/?q=#{params[:q]}&s=date" %> | |
<span class="dim">. Showing <%= @s.results.size %> of <%= @s.results.total %> total results.</span> | |
</p> | |
</div> | |
<div id="tips"> | |
<%= link_to_tip('git*', 'Messages beginning with “git”') %> | |
<%= link_to_tip('from:github.com', 'Messages from Github') %> | |
<%= link_to_tip('apple OR linux^100', 'Messages about Apple or Linux, with a boost for Linux') %> | |
<%= link_to_tip("date:[#{(Time.now-7*24*60*60).strftime('%Y-%m-%d')} TO #{Time.now.strftime('%Y-%m-%d')}]", 'Messages from last week') %> | |
</div> | |
</div> | |
<% @s.results.each do |m| %> | |
<div class="message"> | |
<p class="from"> | |
<%= m.from %> | |
<% if m._score && m._score != 1.0 %> | |
<small title="score"><%= m._score.inspect %></small> | |
<% end %> | |
</p> | |
<p class="date"><%= Time.parse(m.date).strftime('%Y/%m/%d %H:%M') %></p> | |
<% body = (m.highlight && m.highlight.body) ? m.highlight.body.first : m.body %> | |
<% subject = (m.highlight && m.highlight.subject) ? m.highlight.subject.first : m.subject %> | |
<p class="subject"><%= subject %></p> | |
<div class="body"><%= simple_format(body) %></div> | |
</div> | |
<% end %> | |
<% if @s.results.total > (params[:p].to_i+1)*settings.per_page %> | |
<p><a href="/?q=<%= params[:q] %>&s=<%= params[:s] %>&p=<%= params[:p].to_i+1 %>">Next »</a></p> | |
<% end %> | |
<% if @s.results.empty? %> | |
<p>No results.</p> | |
<% end %> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment