Skip to content

Instantly share code, notes, and snippets.

@devdatta
Forked from karmi/.gitignore
Created May 16, 2011 04:48
Show Gist options
  • Save devdatta/973951 to your computer and use it in GitHub Desktop.
Save devdatta/973951 to your computer and use it in GitHub Desktop.
Import your Gmail messages into ElasticSearch and search them with a simple web application.
.DS_Store
*.log
Gemfile.lock
source 'http://rubygems.org'
gem 'tire'
gem 'mime'
gem 'gmail'
gem 'sinatra'
# =======================================================
# Importing Gmail messages into ElasticSearch
# =======================================================
#
# Import your Gmail messages into ElasticSearch and search them with a simple web application.
#
# Requirements:
# -------------
#
# * ElasticSearch 0.16.x
# * Ruby 1.8.x
# * Rubygems
# * Bundler gem
#
# Usage:
# ------
#
# Install the required gems:
#
# $ bundle install
#
# Run this script to import your e-mail into ElasticSearch:
#
# $ ruby gmail-import.rb [email protected] yourpassword
#
# Note, that messages are fetched one by one, so the process depends on your connection.
# You may abort the process in any time and search already stored messages.
#
# Then launch the web application:
#
# $ [email protected] ruby gmail-server.rb
#
# Open <http://localhost:4567/> in your browser.
#
#
require 'rubygems'
require 'time'
require 'iconv'
require 'tire'
require 'mime'
require 'gmail'
STDOUT.sync = true
USERNAME, PASSWORD = ARGV
unless (USERNAME && PASSWORD)
puts "[ERROR] Please provide your Gmail credentials:", "",
" #{__FILE__} [email protected] password", ""
exit(1)
end
# Helper variables
#
@done = 0
@total = 0
@errors = []
# Helper method to display elapsed time
#
def elapsed_to_human(elapsed)
hour = 60*60
day = hour*24
case elapsed
when 0..59
"#{sprintf("%1.5f", elapsed)} seconds"
when 60..hour-1
"#{elapsed.to_i/60} minutes and #{elapsed.to_i % 60} seconds"
when hour..day
"#{elapsed.to_i/hour} hours and #{elapsed.to_i % hour} minutes"
else
"#{elapsed.to_i/hour} hours"
end
end
# Display import statistics
#
def report
["",
"Imported #{@done} messages into index: " +
"<http://localhost:9200/#{USERNAME}/_search?q=*> ",
"in #{elapsed_to_human(@elapsed)}. " +
"There were #{@errors.size} errors.",
""].join("\n")
end
# Clean exit on interrupt
#
trap(:INT) do
puts "\r\nExiting...\n"
puts report
exit( @errors.size > 0 ? 1 : 0 )
end
# Set up ElasticSearch index with the same name as your account
#
index = Tire.index USERNAME do
# Remove the indef if force set to true
#
delete if ENV['FORCE']
# Create the index for messages with proper mapping
#
create :mappings => {
:message => {
:properties => {
:id => { :type => 'string', :index => 'not_analyzed', :store => true },
:subject => { :type => 'string', :analyzer => 'snowball', :boost => 10 },
:from => { :type => 'multi_field',
:fields => { :from => { :type => 'string', :analyzer => 'snowball', :boost => 100 },
:exact => { :type => 'string', :index => 'not_analyzed', :store => true } }
},
:to => { :type => 'string', :analyzer => 'keyword' },
:date => { :type => 'date', },
:body => { :type => 'string', :analyzer => 'snowball' },
}
}
}
end
@elapsed = Benchmark.realtime do
# Helper method to strip non-UTF-8 characters
#
def force_utf(s)
Iconv.conv('UTF-8//IGNORE', 'UTF-8', s + ' ')[0..-2]
end
puts '-'*80, "Connecting to Gmail account '#{USERNAME}'...", '-'*80
# Connect to Gmail account
#
Gmail.new(USERNAME, PASSWORD) do |gmail|
@total = gmail.inbox.count
puts "Importing #{@total} messages, press Ctrl-C to abort...", '-'*80
# Process inbox messages one by one
#
gmail.inbox.emails.each do |email|
# Defensively define message properties (clean IDs, force UTF, etc)
#
document = {}
document[:id] = email.message_id.to_s.tr('<>', '').tr('/', '-')
document[:subject] = force_utf(email.subject.to_s)
document[:from] = Array(email.from).map { |a| "#{a.name} <#{a.mailbox}@#{a.host}>" }
document[:to] = Array(email.to).map { |a| "#{a.name} <#{a.mailbox}@#{a.host}>" }
document[:date] = (Time.parse(email.date).strftime('%Y-%m-%dT%H:%M:%S%z') rescue nil)
document[:body] = force_utf( (email.body.parts.first.body.to_s rescue email.body.to_s) )
begin
# Store the message in the index
#
index.store :message, document
@done += 1
puts "\e[32m#{@done.to_s.ljust(4)}\e[0m #{email.subject} <#{email.from_addrs.join(', ')}>"
rescue Exception => e
# Display failure message
#
puts "\e[31m[!]\e[0m #{email.subject} <#{email.from_addrs.join(', ')}>"
puts " #{e.inspect}"
@errors << email
end
end
end
end
puts report
# =======================================================
# Simple web application to search your Gmail messages
# =======================================================
#
# Usage:
# ------
#
# First, import your messages with the `gmail-import.rb` script.
#
# Then, launch this application:
#
# $ [email protected] ruby gmail-server.rb
#
#
require 'rubygems'
require 'tire'
require 'sinatra'
unless ENV['INDEX']
puts "[ERROR] Please set the index name with the INDEX environment variable:", "",
" $ [email protected] ruby #{__FILE__}", ""
exit(1)
end
configure do
set :views, File.dirname(__FILE__)
set :per_page, 25
end
helpers do
def simple_format(text)
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n{2}/, "\n") # \n\n -> \n
text.gsub!(/\n\n+/, "</p>\n<p>") # \n -> paragraph
"<p>" + text + "</p>"
end
def link_to_unless(condition, name, url)
condition ? %Q|<a href="#{url}">#{name}</a>| : "#{name}"
end
def link_to_tip(query, legend)
%Q|<p class="tip"><a href="/?q=#{query}">#{query}</a><span>#{legend}</span></p>|
end
end
get '/' do
q = params[:q].to_s !~ /\S/ ? '*' : params[:q].to_s
s = params[:s] == 'date'
f = params[:p].to_i*settings.per_page
@s = Tire.search( ENV['INDEX'] ) do |search|
search.query { |query| query.string q }
search.highlight :subject => {:number_of_fragments => 0},
:body => {:number_of_fragments => 0},
:options => { :tag => '<em class="highlight">' }
search.sort { date :desc } if s
search.size settings.per_page
search.from f
end
# puts @s.to_curl
erb :results
end
<!DOCTYPE html>
<html>
<head>
<title>Search your Gmail (<%= ENV['INDEX'] %>)</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<script src="http://code.jquery.com/jquery-1.6.1.min.js"></script>
<style>
body
{ color: #222; background: #fff;
font-size: 76%;
font-family: Helvetica, sans-serif;
padding: 2em 6em; }
a { color: #2f3b4c; text-decoration: none !important; }
h1
{ color: #999;
font-size: 120%;
padding: 0.5em 0.8em 0 0;
margin: 0;
float: left;
position: relative; }
h1 a { color: #999; }
#search-form
{ border-bottom: 2px solid #ccc;
padding: 0.5em 0 0.5em 0;
clear: both; }
#search-form input[type='text']
{ color: #222;
font-size: 110%;
padding: 0.25em;
width: 50em; }
#search-form #tools
{ color: #34383e;
margin: 0 0 0 11.6em; }
#search-form #tools a
{ color: #2f3b4c; text-decoration: underline !important; }
#search-form #tools .dim
{ color: #878787; }
#toggle-tips
{ font-size: 10px;
font-weight: normal;
text-decoration: underline !important;
position: absolute;
top: 0 bottom: 0; }
#search-form #tips
{ background-color: #eff0f1;
padding: 1em 2em;
margin: 0 0 0 11.6em;
position: relative;
-moz-border-radius: 0.5em;
-webkit-border-radius: 0.5em;
border-radius: 0.5em; }
#search-form #tips p
{ padding: 0.5em 0 0.5em 0;
margin: 0; }
#search-form #tips a
{ background: #B9D4FA;
padding: 0.25em 0.5em 0.1em 0.5em;
-moz-border-radius: 0.25em;
-webkit-border-radius: 0.25em;
border-radius: 0.25em; }
#search-form #tips a:hover
{ color: #dde4ed;
background: #444e5d; }
#search-form #tips span
{ color: #878787;
font-size: 95%;
margin-left: 1em; }
.message
{ line-height: 125%;
padding: 1em 0;
border-bottom: 1px solid #ccc;
position: relative; }
.message p
{ margin: 0 0 0.5em 0; }
.message .from
{ color: #34383e;
font-weight: bold;
float: left; }
.message .from small
{ color: #5f646b;
font-weight: normal; }
.message .date
{ color: #5976a1;
float: right; }
.message .subject
{ color: #34383e;
clear: both; }
.message .body
{ color: #87858f;
font-size: 95%;
height: 1.25em;
overflow: hidden; }
.message .body p
{ display: inline; }
.message.expanded .body
{ height: auto; }
.message.expanded .body p
{ display: block; }
.message:hover
{ background: #f5f5f8; }
.highlight {
font-size: normal;
background-color: #fef4c1;
padding: 0.25em 0.25em;
-moz-border-radius: 0.25em;
-webkit-border-radius: 0.25em;
border-radius: 0.25em;
}
</style>
<script>
$(function() {
$('#tips').hide();
$('.message .body').
hover(function() { $(this).css({ cursor : 'pointer' }); }).
click(function() { $(this).parent().toggleClass('expanded'); return false; });
$('#toggle-tips').
click(function() { $('#tips').toggle('fast'); return false; });
});
</script>
</head>
<body>
<div id="search-form">
<h1>
<a href="/">Search your Gmail</a><br>
<a id="toggle-tips" href="#">Toggle tips</a>
</h1>
<form action="/" method="get" accept-charset="utf-8">
<input type="hidden" name="s" value="<%= params[:s] %>">
<input type="text" name="q" value="<%= params[:q] %>">
<input type="submit" value="Search">
</form>
<div id="tools">
<p>
<span class="dim">Sort by:</span>
<%= link_to_unless params[:s] =~ /\S/, 'relevance', "/?q=#{params[:q]}" %> <span class="dim">or</span>
<%= link_to_unless params[:s] !~ /\S/, 'date', "/?q=#{params[:q]}&amp;s=date" %>
<span class="dim">. Showing <%= @s.results.size %> of <%= @s.results.total %> total results.</span>
</p>
</div>
<div id="tips">
<%= link_to_tip('git*', 'Messages beginning with “git”') %>
<%= link_to_tip('from:github.com', 'Messages from Github') %>
<%= link_to_tip('apple OR linux^100', 'Messages about Apple or Linux, with a boost for Linux') %>
<%= link_to_tip("date:[#{(Time.now-7*24*60*60).strftime('%Y-%m-%d')} TO #{Time.now.strftime('%Y-%m-%d')}]", 'Messages from last week') %>
</div>
</div>
<% @s.results.each do |m| %>
<div class="message">
<p class="from">
<%= m.from %>
<% if m._score && m._score != 1.0 %>
<small title="score"><%= m._score.inspect %></small>
<% end %>
</p>
<p class="date"><%= Time.parse(m.date).strftime('%Y/%m/%d %H:%M') %></p>
<% body = (m.highlight && m.highlight.body) ? m.highlight.body.first : m.body %>
<% subject = (m.highlight && m.highlight.subject) ? m.highlight.subject.first : m.subject %>
<p class="subject"><%= subject %></p>
<div class="body"><%= simple_format(body) %></div>
</div>
<% end %>
<% if @s.results.total > (params[:p].to_i+1)*settings.per_page %>
<p><a href="/?q=<%= params[:q] %>&amp;s=<%= params[:s] %>&amp;p=<%= params[:p].to_i+1 %>">Next &raquo;</a></p>
<% end %>
<% if @s.results.empty? %>
<p>No results.</p>
<% end %>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment