Import your Gmail messages into ElasticSearch and search them with a simple web application.
source ''
gem 'tire'
gem 'mime'
gem 'gmail'
gem 'sinatra'
# =======================================================
# Importing Gmail messages into ElasticSearch
# =======================================================
# Import your Gmail messages into ElasticSearch and search them with a simple web application.
# Requirements:
# -------------
# * ElasticSearch 0.16.x
# * Ruby 1.8.x
# * Rubygems
# * Bundler gem
# Usage:
# ------
# Install the required gems:
# $ bundle install
# Run this script to import your e-mail into ElasticSearch:
# $ ruby gmail-import.rb [email protected] yourpassword
# Note, that messages are fetched one by one, so the process depends on your connection.
# You may abort the process in any time and search already stored messages.
# Then launch the web application:
# $ [email protected] ruby gmail-server.rb
# Open <http://localhost:4567/> in your browser.
require 'rubygems'
require 'time'
require 'iconv'
require 'tire'
require 'mime'
require 'gmail'
STDOUT.sync = true
puts "[ERROR] Please provide your Gmail credentials:", "",
" #{__FILE__} [email protected] password", ""
# Helper variables
@done = 0
@total = 0
@errors = []
# Helper method to display elapsed time
def elapsed_to_human(elapsed)
hour = 60*60
day = hour*24
case elapsed
when 0..59
"#{sprintf("%1.5f", elapsed)} seconds"
when 60..hour-1
"#{elapsed.to_i/60} minutes and #{elapsed.to_i % 60} seconds"
"#{elapsed.to_i/hour} hours and #{elapsed.to_i % hour} minutes"
"#{elapsed.to_i/hour} hours"
# Display import statistics
def report
"Imported #{@done} messages into index: " +
"<http://localhost:9200/#{USERNAME}/_search?q=*> ",
"in #{elapsed_to_human(@elapsed)}. " +
"There were #{@errors.size} errors.",
# Clean exit on interrupt
trap(:INT) do
puts "\r\nExiting...\n"
puts report
exit( @errors.size > 0 ? 1 : 0 )
# Set up ElasticSearch index with the same name as your account
index = Tire.index USERNAME do
# Remove the indef if force set to true
delete if ENV['FORCE']
# Create the index for messages with proper mapping
create :mappings => {
:message => {
:properties => {
:id => { :type => 'string', :index => 'not_analyzed', :store => true },
:subject => { :type => 'string', :analyzer => 'snowball', :boost => 10 },
:from => { :type => 'multi_field',
:fields => { :from => { :type => 'string', :analyzer => 'snowball', :boost => 100 },
:exact => { :type => 'string', :index => 'not_analyzed', :store => true } }
:to => { :type => 'string', :analyzer => 'keyword' },
:date => { :type => 'date', },
:body => { :type => 'string', :analyzer => 'snowball' },
@elapsed = Benchmark.realtime do
# Helper method to strip non-UTF-8 characters
def force_utf(s)
Iconv.conv('UTF-8//IGNORE', 'UTF-8', s + ' ')[0..-2]
puts '-'*80, "Connecting to Gmail account '#{USERNAME}'...", '-'*80
# Connect to Gmail account
#, PASSWORD) do |gmail|
@total = gmail.inbox.count
puts "Importing #{@total} messages, press Ctrl-C to abort...", '-'*80
# Process inbox messages one by one
gmail.inbox.emails.each do |email|
# Defensively define message properties (clean IDs, force UTF, etc)
document = {}
document[:id] ='<>', '').tr('/', '-')
document[:subject] = force_utf(email.subject.to_s)
document[:from] = Array(email.from).map { |a| "#{} <#{a.mailbox}@#{}>" }
document[:to] = Array( { |a| "#{} <#{a.mailbox}@#{}>" }
document[:date] = (Time.parse('%Y-%m-%dT%H:%M:%S%z') rescue nil)
document[:body] = force_utf( ( rescue email.body.to_s) )
# Store the message in the index
# :message, document
@done += 1
puts "\e[32m#{@done.to_s.ljust(4)}\e[0m #{email.subject} <#{email.from_addrs.join(', ')}>"
rescue Exception => e
# Display failure message
puts "\e[31m[!]\e[0m #{email.subject} <#{email.from_addrs.join(', ')}>"
puts " #{e.inspect}"
@errors << email
puts report
# =======================================================
# Simple web application to search your Gmail messages
# =======================================================
# Usage:
# ------
# First, import your messages with the `gmail-import.rb` script.
# Then, launch this application:
# $ [email protected] ruby gmail-server.rb
require 'rubygems'
require 'tire'
require 'sinatra'
unless ENV['INDEX']
puts "[ERROR] Please set the index name with the INDEX environment variable:", "",
" $ [email protected] ruby #{__FILE__}", ""
configure do
set :views, File.dirname(__FILE__)
set :per_page, 25
helpers do
def simple_format(text)
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n{2}/, "\n") # \n\n -> \n
text.gsub!(/\n\n+/, "</p>\n<p>") # \n -> paragraph
"<p>" + text + "</p>"
def link_to_unless(condition, name, url)
condition ? %Q|<a href="#{url}">#{name}</a>| : "#{name}"
def link_to_tip(query, legend)
%Q|<p class="tip"><a href="/?q=#{query}">#{query}</a><span>#{legend}</span></p>|
get '/' do
q = params[:q].to_s !~ /\S/ ? '*' : params[:q].to_s
s = params[:s] == 'date'
f = params[:p].to_i*settings.per_page
@s = ENV['INDEX'] ) do |search|
search.query { |query| query.string q }
search.highlight :subject => {:number_of_fragments => 0},
:body => {:number_of_fragments => 0},
:options => { :tag => '<em class="highlight">' }
search.sort { date :desc } if s
search.size settings.per_page
search.from f
# puts @s.to_curl
erb :results
<!DOCTYPE html>
<title>Search your Gmail (<%= ENV['INDEX'] %>)</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<script src=""></script>
{ color: #222; background: #fff;
font-size: 76%;
font-family: Helvetica, sans-serif;
padding: 2em 6em; }
a { color: #2f3b4c; text-decoration: none !important; }
{ color: #999;
font-size: 120%;
padding: 0.5em 0.8em 0 0;
margin: 0;
float: left;
position: relative; }
h1 a { color: #999; }
{ border-bottom: 2px solid #ccc;
padding: 0.5em 0 0.5em 0;
clear: both; }
#search-form input[type='text']
{ color: #222;
font-size: 110%;
padding: 0.25em;
width: 50em; }
#search-form #tools
{ color: #34383e;
margin: 0 0 0 11.6em; }
#search-form #tools a
{ color: #2f3b4c; text-decoration: underline !important; }
#search-form #tools .dim
{ color: #878787; }
{ font-size: 10px;
font-weight: normal;
text-decoration: underline !important;
position: absolute;
top: 0 bottom: 0; }
#search-form #tips
{ background-color: #eff0f1;
padding: 1em 2em;
margin: 0 0 0 11.6em;
position: relative;
-moz-border-radius: 0.5em;
-webkit-border-radius: 0.5em;
border-radius: 0.5em; }
#search-form #tips p
{ padding: 0.5em 0 0.5em 0;
margin: 0; }
#search-form #tips a
{ background: #B9D4FA;
padding: 0.25em 0.5em 0.1em 0.5em;
-moz-border-radius: 0.25em;
-webkit-border-radius: 0.25em;
border-radius: 0.25em; }
#search-form #tips a:hover
{ color: #dde4ed;
background: #444e5d; }
#search-form #tips span
{ color: #878787;
font-size: 95%;
margin-left: 1em; }
{ line-height: 125%;
padding: 1em 0;
border-bottom: 1px solid #ccc;
position: relative; }
.message p
{ margin: 0 0 0.5em 0; }
.message .from
{ color: #34383e;
font-weight: bold;
float: left; }
.message .from small
{ color: #5f646b;
font-weight: normal; }
.message .date
{ color: #5976a1;
float: right; }
.message .subject
{ color: #34383e;
clear: both; }
.message .body
{ color: #87858f;
font-size: 95%;
height: 1.25em;
overflow: hidden; }
.message .body p
{ display: inline; }
.message.expanded .body
{ height: auto; }
.message.expanded .body p
{ display: block; }
{ background: #f5f5f8; }
.highlight {
font-size: normal;
background-color: #fef4c1;
padding: 0.25em 0.25em;
-moz-border-radius: 0.25em;
-webkit-border-radius: 0.25em;
border-radius: 0.25em;
$(function() {
$('.message .body').
hover(function() { $(this).css({ cursor : 'pointer' }); }).
click(function() { $(this).parent().toggleClass('expanded'); return false; });
click(function() { $('#tips').toggle('fast'); return false; });
<div id="search-form">
<a href="/">Search your Gmail</a><br>
<a id="toggle-tips" href="#">Toggle tips</a>
<form action="/" method="get" accept-charset="utf-8">
<input type="hidden" name="s" value="<%= params[:s] %>">
<input type="text" name="q" value="<%= params[:q] %>">
<input type="submit" value="Search">
<div id="tools">
<span class="dim">Sort by:</span>
<%= link_to_unless params[:s] =~ /\S/, 'relevance', "/?q=#{params[:q]}" %> <span class="dim">or</span>
<%= link_to_unless params[:s] !~ /\S/, 'date', "/?q=#{params[:q]}&amp;s=date" %>
<span class="dim">. Showing <%= @s.results.size %> of <%= %> total results.</span>
<div id="tips">
<%= link_to_tip('git*', 'Messages beginning with “git”') %>
<%= link_to_tip('', 'Messages from Github') %>
<%= link_to_tip('apple OR linux^100', 'Messages about Apple or Linux, with a boost for Linux') %>
<%= link_to_tip("date:[#{(*24*60*60).strftime('%Y-%m-%d')} TO #{'%Y-%m-%d')}]", 'Messages from last week') %>
<% @s.results.each do |m| %>
<div class="message">
<p class="from">
<%= m.from %>
<% if m._score && m._score != 1.0 %>
<small title="score"><%= m._score.inspect %></small>
<% end %>
<p class="date"><%= Time.parse('%Y/%m/%d %H:%M') %></p>
<% body = (m.highlight && m.highlight.body) ? m.highlight.body.first : m.body %>
<% subject = (m.highlight && m.highlight.subject) ? m.highlight.subject.first : m.subject %>
<p class="subject"><%= subject %></p>
<div class="body"><%= simple_format(body) %></div>
<% end %>
<% if > (params[:p].to_i+1)*settings.per_page %>
<p><a href="/?q=<%= params[:q] %>&amp;s=<%= params[:s] %>&amp;p=<%= params[:p].to_i+1 %>">Next &raquo;</a></p>
<% end %>
<% if @s.results.empty? %>
<p>No results.</p>
<% end %>
