mayblue9 · August 1, 2016 04:16
diff --git a/README.md b/README.md
diff --git a/algorithm.py b/algorithm.py
 # Copyright (C) 2013 Oliver Mader <[email protected]>

 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:

 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.

 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.

 # -*- coding: utf-8 -*-

 import naive_algorithm

 from collections import defaultdict
 from itertools import product, islice

 from suffix_tree import SuffixTree

 __all__ = [
    'maximal_matching_pairs',
    'repetition_regions',
    'essential_matching_pairs'
 ]


 def maximal_matching_pairs(tree):
    """
    Find all substring pairs fulfilling the properties specified in
    definition 1, namely _identical_, _non-overlapping_, _consecutive_ and
    _maximal_.

    Args:
        tree (SuffixTree): A suffix tree, build from the string to be searched.

    Returns:
        generator. A generator of tuples, each describing one matching pair as
                   composed of the start of the first and the second substring,
                   as well as the length of the substring.
    """
    s = tree.string
    substrings = defaultdict(set)

    # get all substring starts repeated at least once
    for leaf in tree.leaves:
        node = leaf.parent
        end = leaf.start
        while node is not None and \
              (node.pathLabel not in substrings or end - len(node.pathLabel) \
                      not in substrings[node.pathLabel]) and \
              node.edgeLabel != '':
            for i in range(len(node.pathLabel)):
                substrings[node.pathLabel[:i+1]].add(end - len(node.pathLabel))
            end -= len(node.edgeLabel)
            node = node.parent

    def contained(x, y, l, largers):
        for large in largers:
            starts = substrings[large]
            idx = list(find_all(large, sub))
            for i,j in product(idx, idx):
                if (x - i) + len(large) <= (y - j) and (x - i) in starts and \
                   (y - j) in starts:
                    return True
        return False

    # apply constraints and yield legit pairs
    for sub in sorted(substrings, key=len, reverse=True):
        starts = sorted(substrings[sub])
        l = len(sub)
        cs = [k for k in substrings if len(k) > len(sub) and
              k.startswith(sub) and k.endswith(sub)]

        for x, y in zip(starts, islice(starts, 1, None)):
            # overlapping
            if x + l > y:
                continue

            # non maximal: left and right expansion
            if (x > 0 and x + l < y and s[x-1] == s[y-1]) or \
               (y + l < len(s) and x + l < y and s[x+l] == s[y+l]):
                continue

            # not maximal: inner and outer expansion
            if contained(x, y, l, cs):
                continue

            yield x, y, l


 def repetition_regions(tree):
    """
    Find all repetition regions as specified in definition 2 and the following
    further limiting rules:

    2.1) _Minimal_: There does not exist any other repetition region R'
         containing R, with the fundamental substring P' containing P.
    2.2) _Leftmost_: There doesn't exist another repetition region R',
         originating from R shifted one character to the left, with equal
         length of the region and of the fundamental substring.

    Args:
        tree (SuffixTree): A suffix tree, build from the string to be searched.

    Returns:
        list. A list of tuples, each describing one repetition region build
              from multiple matching pairs. The tuples contain the start of
              the region, the end of the region not inclusive and the length
              of the fundamental substring, respectively.
    """
    return naive_algorithm.repetition_regions(tree.string)

    s = tree.string
    substrings = defaultdict(set)

    for leaf in tree.leaves:
        node = leaf.parent
        end = leaf.start
        while node is not None and \
              (node.pathLabel not in substrings or end - len(node.pathLabel) \
                      not in substrings[node.pathLabel]) and \
              node.edgeLabel != '':
            for i in range(len(node.pathLabel)):
                substrings[node.pathLabel[:i+1]].add(end - len(node.pathLabel))
            end -= len(node.edgeLabel)
            node = node.parent

    regions = []

    for sub in sorted(substrings, key=len):
        starts = sorted(substrings[sub])
        l = len(sub)

        a = 0
        while a < len(starts) - 1:
            f = starts[a]
            e = f + l
            b = a + 1
            while b < len(starts) and starts[b] - starts[b - 1] == l:
                e += l
                b += 1

            if b - a > 1:
                regions.append((f, e, l))

            a = b

    return regions


 def essential_matching_pairs(string):
    """
    Find all essential matching pairs as specified in definition 3. These
    pairs might be used to build arc diagrams from.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. Yields tuples, each describing one matching pair as composed
                   of the start of the first and the second substring, as well
                   as the length of the substring.
    """
    tree = SuffixTree(string)

    regions = repetition_regions(tree)

    # definition 3.1 and 3.2
    for x,y,l in maximal_matching_pairs(tree):
        if not any(x >= r and y + l <= e for r,e,_ in regions) or \
           any(int((x - r)/f) == int((y + l - r - 1)/f) for r,_,f in regions):
            yield x, y, l

    # definition 3.3
    for r,e,l in regions:
        for x in range(r, e - l, l):
            yield x, x + l, l


 def children(node):
    """ Returns a generator to iterate all direct node children. """
    c = node.firstChild
    while c is not None:
        yield c
        c = c.next


 def find_all(a_str, sub):
    """ Yield all occurences of a substring. """
    start = 0
    while True:
        start = a_str.find(sub, start)
        if start == -1: return
        yield start
        start += 1

 # vim: set expandtab shiftwidth=4 softtabstop=4 textwidth=79:
diff --git a/algorithm.pyc b/algorithm.pyc
diff --git a/index.coffee b/index.coffee
 svg = d3.select('svg')
 width = svg.node().getBoundingClientRect().width
 height = svg.node().getBoundingClientRect().height

 # translate the viewBox to have (0,0) at the center of the vis
 svg
 .attr
   viewBox: "#{-width/2} #{-height+60} #{width} #{height}"
  
 width -= 60

 redraw = () ->
  sequence = d3.select('#sequence_input').node().value
    
  x = d3.scale.ordinal()
    .domain([0..sequence.length])
    .rangeBands([-width/2,width/2])

  d3.json "main.php?sequence=#{sequence}", (matches) ->
    svg.selectAll('.character').remove()
    svg.selectAll('.arc').remove()
    svg.select('.subsequence').remove()
    
    characters = svg.selectAll('.character')
      .data(sequence)

    characters.enter().append('text')
      .text (d) -> d
      .attr
        class: 'character'
        x: (d,i) -> x(i) + x.rangeBand()/2
        dy: '1em'

    arcs = svg.selectAll('.arc')
      .data(matches.filter (m) -> m.length > 1)

    enter_arcs = arcs.enter().append('path')
      .attr
        class: 'arc'
        d: (match) ->
          start_left = x(match.source)
          start_right = x(match.source + match.length)
          end_left = x(match.target)
          end_right = x(match.target + match.length)
          big_radius = (end_right-start_left)/2
          small_radius = (end_left-start_right)/2
          return "M#{start_left} 0 A#{big_radius} #{big_radius} 0 1 1 #{end_right} 0 L#{end_left} 0 A#{small_radius} #{small_radius} 0 1 0 #{start_right} 0  z"

    # subsequence selection
    subsequence = svg.append 'text'
      .attr
        class: 'subsequence'
        y: -350

    enter_arcs.on 'click', (d1) ->
      subsequence
        .text d1.subsequence

      svg.selectAll('.arc')
        .classed 'selected', (d2) -> d1.subsequence is d2.subsequence
        .classed 'dimmed', (d2) -> d1.subsequence isnt d2.subsequence

      d3.event.stopPropagation()

    svg.on 'click', () ->
      subsequence
        .text ''

      svg.selectAll('.arc')
        .classed 'selected', false
        .classed 'dimmed', false

 redraw()

 d3.select('#sequence_input').on 'change', () ->
  console.log 'change'
  redraw()
  
diff --git a/index.css b/index.css
 body, html {
  padding: 0;
  margin: 0;
  height: 100%;
  font-family: sans-serif;
  font-size: 12px;
 }
 svg {
  width: 100%;
  height: 100%;
  background: white;
 }
 .character {
  text-anchor: middle;
  font-size: 16px;
  font-family: Georgia;
 }
 .arc {
  opacity: 0.2;
  cursor: pointer;
  stroke: white
 }
 .arc.selected {
  fill: steelblue;
  opacity: 0.4;
 }
 .arc.dimmed {
  opacity: 0.07;
 }

 .subsequence {
  fill: steelblue;
  text-anchor: middle;
  font-size: 40px;
  font-family: Georgia;
  text-transform: uppercase;
 }
 #input_box {
  position: absolute;
  top: 6px;
  left: 6px;
  width: 100%
 }
 #sequence_input {
  width: 50%;
 }
diff --git a/index.html b/index.html
 <!DOCTYPE html>
 <html>
 <head>
  <meta charset="utf-8">
  <title>Arc Diagram III</title>
  <link type="text/css" href="index.css" rel="stylesheet"/>
  <script src="http://d3js.org/d3.v3.min.js"></script>
 </head>
 <body>
  <svg></svg>
  <div id="input_box">
    <label for="sequence_input">Sequence to be analyzed (press ENTER to redraw):</label>
    <input id="sequence_input" type="text" value="314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534">
  </div>
  <script src="index.js"></script>
 </body>
 </html>
diff --git a/index.js b/index.js
 // Generated by CoffeeScript 1.10.0
 (function() {
  var height, redraw, svg, width;

  svg = d3.select('svg');

  width = svg.node().getBoundingClientRect().width;

  height = svg.node().getBoundingClientRect().height;

  svg.attr({
    viewBox: (-width / 2) + " " + (-height + 60) + " " + width + " " + height
  });

  width -= 60;

  redraw = function() {
    var j, ref, results, sequence, x;
    sequence = d3.select('#sequence_input').node().value;
    x = d3.scale.ordinal().domain((function() {
      results = [];
      for (var j = 0, ref = sequence.length; 0 <= ref ? j <= ref : j >= ref; 0 <= ref ? j++ : j--){ results.push(j); }
      return results;
    }).apply(this)).rangeBands([-width / 2, width / 2]);
    return d3.json("main.php?sequence=" + sequence, function(matches) {
      var arcs, characters, enter_arcs, subsequence;
      svg.selectAll('.character').remove();
      svg.selectAll('.arc').remove();
      svg.select('.subsequence').remove();
      characters = svg.selectAll('.character').data(sequence);
      characters.enter().append('text').text(function(d) {
        return d;
      }).attr({
        "class": 'character',
        x: function(d, i) {
          return x(i) + x.rangeBand() / 2;
        },
        dy: '1em'
      });
      arcs = svg.selectAll('.arc').data(matches.filter(function(m) {
        return m.length > 1;
      }));
      enter_arcs = arcs.enter().append('path').attr({
        "class": 'arc',
        d: function(match) {
          var big_radius, end_left, end_right, small_radius, start_left, start_right;
          start_left = x(match.source);
          start_right = x(match.source + match.length);
          end_left = x(match.target);
          end_right = x(match.target + match.length);
          big_radius = (end_right - start_left) / 2;
          small_radius = (end_left - start_right) / 2;
          return "M" + start_left + " 0 A" + big_radius + " " + big_radius + " 0 1 1 " + end_right + " 0 L" + end_left + " 0 A" + small_radius + " " + small_radius + " 0 1 0 " + start_right + " 0  z";
        }
      });
      subsequence = svg.append('text').attr({
        "class": 'subsequence',
        y: -350
      });
      enter_arcs.on('click', function(d1) {
        subsequence.text(d1.subsequence);
        svg.selectAll('.arc').classed('selected', function(d2) {
          return d1.subsequence === d2.subsequence;
        }).classed('dimmed', function(d2) {
          return d1.subsequence !== d2.subsequence;
        });
        return d3.event.stopPropagation();
      });
      return svg.on('click', function() {
        subsequence.text('');
        return svg.selectAll('.arc').classed('selected', false).classed('dimmed', false);
      });
    });
  };

  redraw();

  d3.select('#sequence_input').on('change', function() {
    console.log('change');
    return redraw();
  });

 }).call(this);
diff --git a/main.php b/main.php
 <?php
  echo shell_exec('python main.py ' . $_GET['sequence']);
 ?>
diff --git a/main.py b/main.py
 import sys
 import algorithm as arc
 import json

 sequence = sys.argv[1]

 matches = arc.essential_matching_pairs(sequence)

 result = [{'source': match[0], 'target': match[1], 'length': match[2], 'subsequence': sequence[match[0]:match[0]+match[2]]} for match in matches if match[2] > 1]

 print json.dumps(result)
diff --git a/naive_algorithm.py b/naive_algorithm.py
 # Copyright (C) 2013 Oliver Mader <[email protected]>

 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:

 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.

 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.

 # -*- coding: utf-8 -*-

 def maximal_matching_pairs(string):
    """
    Find all substring pairs fulfilling the properties specified in
    definition 1, namely _identical_, _non-overlapping_, _consecutive_ and
    _maximal_.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. A generator of tuples, each describing one matching pair as
                   composed of the start of the first and the second substring,
                   as well as the length of the substring.
    """
    n = len(string)
    for x in range(0, n - 1):
        for l in range(int((n - x)/2) + 1, 0, -1):
            c = string[x:x+l]
            y = string.find(c, x + 1)

            # not found or not consecutive
            if y == -1 or y < x + l:
                continue

            # not maximal: left or right expansion
            if (y + l < n and x + l < y and string[x+l] == string[y+l]) or \
               (x - 1 >= 0 and x + l < y and string[x-1] == string[y-1]):
                continue

            # not maximal: inner expansion
            if any(string[x:x+l+i] == string[y-i:y+l]
                   for i in range(1, (y - x - l)/2 + 1)):
                continue

            # not maximal: outer expansion
            if any(string[x-i:x+l] == string[y:y+l+i]
                   for i in range(1, max(x, n - y - l) + 1)):
                continue

            yield x, y, l


 def repetition_regions(string):
    """
    Find all repetition regions as specified in definition 2 and the following
    further limiting rules:

    2.1) _Minimal_: There doesn't exist any other repetition region R'
         containing R, with an equal or smaller fundamental substring P',
         contained in P.
    2.2) _Leftmost_: There doesn't exist another repetition region R',
         originating from R shifted one character to the left, with equal
         length of the region and of the fundamental substring.

    Args:
        string (str): The string to be searched.

    Returns:
        list. A list of tuples, each describing one repetition region build
              from multiple matching pairs. The tuples contain the start of
              the region, the end of the region not inclusive and the length
              of the fundamental substring, respectively.
    """
    n = len(string)
    s = 0
    regions = []
    while s < n - 1:
        for l in range(1, (n - s)/2 + 1):
            candidate = string[s:s+l]

            end = s + l
            while string.find(candidate, end, end + l) == end:
                end += l

            # not enough pairs
            if end == s + l:
                continue

            # not left most
            if (s-1, end-1, len(candidate)) in regions:
                continue

            # not minimal
            if any(r[0] <= s and r[1] >= end and r[2] <= len(candidate)
                   for r in regions):
                continue

            regions.append((s, end, len(candidate)))
        s += 1
    return regions


 def essential_matching_pairs(string):
    """
    Find all essential matching pairs as specified in definition 3. These
    pairs might be used to build arc diagrams from.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. Yields tuples, each describing one matching pair as composed
                   of the start of the first and the second substring, as well
                   as the length of the substring.
    """
    regions = repetition_regions(string)

    # definition 3.1 and 3.2
    for x,y,l in maximal_matching_pairs(string):
        if not any(x >= r and y + l <= e for r,e,_ in regions) or \
           any(int((x - r)/f) == int((y + l - r - 1)/f) for r,_,f in regions):
            yield (x, y, l)

    # definition 3.3
    for r,e,l in regions:
        for x in range(r, e - l, l):
            yield (x, x + l, l)

 # vim: set expandtab shiftwidth=4 softtabstop=4 textwidth=79:
diff --git a/naive_algorithm.pyc b/naive_algorithm.pyc
diff --git a/thumbnail.png b/thumbnail.png
	# Copyright (C) 2013 Oliver Mader <[email protected]>

	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:

	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.

	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	# THE SOFTWARE.

	# -- coding: utf-8 --

	import naive_algorithm

	from collections import defaultdict
	from itertools import product, islice

	from suffix_tree import SuffixTree

	__all__ = [
	'maximal_matching_pairs',
	'repetition_regions',
	'essential_matching_pairs'
	]


	def maximal_matching_pairs(tree):
	"""
	Find all substring pairs fulfilling the properties specified in
	definition 1, namely _identical_, _non-overlapping_, _consecutive_ and
	_maximal_.

	Args:
	tree (SuffixTree): A suffix tree, build from the string to be searched.

	Returns:
	generator. A generator of tuples, each describing one matching pair as
	composed of the start of the first and the second substring,
	as well as the length of the substring.
	"""
	s = tree.string
	substrings = defaultdict(set)

	# get all substring starts repeated at least once
	for leaf in tree.leaves:
	node = leaf.parent
	end = leaf.start
	while node is not None and \
	(node.pathLabel not in substrings or end - len(node.pathLabel) \
	not in substrings[node.pathLabel]) and \
	node.edgeLabel != '':
	for i in range(len(node.pathLabel)):
	substrings[node.pathLabel[:i+1]].add(end - len(node.pathLabel))
	end -= len(node.edgeLabel)
	node = node.parent

	def contained(x, y, l, largers):
	for large in largers:
	starts = substrings[large]
	idx = list(find_all(large, sub))
	for i,j in product(idx, idx):
	if (x - i) + len(large) <= (y - j) and (x - i) in starts and \
	(y - j) in starts:
	return True
	return False

	# apply constraints and yield legit pairs
	for sub in sorted(substrings, key=len, reverse=True):
	starts = sorted(substrings[sub])
	l = len(sub)
	cs = [k for k in substrings if len(k) > len(sub) and
	k.startswith(sub) and k.endswith(sub)]

	for x, y in zip(starts, islice(starts, 1, None)):
	# overlapping
	if x + l > y:
	continue

	# non maximal: left and right expansion
	if (x > 0 and x + l < y and s[x-1] == s[y-1]) or \
	(y + l < len(s) and x + l < y and s[x+l] == s[y+l]):
	continue

	# not maximal: inner and outer expansion
	if contained(x, y, l, cs):
	continue

	yield x, y, l


	def repetition_regions(tree):
	"""
	Find all repetition regions as specified in definition 2 and the following
	further limiting rules:

	2.1) _Minimal_: There does not exist any other repetition region R'
	containing R, with the fundamental substring P' containing P.
	2.2) _Leftmost_: There doesn't exist another repetition region R',
	originating from R shifted one character to the left, with equal
	length of the region and of the fundamental substring.

	Args:
	tree (SuffixTree): A suffix tree, build from the string to be searched.

	Returns:
	list. A list of tuples, each describing one repetition region build
	from multiple matching pairs. The tuples contain the start of
	the region, the end of the region not inclusive and the length
	of the fundamental substring, respectively.
	"""
	return naive_algorithm.repetition_regions(tree.string)

	s = tree.string
	substrings = defaultdict(set)

	for leaf in tree.leaves:
	node = leaf.parent
	end = leaf.start
	while node is not None and \
	(node.pathLabel not in substrings or end - len(node.pathLabel) \
	not in substrings[node.pathLabel]) and \
	node.edgeLabel != '':
	for i in range(len(node.pathLabel)):
	substrings[node.pathLabel[:i+1]].add(end - len(node.pathLabel))
	end -= len(node.edgeLabel)
	node = node.parent

	regions = []

	for sub in sorted(substrings, key=len):
	starts = sorted(substrings[sub])
	l = len(sub)

	a = 0
	while a < len(starts) - 1:
	f = starts[a]
	e = f + l
	b = a + 1
	while b < len(starts) and starts[b] - starts[b - 1] == l:
	e += l
	b += 1

	if b - a > 1:
	regions.append((f, e, l))

	a = b

	return regions


	def essential_matching_pairs(string):
	"""
	Find all essential matching pairs as specified in definition 3. These
	pairs might be used to build arc diagrams from.

	Args:
	string (str): The string to be searched.

	Returns:
	generator. Yields tuples, each describing one matching pair as composed
	of the start of the first and the second substring, as well
	as the length of the substring.
	"""
	tree = SuffixTree(string)

	regions = repetition_regions(tree)

	# definition 3.1 and 3.2
	for x,y,l in maximal_matching_pairs(tree):
	if not any(x >= r and y + l <= e for r,e,_ in regions) or \
	any(int((x - r)/f) == int((y + l - r - 1)/f) for r,_,f in regions):
	yield x, y, l

	# definition 3.3
	for r,e,l in regions:
	for x in range(r, e - l, l):
	yield x, x + l, l


	def children(node):
	""" Returns a generator to iterate all direct node children. """
	c = node.firstChild
	while c is not None:
	yield c
	c = c.next


	def find_all(a_str, sub):
	""" Yield all occurences of a substring. """
	start = 0
	while True:
	start = a_str.find(sub, start)
	if start == -1: return
	yield start
	start += 1

	# vim: set expandtab shiftwidth=4 softtabstop=4 textwidth=79:
	svg = d3.select('svg')
	width = svg.node().getBoundingClientRect().width
	height = svg.node().getBoundingClientRect().height

	# translate the viewBox to have (0,0) at the center of the vis
	svg
	.attr
	viewBox: "#{-width/2} #{-height+60} #{width} #{height}"

	width -= 60

	redraw = () ->
	sequence = d3.select('#sequence_input').node().value

	x = d3.scale.ordinal()
	.domain([0..sequence.length])
	.rangeBands([-width/2,width/2])

	d3.json "main.php?sequence=#{sequence}", (matches) ->
	svg.selectAll('.character').remove()
	svg.selectAll('.arc').remove()
	svg.select('.subsequence').remove()

	characters = svg.selectAll('.character')
	.data(sequence)

	characters.enter().append('text')
	.text (d) -> d
	.attr
	class: 'character'
	x: (d,i) -> x(i) + x.rangeBand()/2
	dy: '1em'

	arcs = svg.selectAll('.arc')
	.data(matches.filter (m) -> m.length > 1)

	enter_arcs = arcs.enter().append('path')
	.attr
	class: 'arc'
	d: (match) ->
	start_left = x(match.source)
	start_right = x(match.source + match.length)
	end_left = x(match.target)
	end_right = x(match.target + match.length)
	big_radius = (end_right-start_left)/2
	small_radius = (end_left-start_right)/2
	return "M#{start_left} 0 A#{big_radius} #{big_radius} 0 1 1 #{end_right} 0 L#{end_left} 0 A#{small_radius} #{small_radius} 0 1 0 #{start_right} 0 z"

	# subsequence selection
	subsequence = svg.append 'text'
	.attr
	class: 'subsequence'
	y: -350

	enter_arcs.on 'click', (d1) ->
	subsequence
	.text d1.subsequence

	svg.selectAll('.arc')
	.classed 'selected', (d2) -> d1.subsequence is d2.subsequence
	.classed 'dimmed', (d2) -> d1.subsequence isnt d2.subsequence

	d3.event.stopPropagation()

	svg.on 'click', () ->
	subsequence
	.text ''

	svg.selectAll('.arc')
	.classed 'selected', false
	.classed 'dimmed', false

	redraw()

	d3.select('#sequence_input').on 'change', () ->
	console.log 'change'
	redraw()
	body, html {
	padding: 0;
	margin: 0;
	height: 100%;
	font-family: sans-serif;
	font-size: 12px;
	}
	svg {
	width: 100%;
	height: 100%;
	background: white;
	}
	.character {
	text-anchor: middle;
	font-size: 16px;
	font-family: Georgia;
	}
	.arc {
	opacity: 0.2;
	cursor: pointer;
	stroke: white
	}
	.arc.selected {
	fill: steelblue;
	opacity: 0.4;
	}
	.arc.dimmed {
	opacity: 0.07;
	}

	.subsequence {
	fill: steelblue;
	text-anchor: middle;
	font-size: 40px;
	font-family: Georgia;
	text-transform: uppercase;
	}
	#input_box {
	position: absolute;
	top: 6px;
	left: 6px;
	width: 100%
	}
	#sequence_input {
	width: 50%;
	}
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<title>Arc Diagram III</title>
	<link type="text/css" href="index.css" rel="stylesheet"/>
	<script src="http://d3js.org/d3.v3.min.js"></script>
	</head>
	<body>
	<svg></svg>
	<div id="input_box">
	<label for="sequence_input">Sequence to be analyzed (press ENTER to redraw):</label>
	<input id="sequence_input" type="text" value="314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534">
	</div>
	<script src="index.js"></script>
	</body>
	</html>
	// Generated by CoffeeScript 1.10.0
	(function() {
	var height, redraw, svg, width;

	svg = d3.select('svg');

	width = svg.node().getBoundingClientRect().width;

	height = svg.node().getBoundingClientRect().height;

	svg.attr({
	viewBox: (-width / 2) + " " + (-height + 60) + " " + width + " " + height
	});

	width -= 60;

	redraw = function() {
	var j, ref, results, sequence, x;
	sequence = d3.select('#sequence_input').node().value;
	x = d3.scale.ordinal().domain((function() {
	results = [];
	for (var j = 0, ref = sequence.length; 0 <= ref ? j <= ref : j >= ref; 0 <= ref ? j++ : j--){ results.push(j); }
	return results;
	}).apply(this)).rangeBands([-width / 2, width / 2]);
	return d3.json("main.php?sequence=" + sequence, function(matches) {
	var arcs, characters, enter_arcs, subsequence;
	svg.selectAll('.character').remove();
	svg.selectAll('.arc').remove();
	svg.select('.subsequence').remove();
	characters = svg.selectAll('.character').data(sequence);
	characters.enter().append('text').text(function(d) {
	return d;
	}).attr({
	"class": 'character',
	x: function(d, i) {
	return x(i) + x.rangeBand() / 2;
	},
	dy: '1em'
	});
	arcs = svg.selectAll('.arc').data(matches.filter(function(m) {
	return m.length > 1;
	}));
	enter_arcs = arcs.enter().append('path').attr({
	"class": 'arc',
	d: function(match) {
	var big_radius, end_left, end_right, small_radius, start_left, start_right;
	start_left = x(match.source);
	start_right = x(match.source + match.length);
	end_left = x(match.target);
	end_right = x(match.target + match.length);
	big_radius = (end_right - start_left) / 2;
	small_radius = (end_left - start_right) / 2;
	return "M" + start_left + " 0 A" + big_radius + " " + big_radius + " 0 1 1 " + end_right + " 0 L" + end_left + " 0 A" + small_radius + " " + small_radius + " 0 1 0 " + start_right + " 0 z";
	}
	});
	subsequence = svg.append('text').attr({
	"class": 'subsequence',
	y: -350
	});
	enter_arcs.on('click', function(d1) {
	subsequence.text(d1.subsequence);
	svg.selectAll('.arc').classed('selected', function(d2) {
	return d1.subsequence === d2.subsequence;
	}).classed('dimmed', function(d2) {
	return d1.subsequence !== d2.subsequence;
	});
	return d3.event.stopPropagation();
	});
	return svg.on('click', function() {
	subsequence.text('');
	return svg.selectAll('.arc').classed('selected', false).classed('dimmed', false);
	});
	});
	};

	redraw();

	d3.select('#sequence_input').on('change', function() {
	console.log('change');
	return redraw();
	});

	}).call(this);
	<?php
	echo shell_exec('python main.py ' . $_GET['sequence']);
	?>
	import sys
	import algorithm as arc
	import json

	sequence = sys.argv[1]

	matches = arc.essential_matching_pairs(sequence)

	result = [{'source': match[0], 'target': match[1], 'length': match[2], 'subsequence': sequence[match[0]:match[0]+match[2]]} for match in matches if match[2] > 1]

	print json.dumps(result)