The following gist is an extract of the article Building a simple crawler. It allows crawling from a URL and for a given number of bounce.
from crawler import Crawler
crawler = Crawler()
crawler.crawl('http://techcrunch.com/')
from datetime import datetime | |
def time_average(time_1, time_2): | |
""" | |
return a the average time between time_1 and time_2 | |
as a string in the same format as the input | |
""" | |
FMT = '%M:%S:%f' |
# -*- coding: utf-8 -*- | |
# <codecell> | |
# term frequency | |
from math import log | |
# XXX: Enter in a query term from the corpus variable | |
# QUERY_TERMS = ['mr.', 'green'] | |
echo "This script will: | |
1) install all modules need to run web2py on Ubuntu/Debian | |
2) install web2py in /home/www-data/ | |
3) create a self signed sll certificate | |
4) setup web2py with mod_wsgi | |
5) overwrite /etc/apache2/sites-available/default | |
6) restart apache. | |
You may want to read this script before running it. |
#! /bin/sh | |
### BEGIN INIT INFO | |
# Provides: elasticsearch | |
# Required-Start: $all | |
# Required-Stop: $all | |
# Default-Start: 2 3 4 5 | |
# Default-Stop: 0 1 6 | |
# Short-Description: Starts elasticsearch | |
# Description: Starts elasticsearch using start-stop-daemon | |
### END INIT INFO |
#!/bin/bash | |
# Elastic Serarch Start and Stop Script | |
ES_HOME="/opt/elsearch/elasticsearch" | |
ES_USER="esearch" | |
PID=$(ps ax | grep elasticsearch | grep $ES_HOME | grep -v grep | awk '{print $1}') | |
#echo $PID |
The following gist is an extract of the article Building a simple crawler. It allows crawling from a URL and for a given number of bounce.
from crawler import Crawler
crawler = Crawler()
crawler.crawl('http://techcrunch.com/')
curl https://raw.githubusercontent.com/pypa/pip/master/contrib/get-pip.py > get-pip.py; | |
python get-pip.py; | |
rm -f get-pip.py; | |
# change directory here. Go in your project home dir. | |
# cd /opt/uuid_resolver/; | |
pip install virtualenv; | |
virtualenv venv; | |
# activate the virtualenv | |
source venv/bin/activate | |
# change here your requirements.txt location |
pip freeze --local | grep -v '^\-e' | cut -d = -f 1 | xargs pip install -U |
# -*- coding: utf-8 -*- | |
import unittest | |
index = {} | |
class tree(object): |
#!/usr/bin/env | |
# -*- coding: utf-8 -*- | |
import unittest | |
""" Quicksort implementation """ | |
def quicksort(arr): | |
""" Quicksort a list |