Skip to content

Instantly share code, notes, and snippets.

@akkunchoi
Created December 4, 2013 14:55
Show Gist options
  • Select an option

  • Save akkunchoi/7788829 to your computer and use it in GitHub Desktop.

Select an option

Save akkunchoi/7788829 to your computer and use it in GitHub Desktop.
<?php
error_reporting(E_ALL);
ini_set('display_errors', 'On');
function index(){
?><!doctype html>
<html>
<head>
<style type="text/css">
</style>
</head>
<body>
<h1>ファイルクローラー</h1>
<form action="">
開始URL: <input type="text" size="80" name="url" /><br />
ステップ: <input type="text" size="2" name="step" value="2" /> <br />
<input type="submit" name="submit" value="実行" />
</form>
</body></html><?php
}
$base = dirname(__FILE__);
$filename = $base . '/out/';
if (isset($_GET['download'])){
$path = $filename . '_' . $_GET['download'] . '.csv';
if (!file_exists($path)){
die('file not found');
}
$name = $_GET['download'];
header('Content-Type: text/comma-separated-values');
header('Content-Disposition: attachment; filename="' . $name . '"');
header('Content-Length: '.filesize($path));
readfile($path);
exit;
}
if (isset($_GET['url'])){
$url = $_GET['url'];
$accept = dirname($url);
}else{
index();
exit;
}
set_time_limit(0);
$step = 2;
if (isset($_GET['step'])){
$step = $_GET['step'];
}
$reject = '';
$python = '/usr/local/bin/python';
$baseid = date("YmdHis");
$exec = "$python $base/webstemmer/textcrawler.py -m$step -L $filename -b$baseid $accept $reject $url 2>&1";
$exec = str_replace(';', '', $exec);
$exec = str_replace("\0", '', $exec);
// 指定するとエラー。指定しなくてもchunkedになってた
//header("Content-Encoding: chunked");
//header("Transfer-Encoding: chunked");
header("Content-Type: text/html; charset=utf-8");
header("Connection: keep-alive");
flush();
ob_flush();
?><!doctype html>
<html>
<head>
<style type="text/css">
</style>
</head>
<body><pre class="out"><?php
$handle = popen($exec, 'r');
while(!feof($handle)){
echo fread($handle, 2096);
flush();
ob_flush();
}
pclose($handle);
//echo passthru($exec);
echo "\nDone!\n";
?></pre>
<?php
$path = $filename . '_' . $baseid . '.csv';
if (file_exists($path)){
?><a href="?download=<?php echo $baseid ?>">ダウンロード</a><?php
}else{
?>失敗しました<?php
}
?>
</body></html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment