Created
November 15, 2012 16:03
-
-
Save misodengaku/4079391 to your computer and use it in GitHub Desktop.
トトリちゃん画像収集用のアレ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Net; | |
using System.Text; | |
using System.Threading.Tasks; | |
namespace TotoriDownloader | |
{ | |
class Program | |
{ | |
static string savePath = @"i:\totori_cs2\";//保存先パス | |
//"https://www.google.co.jp/search?q=%E3%83%88%E3%83%88%E3%83%AA+-%E7%84%BC%E8%82%89+-%E5%88%91%E4%BA%8B&hl=ja&safe=off&sout=1&biw=1920&tbm=isch&sa=N" | |
static void Main(string[] args) | |
{ | |
int i = 0; | |
Console.WriteLine("Start: " + DateTime.Now.ToString()); | |
Parallel.For(i, 100, GetTotori); | |
/*for (var i = 0; i < 100; i++) | |
GetTotori(i);*/ | |
Console.WriteLine("Completed!: " + DateTime.Now.ToString()); | |
return; | |
} | |
static void GetTotori(int _page) | |
{ | |
var page = _page * 20; | |
int done = 0, fail = 0; | |
Console.WriteLine("Page "+_page+" GET Start"); | |
WebClient wc = new WebClient(); | |
byte[] data = wc.DownloadData("https://www.google.co.jp/search?q=%E3%83%88%E3%83%88%E3%83%AA&hl=ja&safe=off&sout=1&biw=1920&tbm=isch&sa=N&start=" + page); | |
//Console.WriteLine("解析中"); | |
Encoding enc = Encoding.GetEncoding("Shift_JIS"); | |
string html = enc.GetString(data); | |
var links = html.Split(new string[] { "http://www.google.co.jp/imgres?imgurl=", "&imgrefurl=" }, StringSplitOptions.RemoveEmptyEntries); | |
//string[] tototi = new string[30]; | |
List<string> totori = new List<string>(); | |
foreach (var item in links) | |
{ | |
if (item.StartsWith("http") && (item.EndsWith("jpg") || item.EndsWith("png") || item.EndsWith("bmp") || item.EndsWith("gif"))) | |
{ | |
totori.Add(item); | |
//Console.WriteLine(item); | |
} | |
} | |
Parallel.ForEach(totori, (u) => | |
{ | |
try | |
{ | |
var url = new Uri(u); | |
var filePath = Path.Combine(savePath, Path.GetFileName(url.LocalPath)); | |
new WebClient().DownloadFile(url, filePath); | |
done++; | |
//Console.WriteLine("downloaded: {0} => {1}", url, filePath); | |
} | |
catch// (Exception e) | |
{ | |
fail++; | |
//Console.WriteLine("failed: {0}, {1}", url, e); | |
} | |
}); | |
//Console.WriteLine("end"); | |
Console.WriteLine("Page " + _page + " End"); | |
Console.WriteLine("Done: " + done + " Error: " + fail); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
細かいことは知らぬ