Created
February 27, 2022 14:46
-
-
Save scilganon/dadf7386a43ac3f2c092e4acbbd2865c to your computer and use it in GitHub Desktop.
пошук ресурсів за доменом `*.gov.ru`
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
id | url | |
---|---|---|
1 | rosim.gov.ru | |
3 | morflot.gov.ru | |
4 | minpromtorg.gov.ru | |
5 | favt.gov.ru | |
6 | rospatent.gov.ru | |
7 | regulation.gov.ru | |
8 | fssp.gov.ru | |
9 | roszdravnadzor.gov.ru | |
10 | roskachestvo.gov.ru | |
11 | orv.gov.ru | |
13 | rospn.gov.ru | |
14 | rosavtodor.gov.ru | |
15 | aiss.gov.ru | |
17 | alania.gov.ru | |
18 | mvd.gov.ru | |
19 | minvr.gov.ru | |
20 | minobrnauki.gov.ru | |
21 | rkn.gov.ru | |
22 | minenergo.gov.ru | |
23 | obrnadzor.gov.ru | |
25 | pfr.gov.ru | |
26 | saratov.gov.ru | |
27 | minstroyrf.gov.ru | |
28 | minzdrav.gov.ru | |
29 | ach.gov.ru | |
30 | council.gov.ru | |
31 | pravo.gov.ru | |
33 | proverki.gov.ru | |
37 | r21.fssp.gov.ru | |
38 | 70.mchs.gov.ru | |
39 | 21.mchs.gov.ru | |
40 | special.favt.gov.ru | |
41 | 76.mchs.gov.ru | |
42 | 38.mchs.gov.ru | |
43 | minzdrav.sakha.gov.ru | |
44 | r40.fssp.gov.ru | |
45 | r44.fssp.gov.ru | |
46 | minobr.saratov.gov.ru | |
48 | r64.fssp.gov.ru | |
49 | r01.fssp.gov.ru | |
50 | economy.council.gov.ru | |
52 | reglament.council.gov.ru | |
54 | defence.council.gov.ru | |
56 | 74.mchs.gov.ru | |
57 | r23.fssp.gov.ru | |
58 | mcx.gov.ru | |
59 | rosstat.gov.ru | |
60 | r61.fssp.gov.ru | |
61 | r22.fssp.gov.ru | |
62 | r08.fssp.gov.ru | |
63 | edu.rs.gov.ru | |
64 | 75.mchs.gov.ru | |
65 | 35.mchs.gov.ru | |
66 | r63.fssp.gov.ru | |
67 | 86.mchs.gov.ru | |
68 | 78.mchs.gov.ru | |
69 | 15.mchs.gov.ru | |
70 | 36.mchs.gov.ru | |
71 | r35.fssp.gov.ru | |
72 | r51.fssp.gov.ru | |
73 | 17.mchs.gov.ru | |
74 | r77.fssp.gov.ru | |
75 | 26.mchs.gov.ru | |
76 | r43.fssp.gov.ru | |
77 | r18.fssp.gov.ru | |
78 | open.edu.gov.ru | |
79 | 72.mchs.gov.ru | |
80 | r66.fssp.gov.ru | |
81 | r41.fssp.gov.ru | |
82 | r69.fssp.gov.ru | |
83 | edu.gov.ru | |
84 | r02.fssp.gov.ru | |
85 | international.council.gov.ru | |
87 | 18.mchs.gov.ru | |
88 | r32.fssp.gov.ru | |
89 | tatishevo.saratov.gov.ru | |
91 | knd.ac.gov.ru | |
92 | programs.gov.ru | |
93 | digital.gov.ru | |
94 | budget.gov.ru | |
96 | archives.gov.ru | |
97 | dszn.tomsk.gov.ru | |
98 | moscow.mchs.gov.ru | |
99 | 25.mchs.gov.ru | |
100 | r26.fssp.gov.ru | |
101 | minpriroda.sakha.gov.ru | |
102 | atlas.mchs.gov.ru | |
103 | r42.fssp.gov.ru | |
104 | fsvts.gov.ru | |
105 | rk.gov.ru | |
106 | rosreestr.gov.ru | |
107 | tourism.gov.ru | |
108 | szfo.gov.ru | |
111 | knd.gov.ru | |
112 | sirius.gov.ru | |
113 | skfo.gov.ru | |
115 | nic.gov.ru | |
116 | gossluzhba.gov.ru | |
117 | science.gov.ru | |
119 | fadm.gov.ru | |
120 | culture.gov.ru | |
121 | customs.gov.ru | |
122 | rs.gov.ru | |
123 | mintrud.gov.ru | |
124 | government.gov.ru | |
126 | duma.gov.ru | |
128 | svr.gov.ru | |
130 | sfo.gov.ru | |
132 | minfin.gov.ru | |
133 | www.mnr.gov.ru | |
134 | r25.fssp.gov.ru | |
135 | vak.minobrnauki.gov.ru | |
144 | extreme.mchs.gov.ru | |
147 | www.sakha.gov.ru | |
150 | www.nalog.gov.ru | |
151 | cittu.customs.gov.ru | |
153 | en.mchs.gov.ru | |
154 | moscow.roskazna.gov.ru | |
155 | mtrud.rk.gov.ru | |
156 | fsa.gov.ru | |
157 | www.fso.gov.ru | |
159 | perm.roskazna.gov.ru | |
160 | www.minsport.gov.ru | |
161 | www.ved.gov.ru | |
164 | www.economy.gov.ru | |
165 | digital.ac.gov.ru | |
166 | www.scrf.gov.ru | |
168 | www.fsb.gov.ru | |
170 | meteorf.gov.ru | |
172 | torgi.gov.ru | |
173 | priemnaya.duma.gov.ru | |
174 | blocklist.rkn.gov.ru | |
175 | lk.rpn.gov.ru | |
176 | www.tambov.gov.ru | |
178 | fsvps.gov.ru | |
179 | publication.pravo.gov.ru | |
180 | www.ais.fadm.gov.ru | |
181 | www.government.gov.ru | |
184 | zakupki.gov.ru | |
185 | gkz.rk.gov.ru | |
186 | ais.fadm.gov.ru | |
187 | zdrav.khv.gov.ru | |
188 | feo.rk.gov.ru | |
190 | sozd.duma.gov.ru |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import puppeteer from 'puppeteer'; | |
import { createConnection } from "mariadb" | |
import _ from "lodash"; | |
const { uniq, random } = _; | |
const sleep = (timeout) => new Promise((res) => setTimeout(() => res(), timeout * 1000)); | |
const randomOfList = (list) => list[random(0, list.length -1)]; | |
const userAgents = [ | |
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4451.0 Safari/537.36", | |
"Mozilla/5.0 (X11; Linux i686 (x86_64)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4451.0 Safari/537.36", | |
"Mozilla/5.0 (Linux; arm_64; Android 10; ONEPLUS A6013) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 YaApp_Android/21.21 YaSearchBrowser/21.21 BroPP/1.0 Mobile Safari/537.36", | |
"Mozilla/5.0 (Linux; Android 10; Joy 4 Build/QKQ1.200603.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/89.0.4389.90 Mobile Safari/537.36", | |
"Mozilla/5.0 (Linux; Android 10; BMH-AN10 Build/HUAWEIBMH-N19; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/78.0.3904.108 Mobile Safari/537.36 com.yandex.zen/4.7.3.2704 (HUAWEI BMH-AN10; Android 10) ZenKit/1.40.7.5-internalNewdesign-Zen.8861", | |
"Mozilla/5.0 (Linux; Android 9; itel L5002) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Mobile Safari/537.36", | |
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.2; ASU2JS)", | |
"Mozilla/5.0 (Linux; U; Android 9; en-US; Lenovo L78051 Build/PKQ1.190110.001) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/78.0.3904.108 UCBrowser/13.3.8.1305 Mobile Safari/537.36", | |
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7b) Gecko/20040421", | |
"Mozilla/5.0 (Linux; Android 8.0.0; HTC_U-1u Build/OPR1.170623.032) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.86 Mobile Safari/537.36", | |
]; | |
(async () => { | |
const db = await createConnection({ | |
host: 'localhost', | |
user: 'root', | |
password: 'root', | |
database: 'web_res' | |
}); | |
const browser = await puppeteer.launch({ | |
headless: false | |
}); | |
const page = await browser.newPage(); | |
for(let i=30; i<10**6; i++) { | |
console.log("CURRENT_PAGE: ", i) | |
const urls = await getUrlsByPage(i, page); | |
for(const url of urls) { | |
try { | |
await db.query("INSERT INTO resources (url) VALUES (?)", [url]); | |
console.log(url); | |
} catch (e) { | |
if(e.code !== 'ER_DUP_ENTRY'){ | |
throw e; | |
} | |
} | |
} | |
await sleep(random(1,3)); | |
} | |
})(); | |
async function getUrlsByPage(currentPage = 1, page) { | |
const ua = randomOfList(userAgents); | |
await page.setUserAgent(ua) | |
await page.goto(`https://www.google.com/search?q=site:*.gov.ru&start=${currentPage*10}`, { | |
waitUntil: 'networkidle2', | |
}); | |
if((await page.$$('#captcha-form')).length > 0) { | |
await sleep(20); | |
} else { | |
await sleep(1); | |
} | |
const list = await page.$$('#search a'); | |
const urls = []; | |
for(let current of list) { | |
const val = await current.getProperty('href'); | |
const parsed = await val.jsonValue(); | |
const pf = parsed.match(/(?!https?:\/\/(www\.)?)[\w\\.-_]+(?=gov\.ru)+/gi); | |
urls.push(...pf | |
.filter((c) => !/webcache/gi.test(c)) | |
.map((u) => u.split('//').pop()) | |
.map((u) => `${u}gov.ru`) | |
) | |
} | |
return uniq(urls); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good Evening. Please inform me if I need to request special permission from the Russian Federation for Internet Privacy and Supervision to document my findings while I attempt to defend myself as many people including entire governments, attempt to pin their crimes on an innocent unsuspecting laborer. I will be happy to work with you in any way to end all of this immediately. Thank you for your consideration in working peacefully together to resolve an enormous issue.