-
-
Save siayi/a31ea383ca78b3a6116886258af0f2f4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#IDM | |
#!/bin/python3 | |
import urllib3 | |
from bs4 import BeautifulSoup | |
from time import sleep | |
import csv | |
import sys | |
import json | |
import ssl | |
import hashlib | |
from tqdm import tqdm | |
def ambilJson(url): | |
http = urllib3.PoolManager() | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36'} | |
req = http.request('GET',url, headers) | |
return json.loads(req.data) | |
def ambil(url): | |
http = urllib3.PoolManager() | |
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'} | |
req = http.request('GET', url, headers) | |
dsoup = BeautifulSoup(req.data) | |
return dsoup | |
root = 'http://idm.kemendesa.go.id' | |
url = root+'/idm_data?id_prov=33&id_kabupaten=3321&id_kecamatan=332110&id_desa=3321102014&tahun=2019' | |
jurl = root+'/users/list_idm?draw=1&columns%5B0%5D%5Bdata%5D=tahun&columns%5B0%5D%5Bname%5D=&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=id_prov&columns%5B1%5D%5Bname%5D=&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=id_kabupaten&columns%5B2%5D%5Bname%5D=&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=id_kecamatan&columns%5B3%5D%5Bname%5D=&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=id_desa&columns%5B4%5D%5Bname%5D=&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=iks_2019&columns%5B5%5D%5Bname%5D=&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=ike_2019&columns%5B6%5D%5Bname%5D=&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=ikl_2019&columns%5B7%5D%5Bname%5D=&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=idm_2019&columns%5B8%5D%5Bname%5D=&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=idm_status_2019&columns%5B9%5D%5Bname%5D=&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=detail&columns%5B10%5D%5Bname%5D=&columns%5B10%5D%5Bsearchable%5D=true&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=0&order%5B0%5D%5Bdir%5D=asc&start=0&length=0&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1568821375769' | |
# | |
y = ambilJson(jurl)['data'] | |
#narik data idm per kab | |
idkab = '3319' | |
# [ i for i in idmy.find_one({},{'_id':0,'id_kabupaten':1,'detail':1})['id_kabupaten'].split('|')[0].strip() == '3319' ] | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in tqdm(idmy.find({'id_kabupaten':idkab},{'_id':0})): | |
if idm.find_one({'detail':u['detail']}) is None: | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url0) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
idm.insert_one(data) | |
idm.insert_one(dict(zip(key1,l))) # jadikan dictionary kemudian masukkan ke mongo | |
# unduh semua idm | |
ik = [i['id_desa'] for i in idm.find({})] | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in idmy.find({},{'_id':0}): #sort descending | |
if u['id_desa'] not in ik : | |
print('mengunduh %s' % u['detail']) | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
#dt.append(data) | |
#if idm.find_one({'id_desa': data['id_desa']},{'id_desa':1,'_id':0}) is None : | |
tqdm(idm.insert_one(data)) | |
ik = set([i['id_desa'] for i in idm.find({})]) | |
else: | |
print('data id %s sudah tersedia' % u['id_desa']) | |
# debug | |
import requests | |
def ambil(url): | |
req = requests.get(url, verify=False) | |
dsoup = BeautifulSoup(req.content) | |
return dsoup | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in tqdm(idmy.find({'id_prov':"11"},{'_id':0})): | |
if idm11.find_one({'detail':u['detail']}) is None: | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url0) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
idm11.insert_one(data) | |
idmmod.insert_one(dict(zip(key1,l))) | |
for u in tqdm(idmy.find({},{'_id':1,'id_kabupaten':1,'id_prov':1,'id_desa':1,'id_kecamatan':1,'detail':1})): | |
try: | |
id_prov = u['id_prov'].split('|')[0].strip() | |
id_kabupaten = u['id_kabupaten'].split('|')[0].strip() | |
id_kecamatan = u['id_kecamatan'].split('|')[0].strip() | |
id_desa = u['id_desa'].split('|')[0].strip() | |
prov = u['id_prov'].split('|')[1].strip() | |
kabupaten = u['id_kabupaten'].split('|')[1].strip() | |
kecamatan = u['id_kecamatan'].split('|')[1].strip() | |
desa = u['id_desa'].split('|')[1].strip() | |
idmy.update_one({'_id':u['_id']},{'$set': {'id_prov':id_prov,'id_kabupaten':id_kabupaten,'id_kecamatan':id_kecamatan,'id_desa':id_desa,'prov':prov,'kabupaten':kabupaten,'kecamatan':kecamatan,'desa':desa }}) | |
except: | |
pass | |
for i in idmy.find({},{'_id':1,'id_kabupaten':1,'id_prov':1,'id_desa':1,'id_kecamatan':1,'detail':1}): | |
idmy.update_one({'_id':i['_id']},{'$set' : {'_flag':0}}) | |
ik = idm.find({}).distinct('id_desa') | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in idmy.find({},{'_id':0}): #sort descending | |
if u['id_desa'] not in ik : | |
print('mengunduh %s' % u['detail']) | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
#dt.append(data) | |
#if idm.find_one({'id_desa': data['id_desa']},{'id_desa':1,'_id':0}) is None : | |
tqdm(idm.insert_one(data)) | |
ik = idm.find({}).distinct('id_desa') | |
else: | |
print('data id %s sudah tersedia' % u['id_desa']) | |
# x = ambilJson(jurl)['data'] | |
# [ i['detail'].split('"')[1] for i in x] | |
# [i.text for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[4].findAll('td')] | |
# n.findAll('div',{'class':'row'})[1].findAll('tr')[3].findAll('img')[0]['src'].split('/')[-1] | |
# for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
# [j['src'].split('/')[-1] for j in i.findAll('img')] | |
# [ i for j in kkx[0] for i in j.split('|')] #### split item didalam list | |
# | |
# kkx = [] | |
# s=0 | |
# for u in tqdm(y): | |
# url0 = root+u['detail'].split('"')[1] | |
# n=ambil(url) | |
# kkk=[] | |
# try: | |
# for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
# kk = [j.text for j in i.findAll('td')] | |
# if len(kk) <16: | |
# kk = kkk[-1][11:11+(16-len(kk))] + kk | |
# kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
# kk = list(u.values())+kk | |
# kkk.append(kk) | |
# except: | |
# kk = list(u.values()) | |
# # kk.insert() | |
# kkk.append(kk) | |
# kkx = kkx + kkk | |
# for l,k in enumerate(kkx): | |
# kkx[l] = [ i for j in k for i in j.split('|')] | |
# idm = dbrem.idm | |
# keys = [str(i) for i in range(1,33)] | |
# for i in tqdm(kkx): | |
# idm.insert_one(dict(zip(keys,i))) | |
# with open('dmk.csv','w') as f: | |
# wr = csv.writer(f) | |
# for row in kkx: | |
# wr.writerow(row) | |
Content-Type: application/x-www-form-urlencoded | |
Origin: https://caribdt.dinsos.jatengprov.go.id | |
Referer: https://caribdt.dinsos.jatengprov.go.id/public/dashboard | |
Upgrade-Insecure-Requests: 1 | |
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 | |
POST /public/rekap-desa-proses HTTP/1.1 | |
Host: caribdt.dinsos.jatengprov.go.id | |
Connection: keep-alive | |
Content-Length: 97 | |
Cache-Control: max-age=0 | |
Origin: https://caribdt.dinsos.jatengprov.go.id | |
Upgrade-Insecure-Requests: 1 | |
Content-Type: application/x-www-form-urlencoded | |
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 | |
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 | |
Referer: https://caribdt.dinsos.jatengprov.go.id/public/dashboard | |
Accept-Encoding: gzip, deflate, br | |
Accept-Language: en-US,en;q=0.9 | |
Cookie: _ga=GA1.3.947129702.1583481586; _gid=GA1.3.289665751.1583481586; XSRF-TOKEN=eyJpdiI6IkNMSGVnV1BXVDFQaW8xYmMwSSt5c0E9PSIsInZhbHVlIjoiaVRFbUJIb1ZLRjhBckQxaXYwZnZ4ODJCVjlaZXdCaXF3WnJnbW1aaTZVVkp0TkVIOWtTdXhJQXBcL0hxS1M5SSsiLCJtYWMiOiI1ODU4Y2JmZTFhZDBhMmI0ZjkwOTNlNWIxZGZmNjk0OTU2NGM1NzFkMWYxNTAyY2I3MDVkNWZhYjZlMGE1MzFlIn0%3D; caribdt_session=eyJpdiI6IjJzb2hlZXA4QkZnSWo3akE4a3lSeWc9PSIsInZhbHVlIjoibk5YUUMxRTVhcGR2YzBoXC94Nm9BUDFtK3p3eWJOeWJvbDFmSXdIN21XSm1zMlMxVnhRR0xUUVJLQ1QxdXlZM1wvIiwibWFjIjoiMGUxYzk5ZWJmNGYyZTg3ZjZmZmNiNTU2ZGJlOWE2MzQ4NzUwOGY4YjNmMDhjMTk3NTk1ZjI2NGFkMzRiZDE3NSJ9 | |
import requests | |
import json | |
url = 'https://caribdt.dinsos.jatengprov.go.id/public/rekap-desa-proses' | |
h={'Host': 'caribdt.dinsos.jatengprov.go.id','User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Language': 'en-US,en;q=0.5','Accept-Encoding': 'gzip, deflate, br','Referer': 'https://caribdt.dinsos.jatengprov.go.id/public/dashboard','Content-Type': 'application/x-www-form-urlencoded','Content-Length': '89','Origin': 'https://caribdt.dinsos.jatengprov.go.id','Connection': 'keep-alive','Upgrade-Insecure-Requests': '1','Cache-Control': 'max-age=0, no-cache','Pragma': 'no-cache'} | |
p={'_token':'u7ER5WkjR6E2exdsvy17jENmrEYJPTitS3s0qiWk&','kdkab':'08','nmkec':'KAJORAN','nmdesa':'BANJARETNO'} | |
c={'XSRF-TOKEN':'eyJpdiI6ImRPUm1ZVDJhWXZSNnBhSGJBUkE5d3c9PSIsInZhbHVlIjoiNmsxRkJINmNUSTFYQitteEZFK1FcL1ZENHpPUWRBSzh4QzVzb21JaVwvdXl5QXAzXC9uM29QUXRqSTMzTW5IelZKUyIsIm1hYyI6IjhlMTk4MWE1OGMzYTIxNjMxOGRjYmNlZjNlZjAxNDUxYmY5M2NiMmQzNzVmNWUyNjgxZjJhODM0NTdlMzNhYWYifQ%3D%3D', 'caribdt_session':'eyJpdiI6Ijg1SW5hMFBYdiswdklOM0FmbU9IZnc9PSIsInZhbHVlIjoicHI5bWxXT1hEeWFcL1BEaDhvUlZ6ZzJvWmhzWVlRd09aUHJ1SlFoajZMYVlxUlN2Q01jdERwVTd6cDcyaHdGRkUiLCJtYWMiOiIzZTAwNjRhY2FiMGI3NGI2M2NhN2RkYjJkMTE5M2JlMWU0ZDAzZWU2MjZiMGEwNDYyMWIzZjMzMDk4YTYxMzc1In0%3D', '_ga':'GA1.3.876487108.1583484300', '_gid':'GA1.3.487186037.1583484300'} | |
r = requests.post(url,data=json.dumps(p),headers=h, cookies=c) | |
[i.update({'slug-url': base+'detail/%s/%s' % (i['id'], re.sub('\W+', ' ',i['title']).strip().lower().replace(' ','-')),'desc':re.sub(clean,' ',i['desc']),'slug-rubrik': base+'rubrik/%s/%s' % (i['rubrik'],i['namarubrik'].lower().replace(' ','-')), 'detail': re.sub(clean,'',i['detail']).replace('\\r\\n','')} ) for i in res ] | |
import cv2 | |
import matplotlib.pyplot as plt | |
import cvlib as cv | |
from cvlib.object_detection import draw_bbox | |
im = cv2.imread('cars2.jpg') | |
bbox, label, conf = cv.detect_common_objects(im) | |
output_image = draw_bbox(im, bbox, label,conf) | |
plt.imshow(output_image) | |
plt.show() | |
print(str(label.count('car'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment