Created
July 22, 2020 04:16
-
-
Save kkprakasa/5275ec52a63b58e04353762824b7dd30 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#IDM | |
#!/bin/python3 | |
import urllib3 | |
from bs4 import BeautifulSoup | |
from time import sleep | |
import csv | |
import sys | |
import json | |
import ssl | |
import hashlib | |
from tqdm import tqdm | |
def ambilJson(url): | |
http = urllib3.PoolManager() | |
headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2403.107 Safari/537.36'} | |
req = http.request('GET',url, headers) | |
return json.loads(req.data) | |
def ambil(url): | |
http = urllib3.PoolManager() | |
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'} | |
req = http.request('GET', url, headers) | |
dsoup = BeautifulSoup(req.data) | |
return dsoup | |
root = 'http://idm.kemendesa.go.id' | |
url = root+'/idm_data?id_prov=33&id_kabupaten=3321&id_kecamatan=332110&id_desa=3321102014&tahun=2019' | |
jurl = root+'/users/list_idm?draw=1&columns%5B0%5D%5Bdata%5D=tahun&columns%5B0%5D%5Bname%5D=&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=id_prov&columns%5B1%5D%5Bname%5D=&columns%5B1%5D%5Bsearchable%5D=true&columns%5B1%5D%5Borderable%5D=true&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=id_kabupaten&columns%5B2%5D%5Bname%5D=&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=true&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=id_kecamatan&columns%5B3%5D%5Bname%5D=&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=true&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=id_desa&columns%5B4%5D%5Bname%5D=&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=true&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=iks_2019&columns%5B5%5D%5Bname%5D=&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=true&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=ike_2019&columns%5B6%5D%5Bname%5D=&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=true&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=ikl_2019&columns%5B7%5D%5Bname%5D=&columns%5B7%5D%5Bsearchable%5D=true&columns%5B7%5D%5Borderable%5D=true&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=idm_2019&columns%5B8%5D%5Bname%5D=&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=idm_status_2019&columns%5B9%5D%5Bname%5D=&columns%5B9%5D%5Bsearchable%5D=true&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B10%5D%5Bdata%5D=detail&columns%5B10%5D%5Bname%5D=&columns%5B10%5D%5Bsearchable%5D=true&columns%5B10%5D%5Borderable%5D=false&columns%5B10%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B10%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=0&order%5B0%5D%5Bdir%5D=asc&start=0&length=0&search%5Bvalue%5D=&search%5Bregex%5D=false&_=1568821375769' | |
# | |
y = ambilJson(jurl)['data'] | |
#narik data idm per kab | |
idkab = '3319' | |
# [ i for i in idmy.find_one({},{'_id':0,'id_kabupaten':1,'detail':1})['id_kabupaten'].split('|')[0].strip() == '3319' ] | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in tqdm(idmy.find({'id_kabupaten':idkab},{'_id':0})): | |
if idm.find_one({'detail':u['detail']}) is None: | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url0) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
idm.insert_one(data) | |
idm.insert_one(dict(zip(key1,l))) # jadikan dictionary kemudian masukkan ke mongo | |
# unduh semua idm | |
ik = [i['id_desa'] for i in idm.find({})] | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in idmy.find({},{'_id':0}): #sort descending | |
if u['id_desa'] not in ik : | |
print('mengunduh %s' % u['detail']) | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
#dt.append(data) | |
#if idm.find_one({'id_desa': data['id_desa']},{'id_desa':1,'_id':0}) is None : | |
tqdm(idm.insert_one(data)) | |
ik = set([i['id_desa'] for i in idm.find({})]) | |
else: | |
print('data id %s sudah tersedia' % u['id_desa']) | |
# debug | |
import requests | |
def ambil(url): | |
req = requests.get(url, verify=False) | |
dsoup = BeautifulSoup(req.content) | |
return dsoup | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in tqdm(idmy.find({'id_prov':"11"},{'_id':0})): | |
if idm11.find_one({'detail':u['detail']}) is None: | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url0) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
idm11.insert_one(data) | |
idmmod.insert_one(dict(zip(key1,l))) | |
for u in tqdm(idmy.find({},{'_id':1,'id_kabupaten':1,'id_prov':1,'id_desa':1,'id_kecamatan':1,'detail':1})): | |
try: | |
id_prov = u['id_prov'].split('|')[0].strip() | |
id_kabupaten = u['id_kabupaten'].split('|')[0].strip() | |
id_kecamatan = u['id_kecamatan'].split('|')[0].strip() | |
id_desa = u['id_desa'].split('|')[0].strip() | |
prov = u['id_prov'].split('|')[1].strip() | |
kabupaten = u['id_kabupaten'].split('|')[1].strip() | |
kecamatan = u['id_kecamatan'].split('|')[1].strip() | |
desa = u['id_desa'].split('|')[1].strip() | |
idmy.update_one({'_id':u['_id']},{'$set': {'id_prov':id_prov,'id_kabupaten':id_kabupaten,'id_kecamatan':id_kecamatan,'id_desa':id_desa,'prov':prov,'kabupaten':kabupaten,'kecamatan':kecamatan,'desa':desa }}) | |
except: | |
pass | |
for i in idmy.find({},{'_id':1,'id_kabupaten':1,'id_prov':1,'id_desa':1,'id_kecamatan':1,'detail':1}): | |
idmy.update_one({'_id':i['_id']},{'$set' : {'_flag':0}}) | |
ik = idm.find({}).distinct('id_desa') | |
key0 = ['Indeks_komposit','Dimensi','skor','Nilai','Perangkat_Indikator','skor_1','Nilai_1','Indikator_peritem','skor_2','kosong','eksisting_dari_indikator_umum','Kewenangan_pusat','Kewenangan_provinsi','Kewenangan_kabupaten','Kewenangan_desa','Kewenangan_sosial_alami'] | |
for u in idmy.find({},{'_id':0}): #sort descending | |
if u['id_desa'] not in ik : | |
print('mengunduh %s' % u['detail']) | |
url0 = root+u['detail'].split('"')[1] | |
n=ambil(url) | |
kkk=[] | |
try: | |
for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
kk = [j.text.replace('\n','').strip() for j in i.findAll('td')] | |
if len(kk) <16: | |
kk = kkk[-1][16:(32-len(kk))] + kk # menyelesaikan permasalahan jumlah kolom yang tidak sama | |
kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
kk = list(u.values())+kk | |
kkk.append(kk) | |
except: | |
kk = list(u.values()) | |
# kk.insert() | |
kkk.append(kk) | |
key1 = list(u.keys())+key0 #gabungkan list buat bikin key | |
for l in kkk: | |
data = dict(zip(key1,l)) | |
#dt.append(data) | |
#if idm.find_one({'id_desa': data['id_desa']},{'id_desa':1,'_id':0}) is None : | |
tqdm(idm.insert_one(data)) | |
ik = idm.find({}).distinct('id_desa') | |
else: | |
print('data id %s sudah tersedia' % u['id_desa']) | |
# x = ambilJson(jurl)['data'] | |
# [ i['detail'].split('"')[1] for i in x] | |
# [i.text for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[4].findAll('td')] | |
# n.findAll('div',{'class':'row'})[1].findAll('tr')[3].findAll('img')[0]['src'].split('/')[-1] | |
# for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
# [j['src'].split('/')[-1] for j in i.findAll('img')] | |
# [ i for j in kkx[0] for i in j.split('|')] #### split item didalam list | |
# | |
# kkx = [] | |
# s=0 | |
# for u in tqdm(y): | |
# url0 = root+u['detail'].split('"')[1] | |
# n=ambil(url) | |
# kkk=[] | |
# try: | |
# for i in n.findAll('div',{'class':'row'})[1].findAll('tr')[2:]: | |
# kk = [j.text for j in i.findAll('td')] | |
# if len(kk) <16: | |
# kk = kkk[-1][11:11+(16-len(kk))] + kk | |
# kk = kk[:-5] + [j['src'].split('/')[-1].replace('.png','') for j in i.findAll('img')] | |
# kk = list(u.values())+kk | |
# kkk.append(kk) | |
# except: | |
# kk = list(u.values()) | |
# # kk.insert() | |
# kkk.append(kk) | |
# kkx = kkx + kkk | |
# for l,k in enumerate(kkx): | |
# kkx[l] = [ i for j in k for i in j.split('|')] | |
# idm = dbrem.idm | |
# keys = [str(i) for i in range(1,33)] | |
# for i in tqdm(kkx): | |
# idm.insert_one(dict(zip(keys,i))) | |
# with open('dmk.csv','w') as f: | |
# wr = csv.writer(f) | |
# for row in kkx: | |
# wr.writerow(row) | |
Content-Type: application/x-www-form-urlencoded | |
Origin: https://caribdt.dinsos.jatengprov.go.id | |
Referer: https://caribdt.dinsos.jatengprov.go.id/public/dashboard | |
Upgrade-Insecure-Requests: 1 | |
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 | |
POST /public/rekap-desa-proses HTTP/1.1 | |
Host: caribdt.dinsos.jatengprov.go.id | |
Connection: keep-alive | |
Content-Length: 97 | |
Cache-Control: max-age=0 | |
Origin: https://caribdt.dinsos.jatengprov.go.id | |
Upgrade-Insecure-Requests: 1 | |
Content-Type: application/x-www-form-urlencoded | |
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 | |
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 | |
Referer: https://caribdt.dinsos.jatengprov.go.id/public/dashboard | |
Accept-Encoding: gzip, deflate, br | |
Accept-Language: en-US,en;q=0.9 | |
Cookie: _ga=GA1.3.947129702.1583481586; _gid=GA1.3.289665751.1583481586; XSRF-TOKEN=eyJpdiI6IkNMSGVnV1BXVDFQaW8xYmMwSSt5c0E9PSIsInZhbHVlIjoiaVRFbUJIb1ZLRjhBckQxaXYwZnZ4ODJCVjlaZXdCaXF3WnJnbW1aaTZVVkp0TkVIOWtTdXhJQXBcL0hxS1M5SSsiLCJtYWMiOiI1ODU4Y2JmZTFhZDBhMmI0ZjkwOTNlNWIxZGZmNjk0OTU2NGM1NzFkMWYxNTAyY2I3MDVkNWZhYjZlMGE1MzFlIn0%3D; caribdt_session=eyJpdiI6IjJzb2hlZXA4QkZnSWo3akE4a3lSeWc9PSIsInZhbHVlIjoibk5YUUMxRTVhcGR2YzBoXC94Nm9BUDFtK3p3eWJOeWJvbDFmSXdIN21XSm1zMlMxVnhRR0xUUVJLQ1QxdXlZM1wvIiwibWFjIjoiMGUxYzk5ZWJmNGYyZTg3ZjZmZmNiNTU2ZGJlOWE2MzQ4NzUwOGY4YjNmMDhjMTk3NTk1ZjI2NGFkMzRiZDE3NSJ9 | |
import requests | |
import json | |
url = 'https://caribdt.dinsos.jatengprov.go.id/public/rekap-desa-proses' | |
h={'Host': 'caribdt.dinsos.jatengprov.go.id','User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Language': 'en-US,en;q=0.5','Accept-Encoding': 'gzip, deflate, br','Referer': 'https://caribdt.dinsos.jatengprov.go.id/public/dashboard','Content-Type': 'application/x-www-form-urlencoded','Content-Length': '89','Origin': 'https://caribdt.dinsos.jatengprov.go.id','Connection': 'keep-alive','Upgrade-Insecure-Requests': '1','Cache-Control': 'max-age=0, no-cache','Pragma': 'no-cache'} | |
p={'_token':'u7ER5WkjR6E2exdsvy17jENmrEYJPTitS3s0qiWk&','kdkab':'08','nmkec':'KAJORAN','nmdesa':'BANJARETNO'} | |
c={'XSRF-TOKEN':'eyJpdiI6ImRPUm1ZVDJhWXZSNnBhSGJBUkE5d3c9PSIsInZhbHVlIjoiNmsxRkJINmNUSTFYQitteEZFK1FcL1ZENHpPUWRBSzh4QzVzb21JaVwvdXl5QXAzXC9uM29QUXRqSTMzTW5IelZKUyIsIm1hYyI6IjhlMTk4MWE1OGMzYTIxNjMxOGRjYmNlZjNlZjAxNDUxYmY5M2NiMmQzNzVmNWUyNjgxZjJhODM0NTdlMzNhYWYifQ%3D%3D', 'caribdt_session':'eyJpdiI6Ijg1SW5hMFBYdiswdklOM0FmbU9IZnc9PSIsInZhbHVlIjoicHI5bWxXT1hEeWFcL1BEaDhvUlZ6ZzJvWmhzWVlRd09aUHJ1SlFoajZMYVlxUlN2Q01jdERwVTd6cDcyaHdGRkUiLCJtYWMiOiIzZTAwNjRhY2FiMGI3NGI2M2NhN2RkYjJkMTE5M2JlMWU0ZDAzZWU2MjZiMGEwNDYyMWIzZjMzMDk4YTYxMzc1In0%3D', '_ga':'GA1.3.876487108.1583484300', '_gid':'GA1.3.487186037.1583484300'} | |
r = requests.post(url,data=json.dumps(p),headers=h, cookies=c) | |
[i.update({'slug-url': base+'detail/%s/%s' % (i['id'], re.sub('\W+', ' ',i['title']).strip().lower().replace(' ','-')),'desc':re.sub(clean,' ',i['desc']),'slug-rubrik': base+'rubrik/%s/%s' % (i['rubrik'],i['namarubrik'].lower().replace(' ','-')), 'detail': re.sub(clean,'',i['detail']).replace('\\r\\n','')} ) for i in res ] | |
import cv2 | |
import matplotlib.pyplot as plt | |
import cvlib as cv | |
from cvlib.object_detection import draw_bbox | |
im = cv2.imread('cars2.jpg') | |
bbox, label, conf = cv.detect_common_objects(im) | |
output_image = draw_bbox(im, bbox, label,conf) | |
plt.imshow(output_image) | |
plt.show() | |
print(str(label.count('car'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment