Skip to content

Instantly share code, notes, and snippets.

@philshem
Created August 17, 2020 14:51
Show Gist options
  • Select an option

  • Save philshem/b911d9e90bcab700575cc6c432a5ece6 to your computer and use it in GitHub Desktop.

Select an option

Save philshem/b911d9e90bcab700575cc6c432a5ece6 to your computer and use it in GitHub Desktop.
scraper for vehicle counts from autoscout.ch https://twitter.com/philshem/status/1295371670321070080
import requests
from bs4 import BeautifulSoup
import json
from collections import defaultdict
from random import randint
from time import sleep
'''
scraper for counts of each make of car found at autoscout.ch
https://twitter.com/philshem/status/1295371670321070080
when it prints ERROR, you have to run it again and again, many times
(because I'm too lazy to write a proper sleep routine)
'''
def main():
url_base = 'https://www.autoscout24.ch/de/?make={}'
s = requests.Session()
s.headers.update({'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'})
# make first request to get all possible page ids
r = s.get(url_base.format(str(391)))
# quick function to take url
data = get_data(r)
#print(data.get('search'))
d = defaultdict(int)
for x,y in enumerate(data.get('search').get('items')):
if y.get('id','') == 'make':
for z in y.get('options'):
if z.get('value',None) not in [None,'']:
d[int(z.get('value'))] = {'make' : z.get('text', None), 'pop' : z.get('isPopular', False)}
print('INFO:',len(d),'auto makes found')
done = []
with open('results.csv','r') as fp:
tmp = fp.readlines()
for t in tmp:
done.append(int(t.split(',')[0]))
# loop over all make ids
for j in d.keys():
# exclude already found
if j in done:
print('skipping:',j,d.get(j).get('make',None))
continue
print ('running:',j,d.get(j).get('make',None))
if not d.get(j).get('make',None):
continue
r = s.get(url_base.format(str(j)))
try:
data = get_data(r)
except:
print('ERROR:',j,d.get(j))
# no json found in html, meaning no search results
exit(0)
#fp.write(','.join((make,str(0),str(popular)))+'\n')
#continue
# dive into dict
data = data.get('search',None)
if not data:
continue
data = data.get('stats',None)
if not data:
continue
#slug = data.get('listSlug',None)
count = data.get('count',0)
popular = d.get(j).get('pop')
make = d.get(j).get('make')
# print(j,slug,count,d.get(j).get('pop'))
# append to one csv
with open('results.csv','a') as fp:
fp.write(','.join((str(j),make,count,str(popular)))+'\n')
sleep(randint(3,13))
# break
def get_data(t):
soup = BeautifulSoup(t.content,features='html5lib')
script = soup.find_all('script')
data = str(script[2]).replace('<script id="initial-state">window.INITIAL_STATE = ','').replace(';</script>','')
data = json.loads(data)
return data
if __name__ == "__main__":
main()
idx make count popular
209 AC 14 False
205 ACURA 1 False
245 ADLER 0 False
456 ADR 0 False
909 AERO 0 False
438 AGM 2 False
1 AIXAM 5 False
2 ALFA ROMEO 1824 True
530 ALLARD 2 False
882 ALPINE 83 True
233 ALVIS 3 False
239 AMC 5 False
969 AMERICAN LAFRANCE 0 False
3 AMERICAN MOTORS 1 False
572 AMILCAR 1 False
819 AMPHICAR 0 False
897 APAL 0 False
476 ARIEL 0 False
249 ARMSTRONG SIDDELEY 0 False
451 ARTEGA 2 False
4 ASTON MARTIN 369 True
965 ATS 0 False
576 AUBURN 1 False
5 AUDI 10146 True
6 AUSTIN 12 False
221 AUSTIN-HEALEY 27 True
231 AUTOBIANCHI 6 False
253 AVANTI 0 False
7 BENTLEY 395 True
435 BERKELEY 0 False
404 BERLIET 0 False
243 BERNA 0 False
8 BERTONE 0 False
398 BINZ 0 False
218 BITTER 2 False
745 BIZZARRINI 0 False
9 BMW 13446 True
10 BMW-ALPINA 124 True
519 BOATTAIL 0 False
322 BONNET 0 False
235 BORGWARD 0 False
537 BRABHAM 0 False
659 BRASIER 0 False
529 BRISTOL 1 False
11 BUGATTI 3 False
210 BUGGY 0 False
12 BUICK 35 True
13 CADILLAC 340 True
589 CAMPAGNA 0 False
649 CARBODIE 0 False
279 CARVER 0 False
14 CATERHAM 28 True
15 CHEVROLET 931 True
315 CHEVRON 0 False
16 CHRYSLER 285 True
17 CITROEN 2748 True
681 CITYEL 0 False
309 CLASSIC PLUS 0 False
626 CLEMENT 0 False
234 CLENET 1 False
839 CMC 0 False
525 CORD 0 False
849 CROCO 0 False
564 CROSSLE 0 False
18 DACIA 909 True
19 DAEWOO 49 True
242 DAF 1 False
20 DAIHATSU 186 True
21 DAIMLER 49 True
989 DALLARA 1 False
484 DAREN 0 False
721 DARRACQ 0 False
214 DATSUN 4 False
461 DAX 0 False
459 DE DION-BOUTON 0 False
22 DE TOMASO 9 False
437 DELAGE 0 False
810 DELAHAYE 0 False
310 DELARION 0 False
903 DELAUNAY BELLEVILLE 0 False
313 DeLorean 4 False
501 DESOTO 2 False
624 DESTINY 0 False
823 DETROIT ELECTRIC 0 False
1042 DEVINCI 2 False
385 DFM 2 False
591 DFSK 0 False
251 DIAVOLINO 1 False
219 DKW 3 False
23 DODGE 670 True
24 DONKERVOORT 4 False
734 DONNET 0 False
874 DS AUTOMOBILES 576 True
679 DURANT 1 False
506 EDSEL 2 False
750 EHP 0 False
821 ENZMANN 1 False
680 ESSEX 0 False
1007 ESTRIMA 5 False
206 EXCALIBUR 4 False
500 FACEL VEGA 0 False
25 FERRARI 1069 True
26 FIAT 4267 True
454 FISKER 4 False
27 FORD 6763 True
628 FRAMO 0 False
896 FRANKLIN 1 False
460 FREEWIEL 0 False
673 FULDAMOBIL 0 False
893 FULU 0 False
875 FUNYO 0 False
629 GAC GONOW 0 False
268 GAZ 0 False
490 GEORGES 0 False
392 GIBBS 0 False
432 GINETTA 0 False
450 GLAS 2 False
204 GMC 22 True
549 GOGGOMOBIL 0 False
920 GREAT WALL 0 False
378 GUMPERT 0 False
1040 Gurgel 1 False
622 HANOMAG 1 False
1009 HANROAD 0 False
418 HEALEY 0 False
430 HEINKEL 0 False
390 HILLMAN 0 False
884 HINDUSTAN 0 False
888 HITEC 13 False
747 HOLDEN 0 False
29 HONDA 1922 True
786 HORCH 0 False
383 HOTCHKISS 0 False
701 HRG 0 False
30 HS 1 False
230 HUDSON 2 False
31 HUMMER 30 True
250 HWM 0 False
32 HYUNDAI 3269 True
426 IMPERIA 0 False
33 INFINITI 206 True
34 INNOCENTI 5 False
551 INTERMECCANICA 0 False
198 INTERNATIONAL 2 False
436 INVICTA 0 False
535 IRMSCHER 2 False
483 ISO 1 False
35 ISUZU 71 True
36 ISUZU (J) 0 False
37 IVECO 2 False
960 JAC 43 True
38 JAGUAR 1364 True
39 JEEP 2141 True
216 JENSEN 6 False
142 JORDAN 0 False
248 JOWETT 0 False
427 KAISER 4 False
395 KAMOO 0 False
246 KARMANN 0 False
40 KIA 2153 True
780 KLEINSCHNITTGER 0 False
397 KOENIGSEGG 0 False
581 KOUGAR 0 False
41 KTM 6 False
834 KV 0 False
42 LADA 11 False
308 LAGONDA 3 False
43 LAMBORGHINI 352 True
44 LANCIA 255 True
45 LAND ROVER 2570 True
971 LE ZEBRE 2 False
597 LEA-FRANCIS 1 False
642 LEGENDS CAR 0 False
46 LEXUS 634 True
862 LEYLAND 0 False
47 LIGIER 3 False
207 LINCOLN 29 True
457 LOCOMOBILE 0 False
658 LOLA 1 False
752 LOMBARDI 0 False
889 LORENZ & RANKL 0 False
48 LOTUS 137 True
393 LTI 0 False
565 MAHINDRA 1 False
452 MALLOCK 0 False
330 MAN 0 False
907 MANTA CARS 0 False
570 MARCH 0 False
232 MARCOS 1 False
751 MARTIN 0 False
49 MASERATI 699 True
523 MATHIS 1 False
458 MATRA 1 False
674 MAXIMAG 0 False
226 MAYBACH 9 False
50 MAZDA 3633 True
474 McLAREN 110 True
85 MEGA 2 False
51 MERCEDES-BENZ 13458 True
215 MERCURY 8 False
238 MESSERSCHMITT 2 False
545 MEV HUMMER 0 False
52 MG 145 True
833 MIA 0 False
861 MICROCAR 1 False
53 MINELLI 0 False
817 MINERVA 0 False
54 MINI 2135 True
55 MITSUBISHI 1919 True
225 MONTEVERDI 4 False
56 MORGAN 32 True
211 MORRIS 12 False
522 MORS 0 False
725 MOSKWITSCH 0 False
716 MOSLER 1 False
217 MOWAG 1 False
667 MVS 0 False
325 NASH 1 False
57 NISSAN 3183 True
227 NSU 10 False
886 OAKLAND 0 False
381 OBERMAIER 0 False
58 OLDSMOBILE 22 True
707 OM 0 False
59 OPEL 6882 True
623 OVERLAND 0 False
201 PACKARD 1 False
371 PAGANI 2 False
603 PANHARD 0 False
316 PANTHER 0 False
60 PEUGEOT 4745 True
167 PGO 1 False
87 PIAGGIO 8 False
485 PIERCE ARROW 0 False
244 PININFARINA 0 False
203 PLYMOUTH 23 True
998 POLESTAR 2 False
61 PONTIAC 71 True
62 PORSCHE 3550 True
1050 PRC 1 False
63 PUCH 25 True
372 PUMA 0 False
64 QVALE 0 False
321 RADICAL 1 False
865 Rally 0 False
802 RAYTON FISSORE 0 False
787 RCB 0 False
938 RCH-AUTOMOTIVE 0 False
65 RELIANT 3 False
66 RENAULT 5177 True
425 REVA 0 False
241 RILEY 3 False
1004 RIMAC 0 False
67 ROLLS-ROYCE 133 True
455 ROSSION 0 False
68 ROVER 44 True
475 RUF 3 False
396 RUSKA 0 False
69 SAAB 251 True
1005 SABRA 0 False
739 SAKER 0 False
384 SALMSON 0 False
394 SAM 0 False
202 SANTANA 0 False
449 SAPOROSHEZ 0 False
914 SAUBER 0 False
199 SAURER 0 False
573 SBARRO 0 False
70 SEAT 3371 True
176 SECMA 1 False
685 SENECHAL 0 False
536 SEVENTY SEVEN 0 False
1041 SIATA 1 False
213 SIMCA 11 False
247 SINGER 0 False
71 SKODA 5478 True
72 SMART 755 True
453 SMILE 0 False
400 SOKON 0 False
809 SPECTRE 1 False
928 SPIRE 0 False
236 SPYKER 1 False
73 SSANGYONG 459 True
719 STANDARD 1 False
200 STEPHENS 1 False
222 STEYR 5 False
223 STUDEBAKER 2 False
625 STUTZ 1 False
74 SUBARU 2841 True
212 SUNBEAM 7 False
75 SUZUKI 2736 True
974 SWALLOW 1 False
420 SYLVA 0 False
76 TALBOT 7 False
77 TATA 4 False
240 TATRA 0 False
961 TATUUS 0 False
389 TAZZARI 1 False
763 TECNO 0 False
391 TESLA 190 True
434 THINK 0 False
78 TOYOTA 5106 True
224 TRABANT 1 False
185 TRIUMPH 57 True
79 TVR 11 False
428 UAZ 0 False
568 ULTIMA 0 False
687 UMM 0 False
237 UNIMOG 1 False
555 VALE 0 False
229 VAUXHALL 4 False
80 VENTURI 0 False
557 VESPA 0 False
546 VOLTEIS 0 False
81 VOLVO 4472 True
82 VW 12891 True
869 WARTBURG 1 False
996 WELTMEISTER 0 False
252 WESTFIELD 2 False
83 WIESMANN 8 False
208 WILLYS 16 False
327 WOLSELEY 1 False
326 YES! 2 False
84 ZAGATO 0 False
556 ZARP 0 False
711 ZASTAVA 0 False
228 ZBR 0 False
533 ZIMMER 0 False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment