Created
August 17, 2020 14:51
-
-
Save philshem/b911d9e90bcab700575cc6c432a5ece6 to your computer and use it in GitHub Desktop.
scraper for vehicle counts from autoscout.ch https://twitter.com/philshem/status/1295371670321070080
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| from bs4 import BeautifulSoup | |
| import json | |
| from collections import defaultdict | |
| from random import randint | |
| from time import sleep | |
| ''' | |
| scraper for counts of each make of car found at autoscout.ch | |
| https://twitter.com/philshem/status/1295371670321070080 | |
| when it prints ERROR, you have to run it again and again, many times | |
| (because I'm too lazy to write a proper sleep routine) | |
| ''' | |
| def main(): | |
| url_base = 'https://www.autoscout24.ch/de/?make={}' | |
| s = requests.Session() | |
| s.headers.update({'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}) | |
| # make first request to get all possible page ids | |
| r = s.get(url_base.format(str(391))) | |
| # quick function to take url | |
| data = get_data(r) | |
| #print(data.get('search')) | |
| d = defaultdict(int) | |
| for x,y in enumerate(data.get('search').get('items')): | |
| if y.get('id','') == 'make': | |
| for z in y.get('options'): | |
| if z.get('value',None) not in [None,'']: | |
| d[int(z.get('value'))] = {'make' : z.get('text', None), 'pop' : z.get('isPopular', False)} | |
| print('INFO:',len(d),'auto makes found') | |
| done = [] | |
| with open('results.csv','r') as fp: | |
| tmp = fp.readlines() | |
| for t in tmp: | |
| done.append(int(t.split(',')[0])) | |
| # loop over all make ids | |
| for j in d.keys(): | |
| # exclude already found | |
| if j in done: | |
| print('skipping:',j,d.get(j).get('make',None)) | |
| continue | |
| print ('running:',j,d.get(j).get('make',None)) | |
| if not d.get(j).get('make',None): | |
| continue | |
| r = s.get(url_base.format(str(j))) | |
| try: | |
| data = get_data(r) | |
| except: | |
| print('ERROR:',j,d.get(j)) | |
| # no json found in html, meaning no search results | |
| exit(0) | |
| #fp.write(','.join((make,str(0),str(popular)))+'\n') | |
| #continue | |
| # dive into dict | |
| data = data.get('search',None) | |
| if not data: | |
| continue | |
| data = data.get('stats',None) | |
| if not data: | |
| continue | |
| #slug = data.get('listSlug',None) | |
| count = data.get('count',0) | |
| popular = d.get(j).get('pop') | |
| make = d.get(j).get('make') | |
| # print(j,slug,count,d.get(j).get('pop')) | |
| # append to one csv | |
| with open('results.csv','a') as fp: | |
| fp.write(','.join((str(j),make,count,str(popular)))+'\n') | |
| sleep(randint(3,13)) | |
| # break | |
| def get_data(t): | |
| soup = BeautifulSoup(t.content,features='html5lib') | |
| script = soup.find_all('script') | |
| data = str(script[2]).replace('<script id="initial-state">window.INITIAL_STATE = ','').replace(';</script>','') | |
| data = json.loads(data) | |
| return data | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| idx | make | count | popular | |
|---|---|---|---|---|
| 209 | AC | 14 | False | |
| 205 | ACURA | 1 | False | |
| 245 | ADLER | 0 | False | |
| 456 | ADR | 0 | False | |
| 909 | AERO | 0 | False | |
| 438 | AGM | 2 | False | |
| 1 | AIXAM | 5 | False | |
| 2 | ALFA ROMEO | 1824 | True | |
| 530 | ALLARD | 2 | False | |
| 882 | ALPINE | 83 | True | |
| 233 | ALVIS | 3 | False | |
| 239 | AMC | 5 | False | |
| 969 | AMERICAN LAFRANCE | 0 | False | |
| 3 | AMERICAN MOTORS | 1 | False | |
| 572 | AMILCAR | 1 | False | |
| 819 | AMPHICAR | 0 | False | |
| 897 | APAL | 0 | False | |
| 476 | ARIEL | 0 | False | |
| 249 | ARMSTRONG SIDDELEY | 0 | False | |
| 451 | ARTEGA | 2 | False | |
| 4 | ASTON MARTIN | 369 | True | |
| 965 | ATS | 0 | False | |
| 576 | AUBURN | 1 | False | |
| 5 | AUDI | 10146 | True | |
| 6 | AUSTIN | 12 | False | |
| 221 | AUSTIN-HEALEY | 27 | True | |
| 231 | AUTOBIANCHI | 6 | False | |
| 253 | AVANTI | 0 | False | |
| 7 | BENTLEY | 395 | True | |
| 435 | BERKELEY | 0 | False | |
| 404 | BERLIET | 0 | False | |
| 243 | BERNA | 0 | False | |
| 8 | BERTONE | 0 | False | |
| 398 | BINZ | 0 | False | |
| 218 | BITTER | 2 | False | |
| 745 | BIZZARRINI | 0 | False | |
| 9 | BMW | 13446 | True | |
| 10 | BMW-ALPINA | 124 | True | |
| 519 | BOATTAIL | 0 | False | |
| 322 | BONNET | 0 | False | |
| 235 | BORGWARD | 0 | False | |
| 537 | BRABHAM | 0 | False | |
| 659 | BRASIER | 0 | False | |
| 529 | BRISTOL | 1 | False | |
| 11 | BUGATTI | 3 | False | |
| 210 | BUGGY | 0 | False | |
| 12 | BUICK | 35 | True | |
| 13 | CADILLAC | 340 | True | |
| 589 | CAMPAGNA | 0 | False | |
| 649 | CARBODIE | 0 | False | |
| 279 | CARVER | 0 | False | |
| 14 | CATERHAM | 28 | True | |
| 15 | CHEVROLET | 931 | True | |
| 315 | CHEVRON | 0 | False | |
| 16 | CHRYSLER | 285 | True | |
| 17 | CITROEN | 2748 | True | |
| 681 | CITYEL | 0 | False | |
| 309 | CLASSIC PLUS | 0 | False | |
| 626 | CLEMENT | 0 | False | |
| 234 | CLENET | 1 | False | |
| 839 | CMC | 0 | False | |
| 525 | CORD | 0 | False | |
| 849 | CROCO | 0 | False | |
| 564 | CROSSLE | 0 | False | |
| 18 | DACIA | 909 | True | |
| 19 | DAEWOO | 49 | True | |
| 242 | DAF | 1 | False | |
| 20 | DAIHATSU | 186 | True | |
| 21 | DAIMLER | 49 | True | |
| 989 | DALLARA | 1 | False | |
| 484 | DAREN | 0 | False | |
| 721 | DARRACQ | 0 | False | |
| 214 | DATSUN | 4 | False | |
| 461 | DAX | 0 | False | |
| 459 | DE DION-BOUTON | 0 | False | |
| 22 | DE TOMASO | 9 | False | |
| 437 | DELAGE | 0 | False | |
| 810 | DELAHAYE | 0 | False | |
| 310 | DELARION | 0 | False | |
| 903 | DELAUNAY BELLEVILLE | 0 | False | |
| 313 | DeLorean | 4 | False | |
| 501 | DESOTO | 2 | False | |
| 624 | DESTINY | 0 | False | |
| 823 | DETROIT ELECTRIC | 0 | False | |
| 1042 | DEVINCI | 2 | False | |
| 385 | DFM | 2 | False | |
| 591 | DFSK | 0 | False | |
| 251 | DIAVOLINO | 1 | False | |
| 219 | DKW | 3 | False | |
| 23 | DODGE | 670 | True | |
| 24 | DONKERVOORT | 4 | False | |
| 734 | DONNET | 0 | False | |
| 874 | DS AUTOMOBILES | 576 | True | |
| 679 | DURANT | 1 | False | |
| 506 | EDSEL | 2 | False | |
| 750 | EHP | 0 | False | |
| 821 | ENZMANN | 1 | False | |
| 680 | ESSEX | 0 | False | |
| 1007 | ESTRIMA | 5 | False | |
| 206 | EXCALIBUR | 4 | False | |
| 500 | FACEL VEGA | 0 | False | |
| 25 | FERRARI | 1069 | True | |
| 26 | FIAT | 4267 | True | |
| 454 | FISKER | 4 | False | |
| 27 | FORD | 6763 | True | |
| 628 | FRAMO | 0 | False | |
| 896 | FRANKLIN | 1 | False | |
| 460 | FREEWIEL | 0 | False | |
| 673 | FULDAMOBIL | 0 | False | |
| 893 | FULU | 0 | False | |
| 875 | FUNYO | 0 | False | |
| 629 | GAC GONOW | 0 | False | |
| 268 | GAZ | 0 | False | |
| 490 | GEORGES | 0 | False | |
| 392 | GIBBS | 0 | False | |
| 432 | GINETTA | 0 | False | |
| 450 | GLAS | 2 | False | |
| 204 | GMC | 22 | True | |
| 549 | GOGGOMOBIL | 0 | False | |
| 920 | GREAT WALL | 0 | False | |
| 378 | GUMPERT | 0 | False | |
| 1040 | Gurgel | 1 | False | |
| 622 | HANOMAG | 1 | False | |
| 1009 | HANROAD | 0 | False | |
| 418 | HEALEY | 0 | False | |
| 430 | HEINKEL | 0 | False | |
| 390 | HILLMAN | 0 | False | |
| 884 | HINDUSTAN | 0 | False | |
| 888 | HITEC | 13 | False | |
| 747 | HOLDEN | 0 | False | |
| 29 | HONDA | 1922 | True | |
| 786 | HORCH | 0 | False | |
| 383 | HOTCHKISS | 0 | False | |
| 701 | HRG | 0 | False | |
| 30 | HS | 1 | False | |
| 230 | HUDSON | 2 | False | |
| 31 | HUMMER | 30 | True | |
| 250 | HWM | 0 | False | |
| 32 | HYUNDAI | 3269 | True | |
| 426 | IMPERIA | 0 | False | |
| 33 | INFINITI | 206 | True | |
| 34 | INNOCENTI | 5 | False | |
| 551 | INTERMECCANICA | 0 | False | |
| 198 | INTERNATIONAL | 2 | False | |
| 436 | INVICTA | 0 | False | |
| 535 | IRMSCHER | 2 | False | |
| 483 | ISO | 1 | False | |
| 35 | ISUZU | 71 | True | |
| 36 | ISUZU (J) | 0 | False | |
| 37 | IVECO | 2 | False | |
| 960 | JAC | 43 | True | |
| 38 | JAGUAR | 1364 | True | |
| 39 | JEEP | 2141 | True | |
| 216 | JENSEN | 6 | False | |
| 142 | JORDAN | 0 | False | |
| 248 | JOWETT | 0 | False | |
| 427 | KAISER | 4 | False | |
| 395 | KAMOO | 0 | False | |
| 246 | KARMANN | 0 | False | |
| 40 | KIA | 2153 | True | |
| 780 | KLEINSCHNITTGER | 0 | False | |
| 397 | KOENIGSEGG | 0 | False | |
| 581 | KOUGAR | 0 | False | |
| 41 | KTM | 6 | False | |
| 834 | KV | 0 | False | |
| 42 | LADA | 11 | False | |
| 308 | LAGONDA | 3 | False | |
| 43 | LAMBORGHINI | 352 | True | |
| 44 | LANCIA | 255 | True | |
| 45 | LAND ROVER | 2570 | True | |
| 971 | LE ZEBRE | 2 | False | |
| 597 | LEA-FRANCIS | 1 | False | |
| 642 | LEGENDS CAR | 0 | False | |
| 46 | LEXUS | 634 | True | |
| 862 | LEYLAND | 0 | False | |
| 47 | LIGIER | 3 | False | |
| 207 | LINCOLN | 29 | True | |
| 457 | LOCOMOBILE | 0 | False | |
| 658 | LOLA | 1 | False | |
| 752 | LOMBARDI | 0 | False | |
| 889 | LORENZ & RANKL | 0 | False | |
| 48 | LOTUS | 137 | True | |
| 393 | LTI | 0 | False | |
| 565 | MAHINDRA | 1 | False | |
| 452 | MALLOCK | 0 | False | |
| 330 | MAN | 0 | False | |
| 907 | MANTA CARS | 0 | False | |
| 570 | MARCH | 0 | False | |
| 232 | MARCOS | 1 | False | |
| 751 | MARTIN | 0 | False | |
| 49 | MASERATI | 699 | True | |
| 523 | MATHIS | 1 | False | |
| 458 | MATRA | 1 | False | |
| 674 | MAXIMAG | 0 | False | |
| 226 | MAYBACH | 9 | False | |
| 50 | MAZDA | 3633 | True | |
| 474 | McLAREN | 110 | True | |
| 85 | MEGA | 2 | False | |
| 51 | MERCEDES-BENZ | 13458 | True | |
| 215 | MERCURY | 8 | False | |
| 238 | MESSERSCHMITT | 2 | False | |
| 545 | MEV HUMMER | 0 | False | |
| 52 | MG | 145 | True | |
| 833 | MIA | 0 | False | |
| 861 | MICROCAR | 1 | False | |
| 53 | MINELLI | 0 | False | |
| 817 | MINERVA | 0 | False | |
| 54 | MINI | 2135 | True | |
| 55 | MITSUBISHI | 1919 | True | |
| 225 | MONTEVERDI | 4 | False | |
| 56 | MORGAN | 32 | True | |
| 211 | MORRIS | 12 | False | |
| 522 | MORS | 0 | False | |
| 725 | MOSKWITSCH | 0 | False | |
| 716 | MOSLER | 1 | False | |
| 217 | MOWAG | 1 | False | |
| 667 | MVS | 0 | False | |
| 325 | NASH | 1 | False | |
| 57 | NISSAN | 3183 | True | |
| 227 | NSU | 10 | False | |
| 886 | OAKLAND | 0 | False | |
| 381 | OBERMAIER | 0 | False | |
| 58 | OLDSMOBILE | 22 | True | |
| 707 | OM | 0 | False | |
| 59 | OPEL | 6882 | True | |
| 623 | OVERLAND | 0 | False | |
| 201 | PACKARD | 1 | False | |
| 371 | PAGANI | 2 | False | |
| 603 | PANHARD | 0 | False | |
| 316 | PANTHER | 0 | False | |
| 60 | PEUGEOT | 4745 | True | |
| 167 | PGO | 1 | False | |
| 87 | PIAGGIO | 8 | False | |
| 485 | PIERCE ARROW | 0 | False | |
| 244 | PININFARINA | 0 | False | |
| 203 | PLYMOUTH | 23 | True | |
| 998 | POLESTAR | 2 | False | |
| 61 | PONTIAC | 71 | True | |
| 62 | PORSCHE | 3550 | True | |
| 1050 | PRC | 1 | False | |
| 63 | PUCH | 25 | True | |
| 372 | PUMA | 0 | False | |
| 64 | QVALE | 0 | False | |
| 321 | RADICAL | 1 | False | |
| 865 | Rally | 0 | False | |
| 802 | RAYTON FISSORE | 0 | False | |
| 787 | RCB | 0 | False | |
| 938 | RCH-AUTOMOTIVE | 0 | False | |
| 65 | RELIANT | 3 | False | |
| 66 | RENAULT | 5177 | True | |
| 425 | REVA | 0 | False | |
| 241 | RILEY | 3 | False | |
| 1004 | RIMAC | 0 | False | |
| 67 | ROLLS-ROYCE | 133 | True | |
| 455 | ROSSION | 0 | False | |
| 68 | ROVER | 44 | True | |
| 475 | RUF | 3 | False | |
| 396 | RUSKA | 0 | False | |
| 69 | SAAB | 251 | True | |
| 1005 | SABRA | 0 | False | |
| 739 | SAKER | 0 | False | |
| 384 | SALMSON | 0 | False | |
| 394 | SAM | 0 | False | |
| 202 | SANTANA | 0 | False | |
| 449 | SAPOROSHEZ | 0 | False | |
| 914 | SAUBER | 0 | False | |
| 199 | SAURER | 0 | False | |
| 573 | SBARRO | 0 | False | |
| 70 | SEAT | 3371 | True | |
| 176 | SECMA | 1 | False | |
| 685 | SENECHAL | 0 | False | |
| 536 | SEVENTY SEVEN | 0 | False | |
| 1041 | SIATA | 1 | False | |
| 213 | SIMCA | 11 | False | |
| 247 | SINGER | 0 | False | |
| 71 | SKODA | 5478 | True | |
| 72 | SMART | 755 | True | |
| 453 | SMILE | 0 | False | |
| 400 | SOKON | 0 | False | |
| 809 | SPECTRE | 1 | False | |
| 928 | SPIRE | 0 | False | |
| 236 | SPYKER | 1 | False | |
| 73 | SSANGYONG | 459 | True | |
| 719 | STANDARD | 1 | False | |
| 200 | STEPHENS | 1 | False | |
| 222 | STEYR | 5 | False | |
| 223 | STUDEBAKER | 2 | False | |
| 625 | STUTZ | 1 | False | |
| 74 | SUBARU | 2841 | True | |
| 212 | SUNBEAM | 7 | False | |
| 75 | SUZUKI | 2736 | True | |
| 974 | SWALLOW | 1 | False | |
| 420 | SYLVA | 0 | False | |
| 76 | TALBOT | 7 | False | |
| 77 | TATA | 4 | False | |
| 240 | TATRA | 0 | False | |
| 961 | TATUUS | 0 | False | |
| 389 | TAZZARI | 1 | False | |
| 763 | TECNO | 0 | False | |
| 391 | TESLA | 190 | True | |
| 434 | THINK | 0 | False | |
| 78 | TOYOTA | 5106 | True | |
| 224 | TRABANT | 1 | False | |
| 185 | TRIUMPH | 57 | True | |
| 79 | TVR | 11 | False | |
| 428 | UAZ | 0 | False | |
| 568 | ULTIMA | 0 | False | |
| 687 | UMM | 0 | False | |
| 237 | UNIMOG | 1 | False | |
| 555 | VALE | 0 | False | |
| 229 | VAUXHALL | 4 | False | |
| 80 | VENTURI | 0 | False | |
| 557 | VESPA | 0 | False | |
| 546 | VOLTEIS | 0 | False | |
| 81 | VOLVO | 4472 | True | |
| 82 | VW | 12891 | True | |
| 869 | WARTBURG | 1 | False | |
| 996 | WELTMEISTER | 0 | False | |
| 252 | WESTFIELD | 2 | False | |
| 83 | WIESMANN | 8 | False | |
| 208 | WILLYS | 16 | False | |
| 327 | WOLSELEY | 1 | False | |
| 326 | YES! | 2 | False | |
| 84 | ZAGATO | 0 | False | |
| 556 | ZARP | 0 | False | |
| 711 | ZASTAVA | 0 | False | |
| 228 | ZBR | 0 | False | |
| 533 | ZIMMER | 0 | False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment