julienvaslet · May 25, 2018 12:47
diff --git a/water.py b/water.py
 from bs4 import BeautifulSoup
 import requests
 import re

 requested_data = [ "Calcium" ]

 post_data = {
  "methode": "rechercher",
  "idRegion": "73",
  "usd": "AEP",
  "posPLV": "0",
  "departement": "031",
  "communeDepartement": "555",
  "reseau": "000006_031"
 }

 http_session = requests.session()

 first_url = "https://orobnat.sante.gouv.fr/orobnat/afficherPage.do?methode=menu&idRegion={region}&dpt={dpt}&usd={usd}&comDpt={comdpt}".format( region=post_data["idRegion"], dpt=post_data["departement"], usd=post_data["usd"], comdpt=post_data["communeDepartement"] )

 print( "Loading:", first_url )
 result = http_session.get( first_url )

 if result.status_code == 200:
  failure = False
  pos_plv = 0
  source_url = "https://orobnat.sante.gouv.fr/orobnat/rechercherResultatQualite.do"

  pattern = re.compile( r"{0}".format( requested_data[0] ) )
  
  while pattern.search( result.text ) is None:

    print( "Try #{0}".format( pos_plv ) )
    print( "Loading:", source_url )
    result = http_session.post( source_url, post_data )

    if result.status_code != 200:
      print( "Unable to get page:", result.status_code, result.reason )
      failure = True
      break
    
    pos_plv += 1
    post_data["posPLV"] = str(pos_plv)
    
  if not failure:
    print( "Data found." )

    soup = BeautifulSoup( result.text )
    
    # All page tables have the same "id"...
    for table in soup.find_all( id="tableau" ):
      tds = table.find_all( "td", { "class": "gras" } )
      found = False
      
      for td in tds:
        if len(td.contents):
          # Some td have div tag inside...
          title = td.contents[0].find( "Valeur" )

          if title is not None and title >= 0:
            # Good table !
            found = True
            break
      
      if found:
        for tr in table.find_all( "tr" ):
          td = tr.find( "td" )
          needed_data = False

          for value in requested_data:
            if len(td.contents) and td.contents[0].find( value ) >= 0:
              needed_data = True
              break
          
          if needed_data:
            print( " ".join( tr.stripped_strings ) )

  else:
    print( "Unable to find criteria" )

 else:
  print( "Can not get main page:", result.status_code, result.reason )
	from bs4 import BeautifulSoup
	import requests
	import re

	requested_data = [ "Calcium" ]

	post_data = {
	"methode": "rechercher",
	"idRegion": "73",
	"usd": "AEP",
	"posPLV": "0",
	"departement": "031",
	"communeDepartement": "555",
	"reseau": "000006_031"
	}

	http_session = requests.session()

	first_url = "https://orobnat.sante.gouv.fr/orobnat/afficherPage.do?methode=menu&idRegion={region}&dpt={dpt}&usd={usd}&comDpt={comdpt}".format( region=post_data["idRegion"], dpt=post_data["departement"], usd=post_data["usd"], comdpt=post_data["communeDepartement"] )

	print( "Loading:", first_url )
	result = http_session.get( first_url )

	if result.status_code == 200:
	failure = False
	pos_plv = 0
	source_url = "https://orobnat.sante.gouv.fr/orobnat/rechercherResultatQualite.do"

	pattern = re.compile( r"{0}".format( requested_data[0] ) )

	while pattern.search( result.text ) is None:

	print( "Try #{0}".format( pos_plv ) )
	print( "Loading:", source_url )
	result = http_session.post( source_url, post_data )

	if result.status_code != 200:
	print( "Unable to get page:", result.status_code, result.reason )
	failure = True
	break

	pos_plv += 1
	post_data["posPLV"] = str(pos_plv)

	if not failure:
	print( "Data found." )

	soup = BeautifulSoup( result.text )

	# All page tables have the same "id"...
	for table in soup.find_all( id="tableau" ):
	tds = table.find_all( "td", { "class": "gras" } )
	found = False

	for td in tds:
	if len(td.contents):
	# Some td have div tag inside...
	title = td.contents[0].find( "Valeur" )

	if title is not None and title >= 0:
	# Good table !
	found = True
	break

	if found:
	for tr in table.find_all( "tr" ):
	td = tr.find( "td" )
	needed_data = False

	for value in requested_data:
	if len(td.contents) and td.contents[0].find( value ) >= 0:
	needed_data = True
	break

	if needed_data:
	print( " ".join( tr.stripped_strings ) )

	else:
	print( "Unable to find criteria" )

	else:
	print( "Can not get main page:", result.status_code, result.reason )