pankaj28843 · October 6, 2014 15:36
diff --git a/get_price_from_amazon.py b/get_price_from_amazon.py
 import re

 import requests
 from lxml import etree

 REGEX_FIND_NUMBER = re.compile(r'\d*\.\d+|\d+')
 REGEX_FIND_COMMA = re.compile(r'\s*,\s*')


 def get_price_from_text(text):
    # strip the text
    text = text.strip()
    # remove comma
    text = REGEX_FIND_COMMA.sub('', text)
    # find decimal string
    decimals = REGEX_FIND_NUMBER.findall(text)
    # price string should be first one
    price_string = decimals[0]
    # convert to float and return
    return float(price_string)


 def get_text_for_etree_node(node):
    # get text for current node
    text = node.text or ''
    # get tail for current node
    tail = node.tail or ''
    # get text for all children
    children_text = ''.join(map(get_text_for_etree_node, node))

    return text + children_text + tail


 def get_price_from_amazon(url):
    req = requests.get(url)
    root = etree.HTML(req.text)
    target_element = root.find(".//span[@id='priceblock_ourprice']")
    target_element_text = get_text_for_etree_node(target_element)
    price = get_price_from_text(target_element_text)
    return price


 if __name__ == "__main__":
    url = "http://www.amazon.in/gp/product/B00MMKAVR8/"
    price = get_price_from_amazon(url)
    print "Price is : {}\n".format(price)
	import re

	import requests
	from lxml import etree

	REGEX_FIND_NUMBER = re.compile(r'\d*\.\d+\|\d+')
	REGEX_FIND_COMMA = re.compile(r'\s,\s')


	def get_price_from_text(text):
	# strip the text
	text = text.strip()
	# remove comma
	text = REGEX_FIND_COMMA.sub('', text)
	# find decimal string
	decimals = REGEX_FIND_NUMBER.findall(text)
	# price string should be first one
	price_string = decimals[0]
	# convert to float and return
	return float(price_string)


	def get_text_for_etree_node(node):
	# get text for current node
	text = node.text or ''
	# get tail for current node
	tail = node.tail or ''
	# get text for all children
	children_text = ''.join(map(get_text_for_etree_node, node))

	return text + children_text + tail


	def get_price_from_amazon(url):
	req = requests.get(url)
	root = etree.HTML(req.text)
	target_element = root.find(".//span[@id='priceblock_ourprice']")
	target_element_text = get_text_for_etree_node(target_element)
	price = get_price_from_text(target_element_text)
	return price


	if __name__ == "__main__":
	url = "http://www.amazon.in/gp/product/B00MMKAVR8/"
	price = get_price_from_amazon(url)
	print "Price is : {}\n".format(price)
No results found