odeke-em · April 22, 2018 03:39
diff --git a/main.py b/main.py
 #!/usr/bin/env python3

 """
 Copyright 2018, OpenCensus Authors

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 u may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """

 from hashlib import md5
 import os
 import requests
 import time

 from pymemcache import client as memclient

 from opencensus.trace import execution_context
 from opencensus.trace.exporters.stackdriver_exporter import StackdriverExporter
 from opencensus.trace import trace_options
 from opencensus.trace.samplers import always_on
 from opencensus.trace.tracer import Tracer
 from opencensus.trace.exporters.transports.background_thread import BackgroundThreadTransport
 from opencensus.trace import config_integration

 # We also want to trace requests with the already provided integration
 config_integration.trace_integrations(['requests'])
 sdexporter = StackdriverExporter(project_id=os.environ.get('OPENCENSUS_PROJECTID', 'census-demos'),
                                                                        transport=BackgroundThreadTransport)
 tracer = Tracer(sampler=always_on.AlwaysOnSampler(), exporter=sdexporter)
 execution_context.set_opencensus_tracer(tracer)

 def main():
    urls = ['https://opencensus.io', 'https://github.com/census-instrumentation', 'https://yahoo.com', 'https://time.com']
    mc = memclient.Client(('localhost', 11211))
    ind = Indexer(mc)
    for url in urls:
        for i in range(3):
            ts = time.time()
            with tracer.span(name='Crawling') as span:
                span.add_annotation('Fetching page', url=url)
                data, alreadyCached = ind.fetch(url)
                name = md5(bytes(url, encoding='utf-8')).hexdigest()
                with open(name + '.txt', 'wb') as f:
                    f.write(data)
                span.finish()

            te = time.time()
            print('URL: %s TimeSpent: %.3fs alreadyCached: %s'%(url, te-ts, alreadyCached))

    for url in urls:
        ind.clearCache(url)

 class Indexer(object):
    def __init__(self, cache):
        self.cache = cache

    def fetch(self, url):
        with tracer.span(name='Indexer.fetch') as span:
            # Firstly check if we've cached it before
            span.add_annotation('Checking cache if already memoized', url=url)
            memoized = self.cache.get(url)
            span.add_annotation('Done checking cache')
            if memoized:
                span.add_annotation('Cache hit', url=url)
                return memoized, True

            # Cache miss
            span.add_annotation('Cache miss, now fetching URL', url=url)
            res = requests.get(url)
            # Now cache it
            data = res.text
            sc = res.status_code
            span.add_annotation('Results back', code=sc, url=url)
            if sc >= 200 and sc <= 299:
                data = bytes(res.text, encoding='utf-8')
                span.add_annotation('Now caching results', code=sc, url=url)
                self.cache.set(url, data)

            return data, False

    def clearCache(self, url):
        self.cache.delete(url)

 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3

	"""
	Copyright 2018, OpenCensus Authors

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	u may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	"""

	from hashlib import md5
	import os
	import requests
	import time

	from pymemcache import client as memclient

	from opencensus.trace import execution_context
	from opencensus.trace.exporters.stackdriver_exporter import StackdriverExporter
	from opencensus.trace import trace_options
	from opencensus.trace.samplers import always_on
	from opencensus.trace.tracer import Tracer
	from opencensus.trace.exporters.transports.background_thread import BackgroundThreadTransport
	from opencensus.trace import config_integration

	# We also want to trace requests with the already provided integration
	config_integration.trace_integrations(['requests'])
	sdexporter = StackdriverExporter(project_id=os.environ.get('OPENCENSUS_PROJECTID', 'census-demos'),
	transport=BackgroundThreadTransport)
	tracer = Tracer(sampler=always_on.AlwaysOnSampler(), exporter=sdexporter)
	execution_context.set_opencensus_tracer(tracer)

	def main():
	urls = ['https://opencensus.io', 'https://github.com/census-instrumentation', 'https://yahoo.com', 'https://time.com']
	mc = memclient.Client(('localhost', 11211))
	ind = Indexer(mc)
	for url in urls:
	for i in range(3):
	ts = time.time()
	with tracer.span(name='Crawling') as span:
	span.add_annotation('Fetching page', url=url)
	data, alreadyCached = ind.fetch(url)
	name = md5(bytes(url, encoding='utf-8')).hexdigest()
	with open(name + '.txt', 'wb') as f:
	f.write(data)
	span.finish()

	te = time.time()
	print('URL: %s TimeSpent: %.3fs alreadyCached: %s'%(url, te-ts, alreadyCached))

	for url in urls:
	ind.clearCache(url)

	class Indexer(object):
	def __init__(self, cache):
	self.cache = cache

	def fetch(self, url):
	with tracer.span(name='Indexer.fetch') as span:
	# Firstly check if we've cached it before
	span.add_annotation('Checking cache if already memoized', url=url)
	memoized = self.cache.get(url)
	span.add_annotation('Done checking cache')
	if memoized:
	span.add_annotation('Cache hit', url=url)
	return memoized, True

	# Cache miss
	span.add_annotation('Cache miss, now fetching URL', url=url)
	res = requests.get(url)
	# Now cache it
	data = res.text
	sc = res.status_code
	span.add_annotation('Results back', code=sc, url=url)
	if sc >= 200 and sc <= 299:
	data = bytes(res.text, encoding='utf-8')
	span.add_annotation('Now caching results', code=sc, url=url)
	self.cache.set(url, data)

	return data, False

	def clearCache(self, url):
	self.cache.delete(url)

	if __name__ == '__main__':
	main()