Skip to content

Instantly share code, notes, and snippets.

@anirbanroydas
Last active December 6, 2016 19:49
Show Gist options
  • Save anirbanroydas/3a298cfadf7cc18fda8387421a1904bf to your computer and use it in GitHub Desktop.
Save anirbanroydas/3a298cfadf7cc18fda8387421a1904bf to your computer and use it in GitHub Desktop.
invite_friends: send invitation to friends in friend list who are within D distance from given source S. D is in Km, S is in (latitude, longitude) pair which are expressed as decimal degrees. It also logs data both to file and I/O stream. Its also configurable. Its also scalable. Its also fault tolerant.
{"latitude": "12.986375", "user_id": 12, "name": "Chris", "longitude": "77.043701"}
{"latitude": "11.92893", "user_id": 1, "name": "Alice", "longitude": "78.27699"}
{"latitude": "11.8856167", "user_id": 2, "name": "Ian", "longitude": "78.4240911"}
{"latitude": "12.3191841", "user_id": 3, "name": "Jack", "longitude": "78.5072391"}
{"latitude": "13.807778", "user_id": 28, "name": "Charlie", "longitude": "76.714444"}
{"latitude": "13.4692815", "user_id": 7, "name": "Frank", "longitude": "-9.436036"}
{"latitude": "14.0894797", "user_id": 8, "name": "Eoin", "longitude": "77.18671"}
{"latitude": "13.038056", "user_id": 26, "name": "Stephen", "longitude": "76.613889"}
{"latitude": "14.1225", "user_id": 27, "name": "Enid", "longitude": "78.143333"}
{"latitude": "13.1229599", "user_id": 6, "name": "Theresa", "longitude": "77.2701202"}
{"latitude": "12.2559432", "user_id": 9, "name": "Jack", "longitude": "76.1048927"}
{"latitude": "12.240382", "user_id": 10, "name": "Georgina", "longitude": "77.972413"}
{"latitude": "13.2411022", "user_id": 4, "name": "Ian", "longitude": "77.238335"}
{"latitude": "13.1302756", "user_id": 5, "name": "Nora", "longitude": "77.2397222"}
{"latitude": "13.008769", "user_id": 11, "name": "Richard", "longitude": "77.1056711"}
{"latitude": "13.1489345", "user_id": 31, "name": "Alan", "longitude": "77.8422408"}
{"latitude": "13", "user_id": 13, "name": "Olive", "longitude": "76"}
{"latitude": "11.999447", "user_id": 14, "name": "Helen", "longitude": "-9.742744"}
{"latitude": "12.966", "user_id": 15, "name": "Michael", "longitude": "77.463"}
{"latitude": "12.366037", "user_id": 16, "name": "Ian", "longitude": "78.179118"}
{"latitude": "14.180238", "user_id": 17, "name": "Patricia", "longitude": "-5.920898"}
{"latitude": "13.0033946", "user_id": 39, "name": "Lisa", "longitude": "77.3877505"}
{"latitude": "12.228056", "user_id": 18, "name": "Bob", "longitude": "76.915833"}
{"latitude": "14.133333", "user_id": 24, "name": "Rose", "longitude": "77.433333"}
{"latitude": "55.033", "user_id": 19, "name": "Enid", "longitude": "78.112"}
{"latitude": "13.121111", "user_id": 20, "name": "Enid", "longitude": "-9.831111"}
{"latitude": "11.802", "user_id": 21, "name": "David", "longitude": "-9.442"}
{"latitude": "14.374208", "user_id": 22, "name": "Charlie", "longitude": "78.371639"}
{"latitude": "13.74412", "user_id": 29, "name": "Oliver", "longitude": "76.11167"}
{"latitude": "13.761389", "user_id": 30, "name": "Nick", "longitude": "76.2875"}
{"latitude": "14.080556", "user_id": 23, "name": "Eoin", "longitude": "77.361944"}
{"latitude": "12.833502", "user_id": 25, "name": "David", "longitude": "78.122366"}
{"latitude": "12.240382", "user_id": "10", "name": "Georgina", "longitude": "77.972413"}
[INFO]: [08-08-16 12:28:27] [__main__] [invite_friends:293] - Input Friend List file size is small : size in MB = 0
[ERROR]: [08-08-16 12:28:27] [utils] [utils:127] - Invalid Latitude coordinate: latitude should be in degrees - Latitude: 13
[ERROR]: [08-08-16 12:28:27] [utils] [utils:179] - Bad Data: Data not according to required structure: Name should be of type str or unicode and id of type int. Given data : type(data[name]) : <type 'unicode'> and type(data[id]) : <type 'unicode'>
[INFO]: [08-08-16 12:28:27] [__main__] [invite_friends:451] - Invitatoin Generation Time: 0.003044 seconds
[ERROR]: [08-08-16 12:28:27] [__main__] [invite_friends:388] - Found 2 bad_inputs
[INFO]: [08-08-16 12:28:27] [__main__] [invite_friends:419] - Found 11 qualified friends
[INFO]: [08-08-16 12:29:13] [__main__] [invite_friends:307] - Input Friend List file size is large : size in MB = 98.3882408142
[INFO]: [08-08-16 12:30:33] [__main__] [invite_friends:451] - Invitatoin Generation Time: 79.884979 seconds
[INFO]: [08-08-16 12:30:33] [__main__] [invite_friends:419] - Found 37 qualified friends
"""This module is the main module which actually does the processing of data prints the result having the list of
friends to send invitation to.
The module contains functions which reads data from data source, and processes the data synchronously or asynchronously depending
on the async settings taken from the configuration file. The modules vary a lot in their functionality depending on the configuration settings.
This module also has a lot of constants which can be configures or modified to change the processing and output of the application.
Configurable Constants:
1. SMALL_FILE_SIZE
2. FILE_INPUT_BUFFER_SIZE
3. SHOULD_VALIDATE_INDIVIDUAL_DATA
4. DISTANCE_RANGE
"""
# import logging
from concurrent.futures import ProcessPoolExecutor
from functools import partial
from operator import itemgetter
import json
import os.path
import math
import time
import invite_friends_config as config
from utils import is_valid_location, is_valid_friends_data, is_valid_config, LOCATION, SOURCE_LOCATION
from log_conf import get_logger
LOGGER = get_logger(__name__)
# Mean Earth Radius in km
RADIUS = 6371
# Size of file in MB, which is considered small and will be taken as input in a single time
SMALL_FILE_SIZE = 8
# Buffer of input file to be read one at a time in bytes
FILE_INPUT_BUFFER_SIZE = SMALL_FILE_SIZE * 1024 * 1024
# boolena variable specifying where to validate each and every data from input source or
# assume correct data
SHOULD_VALIDATE_INDIVIDUAL_DATA = True
# Distance Range in km for which to send invitation to friend
DISTANCE_RANGE = 100
# The Global Result list, having list of qualifying friends
GLOBAL_RESULT = []
# the bad inputs, i.e bad input data from input source
bad_inputs = []
def calculate_distance(location_destination, location_source=SOURCE_LOCATION):
"""This function calculates the spherical distance between source and destination location.
The source and destination location are given as (latitude, longitude) pairs which are represented in degrees.
The function checks for the validity of the locations and then calulates the distance using
the law of cosines spherical distance formula:
Law of cosines: d = acos( sin phi1 * sin phi2 + cos phi1 * cos phi2 * cos delta_lambda ) * R
where,
phi1 = lat1.toRadians(),
phi2 = lat2.toRadians(),
delta_lambda = (lon2-lon1).toRadians(),
R = 6371 // gives d in km
d = Math.acos( Math.sin(phi1)*Math.sin(phi2) + Math.cos(phi1)*Math.cos(phi2) * Math.cos(delta_lambda) ) * R
:param location_destination: the destination location
:type location_destination: LOCATION, a namedtuple of latitude and longitude
:param location_source: the source location
:type location_source: LOCATION, a namedtuple of latitude and longitude
:return: the spherical distance in km
:rtype: float
"""
if location_source != SOURCE_LOCATION and not is_valid_location(location_source):
raise Exception
if not is_valid_location(location_destination):
raise Exception
latitude_source = float(location_source.latitude)
longitude_source = float(location_source.longitude)
latitude_destination = float(location_destination.latitude)
longitude_destination = float(location_destination.longitude)
phi_1 = math.radians(latitude_source)
phi_2 = math.radians(latitude_destination)
delta_lambda = math.radians(longitude_destination - longitude_source)
delta_sigma = math.acos((math.sin(phi_1) * math.sin(phi_2)) +
(math.cos(phi_1) * math.cos(phi_2) * math.cos(delta_lambda)))
result = RADIUS * delta_sigma
return result
# Worker process which processes data and pepares the final result
def process_friends_data_worker(data, testing=False):
"""This function is a worker process which processes the list of data taken from the file line by line.
This function checks for the validity of the input data and then calls the calculate_distance with the valid input.
:param data: the input data
:type data: list
:param testing: the condition to use when testing this function using pytest
:type testing: bool
"""
if testing:
bad_data_testing = list()
good_result_testing = list()
good_data_testing = list()
test_data_length = len(data)
# base initialization of distance with a non qualifying value
distance = 101.0
for friend_item in data:
json_decoded_friends_data = json.loads(friend_item)
# Debug Logging
LOGGER.debug('CHECK BUG LINE 1 : json_decoded_friends_data : type= %s and\nValue : %s ' % (str(type(json_decoded_friends_data)), json_decoded_friends_data))
if SHOULD_VALIDATE_INDIVIDUAL_DATA and not is_valid_friends_data(json_decoded_friends_data):
# Debug Logging
LOGGER.debug('Not Valid Friends Data : data = %s ' % json_decoded_friends_data)
if not testing:
# Debug Logging
LOGGER.debug('bad_inputs data appending : %s ' % json_decoded_friends_data)
bad_inputs.append(json_decoded_friends_data)
LOGGER.debug('bad_inputs lenthg : %d ' % len(bad_inputs))
else:
# Debug Logging
LOGGER.debug('bad_testing_data appending : %s ' % json_decoded_friends_data)
bad_data_testing.append(json_decoded_friends_data)
LOGGER.debug('bad_testing_data lenthg : %d ' % len(bad_data_testing))
# Debug Logging
LOGGER.debug('continue with rest of loop')
continue
# Debug Logging
LOGGER.debug('THIS LINE SHOULD NOT BE ADDED - IF ADDED THEN SOMETHING WRONG WITH CODE')
friends_location_coordinates = LOCATION(json_decoded_friends_data['latitude'], json_decoded_friends_data['longitude'])
try:
distance = calculate_distance(location_destination=friends_location_coordinates)
# Debug Logging
# LOGGER.debug('distance = calculate_distance = %f ' % distance)
except:
if not testing:
# Debug Logging
LOGGER.debug('bad_inputs data appending : %s ' % json_decoded_friends_data)
bad_inputs.append(json_decoded_friends_data)
LOGGER.debug('bad_inputs lenthg : %d ' % len(bad_inputs))
else:
# Debug Logging
LOGGER.debug('bad_testing_data appending : %s ' % json_decoded_friends_data)
bad_data_testing.append(json_decoded_friends_data)
LOGGER.debug('bad_testing_data lenthg : %d ' % len(bad_data_testing))
# Debug Logging
LOGGER.debug('Continue Statement since bad_data')
continue
LOGGER.debug('THIS LINE SHOULD NOT BE PRINTED, IF IT DOES, THEN SOMETHIN IS WRONG IN CODE')
if distance < DISTANCE_RANGE:
# Debug Logging
LOGGER.debug('json_decoded_friends_data : type= %s and\nValue : %s ' % (str(type(json_decoded_friends_data)), json_decoded_friends_data))
LOGGER.debug('distance qualified : %f ' % distance)
if not testing:
# Debug Logging
LOGGER.debug('Adding Good Resuly appending : %s ' % json_decoded_friends_data)
GLOBAL_RESULT.append(json_decoded_friends_data)
LOGGER.debug('GLOBAL_RESULT lenthg : %d ' % len(GLOBAL_RESULT))
else:
# Debug Logging
LOGGER.debug('Adding Testing Good Result appending : %s ' % json_decoded_friends_data)
good_result_testing.append(json_decoded_friends_data)
LOGGER.debug('good_result_testing lenthg : %d ' % len(good_result_testing))
LOGGER.debug('CHECK BUG LINE 2')
else:
# Debug Logging
# LOGGER.debug('distance not qualified : %f ' % distance)
if testing:
# Debug Logging
# LOGGER.debug('Adding Testing Good data not qualiied but appending : %s ' % json_decoded_friends_data)
good_data_testing.append(json_decoded_friends_data)
# LOGGER.debug('good_data_testing lenthg : %d ' % len(good_data_testing))
# LOGGER.debug('CHECK BUG LINE 2')
# Debug log
LOGGER.debug('GOOD_RESULT TILL NOW length : %d ' % len(GLOBAL_RESULT))
LOGGER.debug('bad_inputs TIll Now length : %d ' % len(bad_inputs))
if testing:
if len(bad_data_testing) + len(good_result_testing) + len(good_data_testing) != test_data_length:
return False
return True
# private function to check for testing data
def _dummy_process_data_worker_function(data):
"""This function is a dummy function which tries to mock the actual process_friends_data_worker when testing the
read_friend_list function using pytest.
It takes the input and checks for the lenth and existence and returns True or False.
:param data: the input data list taken fomr input file
:type data: list
:return: True or False or raises Exception if doesn't satisfy checks.
:rtype: bool or Exception
"""
if data is not None and len(data) > 0:
return True
raise Exception
def read_friend_list(source_path, asynchronous, testing=False):
"""This function reads the friend list from the given source.
This function checks for the validity of the source_path and reads the data depending on the size of the file.
It takes 3 approaches to reading from file:
1. When file size is small, it reads entire file at one time and passes to process_friends_data_worker
2. When file size is large and asynchronous is False, it reads some buffer amount of data from file one at a time,
and passes the data to processs_friends_data_worker, and sequencially does the same thing until it has read the entire file.
3. When file size is large and asynchronous if True, it reads some buffer amount of data one at a time but it divides the
processing in multipler workers using concurrent.futures' ProcessPoolExecutor. It tries to do it to do the CPU bount computatoin
of worker parallely and asynchronously when multiple cores are available. It number of workers are by default the number of cores
available in system. Which can be manually changes as per requirement. Multiple processes are used instead of multiple threads
becoz the process is CPU bound and python GIL will not let threads to benefit the running time. It will infact increase
running time. So process pool is used which doesn't suffer from GIL problem.
:param source_path: the source path of the input file
:type source_path: str
:param asynchronous: whether to process the file asynchronously, using multiple processses or not
:type asynchronous: bool
:param testing: the condition to use when testing this function using pytest
:type testing: bool
"""
# size of file at source_path
file_size = None
# result of futures executed by concurrent.futures' ProcessPoolExecutor
executor_futures_result = None
try:
# get file size in MB
file_size = os.path.getsize(source_path) / float(1024 * 1024)
except OSError:
LOGGER.error('File does not exist or permission is denied at path: %s ' % source_path)
raise
# friends_data to be returned if asynchronous is False
friends_data = list()
if file_size <= SMALL_FILE_SIZE:
LOGGER.info('Input Friend List file size is small : size in MB = %d ' % file_size)
# small file_size, which can be all taken at a time.
# hence no need for multiple processes
# here data will be separately sent to be processed
with open(source_path, 'rb') as input_file:
friends_data = input_file.readlines()
# pass the read data so far for computation if testing=False
if not testing:
process_friends_data_worker(friends_data)
else:
LOGGER.info('Testing read_friend_list functionality, hence not passing the opened file for processing')
else:
LOGGER.info('Input Friend List file size is large : size in MB = %s ' % str(file_size))
# large file_size, which can be read in chunks or batches asynchronous
# now asynchronous value will be taken into consideration
# here data will be sent for processing in batches or will be
# passed on to multiple worker processes if asynchronous is True
with open(source_path, 'rb') as input_file:
# Debug Logging
LOGGER.debug('Input File Opened as input_file')
if asynchronous is True:
# Debug Logging
LOGGER.debug('asynchronous is True')
with ProcessPoolExecutor() as executor:
# read input file in batches and pass it to workers for processing
# make the readlines with buffer size as a partial function so to defer its calling by the iterator later
partial_readlines = partial(input_file.readlines, FILE_INPUT_BUFFER_SIZE)
# iterator object for the above partial to pass to the map function
iterator_for_executor_map = iter(partial_readlines, [])
if not testing:
try:
executor.map(process_friends_data_worker, iterator_for_executor_map, chunksize=1)
except:
return
else:
try:
executor.map(_dummy_process_data_worker_function, iterator_for_executor_map, chunksize=1)
except:
return
return executor_futures_result
else:
# Debug Logging
LOGGER.debug('asynchronous is False')
while True:
# Debug Logging
LOGGER.debug('Starting While Loop')
# reading FILE_INPUT_BUFFER_SIZE of items from input file
friends_data = input_file.readlines(FILE_INPUT_BUFFER_SIZE)
# smaple item in friends_data:
# {"latitude": "12.986375", "user_id": 12, "name": "Chris", "longitude": "77.043701"}
if not friends_data:
# Debug Logging
LOGGER.debug('Friend data is None')
LOGGER.debug('Breaking from Loop Now')
break
LOGGER.debug('THIS LINE SHOULD NOT BE PRINTED _ ELSE SOMETHING WRONG WITH CODE')
else:
# Debug Logging
LOGGER.debug('Friend data is not None - calling process_friends_data_worker')
# pass the read data so far for computation if testing if False
if not testing:
LOGGER.debug('Testing = False --> Friend data is not None - calling process_friends_data_worker')
process_friends_data_worker(friends_data)
if testing:
# Debug loging
LOGGER.debug('Testing is True, now return True')
# return True if function was used for testing read of files
return True
def print_bad_inputs(inputs=None):
"""This function is prints the bad inputs.
:param inputs: the list of bad inputs
:type inputs: list
"""
if inputs is None:
inputs = bad_inputs
count = len(inputs)
if count > 0:
LOGGER.error('Found %d bad_inputs ' % count)
print 'Bad Inputs : \n'
for i in xrange(count):
print str(i + 1) + '. ', inputs[i]
else:
print 'No Bad Inputs\n'
def print_invitation_list(invitation_list=None):
"""This function is prints the final good qualifying results.
:param invitation_list: the list of qualifying friends data
:type invitation_list: list
"""
if invitation_list is None:
invitation_list = GLOBAL_RESULT
if invitation_list == []:
LOGGER.info('Found 0 friends nearby to send invitations')
print 'Found 0 friends nearby to send invitations'
else:
invitation_list.sort(key=itemgetter('user_id'))
count = len(invitation_list)
LOGGER.info('Found %d qualified friends ' % count)
print 'Invitations in sorted order by id : \n'
for i in xrange(count):
print str(i + 1) + '. ' + 'Id: ', invitation_list[i]['user_id'], ' Name: ' + invitation_list[i]['name']
def invite_friends(friend_list_source, source_path=None, asynchronous=False):
"""This function is the main function which is called by the main function.
The function starts by reading the inputs from source, then processing the data and then finally printing them
:param friend_list_source: the source of friend list, like file or redis or mongodb, etc.
:type friend_list_source: str
:param source_path: the path to source (None if source is not a file)
:type source_path: str or None
:param asynchronous: whether to process the data asynchornously or not
:type asynchronous: bool
"""
if friend_list_source == 'file':
try:
start_time = time.time()
# Debug log
LOGGER.debug('Call read_friend_list()')
read_friend_list(source_path, asynchronous)
LOGGER.debug('Returned from read_friend_list()')
LOGGER.debug('GOOD_RESULT FINAL length : %d ' % len(GLOBAL_RESULT))
LOGGER.debug('bad_inputs FINAL length : %d ' % len(bad_inputs))
reading_time = time.time() - start_time
LOGGER.info('Invitatoin Generation Time: %f seconds ' % reading_time)
print 'Invitatoin Generation Time: %f seconds ' % reading_time
except:
return
print_bad_inputs()
print_invitation_list()
else:
# Implementation for other types of sources like redis, mongdob, mysql
print '\nThese advanced features will be implemented soon\nKeep following\n\n'
raise NotImplementedError
def main():
config_data = config.inputs
if not is_valid_config(config_data):
# invalid settings, so returning
return
friend_list_source = config_data['friend_list_source']
source_path = config_data['source_path']
asynchronous = config_data['async']
invite_friends(friend_list_source, source_path, asynchronous)
if __name__ == "__main__":
main()
"""This is the configuration file, which can be used to change the settings of the input and
change settings of how the application will run.
The configurable settings gives the power of customizing the tests, or the application as a whole.
This also makes the app much more development friendly.
This also makes the app user friendly.
"""
inputs = {
# source is configurable, can also be mongodb, mysql, redis, etc.
# default is file
'friend_list_source': 'file',
# if source is file, the source_path is a full os path with the filename like /usr/local/etc/invite_friends/friends.json or
# any other path, just the name of file denotes file in current working directory
# if source is a db like mongodb, redis, etc, then make source_pat = None
'source_path': 'friends.json',
# this value denotes either to use multiple processes to process the friend list or not,
# if friends data is huge, then async=True would benefit from multiple proccesses,
# there is no benefit with multi threading, since python has the infamous GIL, and the processing of
# friend's data is mostly CPU bound, multithreading would not release GIL and would in fact increase the overall
# running time. Hence async which is short for asynchronous uses multiple processing
# to speed up the execution of the program
# Make it False if you don't want multiproccessing, underhood it used concurrent.futures package
'async': False
}
"""This module defines the base loggers that can be used to log any kinds of data be it, debug, info, error, warning, critical,
both in a log file and also in the I/O stream.
To change the name of log file, or log level for file and also for stream, just change the constants at the top level.
This is configurable as per your requirements.
"""
# Initate logging loggers, and handlers
import logging
# create formatter
DEFAULT_FORMAT = '[%(levelname)s]: [%(asctime)s] [%(name)s] [%(module)s:%(lineno)d] - %(message)s'
DEFAULT_DATE_FORMAT = '%d-%m-%y %H:%M:%S'
DEFAULT_LOG_FILE = 'invite_friends.log'
DEFAULT_FILE_LOGLEVEL = logging.INFO
DEFAULT_STREAM_LOGLEVEL = logging.ERROR
DEFAULT_LOGGER_LOGLEVEL = logging.INFO
def get_logger(module_name):
"""Function to return the logger for each module used in the application.
:param module_name: the module name where the logger is called from
:type moduele_name: str
:rerturn: The logger object
:rtype: logging.LOGGER
"""
# File logging
logging.basicConfig(level=DEFAULT_FILE_LOGLEVEL, filename=DEFAULT_LOG_FILE, format=DEFAULT_FORMAT, datefmt=DEFAULT_DATE_FORMAT)
# create console handler and set level to DEBUG
stream_handler = logging.StreamHandler()
stream_handler.setLevel(DEFAULT_STREAM_LOGLEVEL)
formatter = logging.Formatter(DEFAULT_FORMAT, DEFAULT_DATE_FORMAT)
# add formatter to handler
stream_handler.setFormatter(formatter)
# initiate a logger
stream_logger = logging.getLogger(module_name)
# set level for Logger
stream_logger.setLevel(DEFAULT_LOGGER_LOGLEVEL)
# add handler to logger
stream_logger.addHandler(stream_handler)
return stream_logger
python >=2.7, <3.0
concurrent.futures
pytest
================================================================================ test session starts ================================================================================
platform darwin -- Python 2.7.11, pytest-2.9.2, py-1.4.31, pluggy-0.3.1 -- /usr/local/opt/python/bin/python2.7
cachedir: .cache
rootdir: /Users/Roy/Documents/Github/sources/private/adwyze-interview/round1/invite_friends, inifile:
collected 9 items
test_invite_friends.py::test_calculate_distance_with_valid_inputs PASSED
test_invite_friends.py::test_calculate_distance_with_invalid_inputs PASSED
test_invite_friends.py::test_read_friend_list_with_valid_source_path PASSED
test_invite_friends.py::test_read_friend_list_with_invalid_source_path PASSED
test_invite_friends.py::test_process_friends_data_worker_with_valid_data PASSED
test_invite_friends.py::test_print_bad_inputs PASSED
test_invite_friends.py::test_print_invitation_list PASSED
test_invite_friends.py::test_invite_friends_with_source_as_file PASSED
test_invite_friends.py::test_invite_friends_with_source_not_as_file PASSED
============================================================================= 9 passed in 3.57 seconds ==============================================================================
================================================================================ test session starts ================================================================================
platform darwin -- Python 2.7.11, pytest-2.9.2, py-1.4.31, pluggy-0.3.1 -- /usr/local/opt/python/bin/python2.7
cachedir: .cache
rootdir: /Users/Roy/Documents/Github/sources/private/adwyze-interview/round1/invite_friends, inifile:
collected 10 items
test_utils.py::test_is_valid_coordinate_format_with_valid_coordinate PASSED
test_utils.py::test_is_valid_coordinate_format_with_Invalid_coordinate PASSED
test_utils.py::test_is_valid_coordinates_value_with_valid_coordinates PASSED
test_utils.py::test_is_valid_coordinates_value_with_Invalid_coordinates PASSED
test_utils.py::test_is_valid_location_with_valid_location PASSED
test_utils.py::test_is_valid_location_with_Invalid_location PASSED
test_utils.py::test_is_valid_friends_data_with_valid_data PASSED
test_utils.py::test_is_valid_friends_data_with_Invalid_data PASSED
test_utils.py::test_is_valid_config_with_good_configurations PASSED
test_utils.py::test_is_valid_config_with_bad_configurations PASSED
============================================================================= 10 passed in 0.21 seconds =============================================================================
""" This module does the testing of the module invite_friends.
NOTE: Comment or uncomment the 4 lines in setup_module() method,
Those 4 lines basically creates a large file of 1 million entries.
So you can uncomment it first time and after that comment and then run the tests otherwise that 4 lines
will always run on each run which is time consuming because it creates 1 million entries worth of data.
To run the test:
$ py.test test_invite_frineds.py -v
"""
import pytest
import random
import json
import time
from invite_friends import calculate_distance, process_friends_data_worker, read_friend_list,\
print_bad_inputs, print_invitation_list, invite_friends
from utils import LOCATION
def setup_module(module):
print "\nsetup_module module: %s \n\n" % module.__name__
try:
create_large_input_file('friends_large.json', 1000000)
except:
print 'unable to create large input file'
def teardown_module(module):
print "\n\nteardown_module module: %s " % module.__name__
# def setup_function(function):
# print ("setup_function function: %s " % function.__name__)
# def teardown_function(function):
# print ("teardown_function function: %s " % function.__name__)
def test_calculate_distance_with_valid_inputs():
destination = LOCATION('12.986375', '77.043701')
print
assert calculate_distance(destination) == 64.26480291995638
destination = LOCATION('80.123456', '-175.12345')
assert calculate_distance(destination) == 8909.980105033259
destination = LOCATION('-85.654321', '175.654321')
assert calculate_distance(destination) == 11511.948749243722
def test_calculate_distance_with_invalid_inputs():
print
with pytest.raises(Exception):
destination = LOCATION('112.986375', '77.043701')
assert calculate_distance(destination)
with pytest.raises(Exception):
destination = LOCATION('80.1324242', '-181.000000')
assert calculate_distance(destination)
with pytest.raises(Exception):
destination = LOCATION('12,134134', '77.123456')
assert calculate_distance(destination)
with pytest.raises(Exception):
destination = LOCATION(12.987654, '77.123456')
assert calculate_distance(destination)
with pytest.raises(Exception):
destination = LOCATION('12.975310', '77.8ac765')
assert calculate_distance(destination)
with pytest.raises(Exception):
destination = LOCATION('a12.457984', '77.87654b')
assert calculate_distance(destination)
with pytest.raises(Exception):
destination = LOCATION(('12.986375', '77.043701'))
assert calculate_distance(destination)
with pytest.raises(Exception):
assert calculate_distance()
def create_large_input_file(filename, num_of_entries):
def _lat():
return "{:.6f}".format(random.uniform(-90, 90))
def _lon():
return "{:.6f}".format(random.uniform(-180, 180))
def _user_name():
return ''.join([random.choice('abcdefgjhijklmnopqrstuvwzyz') for _ in xrange(random.randrange(5, 15))]) +\
''.join([random.choice('abcdefgjhijklmnopqrstuvwzyz')
for _ in xrange(random.randrange(5, 15))])
def _create_item(user_id):
return {'latitude': _lat(), 'user_id': user_id, 'name': _user_name(), 'longitude': _lon()}
with open(filename, 'w') as output_file:
for i in xrange(num_of_entries):
json.dump(_create_item(i), output_file)
output_file.write('\n')
def test_read_friend_list_with_valid_source_path():
print
s = time.time()
assert read_friend_list(source_path='friends.json', asynchronous=False, testing=True) is True
print 'Done'
print 'Reading took : %s seconds' % str(time.time() - s)
s = time.time()
assert read_friend_list(source_path='friends.json', asynchronous=True, testing=True) is True
print 'Done'
print 'Reading took : %s seconds' % str(time.time() - s)
s = time.time()
assert read_friend_list(source_path='friends_large.json',
asynchronous=False, testing=True) is True
print 'Done'
print 'Reading took : %s seconds' % str(time.time() - s)
s = time.time()
assert read_friend_list(source_path='friends_large.json',
asynchronous=True, testing=True) is None
print 'Done'
print 'Reading took : %s seconds' % str(time.time() - s)
def test_read_friend_list_with_invalid_source_path():
print
with pytest.raises(Exception):
assert read_friend_list(source_path='friens.json', asynchronous=False, testing=True)
with pytest.raises(Exception):
assert read_friend_list(source_path='/usr/local/etc/friends.json',
asynchronous=True, testing=True)
def test_process_friends_data_worker_with_valid_data():
print
with open('friends.json', 'rb') as input_file:
data = input_file.readlines(8192)
assert process_friends_data_worker(data, testing=True) is True
def test_print_bad_inputs():
inputs = [
{"latitude": "12.240382", "user_id": "10", "name": "Georgina", "longitude": "77.972413"},
{"latitude": "12.240382", "user_id": 10, "name": "Georgina", "longitude": 77.972413}
]
print
assert print_bad_inputs(inputs) is None
def test_print_invitation_list():
print
invitation_list = [
{"latitude": "12.240382", "user_id": 10, "name": "Georgina", "longitude": "77.972413"},
{"latitude": "13.2411022", "user_id": 4, "name": "Ian", "longitude": "77.238335"},
{"latitude": "13.1302756", "user_id": 5, "name": "Nora", "longitude": "77.2397222"}
]
assert print_invitation_list(invitation_list) is None
def test_invite_friends_with_source_as_file():
print
assert invite_friends(friend_list_source='file', source_path='friends.json', asynchronous=False) is None
def test_invite_friends_with_source_not_as_file():
print
with pytest.raises(NotImplementedError):
assert invite_friends(friend_list_source='mongodb', source_path=None, asynchronous=False)
"""This is the testing module for testing the utils module.
To run the test:
$ py.test test_utils.py -v
"""
from utils import is_valid_coordinate_format, is_valid_coordinates_value, is_valid_location,\
is_valid_friends_data, is_valid_config, LOCATION
def setup_module(module):
print "\nsetup_module module: %s \n\n" % module.__name__
def teardown_module(module):
print "\n\nteardown_module module: %s " % module.__name__
# def setup_function(function):
# print ("setup_function function: %s " % function.__name__)
# def teardown_function(function):
# print ("teardown_function function: %s " % function.__name__)
def test_is_valid_coordinate_format_with_valid_coordinate():
print
assert is_valid_coordinate_format(' 12.986375') is True
assert is_valid_coordinate_format('-152.986375 ') is True
assert is_valid_coordinate_format(' -152.986375 ') is True
def test_is_valid_coordinate_format_with_Invalid_coordinate():
print
assert is_valid_coordinate_format('a12.986375') is False
assert is_valid_coordinate_format('-152986375') is False
assert is_valid_coordinate_format('12.986375a') is False
assert is_valid_coordinate_format('-1529 86375') is False
assert is_valid_coordinate_format('15,86375') is False
assert is_valid_coordinate_format('ab.cdefghi') is False
assert is_valid_coordinate_format() is False
def test_is_valid_coordinates_value_with_valid_coordinates():
print
assert is_valid_coordinates_value('12.986375', '77.043701') is True
assert is_valid_coordinates_value('80.123456', '-175.12345') is True
assert is_valid_coordinates_value('-85.654321', '175.654321') is True
def test_is_valid_coordinates_value_with_Invalid_coordinates():
print
assert is_valid_coordinates_value('112.986375', '77.043701') is False
assert is_valid_coordinates_value('80.1324242', '-181.000000') is False
assert is_valid_coordinates_value('-112.134134', '77.123456') is False
assert is_valid_coordinates_value('77.123456') is False
assert is_valid_coordinates_value() is False
def test_is_valid_location_with_valid_location():
print
destination = LOCATION('12.986375', '77.043701')
assert is_valid_location(destination) is True
destination = LOCATION('80.123456', '-175.12345')
assert is_valid_location(destination) is True
destination = LOCATION('-85.654321', '175.654321')
assert is_valid_location(destination) is True
def test_is_valid_location_with_Invalid_location():
print
destination = ('12.986375', '77.043701')
assert is_valid_location(destination) is False
destination = LOCATION('80.1324242', '-181.000000')
assert is_valid_location(destination) is False
destination = LOCATION('12,134134', '77.123456')
assert is_valid_location(destination) is False
destination = LOCATION(12.987654, '77.123456')
assert is_valid_location(destination) is False
destination = LOCATION('12.975310', '77.8ac765')
assert is_valid_location(destination) is False
destination = LOCATION('a12.457984', '77.87654b')
assert is_valid_location(destination) is False
assert is_valid_location() is False
def test_is_valid_friends_data_with_valid_data():
print
data = {"latitude": "12.986375", "user_id": 12, "name": "Chris", "longitude": "77.043701"}
assert is_valid_friends_data(data) is True
def test_is_valid_friends_data_with_Invalid_data():
print
data = [{"latitud": "12.986375", "user_id": 12, "name": "Chris", "longitude": "77.043701"}]
assert is_valid_friends_data(data) is False
data = data[0]
assert is_valid_friends_data(data) is False
data = {"latitude": 12.986375, "user_id": 12, "name": "Chris", "longitude": "77.043701"}
assert is_valid_friends_data(data) is False
data = {"latitude": "12.986375", "id": 12, "name": "Chris", "longitude": "77.043701"}
assert is_valid_friends_data(data) is False
data = {"latitud": "12.986375", "user_id": "12afd3", "name": "Chris", "longitude": "77.043701"}
assert is_valid_friends_data(data) is False
data = {"latitud": "12.986375", "user_id": 12, "username": "Chris", "longitude": "77.043701"}
assert is_valid_friends_data(data) is False
data = {"latitude": 12.986375, "user_id": 12, "name": ["Chris", "harris"], "longitude": "77.043701"}
assert is_valid_friends_data(data) is False
data = {"latitude": "12.986375", "id": 12, "name": "Chris", "lon": "77.043701"}
assert is_valid_friends_data(data) is False
data = {"latitude": "12.986375", "id": 12, "name": "Chris", "longitude": 77.043701}
assert is_valid_friends_data(data) is False
assert is_valid_friends_data() is False
def test_is_valid_config_with_good_configurations():
print
good_config = {
'friend_list_source': 'file',
'source_path': 'friends.json',
'async': True
}
assert is_valid_config(good_config) is True
def test_is_valid_config_with_bad_configurations():
print
bad_config = [{
'source': 'file',
'source_path': 'friends.json',
'async': True
}]
assert is_valid_config(bad_config) is False
bad_config = bad_config[0]
assert is_valid_config(bad_config) is False
del bad_config['source']
bad_config['friend_list_source'] = 1
assert is_valid_config(bad_config) is False
bad_config['friend_list_source'] = 'invalid_source'
assert is_valid_config(bad_config) is False
bad_config['friend_list_source'] = 'file'
bad_config['source_path'] = ('path/to/source')
assert is_valid_config(bad_config) is False
del bad_config['source_path']
assert is_valid_config(bad_config) is False
bad_config['source_path'] = 'friends.json'
bad_config['async'] = 1
assert is_valid_config(bad_config) is False
del bad_config['async']
assert is_valid_config(bad_config) is False
bad_config['asynchronous'] = True
assert is_valid_config(bad_config) is False
assert is_valid_config() is False
"""This module basically contains all the utility functions which are called for
many sub problems which are used many times in the main program.
This module also has a lot of constants which you can change and further configure your requirements.
Configurable Constants:
1. SOURCE_LOCATION
2. FRIEND_LIST_SOURCES
"""
# import logging
import os.path
from collections import namedtuple
from log_conf import get_logger
LOGGER = get_logger(__name__)
LOCATION = namedtuple('LOCATION', 'latitude, longitude')
# main source location, this is a constant location
SOURCE_LOCATION = LOCATION('12.9611159', '77.6362214')
# it can be further extended to include ('file', 'mongodb', 'redis', 'mysql')
FRIEND_LIST_SOURCES = ('file',)
def is_valid_coordinate_format(coordinate_point=None):
"""This function checks the validity for the format of the coordinate point.
This function checks if the point, lets say 13.343335 has a decimal separator, and both the
parts before and after of the decimal points are individually integers or not.
:param coordinate_point: the coordinate point to check the validity for.
:type coordinate_point: str
:return: returns True or False depending on whether its valid or not.
:rtype: bool
"""
if not coordinate_point:
return False
coordinate_point = coordinate_point.strip()
if coordinate_point.count('.') == 1:
number_before_decimal, number_after_decimal = coordinate_point.split('.')
if number_before_decimal.startswith('-'):
number_before_decimal = number_before_decimal[1:]
if number_before_decimal.isdigit() and number_after_decimal.isdigit():
return True
else:
return False
else:
return False
def is_valid_coordinates_value(latitude=None, longitude=None):
"""This function checks the validity for the value of a location, i.e latituda and longitude.
This function checks if the latitude and longitude, lets say 13.343335, 77.393473 is between a specific range
for a valid value. i.e latitude less 90 and greater than -90 in degrees, and longitude between -180 and 180 degrees,
both including.
:param latitude: the latitude of the location.
:type latitude: str
:param longitude: the longitude of the location.
:type longitude: str
:return: returns True or False depending on whether its valid or not.
:rtype: bool
"""
if not latitude or not longitude:
return False
latitude = float(latitude)
longitude = float(longitude)
if latitude < -90 or latitude > 90 or longitude < -180 or longitude > 180:
return False
return True
def is_valid_location(location=None):
"""This function checks the validity for the location.
This function checks if the latitude and longitude, lets say (13.343335, 77.393473) has individually valid format
and valid value and checks for valid instance types.
:param location: the location to check validity for.
:type location: LOCATION, i.e the namedtuple type imported from utils module
:return: returns True or False depending on whether its valid or not.
:rtype: bool
"""
if not location:
return False
if not isinstance(location, LOCATION):
LOGGER.error(
'Invalid Location type: location should be of type LOCATION, given location is of type : %s ' % str(type(location)))
return False
latitude = location.latitude
longitude = location.longitude
if (not isinstance(latitude, str) and not isinstance(latitude, unicode)) or (not isinstance(longitude, str) and not isinstance(longitude, unicode)):
LOGGER.error('Invalid Location: latitude and longitude should both be of type str or unicode, given latitude type = %s and longitude type = %s ' % (str(type(latitude)), str(type(longitude))))
return False
if not is_valid_coordinate_format(latitude):
LOGGER.error(
'Invalid Latitude coordinate: latitude should be in degrees - Latitude: %s ' % str(latitude))
return False
if not is_valid_coordinate_format(longitude):
LOGGER.error(
'Invalid Longitude coordinate: longitude should be in degrees - Longitude: %s ' % str(longitude))
return False
if not is_valid_coordinates_value(latitude, longitude):
LOGGER.error(
'Invalid coordinate values: Should be -90<=latitude<=90 and -180<=longitude<=180 Given are : %s, %s ' % (latitude, longitude))
return False
return True
def is_valid_friends_data(data=None):
"""This function checks the validity for the each line of data read from input file.
This function checks for the structure of the data, does it have all the attributes of the data, like,
latitude, longitude, name and user_id.
It also checks for its type, and thier values.
:param data: the data item in a single line read from the input file.
:type data: dict (after decoding via json.loads)
:return: returns True or False depending on whether its valid or not.
:rtype: bool
"""
if not data:
return False
if not isinstance(data, dict):
# logs the error regarding invalid friend data
LOGGER.error('Bad Data: Data not according to required structure: Data should be in dictionary form')
return False
if 'latitude' not in data or 'longitude' not in data or 'name' not in data or 'user_id' not in data:
# logs the error regarding invalid inputs keys
LOGGER.error(
'Invalid Data keys. Data should have all the 4 keys, [longitude], [latitude], [name] and [id]')
return False
if (not isinstance(data['name'], str) and not isinstance(data['name'], unicode)) or not isinstance(data['user_id'], int):
# logs the error regarding invalid friend data value types
LOGGER.error('Bad Data: Data not according to required structure: Name should be of type str or unicode and id of type int.\
Given data : type(data[name]) : %s and type(data[id]) : %s ' % (str(type(data['name'])), str(type(data['user_id']))))
return False
if (not isinstance(data['latitude'], str) and not isinstance(data['latitude'], unicode)) or (not isinstance(data['longitude'], str) and not isinstance(data['longitude'], unicode)):
# logs the error regarding invalid friend data value types
LOGGER.error('Bad Data: Data not according to required structure: Latitude and Longitude should be of type str or unicode.\
Given data : type(data[latitude]) : %s and type(data[longitude]) : %s ' % (str(type(data['latitude'])), str(type(data['longitude']))))
return False
return True
def is_valid_config(config_data=None):
"""This function checks if the given config in the configuration file is valid or not.
For validity it checks if the config has all the keys, thier types, their value ranges and type.
:param config_data: the config from the configuration file
:type config_data: int
:return: True of False if its valid or not
:rtype: bool
"""
if not config_data:
return False
if not isinstance(config_data, dict):
# logs the error regarding invalid inputs data type
LOGGER.error('Inputs is not valid: Inputs in settings should be a dictionary object')
return False
if 'friend_list_source' not in config_data or 'source_path' not in config_data or 'async' not in config_data:
# logs the error regarding invalid inputs keys
LOGGER.error(
'Invalid Inputs keys. Settings\' inputs dictionary should have all the 3 keys, [friend_list_source], [source_path] and [async]')
return False
friend_list_source = config_data['friend_list_source']
source_path = config_data['source_path']
asynchronous = config_data['async']
if not isinstance(friend_list_source, str):
# logs the error regarding invalid inputs type
LOGGER.error('Invalid source type. Source should be of type str.')
return False
if friend_list_source not in FRIEND_LIST_SOURCES:
# logs the error involving bad source value
LOGGER.error('Invalid source value. Source Value should be from %s ' % str(FRIEND_LIST_SOURCES))
if (not isinstance(source_path, str) and friend_list_source == 'file') or\
(source_path is not None and friend_list_source != 'file'):
# logs the error regarding invalid inputs type
LOGGER.error(
'Invalid Source Path type. Source Path should be of type str when source if file or None if source is not file. ')
return False
if not isinstance(asynchronous, bool):
# logs the error regarding invalid inputs type
LOGGER.error('Invalid async type. Async should of type bool, i.e, eiher True or False.')
return False
if friend_list_source == 'file' and not os.path.exists(source_path):
# logs the error regarding invalid source path value
LOGGER.error('Invalid source path value. source path should a proper directory path')
return False
if not is_valid_location(SOURCE_LOCATION):
return False
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment