Created
          October 2, 2012 02:39 
        
      - 
      
 - 
        
Save ernestom/3815865 to your computer and use it in GitHub Desktop.  
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # headers | |
| # URL,Ok Volume (MB),Error Volume (MB),Ok Hits,0xx,200,206,2xx,302,304,3xx,404,4xx,Offloaded Hits,Origin Hits,Origin OK Volume (MB) ,Origin Error Volume (MB) | |
| def parse_file(file): | |
| lines = [l.split(',') for l in open(file) if not l.startswith('#')] | |
| return lines | |
| def is_cached(line): | |
| # offloaded hits | |
| return int(line[13]) > 0 | |
| def is_web_page_with_traffic(line): | |
| url = line[0] | |
| ext = url[-4:] | |
| is_page = ext in ['.jsp', '.htm', 'html'] or url.endswith('/') | |
| has_traffic = float(line[1]) > 0 | |
| return is_page and has_traffic | |
| def get_cached_objects_percentage(lines): | |
| cached = sum([1 for l in lines if is_cached(l)]) | |
| return '%.2f %%' % (100.0 / len(lines) * cached) | |
| def get_avg_size_if(lines, callback): | |
| sizes = [] | |
| for line in lines: | |
| if not callback(line): | |
| continue | |
| try: | |
| size = float(line[1]) / float(line[3]) | |
| except ZeroDivisionError: | |
| size = 0 | |
| sizes.append(size) | |
| return '%.2f KB' % (sum(sizes) / len(sizes) * 1024) | |
| def get_avg_cached_object_size(lines): | |
| return get_avg_size_if(lines, lambda l: is_cached(l)) | |
| def get_avg_web_page_size(lines): | |
| return get_avg_size_if(lines, lambda l: is_web_page_with_traffic(l)) | |
| def main(): | |
| file = '/Users/ernesto/Downloads/dsa_urls_volume_164077_10-01-2012-07-55-50.csv' | |
| lines = parse_file(file) | |
| print get_cached_objects_percentage(lines) | |
| print get_avg_cached_object_size(lines) | |
| print get_avg_web_page_size(lines) | |
| if __name__ == '__main__': | |
| main() | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment