Last active
August 29, 2015 14:16
-
-
Save paitonic/3ac62ff79f4cda199f58 to your computer and use it in GitHub Desktop.
Script parses Radio T chat log and outputs each subject starting time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import lxml.html | |
import math | |
from datetime import datetime | |
radiot_nick = 'jc-radio-t' | |
podcast = 'http://chat.radio-t.com/logs/radio-t-{number}.html'.format(number=422) | |
# parsing | |
page = lxml.html.parse(podcast) | |
root = page.getroot() | |
chat_messages = root.find_class('message') | |
# filter messages | |
radiot_subjects = [] | |
for message in chat_messages: | |
user_nick = message.find_class('nickname')[0].text_content() | |
user_message = message.find_class('text')[0].text_content() | |
user_date = message.find_class('date')[0].text_content() | |
if ((user_nick == radiot_nick) and | |
(user_message.find('-->') >= 0) and | |
(user_message.find(u'Подкаст выходного дня - импровизации на темы высоких технологий') == -1)): | |
radiot_subjects.append({'subject': user_message, 'date': user_date}) | |
# for subject in radiot_subjects: | |
# print subject['date'], subject['subject'] | |
# calculate and print each subject and its starting time | |
podcast_length = 0 | |
for ind, subject in enumerate(radiot_subjects): | |
if ind == len(radiot_subjects) - 1: | |
radiot_subjects[ind]['start'] = podcast_length | |
break | |
seconds = (datetime.strptime(radiot_subjects[ind+1]['date'], '%H:%M:%S') - datetime.strptime(radiot_subjects[ind]['date'], '%H:%M:%S')).seconds | |
radiot_subjects[ind]['start'] = podcast_length | |
podcast_length += int(math.ceil(seconds / 60.0)) | |
# print the result | |
for subject in radiot_subjects: | |
print u"{stime}:00 - {subject}".format(stime=subject['start'], subject=subject['subject']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment