Skip to content

Instantly share code, notes, and snippets.

@paitonic
Last active August 29, 2015 14:16
Show Gist options
  • Save paitonic/3ac62ff79f4cda199f58 to your computer and use it in GitHub Desktop.
Save paitonic/3ac62ff79f4cda199f58 to your computer and use it in GitHub Desktop.
Script parses Radio T chat log and outputs each subject starting time
# -*- coding: utf-8 -*-
import lxml.html
import math
from datetime import datetime
radiot_nick = 'jc-radio-t'
podcast = 'http://chat.radio-t.com/logs/radio-t-{number}.html'.format(number=422)
# parsing
page = lxml.html.parse(podcast)
root = page.getroot()
chat_messages = root.find_class('message')
# filter messages
radiot_subjects = []
for message in chat_messages:
user_nick = message.find_class('nickname')[0].text_content()
user_message = message.find_class('text')[0].text_content()
user_date = message.find_class('date')[0].text_content()
if ((user_nick == radiot_nick) and
(user_message.find('-->') >= 0) and
(user_message.find(u'Подкаст выходного дня - импровизации на темы высоких технологий') == -1)):
radiot_subjects.append({'subject': user_message, 'date': user_date})
# print
# for subject in radiot_subjects:
# print subject['date'], subject['subject']
# calculate and print each subject and its starting time
podcast_length = 0
for ind, subject in enumerate(radiot_subjects):
if ind == len(radiot_subjects) - 1:
radiot_subjects[ind]['start'] = podcast_length
break
seconds = (datetime.strptime(radiot_subjects[ind+1]['date'], '%H:%M:%S') - datetime.strptime(radiot_subjects[ind]['date'], '%H:%M:%S')).seconds
radiot_subjects[ind]['start'] = podcast_length
podcast_length += int(math.ceil(seconds / 60.0))
# print the result
for subject in radiot_subjects:
print u"{stime}:00 - {subject}".format(stime=subject['start'], subject=subject['subject'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment