Skip to content

Instantly share code, notes, and snippets.

@iwouldnot
Last active December 1, 2017 11:06
Show Gist options
  • Save iwouldnot/2667f3ae1cb0c2d388abab737d7c36af to your computer and use it in GitHub Desktop.
Save iwouldnot/2667f3ae1cb0c2d388abab737d7c36af to your computer and use it in GitHub Desktop.
import urllib
from queue import Queue
from threading import Thread
from time import time
import lxml.html
import pandas as pd
from tqdm import tqdm
df = pd.read_csv('no_lyrics')
class LyricGetter(Thread):
def __init__(self, queue):
"""Инициализация потока"""
Thread.__init__(self)
self.queue = queue
def run(self):
"""Запуск потока"""
while True:
row_id = self.queue.get()
artist = artist = df.loc[row_id]['artist']
title = title = df.loc[row_id]['title']
song = Song(artist, title)
lyr = song.lyricwikia()
df.loc[row_id, 'lyrics'] = lyr
self.queue.task_done()
def main():
queue = Queue()
for x in range(8):
getter = LyricGetter(queue)
getter.daemon = True
getter.start()
# Даем очереди нужные нам ссылки для скачивания
for row_id in tqdm(df.index[:100]):
print('go ', row_id)
queue.put(row_id)
# Ждем завершения работы очереди
queue.join()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment