Skip to content

Instantly share code, notes, and snippets.

@douo
Created December 23, 2012 04:38
Show Gist options
  • Save douo/4361991 to your computer and use it in GitHub Desktop.
Save douo/4361991 to your computer and use it in GitHub Desktop.
Lyrics Grabber2 的千千静听歌词抓取脚本
# -*- coding: utf-8 -*-
import encodings.utf_8
import math
import urllib, urllib2
import random
import re
from xml.dom import minidom
from LevenshteinDistance import LevenshteinDistance
from grabber import LyricProviderBase
class TTPlayerCNC(LyricProviderBase):
def GetName(self):
return "千千静听(LRC)"
def GetDescription(self):
return "从千千静听服务器下载歌词(LRC)"
def GetURL(self):
return "http://www.ttplayer.com"
def GetVersion(self):
return "0.3"
def Query(self, handles, status, abort):
result = []
for handle in handles:
status.Advance()
if abort.Aborting():
return result
try:
artist = handle.Format("[%artist%]")
title = handle.Format("[%title%]")
s = urllib.urlopen("http://ttlrcct2.qianqian.com/dll/lyricsvr.dll?sh?Artist=%s&Title=%s&Flags=0" % (self.ToQianQianHexString(artist), self.ToQianQianHexString(title))).read() ##这里是utf-8编码
doc = minidom.parseString(s)
m = 0xFFFFFFFFFFFFFFFF
best = None
for e in doc.getElementsByTagName("lrc"):
# i = LevenshteinDistance(artist, e.getAttribute("artist")) + LevenshteinDistance(title, e.getAttribute("title"))#原来对比的是不同编码的文本
i = LevenshteinDistance(artist, e.getAttribute("artist").encode("utf-8")) + LevenshteinDistance(title, e.getAttribute("title").encode("utf-8"))
if m > i:
m = i
best = e.getAttribute("id"), e.getAttribute("artist"), e.getAttribute("title")
if best == None:
result.append('')
continue
Id, artist, title = best
code = self.CreateQianQianCode(Id, artist, title)
txheaders = {'User-agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'}
req = urllib2.Request("http://ttlrcct2.qianqian.com/dll/lyricsvr.dll?dl?Id=%s&Code=%d" % (Id, code), None, txheaders)
lyric = urllib2.urlopen(req).read()
if lyric.find("Search ID or Code error!") >= 0:
result.append('')
continue
else:
result.append(lyric)
except Exception, e:
traceback.print_exc(file=sys.stdout)
result.append('')
continue
return result
def QianQianStringFilter(self,string):
s = string
# 英文转小写
s = s.lower()
# 去括号,大中小还有全角的小括号
s = re.sub('\(.*?\)|\[.*?]|{.*?}|(.*?)', '', s);
# 去除半角特殊符号,空格,逗号,etc。
s = re.sub('[ -/:-@[-`{-~]+', '', s);
# 繁(正)体转换为简体
s = translate(s,'zh-tw','zh-cn') # 并不完美.比如千千静听的後(/u8C5F)字没有转换
s = unicode(s, 'utf_8')
# 去除全角特殊符号
s = re.sub(u'[\u2014\u2018\u201c\u2026\u3001\u3002\u300a\u300b\u300e\u300f\u3010\u3011\u30fb\uff01\uff08\uff09\uff0c\uff1a\uff1b\uff1f\uff5e\uffe5]+','',s)
return s
def ToHexStringUnicode(self, string):
s = string
tmp = ''
for c in s:
dec = ord(c)
tmp += "%02X" % (dec & 0xff)
tmp += "%02X" % (dec >> 8)
return tmp
def ToHexString(self, string):
tmp = ''
for c in string:
tmp += "%02X" % ord(c)
return tmp
def ToQianQianHexString(self, string, RequireUnicode = True):
if RequireUnicode:
return self.ToHexStringUnicode(self.QianQianStringFilter(string))
else:
return self.ToHexString(string)
def Conv(self, i):
r = i % 4294967296
if (i >= 0 and r > 2147483648):
r = r - 4294967296
elif (i < 0 and r < 2147483648):
r = r + 4294967296
return r
def CreateQianQianCode(self, lrcId, artist, title):
lrcId = int(lrcId)
ttstr = self.ToQianQianHexString((artist + title).encode("utf-8"), False) ##这里需要utf-8编码
length = len(ttstr) >> 1
song = []
for i in xrange(length):
song.append(int(ttstr[i*2:i*2+2], 16))
t1 = 0
t2 = 0
t3 = 0
t1 = (lrcId & 0x0000FF00) >> 8
if (lrcId & 0x00FF0000) == 0:
t3 = 0x000000FF & ~t1
else:
t3 = 0x000000FF & ((lrcId & 0x00FF0000) >> 16)
t3 |= (0x000000FF & lrcId) << 8
t3 <<= 8
t3 |= 0x000000FF & t1
t3 <<= 8
if (lrcId & 0xFF000000) == 0:
t3 |= 0x000000FF & (~lrcId)
else:
t3 |= 0x000000FF & (lrcId >> 24)
j = length - 1
while j >= 0:
c = song[j]
if c >= 0x80:
c = c - 0x100
t1 = (c + t2) & 0x00000000FFFFFFFF
t2 = (t2 << (j % 2 + 4)) & 0x00000000FFFFFFFF
t2 = (t1 + t2) & 0x00000000FFFFFFFF
j -= 1
j = 0
t1 = 0
while j <= length - 1:
c = song[j]
if c >= 0x80: # c <128
c = c - 0x100
t4 = (c + t1) & 0x00000000FFFFFFFF
t1 = (t1 << (j % 2 + 3)) & 0x00000000FFFFFFFF
t1 = (t1 + t4) & 0x00000000FFFFFFFF
j += 1
t5 = self.Conv(t2 ^ t3)
t5 = self.Conv(t5 + (t1 | lrcId))
t5 = self.Conv(t5 * (t1 | t3))
t5 = self.Conv(t5 * (t2 ^ lrcId))
t6 = t5
if (t6 > 2147483648):
t5 = t6 - 4294967296
return t5
def translate(text,lang_from,lang_to):
#Out of Date
#v2 居然要收錢,還這麼貴 see:http://code.google.com/apis/language/translate/v2/pricing.html
#url = ('http://ajax.googleapis.com/ajax/services/language/translate?' +
# 'v=1.0&q='+urllib.quote(text)+'&langpair='+lang_from+'%7C'+lang_to)
# 替換成bing的翻譯api
url = ('http://api.microsofttranslator.com/V2/Ajax.svc/Translate?' +
'appId=DE2A1CAA235EB52E611BC1243F16E4D301BB600E' +
'&from='+ lang_from +'&to='+ lang_to +
'&text='+urllib.quote(text))
json = urllib.urlopen(url).read()
# return json;
# p = re.compile('"translatedText":"(.+?)"') #對應谷歌
p = re.compile('"(.+?)"') #對應必應
m = p.search(json);
return m.group(1);
if __name__ == "__main__":
LyricProviderInstance = TTPlayerCNC()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment