Skip to content

Commit

Permalink
lyrics: more python 3 porting
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan Matthew committed Apr 21, 2013
1 parent 82aa9a7 commit 71c8d7b
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 111 deletions.
9 changes: 5 additions & 4 deletions plugins/lyrics/AstrawebParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

import urllib
import urllib.parse
import re
import rb

Expand All @@ -39,8 +39,8 @@ def __init__(self, artist, title):
self.title = title

def search(self, callback, *data):
wartist = re.sub('%20', '+', urllib.quote(self.artist))
wtitle = re.sub('%20', '+', urllib.quote(self.title))
wartist = urllib.parse.quote_plus(self.artist)
wtitle = urllib.parse.quote_plus(self.title)

wurl = 'http://search.lyrics.astraweb.com/?word=%s+%s' % (wartist, wtitle)

Expand All @@ -52,6 +52,7 @@ def got_results (self, result, callback, *data):
callback (None, *data)
return

result = result.decode('iso-8859-1') # no indication of anything else..
results = re.sub('\n', '', re.sub('\r', '', result))

if re.search('(<tr><td bgcolor="#BBBBBB".*)(More Songs &gt)', results) is not None:
Expand Down Expand Up @@ -85,6 +86,7 @@ def parse_lyrics(self, result, callback, *data):
callback (None, *data)
return

result = result.decode('iso-8859-1')
result = re.sub('\n', '', re.sub('\r', '', result))

artist_title = re.split('(<title>Lyrics: )([^<]*)', result)[2]
Expand All @@ -98,4 +100,3 @@ def parse_lyrics(self, result, callback, *data):
lyrics += "\n\nLyrics provided by lyrics.astraweb.com"

callback (lyrics, *data)

6 changes: 4 additions & 2 deletions plugins/lyrics/DarkLyricsParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def search_artist(self, artist_page, callback, *data):
if artist_page is None:
callback (None, *data)
return
link_section = re.split ('tban.js',
artist_page, 1)[1]
artist_page = artist_page.decode('iso-8859-1')
link_section = re.split ('tban.js', artist_page, 1)[1]
pattern_link = '<a href="'
pattern_artist = '([^"]*)">*([^<]*)<'
links = re.split (pattern_link, link_section.lower())
Expand Down Expand Up @@ -108,6 +108,7 @@ def search_song (self, songlist, callback, *data):
if songlist is None:
callback (None, *data)
return
songlist = songlist.decode('iso-8859-1')
# Search for all the <a>
# filter for those that has the artist name string_match
# and for those which its content is artist string_match
Expand Down Expand Up @@ -147,6 +148,7 @@ def parse_lyrics (self, album, callback, *data):
if album is None:
callback (None, *data)
return
album = album.decode('iso-8859-1')
titleline = '<a name="%s">%s. %s(.*?)</a>' % \
(self.titlenumber, self.titlenumber, re.escape(self.title.title()))
lyricmatch = re.split (titleline, album)
Expand Down
72 changes: 37 additions & 35 deletions plugins/lyrics/JetlyricsParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,51 +25,53 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

import rb
import urllib
import urllib.parse
import re

class JetlyricsParser (object):
def __init__ (self, artist, title):
self.artist = artist
self.title = title
def __init__ (self, artist, title):
self.artist = artist
self.title = title

def search (self, callback, *data):
artist = re.sub('%20', '+', urllib.quote(self.artist))
title = re.sub('%20', '+', urllib.quote(self.title))
q = title + ' - ' + artist
url = 'http://www.jetlyrics.com/search.php?q=%s' % (q)
loader = rb.Loader()
loader.get_url (url, self.got_results, callback, *data)
def search (self, callback, *data):
artist = urllib.parse.quote_plus(self.artist)
title = urllib.parse.quote_plus(self.title)
q = title + ' - ' + artist
url = 'http://www.jetlyrics.com/search.php?q=%s' % (q)
loader = rb.Loader()
loader.get_url (url, self.got_results, callback, *data)

def got_results (self, result, callback, *data):
if result is None:
callback (None, *data)
return
def got_results (self, result, callback, *data):
if result is None:
callback (None, *data)
return
result = result.decode('utf-8')

m = re.search('<a href=\'(http://jetlyrics\.com/viewlyrics\.php\?id=[0-9]*)\'>', result)
if m is None:
callback (None, *data)
return
m = re.search('<a href=\'(http://jetlyrics\.com/viewlyrics\.php\?id=[0-9]*)\'>', result)
if m is None:
callback (None, *data)
return

loader = rb.Loader()
loader.get_url (m.group(1), self.parse_lyrics, callback, *data)
loader = rb.Loader()
loader.get_url (m.group(1), self.parse_lyrics, callback, *data)

def parse_lyrics (self, result, callback, *data):
if result is None:
callback (None, *data)
return
def parse_lyrics (self, result, callback, *data):
if result is None:
callback (None, *data)
return
result = result.decode('utf-8')

lyrics = re.split ('<div id=lyricsText>', result)[1]
lyrics = re.split ('</div>', lyrics)[0]
lyrics = re.split ('<div id=lyricsText>', result)[1]
lyrics = re.split ('</div>', lyrics)[0]

lyrics = re.sub('<br/>', '\n', lyrics)
lyrics = re.sub('<br />', '\n', lyrics)
lyrics = re.sub('<br>', '\n', lyrics)
lyrics = re.sub('<noscript><a href=\'http://jetlyrics.com\'>Lyrics</a></noscript>', '', lyrics)
lyrics = re.sub('<a href=\'http://jetlyrics.com\'>Jet Lyrics</a>', '', lyrics)
lyrics = re.sub('<br/>', '\n', lyrics)
lyrics = re.sub('<br />', '\n', lyrics)
lyrics = re.sub('<br>', '\n', lyrics)
lyrics = re.sub('<noscript><a href=\'http://jetlyrics.com\'>Lyrics</a></noscript>', '', lyrics)
lyrics = re.sub('<a href=\'http://jetlyrics.com\'>Jet Lyrics</a>', '', lyrics)

lyrics = self.title + "\n\n" + lyrics
lyrics += "\n\nLyrics provided by jetlyrics.com"
lyrics = self.title + "\n\n" + lyrics
lyrics += "\n\nLyrics provided by jetlyrics.com"

callback (lyrics, *data)
callback (lyrics, *data)

63 changes: 32 additions & 31 deletions plugins/lyrics/JlyricParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,45 +25,46 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

import rb
import urllib
import urllib.parse
import re

class JlyricParser (object):
def __init__ (self, artist, title):
self.artist = artist
self.title = title
def __init__ (self, artist, title):
self.artist = artist
self.title = title

def search (self, callback, *data):
artist = re.sub('%20', '+', urllib.quote(self.artist))
title = re.sub('%20', '+', urllib.quote(self.title))
url = 'http://j-lyric.net/index.php?kt=%s&ka=%s' % (title, artist)
loader = rb.Loader()
loader.get_url (url, self.got_results, callback, *data)
def search (self, callback, *data):
artist = urllib.parse.quote_plus(self.artist)
title = urllib.parse.quote_plus(self.title)
url = 'http://j-lyric.net/index.php?kt=%s&ka=%s' % (title, artist)
loader = rb.Loader()
loader.get_url (url, self.got_results, callback, *data)

def got_results (self, result, callback, *data):
if result is None:
callback (None, *data)
return
def got_results (self, result, callback, *data):
if result is None:
callback (None, *data)
return

m = re.search('<div class=\'title\'><a href=\'(/artist/[^\.]*\.html)\'>', result)
if m is None:
callback (None, *data)
return
result = result.decode('utf-8')
m = re.search('<div class=\'title\'><a href=\'(/artist/[^\.]*\.html)\'>', result)
if m is None:
callback (None, *data)
return

loader = rb.Loader()
loader.get_url ('http://j-lyric.net' + m.group(1), self.parse_lyrics, callback, *data)

def parse_lyrics (self, result, callback, *data):
if result is None:
callback (None, *data)
return
loader = rb.Loader()
loader.get_url ('http://j-lyric.net' + m.group(1), self.parse_lyrics, callback, *data)

lyrics = re.split ('<p id=\'lyricBody\'>', result)[1]
lyrics = re.split ('</p>', lyrics)[0]
def parse_lyrics (self, result, callback, *data):
if result is None:
callback (None, *data)
return

lyrics = re.sub('<br />', '', lyrics)
lyrics = self.title + "\n\n" + lyrics
lyrics += "\n\nLyrics provided by j-lyric.net"
result = result.decode('utf-8')
lyrics = re.split ('<p id=\'lyricBody\'>', result)[1]
lyrics = re.split ('</p>', lyrics)[0]

callback (lyrics, *data)
lyrics = re.sub('<br />', '', lyrics)
lyrics = self.title + "\n\n" + lyrics
lyrics += "\n\nLyrics provided by j-lyric.net"

callback (lyrics, *data)
8 changes: 4 additions & 4 deletions plugins/lyrics/LyricWikiParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.


import urllib
import urllib.parse
import rb
from xml.dom import minidom

Expand All @@ -35,8 +35,8 @@ def __init__(self, artist, title):
self.title = title

def search(self, callback, *data):
artist = urllib.quote(self.artist.replace(' ', '_'))
title = urllib.quote(self.title.replace(' ', '_'))
artist = urllib.parse.quote(self.artist.replace(' ', '_'))
title = urllib.parse.quote(self.title.replace(' ', '_'))

htstring = 'http://lyricwiki.org/api.php?artist=%s&song=%s&fmt=text' % (artist, title)

Expand All @@ -48,7 +48,7 @@ def got_lyrics(self, result, callback, *data):
callback (None, *data)
return

result = result.decode('iso-8859-1')
result += "\n\nLyrics provided by lyricwiki.org"

callback (result, *data)

3 changes: 0 additions & 3 deletions plugins/lyrics/LyricsParse.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

import urllib
import re

import rb
from gi.repository import GObject, Gio

Expand Down
21 changes: 10 additions & 11 deletions plugins/lyrics/TerraParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.


import urllib
import urllib.parse
import rb
import re
import sys

# Deal with html entitys and utf-8
# Deal with html entities and utf-8
# code taken from django/utils/text.py

from html.entities import name2codepoint
Expand All @@ -47,12 +47,12 @@ def _replace_entity(match):
c = int(text[1:], 16)
else:
c = int(text)
return unichr(c)
return chr(c)
except ValueError:
return match.group(0)
else:
try:
return unichr(name2codepoint[text])
return chr(name2codepoint[text])
except (ValueError, KeyError):
return match.group(0)

Expand All @@ -65,11 +65,11 @@ def __init__(self, artist, title):
self.title = title

def search(self, callback, *data):
path = 'http://letras.terra.com.br/'
path = 'http://letras.mus.br/'

artist = urllib.quote(self.artist)
title = urllib.quote(self.title)
join = urllib.quote(' - ')
artist = urllib.parse.quote(self.artist)
title = urllib.parse.quote(self.title)
join = urllib.parse.quote(' - ')

wurl = 'winamp.php?t=%s%s%s' % (artist, join, title)
print("search URL: " + wurl)
Expand All @@ -83,7 +83,7 @@ def got_lyrics(self, result, callback, *data):
return

if result is not None:
result = result.decode('iso-8859-1').encode('UTF-8')
result = result.decode('utf-8')
if re.search('M&uacute;sica n&atilde;o encontrada', result):
print("not found")
callback (None, *data)
Expand All @@ -107,6 +107,5 @@ def parse_lyrics(self, source):
lyrics = re.sub('<[Bb][Rr]/>', '', lyrics)

lyrics = unescape_entities(artistitle) + "\n" + unescape_entities(lyrics)
lyrics += "\n\nEsta letra foi disponibilizada pelo site\nhttp://letras.terra.com.br"

lyrics += "\n\nEsta letra foi disponibilizada pelo site\nhttp://letras.mus.br"
return lyrics
24 changes: 6 additions & 18 deletions plugins/lyrics/WinampcnParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,11 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

import sys
import urllib
import urllib.parse
import re
import rb
from xml.dom import minidom

def detect_charset(s):
charsets = ('iso-8859-1', 'gbk', 'utf-8')
for charset in charsets:
try:
return unicode(unicode(s, 'utf-8').encode(charset), 'gbk')
except:
continue
return s

class WinampcnParser(object):
def __init__(self, artist, title):
self.artist = artist
Expand All @@ -47,8 +38,8 @@ def __init__(self, artist, title):
def search(self, callback, *data):

# encode search string
title_encode = urllib.quote(detect_charset(self.title).encode('gbk').replace(' ', ''))
artist_encode = urllib.quote(detect_charset(self.artist).encode('gbk').replace(' ',''))
title_encode = urllib.parse.quote(self.title.replace(' ', '').encode('gbk'))
artist_encode = urllib.parse.quote(self.artist.replace(' ', '').encode('gbk'))
url = 'http://www.winampcn.com/lyrictransfer/get.aspx?song=%s&artist=%s&lsong=%s&Datetime=20060601' % (title_encode, artist_encode, title_encode)

loader = rb.Loader()
Expand All @@ -60,9 +51,9 @@ def got_lyrics(self, xmltext, callback, *data):
print("no response")
callback (None, *data)
return
xmltext = xmltext.decode('gbk')

try:
xmltext = xmltext.decode('gbk').encode('UTF-8')
xmltext = xmltext.replace('encoding="gb2312"', 'encoding="UTF-8"')
xmldoc = minidom.parseString(xmltext)
root = xmldoc.documentElement
Expand Down Expand Up @@ -91,16 +82,13 @@ def parse_lyrics(self, lyrics, callback, *data):
return

# transform it into plain text
lrcplaintext = lyrics
lrcplaintext = lyrics.decode('gbk')
try:
lrcplaintext = re.sub('\[.*?\]', '', lrcplaintext)
lrcplaintext = lrcplaintext.decode('gbk').encode('UTF-8')
except:
print("unable to decode lyrics")
callback (lrcplaintext, *data)
callback (None, *data)
return

# callback and show
lrcplaintext += "\n\nLyrics provided by winampcn.com"
callback(lrcplaintext, *data)

Loading

0 comments on commit 71c8d7b

Please sign in to comment.