lyrics: add support for lyricstime.com, metrolyrics.com and lyricsmania.com

This commit is contained in:
Andrzej Rybczak
2010-08-10 19:53:44 +02:00
parent 3a724626de
commit b387d22663
9 changed files with 190 additions and 39 deletions

View File

@@ -93,6 +93,14 @@ namespace
}
}
void iconv_convert_from_to(const char *from, const char *to, std::string &s)
{
const char *tmp = strdup(s.c_str());
charset_convert(from, to, tmp, true, s.length());
s = tmp;
free(const_cast<char *>(tmp));
}
void utf_to_locale(std::string &s)
{
if (s.empty() || Config.system_encoding.empty() || !has_non_ascii_chars(s.c_str()))

View File

@@ -29,6 +29,8 @@
#include <string>
void iconv_convert_from_to(const char *from, const char *to, std::string &s);
void utf_to_locale(std::string &);
void locale_to_utf(std::string &);
@@ -40,6 +42,8 @@ void locale_to_utf(const char *&, bool);
#else
#define iconv_convert_from_to(x, y, z);
#define utf_to_locale(x);
#define locale_to_utf(x);

View File

@@ -206,6 +206,35 @@ void EscapeUnallowedChars(std::string &s)
}
}
std::string unescapeHtmlUtf8(const std::string &data)
{
std::string result;
for (size_t i = 0, j; i < data.length(); ++i)
{
if (data[i] == '&' && data[i+1] == '#' && (j = data.find(';', i)) != std::string::npos)
{
int n = atoi(&data.c_str()[i+2]);
if (n >= 0x800)
{
result += (0xe0 | ((n >> 12) & 0x0f));
result += (0x80 | ((n >> 6) & 0x3f));
result += (0x80 | (n & 0x3f));
}
else if (n >= 0x80)
{
result += (0xc0 | ((n >> 6) & 0x1f));
result += (0x80 | (n & 0x3f));
}
else
result += n;
i = j;
}
else
result += data[i];
}
return result;
}
void StripHtmlTags(std::string &s)
{
bool erase = 0;

View File

@@ -60,6 +60,8 @@ std::string Shorten(const std::basic_string<my_char_t> &s, size_t max_length);
void EscapeUnallowedChars(std::string &);
std::string unescapeHtmlUtf8(const std::string &data);
void StripHtmlTags(std::string &s);
void Trim(std::string &s);

View File

@@ -34,7 +34,7 @@ namespace
}
}
CURLcode Curl::perform(const std::string &URL, std::string &data, unsigned timeout)
CURLcode Curl::perform(std::string &data, const std::string &URL, const std::string &referer, unsigned timeout)
{
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&lock);
@@ -45,6 +45,9 @@ CURLcode Curl::perform(const std::string &URL, std::string &data, unsigned timeo
curl_easy_setopt(c, CURLOPT_WRITEDATA, &data);
curl_easy_setopt(c, CURLOPT_CONNECTTIMEOUT, timeout);
curl_easy_setopt(c, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(c, CURLOPT_USERAGENT, "ncmpcpp " VERSION);
if (!referer.empty())
curl_easy_setopt(c, CURLOPT_REFERER, referer.c_str());
result = curl_easy_perform(c);
curl_easy_cleanup(c);
pthread_mutex_unlock(&lock);

View File

@@ -32,7 +32,7 @@
namespace Curl
{
CURLcode perform(const std::string &URL, std::string &data, unsigned timeout = 10);
CURLcode perform(std::string &data, const std::string &URL, const std::string &referer = "", unsigned timeout = 10);
std::string escape(const std::string &s);
}

View File

@@ -229,7 +229,7 @@ void *Info::PrepareArtist(void *screen_void_ptr)
url += "&api_key=d94e5b6e26469a2d1ffae8ef20131b79";
std::string result;
CURLcode code = Curl::perform(url, result);
CURLcode code = Curl::perform(result, url);
if (code != CURLE_OK)
{

View File

@@ -24,17 +24,23 @@
#include <cstdlib>
#include "charset.h"
#include "conv.h"
#include "lyrics_fetcher.h"
LyricsFetcher *lyricsPlugins[] =
{
new LyricwikiFetcher(),
new LyricsmaniaFetcher(),
new LyricstimeFetcher(),
new MetrolyricsFetcher(),
new LyrcComArFetcher(),
new LyricsflyFetcher(),
0
};
const char LyricsFetcher::msgNotFound[] = "Not found";
LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std::string &title)
{
Result result;
@@ -45,7 +51,7 @@ LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std:
Replace(url, "%title%", title.c_str());
std::string data;
CURLcode code = Curl::perform(url, data);
CURLcode code = Curl::perform(data, url);
if (code != CURLE_OK)
{
@@ -57,7 +63,7 @@ LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std:
if (!parse_ok || notLyrics(data))
{
result.second = "Not found";
result.second = msgNotFound;
return result;
}
@@ -100,7 +106,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s
result.first = false;
std::string data;
CURLcode code = Curl::perform(result.second, data);
CURLcode code = Curl::perform(data, result.second);
if (code != CURLE_OK)
{
@@ -112,7 +118,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s
if (!parse_ok)
{
result.second = "Not found";
result.second = msgNotFound;
return result;
}
@@ -131,35 +137,6 @@ bool LyricwikiFetcher::notLyrics(const std::string &data)
return data.find("action=edit") != std::string::npos;
}
std::string LyricwikiFetcher::unescapeHtmlUtf8(const std::string &data)
{
std::string result;
for (size_t i = 0, j; i < data.length(); ++i)
{
if (data[i] == '&' && data[i+1] == '#' && (j = data.find(';', i)) != std::string::npos)
{
int n = atoi(&data.c_str()[i+2]);
if (n >= 0x800)
{
result += (0xe0 | ((n >> 12) & 0x0f));
result += (0x80 | ((n >> 6) & 0x3f));
result += (0x80 | (n & 0x3f));
}
else if (n >= 0x80)
{
result += (0xc0 | ((n >> 6) & 0x1f));
result += (0x80 | (n & 0x3f));
}
else
result += n;
i = j;
}
else
result += data[i];
}
return result;
}
/***********************************************************************/
void LyricsflyFetcher::postProcess(std::string &data)
@@ -168,5 +145,81 @@ void LyricsflyFetcher::postProcess(std::string &data)
Trim(data);
}
/**********************************************************************/
LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, const std::string &title)
{
Result result;
result.first = false;
std::string search_str = "%22";
search_str += artist;
search_str += "%22+";
search_str += title;
search_str += "+";
search_str += getSiteKeyword();
std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
google_url += search_str;
google_url += "&btnI=I%27m+Feeling+Lucky";
std::string data;
CURLcode code = Curl::perform(data, google_url, google_url);
if (code != CURLE_OK)
{
result.second = curl_easy_strerror(code);
return result;
}
bool found_url = getContent("<A HREF=\"", "\">here</A>", data);
if (!found_url || !isURLOk(data))
{
result.second = msgNotFound;
return result;
}
URL = data.c_str();
return LyricsFetcher::fetch("", "");
}
bool GoogleLyricsFetcher::isURLOk(const std::string &url)
{
return url.find(getSiteKeyword()) != std::string::npos;
}
/**********************************************************************/
bool LyricstimeFetcher::isURLOk(const std::string &url)
{
// it sometimes returns list of all artists that begin
// with a given letter, e.g. www.lyricstime.com/A.html, which
// is 25 chars long, so we want longer.
return GoogleLyricsFetcher::isURLOk(url) && url.length() > 25;
}
void LyricstimeFetcher::postProcess(std::string &data)
{
// lyricstime.com uses iso-8859-1 as the encoding
// so we need to convert obtained lyrics to utf-8
iconv_convert_from_to("iso-8859-1", "utf-8", data);
LyricsFetcher::postProcess(data);
}
/**********************************************************************/
void MetrolyricsFetcher::postProcess(std::string &data)
{
data = unescapeHtmlUtf8(data);
LyricsFetcher::postProcess(data);
}
bool MetrolyricsFetcher::isURLOk(const std::string &url)
{
// it sometimes return link to sitemap.xml, which is huge so we need to discard it
return GoogleLyricsFetcher::isURLOk(url) && url.find("sitemap.xml") == std::string::npos;
}
#endif // HAVE_CURL_CURL_H

View File

@@ -45,6 +45,8 @@ struct LyricsFetcher
virtual void postProcess(std::string &data);
bool getContent(const char *open_tag, const char *close_tag, std::string &data);
static const char msgNotFound[];
};
struct LyrcComArFetcher : public LyricsFetcher
@@ -68,9 +70,6 @@ struct LyricwikiFetcher : public LyricsFetcher
virtual const char *getCloseTag() { return "</url>"; }
virtual bool notLyrics(const std::string &data);
private:
std::string unescapeHtmlUtf8(const std::string &data);
};
struct LyricsflyFetcher : public LyricsFetcher
@@ -85,6 +84,59 @@ struct LyricsflyFetcher : public LyricsFetcher
virtual void postProcess(std::string &data);
};
/**********************************************************************/
struct GoogleLyricsFetcher : public LyricsFetcher
{
virtual Result fetch(const std::string &artist, const std::string &title);
protected:
virtual const char *getSiteKeyword() = 0;
virtual const char *getURL() { return URL; }
virtual bool isURLOk(const std::string &url);
private:
const char *URL;
};
struct LyricstimeFetcher : public GoogleLyricsFetcher
{
virtual const char *name() { return "lyricstime.com"; }
protected:
virtual const char *getSiteKeyword() { return "lyricstime"; }
virtual const char *getOpenTag() { return "<div id=\"songlyrics\" >"; }
virtual const char *getCloseTag() { return "</div>"; }
virtual bool isURLOk(const std::string &url);
virtual void postProcess(std::string &data);
};
struct MetrolyricsFetcher : public GoogleLyricsFetcher
{
virtual const char *name() { return "metrolyrics.com"; }
protected:
virtual const char *getSiteKeyword() { return "metrolyrics"; }
virtual const char *getOpenTag() { return "<div id=\"lyrics\">"; }
virtual const char *getCloseTag() { return "</div>"; }
virtual bool isURLOk(const std::string &url);
virtual void postProcess(std::string &data);
};
struct LyricsmaniaFetcher : public GoogleLyricsFetcher
{
virtual const char *name() { return "lyricsmania.com"; }
protected:
virtual const char *getSiteKeyword() { return "lyricsmania"; }
virtual const char *getOpenTag() { return "</strong> :<br />"; }
virtual const char *getCloseTag() { return "&#91; <a"; }
};
extern LyricsFetcher *lyricsPlugins[];
#endif // HAVE_CURL_CURL_H