diff --git a/src/charset.cpp b/src/charset.cpp index 84b97971..f1b5bc7d 100644 --- a/src/charset.cpp +++ b/src/charset.cpp @@ -93,6 +93,14 @@ namespace } } +void iconv_convert_from_to(const char *from, const char *to, std::string &s) +{ + const char *tmp = strdup(s.c_str()); + charset_convert(from, to, tmp, true, s.length()); + s = tmp; + free(const_cast(tmp)); +} + void utf_to_locale(std::string &s) { if (s.empty() || Config.system_encoding.empty() || !has_non_ascii_chars(s.c_str())) diff --git a/src/charset.h b/src/charset.h index 37ba1c6e..01612e39 100644 --- a/src/charset.h +++ b/src/charset.h @@ -29,6 +29,8 @@ #include +void iconv_convert_from_to(const char *from, const char *to, std::string &s); + void utf_to_locale(std::string &); void locale_to_utf(std::string &); @@ -40,6 +42,8 @@ void locale_to_utf(const char *&, bool); #else +#define iconv_convert_from_to(x, y, z); + #define utf_to_locale(x); #define locale_to_utf(x); diff --git a/src/conv.cpp b/src/conv.cpp index c5b94d37..b7da7d1f 100644 --- a/src/conv.cpp +++ b/src/conv.cpp @@ -206,6 +206,35 @@ void EscapeUnallowedChars(std::string &s) } } +std::string unescapeHtmlUtf8(const std::string &data) +{ + std::string result; + for (size_t i = 0, j; i < data.length(); ++i) + { + if (data[i] == '&' && data[i+1] == '#' && (j = data.find(';', i)) != std::string::npos) + { + int n = atoi(&data.c_str()[i+2]); + if (n >= 0x800) + { + result += (0xe0 | ((n >> 12) & 0x0f)); + result += (0x80 | ((n >> 6) & 0x3f)); + result += (0x80 | (n & 0x3f)); + } + else if (n >= 0x80) + { + result += (0xc0 | ((n >> 6) & 0x1f)); + result += (0x80 | (n & 0x3f)); + } + else + result += n; + i = j; + } + else + result += data[i]; + } + return result; +} + void StripHtmlTags(std::string &s) { bool erase = 0; diff --git a/src/conv.h b/src/conv.h index b4608895..fba3f977 100644 --- a/src/conv.h +++ b/src/conv.h @@ -60,6 +60,8 @@ std::string Shorten(const std::basic_string &s, size_t max_length); void EscapeUnallowedChars(std::string &); +std::string unescapeHtmlUtf8(const std::string &data); + void StripHtmlTags(std::string &s); void Trim(std::string &s); diff --git a/src/curl_handle.cpp b/src/curl_handle.cpp index 46f4225f..26e758f4 100644 --- a/src/curl_handle.cpp +++ b/src/curl_handle.cpp @@ -34,7 +34,7 @@ namespace } } -CURLcode Curl::perform(const std::string &URL, std::string &data, unsigned timeout) +CURLcode Curl::perform(std::string &data, const std::string &URL, const std::string &referer, unsigned timeout) { static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock(&lock); @@ -45,6 +45,9 @@ CURLcode Curl::perform(const std::string &URL, std::string &data, unsigned timeo curl_easy_setopt(c, CURLOPT_WRITEDATA, &data); curl_easy_setopt(c, CURLOPT_CONNECTTIMEOUT, timeout); curl_easy_setopt(c, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(c, CURLOPT_USERAGENT, "ncmpcpp " VERSION); + if (!referer.empty()) + curl_easy_setopt(c, CURLOPT_REFERER, referer.c_str()); result = curl_easy_perform(c); curl_easy_cleanup(c); pthread_mutex_unlock(&lock); diff --git a/src/curl_handle.h b/src/curl_handle.h index 8a12eb8e..a9148b09 100644 --- a/src/curl_handle.h +++ b/src/curl_handle.h @@ -32,7 +32,7 @@ namespace Curl { - CURLcode perform(const std::string &URL, std::string &data, unsigned timeout = 10); + CURLcode perform(std::string &data, const std::string &URL, const std::string &referer = "", unsigned timeout = 10); std::string escape(const std::string &s); } diff --git a/src/info.cpp b/src/info.cpp index fa58cacd..3f7086b6 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -229,7 +229,7 @@ void *Info::PrepareArtist(void *screen_void_ptr) url += "&api_key=d94e5b6e26469a2d1ffae8ef20131b79"; std::string result; - CURLcode code = Curl::perform(url, result); + CURLcode code = Curl::perform(result, url); if (code != CURLE_OK) { diff --git a/src/lyrics_fetcher.cpp b/src/lyrics_fetcher.cpp index d02f361e..7dfe12ec 100644 --- a/src/lyrics_fetcher.cpp +++ b/src/lyrics_fetcher.cpp @@ -24,17 +24,23 @@ #include +#include "charset.h" #include "conv.h" #include "lyrics_fetcher.h" LyricsFetcher *lyricsPlugins[] = { new LyricwikiFetcher(), + new LyricsmaniaFetcher(), + new LyricstimeFetcher(), + new MetrolyricsFetcher(), new LyrcComArFetcher(), new LyricsflyFetcher(), 0 }; +const char LyricsFetcher::msgNotFound[] = "Not found"; + LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std::string &title) { Result result; @@ -45,7 +51,7 @@ LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std: Replace(url, "%title%", title.c_str()); std::string data; - CURLcode code = Curl::perform(url, data); + CURLcode code = Curl::perform(data, url); if (code != CURLE_OK) { @@ -57,7 +63,7 @@ LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std: if (!parse_ok || notLyrics(data)) { - result.second = "Not found"; + result.second = msgNotFound; return result; } @@ -100,7 +106,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s result.first = false; std::string data; - CURLcode code = Curl::perform(result.second, data); + CURLcode code = Curl::perform(data, result.second); if (code != CURLE_OK) { @@ -112,7 +118,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s if (!parse_ok) { - result.second = "Not found"; + result.second = msgNotFound; return result; } @@ -131,35 +137,6 @@ bool LyricwikiFetcher::notLyrics(const std::string &data) return data.find("action=edit") != std::string::npos; } -std::string LyricwikiFetcher::unescapeHtmlUtf8(const std::string &data) -{ - std::string result; - for (size_t i = 0, j; i < data.length(); ++i) - { - if (data[i] == '&' && data[i+1] == '#' && (j = data.find(';', i)) != std::string::npos) - { - int n = atoi(&data.c_str()[i+2]); - if (n >= 0x800) - { - result += (0xe0 | ((n >> 12) & 0x0f)); - result += (0x80 | ((n >> 6) & 0x3f)); - result += (0x80 | (n & 0x3f)); - } - else if (n >= 0x80) - { - result += (0xc0 | ((n >> 6) & 0x1f)); - result += (0x80 | (n & 0x3f)); - } - else - result += n; - i = j; - } - else - result += data[i]; - } - return result; -} - /***********************************************************************/ void LyricsflyFetcher::postProcess(std::string &data) @@ -168,5 +145,81 @@ void LyricsflyFetcher::postProcess(std::string &data) Trim(data); } +/**********************************************************************/ + +LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, const std::string &title) +{ + Result result; + result.first = false; + + std::string search_str = "%22"; + search_str += artist; + search_str += "%22+"; + search_str += title; + search_str += "+"; + search_str += getSiteKeyword(); + + std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q="; + google_url += search_str; + google_url += "&btnI=I%27m+Feeling+Lucky"; + + std::string data; + CURLcode code = Curl::perform(data, google_url, google_url); + + if (code != CURLE_OK) + { + result.second = curl_easy_strerror(code); + return result; + } + + bool found_url = getContent("here", data); + + if (!found_url || !isURLOk(data)) + { + result.second = msgNotFound; + return result; + } + + URL = data.c_str(); + return LyricsFetcher::fetch("", ""); +} + +bool GoogleLyricsFetcher::isURLOk(const std::string &url) +{ + return url.find(getSiteKeyword()) != std::string::npos; +} + +/**********************************************************************/ + +bool LyricstimeFetcher::isURLOk(const std::string &url) +{ + // it sometimes returns list of all artists that begin + // with a given letter, e.g. www.lyricstime.com/A.html, which + // is 25 chars long, so we want longer. + return GoogleLyricsFetcher::isURLOk(url) && url.length() > 25; +} + +void LyricstimeFetcher::postProcess(std::string &data) +{ + // lyricstime.com uses iso-8859-1 as the encoding + // so we need to convert obtained lyrics to utf-8 + iconv_convert_from_to("iso-8859-1", "utf-8", data); + LyricsFetcher::postProcess(data); +} + +/**********************************************************************/ + +void MetrolyricsFetcher::postProcess(std::string &data) +{ + data = unescapeHtmlUtf8(data); + LyricsFetcher::postProcess(data); +} + +bool MetrolyricsFetcher::isURLOk(const std::string &url) +{ + // it sometimes return link to sitemap.xml, which is huge so we need to discard it + return GoogleLyricsFetcher::isURLOk(url) && url.find("sitemap.xml") == std::string::npos; +} + #endif // HAVE_CURL_CURL_H diff --git a/src/lyrics_fetcher.h b/src/lyrics_fetcher.h index a19da58e..624bf54f 100644 --- a/src/lyrics_fetcher.h +++ b/src/lyrics_fetcher.h @@ -45,6 +45,8 @@ struct LyricsFetcher virtual void postProcess(std::string &data); bool getContent(const char *open_tag, const char *close_tag, std::string &data); + + static const char msgNotFound[]; }; struct LyrcComArFetcher : public LyricsFetcher @@ -68,9 +70,6 @@ struct LyricwikiFetcher : public LyricsFetcher virtual const char *getCloseTag() { return ""; } virtual bool notLyrics(const std::string &data); - - private: - std::string unescapeHtmlUtf8(const std::string &data); }; struct LyricsflyFetcher : public LyricsFetcher @@ -85,6 +84,59 @@ struct LyricsflyFetcher : public LyricsFetcher virtual void postProcess(std::string &data); }; +/**********************************************************************/ + +struct GoogleLyricsFetcher : public LyricsFetcher +{ + virtual Result fetch(const std::string &artist, const std::string &title); + + protected: + virtual const char *getSiteKeyword() = 0; + virtual const char *getURL() { return URL; } + + virtual bool isURLOk(const std::string &url); + private: + const char *URL; +}; + +struct LyricstimeFetcher : public GoogleLyricsFetcher +{ + virtual const char *name() { return "lyricstime.com"; } + + protected: + virtual const char *getSiteKeyword() { return "lyricstime"; } + virtual const char *getOpenTag() { return "
"; } + virtual const char *getCloseTag() { return "
"; } + + virtual bool isURLOk(const std::string &url); + + virtual void postProcess(std::string &data); +}; + +struct MetrolyricsFetcher : public GoogleLyricsFetcher +{ + virtual const char *name() { return "metrolyrics.com"; } + + protected: + virtual const char *getSiteKeyword() { return "metrolyrics"; } + virtual const char *getOpenTag() { return "
"; } + virtual const char *getCloseTag() { return "
"; } + + virtual bool isURLOk(const std::string &url); + + virtual void postProcess(std::string &data); +}; + +struct LyricsmaniaFetcher : public GoogleLyricsFetcher +{ + virtual const char *name() { return "lyricsmania.com"; } + + protected: + virtual const char *getSiteKeyword() { return "lyricsmania"; } + virtual const char *getOpenTag() { return " :
"; } + virtual const char *getCloseTag() { return "[