lastfm service: fix artist info fetching

This commit is contained in:
Andrzej Rybczak
2015-09-12 23:51:10 +02:00
parent bf3df86142
commit 95c6366596
4 changed files with 22 additions and 10 deletions

1
NEWS
View File

@@ -1,4 +1,5 @@
ncmpcpp-0.6.7 (????-??-??) ncmpcpp-0.6.7 (????-??-??)
* Fetching artist info from last.fm was fixed.
ncmpcpp-0.6.6 (2015-09-07) ncmpcpp-0.6.6 (2015-09-07)
* A typo in the example configuration file was fixed. * A typo in the example configuration file was fixed.

View File

@@ -116,9 +116,11 @@ Service::Result ArtistInfo::processData(const std::string &data)
rx.assign("<link rel=\"original\" href=\"(.*?)\""); rx.assign("<link rel=\"original\" href=\"(.*?)\"");
if (boost::regex_search(data, what, rx)) if (boost::regex_search(data, what, rx))
{ {
std::string url = what[1], wiki;
// unescape &amp;s
unescapeHtmlEntities(url);
// ...try to get the content of it... // ...try to get the content of it...
std::string wiki; CURLcode code = Curl::perform(wiki, url, "", true);
CURLcode code = Curl::perform(wiki, what[1]);
if (code != CURLE_OK) if (code != CURLE_OK)
{ {
@@ -128,7 +130,7 @@ Service::Result ArtistInfo::processData(const std::string &data)
else else
{ {
// ...and filter it to get the whole description. // ...and filter it to get the whole description.
rx.assign("<div id=\"wiki\">(.*?)</div>"); rx.assign("<div class=\"wiki\">(.*?)</div>");
if (boost::regex_search(wiki, what, rx)) if (boost::regex_search(wiki, what, rx))
desc = unescapeHtmlUtf8(what[1]); desc = unescapeHtmlUtf8(what[1]);
} }

View File

@@ -20,7 +20,6 @@
#include <boost/algorithm/string/replace.hpp> #include <boost/algorithm/string/replace.hpp>
#include "utility/html.h" #include "utility/html.h"
//#include "utility/string.h"
std::string unescapeHtmlUtf8(const std::string &data) std::string unescapeHtmlUtf8(const std::string &data)
{ {
@@ -51,18 +50,28 @@ std::string unescapeHtmlUtf8(const std::string &data)
return result; return result;
} }
void unescapeHtmlEntities(std::string &s)
{
// well, at least some of them.
boost::replace_all(s, "&amp;", "&");
boost::replace_all(s, "&gt;", ">");
boost::replace_all(s, "&lt;", "<");
boost::replace_all(s, "&nbsp;", " ");
boost::replace_all(s, "&quot;", "\"");
}
void stripHtmlTags(std::string &s) void stripHtmlTags(std::string &s)
{ {
bool erase = 0; bool erase = 0;
for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<")) for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<"))
{ {
size_t j = s.find(">", i)+1; size_t j = s.find(">", i)+1;
s.replace(i, j-i, ""); if (s.compare(i, j-i, "<p>") == 0 || s.compare(i, j-i, "</p>") == 0)
s.replace(i, j-i, "\n");
else
s.replace(i, j-i, "");
} }
boost::replace_all(s, "&#039;", "'"); unescapeHtmlEntities(s);
boost::replace_all(s, "&amp;", "&");
boost::replace_all(s, "&quot;", "\"");
boost::replace_all(s, "&nbsp;", " ");
for (size_t i = 0; i < s.length(); ++i) for (size_t i = 0; i < s.length(); ++i)
{ {
if (erase) if (erase)

View File

@@ -24,7 +24,7 @@
#include <string> #include <string>
std::string unescapeHtmlUtf8(const std::string &s); std::string unescapeHtmlUtf8(const std::string &s);
void unescapeHtmlEntities(std::string &s);
void stripHtmlTags(std::string &s); void stripHtmlTags(std::string &s);
#endif // NCMPCPP_UTILITY_HTML_H #endif // NCMPCPP_UTILITY_HTML_H