lyrics fetcher: fix fetchers and improve formatting of lyrics

This commit is contained in:
Andrzej Rybczak
2016-10-30 23:25:51 +01:00
parent a53c574edd
commit 888b6bc1c9
4 changed files with 26 additions and 4 deletions

1
NEWS
View File

@@ -5,6 +5,7 @@ ncmpcpp-0.7.6 (????-??-??)
* Opening playlist editor when there is no MPD playlists directory no longer freezes the application.
* Added info about behavior of MPD_HOST and MPD_PORT environment variables to man page.
* Tilde will now be expanded to home directory in visualizer_fifo_path, execute_on_song_change and external_editor configuration variables.
* Fixed lyricwiki and justsomelyrics fetchers.
ncmpcpp-0.7.5 (2016-08-17)
* Action chains can be now used for seeking.

View File

@@ -25,7 +25,9 @@
#include <cstdlib>
#include <cstring>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/regex.hpp>
@@ -105,6 +107,15 @@ std::vector<std::string> LyricsFetcher::getContent(const char *regex_, const std
void LyricsFetcher::postProcess(std::string &data) const
{
stripHtmlTags(data);
// Remove indentation from each line and collapse multiple newlines into one.
std::vector<std::string> lines;
boost::split(lines, data, boost::is_any_of("\r\n"));
for (auto &line : lines)
boost::trim(line);
std::unique(lines.begin(), lines.end(), [](std::string &a, std::string &b) {
return a.empty() && b.empty();
});
data = boost::algorithm::join(lines, "\n");
boost::trim(data);
}
@@ -126,7 +137,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s
return result;
}
auto lyrics = getContent("<div class='lyricbox'>(.*?)<!--", data);
auto lyrics = getContent("<div class='lyricbox'>(.*?)</div>", data);
if (lyrics.empty())
{
@@ -224,6 +235,14 @@ void Sing365Fetcher::postProcess(std::string &data) const
/**********************************************************************/
void JustSomeLyricsFetcher::postProcess(std::string &data) const
{
data = unescapeHtmlUtf8(data);
LyricsFetcher::postProcess(data);
}
/**********************************************************************/
void MetrolyricsFetcher::postProcess(std::string &data) const
{
// some of lyrics have both \n chars and <br />, html tags

View File

@@ -108,7 +108,9 @@ struct JustSomeLyricsFetcher : public GoogleLyricsFetcher
virtual const char *name() const OVERRIDE { return "justsomelyrics.com"; }
protected:
virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>(.*?)</div>"; }
virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>\\s*</div>(.*?)<div"; }
virtual void postProcess(std::string &data) const OVERRIDE;
};
struct AzLyricsFetcher : public GoogleLyricsFetcher

View File

@@ -66,7 +66,7 @@ void stripHtmlTags(std::string &s)
for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<"))
{
size_t j = s.find(">", i)+1;
if (s.compare(i, j-i, "<p>") == 0 || s.compare(i, j-i, "</p>") == 0)
if (s.compare(i, std::min(3ul, j-i), "<p ") == 0 || s.compare(i, j-i, "</p>") == 0)
s.replace(i, j-i, "\n");
else
s.replace(i, j-i, "");
@@ -87,4 +87,4 @@ void stripHtmlTags(std::string &s)
else if (s[i] == '\t')
s[i] = ' ';
}
}
}