lyricwiki fetcher: deal with utf8 characters properly
This commit is contained in:
@@ -120,7 +120,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s
|
|||||||
|
|
||||||
Replace(data, "<br />", "\n");
|
Replace(data, "<br />", "\n");
|
||||||
|
|
||||||
result.second = unescape(data);
|
result.second = unescapeHtmlUtf8(data);
|
||||||
result.first = true;
|
result.first = true;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@@ -131,7 +131,7 @@ bool LyricwikiFetcher::notLyrics(const std::string &data)
|
|||||||
return data.find("action=edit") != std::string::npos;
|
return data.find("action=edit") != std::string::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string LyricwikiFetcher::unescape(const std::string &data)
|
std::string LyricwikiFetcher::unescapeHtmlUtf8(const std::string &data)
|
||||||
{
|
{
|
||||||
std::string result;
|
std::string result;
|
||||||
for (size_t i = 0, j; i < data.length(); ++i)
|
for (size_t i = 0, j; i < data.length(); ++i)
|
||||||
@@ -139,7 +139,19 @@ std::string LyricwikiFetcher::unescape(const std::string &data)
|
|||||||
if (data[i] == '&' && data[i+1] == '#' && (j = data.find(';', i)) != std::string::npos)
|
if (data[i] == '&' && data[i+1] == '#' && (j = data.find(';', i)) != std::string::npos)
|
||||||
{
|
{
|
||||||
int n = atoi(&data.c_str()[i+2]);
|
int n = atoi(&data.c_str()[i+2]);
|
||||||
result += char(n);
|
if (n >= 0x800)
|
||||||
|
{
|
||||||
|
result += (0xe0 | ((n >> 12) & 0x0f));
|
||||||
|
result += (0x80 | ((n >> 6) & 0x3f));
|
||||||
|
result += (0x80 | (n & 0x3f));
|
||||||
|
}
|
||||||
|
if (n >= 0x80)
|
||||||
|
{
|
||||||
|
result += (0xc0 | ((n >> 6) & 0x1f));
|
||||||
|
result += (0x80 | (n & 0x3f));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
result += n;
|
||||||
i = j;
|
i = j;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ struct LyricwikiFetcher : public LyricsFetcher
|
|||||||
virtual bool notLyrics(const std::string &data);
|
virtual bool notLyrics(const std::string &data);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string unescape(const std::string &data);
|
std::string unescapeHtmlUtf8(const std::string &data);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LyricsflyFetcher : public LyricsFetcher
|
struct LyricsflyFetcher : public LyricsFetcher
|
||||||
|
|||||||
Reference in New Issue
Block a user