Add support for ignoring diacritics while searching and filtering lists
This commit is contained in:
@@ -25,6 +25,8 @@
|
||||
|
||||
#ifdef BOOST_REGEX_ICU
|
||||
# include <boost/regex/icu.hpp>
|
||||
# include <unicode/errorcode.h>
|
||||
# include <unicode/translit.h>
|
||||
#else
|
||||
# include <boost/regex.hpp>
|
||||
#endif // BOOST_REGEX_ICU
|
||||
@@ -32,6 +34,39 @@
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
#include "utility/functional.h"
|
||||
|
||||
namespace {
|
||||
|
||||
#ifdef BOOST_REGEX_ICU
|
||||
|
||||
struct StripDiacritics
|
||||
{
|
||||
static void convert(UnicodeString &s)
|
||||
{
|
||||
if (m_converter == nullptr)
|
||||
{
|
||||
ErrorCode result;
|
||||
m_converter = Transliterator::createInstance(
|
||||
"NFD; [:M:] Remove; NFC", UTRANS_FORWARD, result);
|
||||
if (result.isFailure())
|
||||
throw std::runtime_error(
|
||||
"instantiation of transliterator instance failed with "
|
||||
+ std::string(result.errorName()));
|
||||
}
|
||||
m_converter->transliterate(s);
|
||||
}
|
||||
|
||||
private:
|
||||
static Transliterator *m_converter;
|
||||
};
|
||||
|
||||
Transliterator *StripDiacritics::m_converter;
|
||||
|
||||
#endif // BOOST_REGEX_ICU
|
||||
|
||||
}
|
||||
|
||||
namespace Regex {
|
||||
|
||||
typedef
|
||||
@@ -43,31 +78,44 @@ typedef
|
||||
Regex;
|
||||
|
||||
template <typename StringT>
|
||||
inline Regex make(StringT &&s, boost::regex_constants::syntax_option_type flags)
|
||||
inline Regex make(StringT &&s,
|
||||
boost::regex_constants::syntax_option_type flags)
|
||||
{
|
||||
return
|
||||
# ifdef BOOST_REGEX_ICU
|
||||
#ifdef BOOST_REGEX_ICU
|
||||
boost::make_u32regex
|
||||
# else
|
||||
#else
|
||||
boost::regex
|
||||
# endif // BOOST_REGEX_ICU
|
||||
#endif // BOOST_REGEX_ICU
|
||||
(std::forward<StringT>(s), flags);
|
||||
}
|
||||
|
||||
template <typename StringT>
|
||||
inline bool search(StringT &&s, const Regex &rx)
|
||||
template <typename CharT>
|
||||
inline bool search(const std::basic_string<CharT> &s,
|
||||
const Regex &rx,
|
||||
bool ignore_diacritics)
|
||||
{
|
||||
try {
|
||||
return
|
||||
# ifdef BOOST_REGEX_ICU
|
||||
boost::u32regex_search
|
||||
# else
|
||||
boost::regex_search
|
||||
# endif // BOOST_REGEX_ICU
|
||||
(std::forward<StringT>(s), rx);
|
||||
#ifdef BOOST_REGEX_ICU
|
||||
if (ignore_diacritics)
|
||||
{
|
||||
auto us = UnicodeString::fromUTF8(
|
||||
StringPiece(convertString<char, CharT>::apply(s)));
|
||||
StripDiacritics::convert(us);
|
||||
return boost::u32regex_search(us, rx);
|
||||
}
|
||||
else
|
||||
return boost::u32regex_search(s, rx);
|
||||
#else
|
||||
return boost::regex_search(s, rx);
|
||||
#endif // BOOST_REGEX_ICU
|
||||
} catch (std::out_of_range &e) {
|
||||
// Invalid UTF-8 sequence, ignore the string.
|
||||
std::cerr << "Regex::search: error while processing \"" << s << "\": " << e.what() << "\n";
|
||||
std::cerr << "Regex::search: error while processing \""
|
||||
<< s
|
||||
<< "\": "
|
||||
<< e.what()
|
||||
<< "\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user