Force UTF-8 locale when reading configs and data

Apparently, Python on Windows defaults to assuming CP1252 encoding
unless otherwise specified, as opposed to the UTF-8 assumption made
on POSIX platforms. Since our configuration and data files are
expected to always use UTF-8 encoding, be clear in the
ConfigParser.read() calls about that. We only do this under Python
3.x, as that method doesn't have an encoding parameter in 2.7.

Thanks to Lance Bermudez for reporting this.
This commit is contained in:
Jeremy Stanley
2021-12-17 16:29:38 +00:00
parent 257f9f9a0b
commit 455afdc07d

View File

@@ -608,7 +608,11 @@ def get_config():
"weatherrc"
]
for rcfile in rcfiles:
if os.access(rcfile, os.R_OK): config.read(rcfile)
if os.access(rcfile, os.R_OK):
if pyversion("3"):
config.read(rcfile, encoding="utf-8")
else:
config.read(rcfile)
for section in config.sections():
if section != section.lower():
if config.has_section(section.lower()):
@@ -644,6 +648,9 @@ def integrate_search_cache(config, cachedir, setpath):
pass
return config
scache = configparser.ConfigParser()
if pyversion("3"):
scache.read(scache_fn, encoding="utf-8")
else:
scache.read(scache_fn)
for section in scache.sections():
if not config.has_section(section):
@@ -741,6 +748,9 @@ def guess(
stations.read_string(
gzip.open(datafile).read().decode("utf-8") )
else: stations.readfp( gzip.open(datafile) )
else:
if pyversion("3"):
stations.read(datafile, encoding="utf-8")
else:
stations.read(datafile)
else:
@@ -759,6 +769,9 @@ def guess(
if pyversion("3"):
zones.read_string( gzip.open(datafile).read().decode("utf-8") )
else: zones.readfp( gzip.open(datafile) )
else:
if pyversion("3"):
zones.read(datafile, encoding="utf-8")
else:
zones.read(datafile)
else:
@@ -786,6 +799,9 @@ def guess(
airports.read_string(
gzip.open(datafile).read().decode("utf-8") )
else: airports.readfp( gzip.open(datafile) )
else:
if pyversion("3"):
airports.read(datafile, encoding="utf-8")
else:
airports.read(datafile)
else:
@@ -875,6 +891,9 @@ def guess(
zctas.read_string(
gzip.open(datafile).read().decode("utf-8") )
else: zctas.readfp( gzip.open(datafile) )
else:
if pyversion("3"):
zctas.read(datafile, encoding="utf-8")
else:
zctas.read(datafile)
else:
@@ -933,6 +952,9 @@ def guess(
places.read_string(
gzip.open(datafile).read().decode("utf-8") )
else: places.readfp( gzip.open(datafile) )
else:
if pyversion("3"):
places.read(datafile, encoding="utf-8")
else:
places.read(datafile)
else:
@@ -1151,6 +1173,9 @@ def guess(
)
try:
scache_existing = configparser.ConfigParser()
if pyversion("3"):
scache_existing.read(scache_fn, encoding="utf-8")
else:
scache_existing.read(scache_fn)
if not scache_existing.has_section(search[0]):
scache_fd = codecs.open(scache_fn, "a", "utf-8")
@@ -2003,14 +2028,29 @@ def correlate():
sys.stdout.write(message)
sys.stdout.flush()
airports = configparser.ConfigParser()
if pyversion("3"):
airports.read(airports_fn, encoding="utf-8")
else:
airports.read(airports_fn)
places = configparser.ConfigParser()
if pyversion("3"):
places.read(places_fn, encoding="utf-8")
else:
places.read(places_fn)
stations = configparser.ConfigParser()
if pyversion("3"):
stations.read(stations_fn, encoding="utf-8")
else:
stations.read(stations_fn)
zctas = configparser.ConfigParser()
if pyversion("3"):
zctas.read(zctas_fn, encoding="utf-8")
else:
zctas.read(zctas_fn)
zones = configparser.ConfigParser()
if pyversion("3"):
zones.read(zones_fn, encoding="utf-8")
else:
zones.read(zones_fn)
qalog = []
places_nocentroid = 0