Force UTF-8 locale when reading configs and data

Apparently, Python on Windows defaults to assuming CP1252 encoding
unless otherwise specified, as opposed to the UTF-8 assumption made
on POSIX platforms. Since our configuration and data files are
expected to always use UTF-8 encoding, be clear in the
ConfigParser.read() calls about that. We only do this under Python
3.x, as that method doesn't have an encoding parameter in 2.7.

Thanks to Lance Bermudez for reporting this.
This commit is contained in:
Jeremy Stanley
2021-12-17 16:29:38 +00:00
parent 257f9f9a0b
commit 455afdc07d

View File

@@ -608,7 +608,11 @@ def get_config():
"weatherrc" "weatherrc"
] ]
for rcfile in rcfiles: for rcfile in rcfiles:
if os.access(rcfile, os.R_OK): config.read(rcfile) if os.access(rcfile, os.R_OK):
if pyversion("3"):
config.read(rcfile, encoding="utf-8")
else:
config.read(rcfile)
for section in config.sections(): for section in config.sections():
if section != section.lower(): if section != section.lower():
if config.has_section(section.lower()): if config.has_section(section.lower()):
@@ -644,7 +648,10 @@ def integrate_search_cache(config, cachedir, setpath):
pass pass
return config return config
scache = configparser.ConfigParser() scache = configparser.ConfigParser()
scache.read(scache_fn) if pyversion("3"):
scache.read(scache_fn, encoding="utf-8")
else:
scache.read(scache_fn)
for section in scache.sections(): for section in scache.sections():
if not config.has_section(section): if not config.has_section(section):
config.add_section(section) config.add_section(section)
@@ -742,7 +749,10 @@ def guess(
gzip.open(datafile).read().decode("utf-8") ) gzip.open(datafile).read().decode("utf-8") )
else: stations.readfp( gzip.open(datafile) ) else: stations.readfp( gzip.open(datafile) )
else: else:
stations.read(datafile) if pyversion("3"):
stations.read(datafile, encoding="utf-8")
else:
stations.read(datafile)
else: else:
message = "%s error: can't find \"%s\" data file\n" % ( message = "%s error: can't find \"%s\" data file\n" % (
os.path.basename( sys.argv[0] ), os.path.basename( sys.argv[0] ),
@@ -760,7 +770,10 @@ def guess(
zones.read_string( gzip.open(datafile).read().decode("utf-8") ) zones.read_string( gzip.open(datafile).read().decode("utf-8") )
else: zones.readfp( gzip.open(datafile) ) else: zones.readfp( gzip.open(datafile) )
else: else:
zones.read(datafile) if pyversion("3"):
zones.read(datafile, encoding="utf-8")
else:
zones.read(datafile)
else: else:
message = "%s error: can't find \"%s\" data file\n" % ( message = "%s error: can't find \"%s\" data file\n" % (
os.path.basename( sys.argv[0] ), os.path.basename( sys.argv[0] ),
@@ -787,7 +800,10 @@ def guess(
gzip.open(datafile).read().decode("utf-8") ) gzip.open(datafile).read().decode("utf-8") )
else: airports.readfp( gzip.open(datafile) ) else: airports.readfp( gzip.open(datafile) )
else: else:
airports.read(datafile) if pyversion("3"):
airports.read(datafile, encoding="utf-8")
else:
airports.read(datafile)
else: else:
message = "%s error: can't find \"%s\" data file\n" % ( message = "%s error: can't find \"%s\" data file\n" % (
os.path.basename( sys.argv[0] ), os.path.basename( sys.argv[0] ),
@@ -876,7 +892,10 @@ def guess(
gzip.open(datafile).read().decode("utf-8") ) gzip.open(datafile).read().decode("utf-8") )
else: zctas.readfp( gzip.open(datafile) ) else: zctas.readfp( gzip.open(datafile) )
else: else:
zctas.read(datafile) if pyversion("3"):
zctas.read(datafile, encoding="utf-8")
else:
zctas.read(datafile)
else: else:
message = "%s error: can't find \"%s\" data file\n" % ( message = "%s error: can't find \"%s\" data file\n" % (
os.path.basename( sys.argv[0] ), os.path.basename( sys.argv[0] ),
@@ -934,7 +953,10 @@ def guess(
gzip.open(datafile).read().decode("utf-8") ) gzip.open(datafile).read().decode("utf-8") )
else: places.readfp( gzip.open(datafile) ) else: places.readfp( gzip.open(datafile) )
else: else:
places.read(datafile) if pyversion("3"):
places.read(datafile, encoding="utf-8")
else:
places.read(datafile)
else: else:
message = "%s error: can't find \"%s\" data file\n" % ( message = "%s error: can't find \"%s\" data file\n" % (
os.path.basename( sys.argv[0] ), os.path.basename( sys.argv[0] ),
@@ -1151,7 +1173,10 @@ def guess(
) )
try: try:
scache_existing = configparser.ConfigParser() scache_existing = configparser.ConfigParser()
scache_existing.read(scache_fn) if pyversion("3"):
scache_existing.read(scache_fn, encoding="utf-8")
else:
scache_existing.read(scache_fn)
if not scache_existing.has_section(search[0]): if not scache_existing.has_section(search[0]):
scache_fd = codecs.open(scache_fn, "a", "utf-8") scache_fd = codecs.open(scache_fn, "a", "utf-8")
scache_fd.writelines(search_cache) scache_fd.writelines(search_cache)
@@ -2003,15 +2028,30 @@ def correlate():
sys.stdout.write(message) sys.stdout.write(message)
sys.stdout.flush() sys.stdout.flush()
airports = configparser.ConfigParser() airports = configparser.ConfigParser()
airports.read(airports_fn) if pyversion("3"):
airports.read(airports_fn, encoding="utf-8")
else:
airports.read(airports_fn)
places = configparser.ConfigParser() places = configparser.ConfigParser()
places.read(places_fn) if pyversion("3"):
places.read(places_fn, encoding="utf-8")
else:
places.read(places_fn)
stations = configparser.ConfigParser() stations = configparser.ConfigParser()
stations.read(stations_fn) if pyversion("3"):
stations.read(stations_fn, encoding="utf-8")
else:
stations.read(stations_fn)
zctas = configparser.ConfigParser() zctas = configparser.ConfigParser()
zctas.read(zctas_fn) if pyversion("3"):
zctas.read(zctas_fn, encoding="utf-8")
else:
zctas.read(zctas_fn)
zones = configparser.ConfigParser() zones = configparser.ConfigParser()
zones.read(zones_fn) if pyversion("3"):
zones.read(zones_fn, encoding="utf-8")
else:
zones.read(zones_fn)
qalog = [] qalog = []
places_nocentroid = 0 places_nocentroid = 0
places_nodescription = 0 places_nodescription = 0