Update correlation sources

Remove the stale metar.tbl and zonecatalog.curr.tar, which the USA
NWS hasn't been updating for many years, and add the public domain
airports.csv file from the amazing ourairports.com community. Also
update to latest (2019) USA Census Bureau location data, March 2020
WX zone information, cooperative sites list from 2018 (latest), and
regenerated active station and zone lists. Loss of the zonecatalog
necessitates directly applying various forecast and alert URL
patterns, though some which appeared unused by NWS for many years
were not included.

Clear out all old overrides, since the vast majority are obsoleted
by refreshed data, and build fresh correlation sets from the above
sources. Basically all sites have switched from HTTP to HTTPS, so
update URLs for this too.
This commit is contained in:
Jeremy Stanley
2020-05-23 18:09:39 +00:00
parent 1ec2848c20
commit 8a37eddc06
13 changed files with 186295 additions and 203172 deletions

View File

@@ -1,12 +1,12 @@
"""Contains various object definitions needed by the weather utility."""
weather_copyright = """\
# Copyright (c) 2006-2016 Jeremy Stanley <fungi@yuggoth.org>. Permission to
# Copyright (c) 2006-2020 Jeremy Stanley <fungi@yuggoth.org>. Permission to
# use, copy, modify, and distribute this software is granted under terms
# provided in the LICENSE file distributed with this software.
#"""
weather_version = "2.3"
weather_version = "2.4"
radian_to_km = 6372.795484
radian_to_mi = 3959.871528
@@ -1209,25 +1209,26 @@ def gecos(formatted):
return tuple(coordinates)
def correlate():
import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile
import codecs, csv, datetime, hashlib, os, re, sys, tarfile, time, zipfile
if pyversion("3"): import configparser
else: import ConfigParser as configparser
gcounties_an = "2015_Gaz_counties_national.zip"
gcounties_fn = "2015_Gaz_counties_national.txt"
gcousubs_an = "2015_Gaz_cousubs_national.zip"
gcousubs_fn = "2015_Gaz_cousubs_national.txt"
gplace_an = "2015_Gaz_place_national.zip"
gplace_fn = "2015_Gaz_place_national.txt"
gzcta_an = "2015_Gaz_zcta_national.zip"
gzcta_fn = "2015_Gaz_zcta_national.txt"
for filename in os.listdir("."):
if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename):
if re.match("[0-9]{4}_Gaz_counties_national.zip$", filename):
gcounties_an = filename
gcounties_fn = filename[:-4] + ".txt"
elif re.match("[0-9]{4}_Gaz_cousubs_national.zip$", filename):
gcousubs_an = filename
gcousubs_fn = filename[:-4] + ".txt"
elif re.match("[0-9]{4}_Gaz_place_national.zip$", filename):
gplace_an = filename
gplace_fn = filename[:-4] + ".txt"
elif re.match("[0-9]{4}_Gaz_zcta_national.zip$", filename):
gzcta_an = filename
gzcta_fn = filename[:-4] + ".txt"
elif re.match("bp[0-9]{2}[a-z]{2}[0-9]{2}.dbx$", filename):
cpfzcf_fn = filename
break
nsdcccc_fn = "nsd_cccc.txt"
zcatalog_an = "zonecatalog.curr.tar"
metartbl_fn = "metar.tbl"
coopstn_fn = "coop-stations.txt"
ourairports_fn = "airports.csv"
overrides_fn = "overrides.conf"
overrideslog_fn = "overrides.log"
slist_fn = "slist"
@@ -1242,25 +1243,19 @@ def correlate():
%s
# generated by %s on %s from these public domain sources:
#
# http://www.census.gov/geo/maps-data/data/gazetteer2015.html
# https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.html
# %s %s %s
# %s %s %s
# %s %s %s
# %s %s %s
#
# http://www.weather.gov/geodata/catalog/wsom/html/cntyzone.htm
# https://www.weather.gov/gis/ZoneCounty/
# %s %s %s
#
# http://tgftp.nws.noaa.gov/data/nsd_cccc.txt
# https://tgftp.nws.noaa.gov/data/
# %s %s %s
#
# http://tgftp.nws.noaa.gov/data/zonecatalog.curr.tar
# %s %s %s
#
# http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl
# %s %s %s
#
# http://www.ncdc.noaa.gov/homr/reports
# https://ourairports.com/data/
# %s %s %s
#
# ...and these manually-generated or hand-compiled adjustments:
@@ -1303,21 +1298,11 @@ def correlate():
datetime.datetime.fromtimestamp( os.path.getmtime(nsdcccc_fn) )
),
nsdcccc_fn,
hashlib.md5( open(zcatalog_an, "rb").read() ).hexdigest(),
hashlib.md5( open(ourairports_fn, "rb").read() ).hexdigest(),
datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(zcatalog_an) )
datetime.datetime.fromtimestamp( os.path.getmtime(ourairports_fn) )
),
zcatalog_an,
hashlib.md5( open(metartbl_fn, "rb").read() ).hexdigest(),
datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) )
),
metartbl_fn,
hashlib.md5( open(coopstn_fn, "rb").read() ).hexdigest(),
datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(coopstn_fn) )
),
coopstn_fn,
ourairports_fn,
hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(),
datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) )
@@ -1344,9 +1329,9 @@ def correlate():
sys.stdout.flush()
count = 0
gcounties = zipfile.ZipFile(gcounties_an).open(gcounties_fn, "rU")
columns = gcounties.readline().decode("latin1").strip().split("\t")
columns = gcounties.readline().decode("utf-8").strip().split("\t")
for line in gcounties:
fields = line.decode("latin1").strip().split("\t")
fields = line.decode("utf-8").strip().split("\t")
f_geoid = fields[ columns.index("GEOID") ].strip()
f_name = fields[ columns.index("NAME") ].strip()
f_usps = fields[ columns.index("USPS") ].strip()
@@ -1367,9 +1352,9 @@ def correlate():
sys.stdout.flush()
count = 0
gcousubs = zipfile.ZipFile(gcousubs_an).open(gcousubs_fn, "rU")
columns = gcousubs.readline().decode("latin1").strip().split("\t")
columns = gcousubs.readline().decode("utf-8").strip().split("\t")
for line in gcousubs:
fields = line.decode("latin1").strip().split("\t")
fields = line.decode("utf-8").strip().split("\t")
f_geoid = fields[ columns.index("GEOID") ].strip()
f_name = fields[ columns.index("NAME") ].strip()
f_usps = fields[ columns.index("USPS") ].strip()
@@ -1390,9 +1375,9 @@ def correlate():
sys.stdout.flush()
count = 0
gplace = zipfile.ZipFile(gplace_an).open(gplace_fn, "rU")
columns = gplace.readline().decode("latin1").strip().split("\t")
columns = gplace.readline().decode("utf-8").strip().split("\t")
for line in gplace:
fields = line.decode("latin1").strip().split("\t")
fields = line.decode("utf-8").strip().split("\t")
f_geoid = fields[ columns.index("GEOID") ].strip()
f_name = fields[ columns.index("NAME") ].strip()
f_usps = fields[ columns.index("USPS") ].strip()
@@ -1412,51 +1397,22 @@ def correlate():
sys.stdout.write(message)
sys.stdout.flush()
count = 0
slist = codecs.open(slist_fn, "rU")
slist = codecs.open(slist_fn, "rU", "utf-8")
for line in slist:
icao = line.split("#")[0].strip()
if icao:
stations[icao] = {
"metar": "http://tgftp.nws.noaa.gov/data/observations/"\
"metar": "https://tgftp.nws.noaa.gov/data/observations/"\
+ "metar/decoded/%s.TXT" % icao.upper()
}
count += 1
slist.close()
print("done (%s lines)." % count)
message = "Reading %s..." % metartbl_fn
sys.stdout.write(message)
sys.stdout.flush()
count = 0
metartbl = codecs.open(metartbl_fn, "rU")
for line in metartbl:
icao = line[:4].strip().lower()
if icao in stations:
description = []
name = " ".join(
line[16:48].replace("_", " ").strip().title().split()
)
if name: description.append(name)
st = line[49:51].strip()
if st: description.append(st)
cn = line[52:54].strip()
if cn: description.append(cn)
if description:
stations[icao]["description"] = ", ".join(description)
lat = line[55:60].strip()
if lat:
lat = int(lat)/100.0
lon = line[61:67].strip()
if lon:
lon = int(lon)/100.0
stations[icao]["location"] = gecos( "%s,%s" % (lat, lon) )
count += 1
metartbl.close()
print("done (%s lines)." % count)
message = "Reading %s..." % nsdcccc_fn
sys.stdout.write(message)
sys.stdout.flush()
count = 0
nsdcccc = codecs.open(nsdcccc_fn, "rU", "latin1")
nsdcccc = codecs.open(nsdcccc_fn, "rU", "utf-8")
for line in nsdcccc:
line = str(line)
fields = line.split(";")
@@ -1481,44 +1437,49 @@ def correlate():
count += 1
nsdcccc.close()
print("done (%s lines)." % count)
message = "Reading %s..." % coopstn_fn
message = "Reading %s..." % ourairports_fn
sys.stdout.write(message)
sys.stdout.flush()
count = 0
coopstn = open(coopstn_fn)
for line in coopstn:
icao = line[33:37].strip().lower()
ourairports = open(ourairports_fn, "rU")
for row in csv.reader(ourairports):
icao = row[12].decode('utf-8').lower()
if icao in stations:
iata = line[22:26].strip().lower()
iata = row[13].decode('utf-8').lower()
if len(iata) == 3: airports[iata] = { "station": icao }
if "description" not in stations[icao]:
description = []
name = " ".join( line[99:129].strip().title().split() )
name = row[3].decode('utf-8')
if name: description.append(name)
st = line[59:61].strip()
if st: description.append(st)
country = " ".join( line[38:58].strip().title().split() )
if country: description.append(country)
municipality = row[10].decode('utf-8')
if municipality: description.append(municipality)
region = row[9].decode('utf-8')
country = row[8].decode('utf-8')
if region:
if "-" in region:
c,r = region.split("-", 1)
if c == country: region = r
description.append(region)
if country:
description.append(country)
if description:
stations[icao]["description"] = ", ".join(description)
if "location" not in stations[icao]:
lat = line[130:139].strip()
lat = row[4].decode('utf-8')
if lat:
lat = lat.replace(" ", "-")
lon = line[140:150].strip()
lon = row[5].decode('utf-8')
if lon:
lon = lon.replace(" ", "-")
stations[icao]["location"] = gecos(
"%s,%s" % (lat, lon)
)
count += 1
coopstn.close()
ourairports.close()
print("done (%s lines)." % count)
message = "Reading %s..." % zlist_fn
sys.stdout.write(message)
sys.stdout.flush()
count = 0
zlist = codecs.open(zlist_fn, "rU")
zlist = codecs.open(zlist_fn, "rU", "utf-8")
for line in zlist:
line = line.split("#")[0].strip()
if line:
@@ -1526,69 +1487,76 @@ def correlate():
count += 1
zlist.close()
print("done (%s lines)." % count)
message = "Reading %s:*..." % zcatalog_an
sys.stdout.write(message)
sys.stdout.flush()
count = 0
zcatalog = tarfile.open(zcatalog_an)
for entry in zcatalog.getmembers():
if entry.isfile():
fnmatch = re.match(
r"([a-z]+z[0-9]+)\.txt$",
os.path.basename(entry.name)
)
if fnmatch:
zone = fnmatch.group(1)
if zone in zones:
data = zcatalog.extractfile(entry).readlines()
description = data[0].decode("ascii").strip()
zones[zone]["description"] = description
for line in data[1:]:
line = line.decode("latin1").strip()
urimatch = re.match("/webdocs/pub/(.+):(.+) for ",
line)
if urimatch:
uritype = urimatch.group(2).lower().replace(" ","_")
zones[zone][uritype] = (
"http://tgftp.nws.noaa.gov/%s"
% urimatch.group(1))
count += 1
zcatalog.close()
print("done (%s files)." % count)
message = "Reading %s..." % cpfzcf_fn
sys.stdout.write(message)
sys.stdout.flush()
count = 0
cpfz = {}
cpfzcf = open(cpfzcf_fn)
cpfzcf = codecs.open(cpfzcf_fn, "rU", "utf-8")
for line in cpfzcf:
fields = line.strip().split("|")
if len(fields) == 11 \
and fields[0] and fields[1] and fields[9] and fields[10]:
zone = "z".join( fields[:2] ).lower()
if zone in zones:
zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
elif fields[6]:
state = fields[0]
description = fields[3]
county = fields[5]
if state:
zones[zone]["coastal_flood_statement"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"flood/coastal/%s/%s.txt" % (state.lower(), zone))
zones[zone]["flash_flood_statement"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"flash_flood/statement/%s/%s.txt"
% (state.lower(), zone))
zones[zone]["flash_flood_warning"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"flash_flood/warning/%s/%s.txt"
% (state.lower(), zone))
zones[zone]["flash_flood_watch"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"flash_flood/watch/%s/%s.txt" % (state.lower(), zone))
zones[zone]["flood_statement"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"flood/statement/%s/%s.txt" % (state.lower(), zone))
zones[zone]["flood_warning"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"flood/warning/%s/%s.txt" % (state.lower(), zone))
zones[zone]["severe_thunderstorm_warning"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"thunderstorm/%s/%s.txt" % (state.lower(), zone))
zones[zone]["severe_weather_statement"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"severe_weather_stmt/%s/%s.txt"
% (state.lower(), zone))
zones[zone]["short_term_forecast"] = (
"https://tgftp.nws.noaa.gov/data/forecasts/nowcast/"
"%s/%s.txt" % (state.lower(), zone))
zones[zone]["special_weather_statement"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"special_weather_stmt/%s/%s.txt"
% (state.lower(), zone))
zones[zone]["state_forecast"] = (
"https://tgftp.nws.noaa.gov/data/forecasts/state/"
"%s/%s.txt" % (state.lower(), zone))
zones[zone]["urgent_weather_message"] = (
"https://tgftp.nws.noaa.gov/data/watches_warnings/"
"non_precip/%s/%s.txt" % (state.lower(), zone))
zones[zone]["zone_forecast"] = (
"https://tgftp.nws.noaa.gov/data/forecasts/zone/"
"%s/%s.txt" % (state.lower(), zone))
description = fields[3].strip()
fips = "fips%s"%fields[6]
possible = [
"%s, %s" % (county, state),
"%s County, %s" % (county, state),
]
if description.endswith(" Counties"):
description = description[:-9]
for addition in description.split(" and "):
possible.append( "%s, %s" % (addition, state) )
possible.append( "%s County, %s" % (addition, state) )
if fips in places and "centroid" in places[fips]:
for candidate in zones:
if "centroid" not in zones[candidate] and \
"description" in zones[candidate] and \
zones[candidate]["description"] in possible:
zones[candidate]["centroid"] = \
places[fips]["centroid"]
county = fields[5]
if county:
if description.endswith(county):
description += " County"
else:
description += ", %s County" % county
description += ", %s, US" % state
zones[zone]["description"] = description
zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
if fips in places and not zones[zone]["centroid"]:
zones[zone]["centroid"] = places[fips]["centroid"]
count += 1
cpfzcf.close()
print("done (%s lines)." % count)
@@ -1597,9 +1565,9 @@ def correlate():
sys.stdout.flush()
count = 0
gzcta = zipfile.ZipFile(gzcta_an).open(gzcta_fn, "rU")
columns = gzcta.readline().decode("latin1").strip().split("\t")
columns = gzcta.readline().decode("utf-8").strip().split("\t")
for line in gzcta:
fields = line.decode("latin1").strip().split("\t")
fields = line.decode("utf-8").strip().split("\t")
f_geoid = fields[ columns.index("GEOID") ].strip()
f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
@@ -1967,7 +1935,7 @@ def correlate():
count = 0
if os.path.exists(stations_fn):
os.rename(stations_fn, "%s_old"%stations_fn)
stations_fd = codecs.open(stations_fn, "w", "utf8")
stations_fd = codecs.open(stations_fn, "w", "utf-8")
stations_fd.write(header)
for station in sorted( stations.keys() ):
stations_fd.write("\n\n[%s]" % station)