Update correlation sources

Remove the stale metar.tbl and zonecatalog.curr.tar, which the USA NWS hasn't been updating for many years, and add the public domain airports.csv file from the amazing ourairports.com community. Also update to latest (2019) USA Census Bureau location data, March 2020 WX zone information, cooperative sites list from 2018 (latest), and regenerated active station and zone lists. Loss of the zonecatalog necessitates directly applying various forecast and alert URL patterns, though some which appeared unused by NWS for many years were not included. Clear out all old overrides, since the vast majority are obsoleted by refreshed data, and build fresh correlation sets from the above sources. Basically all sites have switched from HTTP to HTTPS, so update URLs for this too.
2020-05-23 18:09:39 +00:00
parent 1ec2848c20
commit 8a37eddc06
13 changed files with 186295 additions and 203172 deletions
--- a/weather.py
+++ b/weather.py
@@ -1,12 +1,12 @@
 """Contains various object definitions needed by the weather utility."""

 weather_copyright = """\
-# Copyright (c) 2006-2016 Jeremy Stanley <fungi@yuggoth.org>. Permission to
+# Copyright (c) 2006-2020 Jeremy Stanley <fungi@yuggoth.org>. Permission to
 # use, copy, modify, and distribute this software is granted under terms
 # provided in the LICENSE file distributed with this software.
 #"""

-weather_version = "2.3"
+weather_version = "2.4"

 radian_to_km = 6372.795484
 radian_to_mi = 3959.871528
@@ -1209,25 +1209,26 @@ def gecos(formatted):
    return tuple(coordinates)

 def correlate():
-    import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile
+    import codecs, csv, datetime, hashlib, os, re, sys, tarfile, time, zipfile
    if pyversion("3"): import configparser
    else: import ConfigParser as configparser
-    gcounties_an = "2015_Gaz_counties_national.zip"
-    gcounties_fn = "2015_Gaz_counties_national.txt"
-    gcousubs_an = "2015_Gaz_cousubs_national.zip"
-    gcousubs_fn = "2015_Gaz_cousubs_national.txt"
-    gplace_an = "2015_Gaz_place_national.zip"
-    gplace_fn = "2015_Gaz_place_national.txt"
-    gzcta_an = "2015_Gaz_zcta_national.zip"
-    gzcta_fn = "2015_Gaz_zcta_national.txt"
    for filename in os.listdir("."):
-        if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename):
+        if re.match("[0-9]{4}_Gaz_counties_national.zip$", filename):
+            gcounties_an = filename
+            gcounties_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_cousubs_national.zip$", filename):
+            gcousubs_an = filename
+            gcousubs_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_place_national.zip$", filename):
+            gplace_an = filename
+            gplace_fn = filename[:-4] + ".txt"
+        elif re.match("[0-9]{4}_Gaz_zcta_national.zip$", filename):
+            gzcta_an = filename
+            gzcta_fn = filename[:-4] + ".txt"
+        elif re.match("bp[0-9]{2}[a-z]{2}[0-9]{2}.dbx$", filename):
            cpfzcf_fn = filename
-            break
    nsdcccc_fn = "nsd_cccc.txt"
-    zcatalog_an = "zonecatalog.curr.tar"
-    metartbl_fn = "metar.tbl"
-    coopstn_fn = "coop-stations.txt"
+    ourairports_fn = "airports.csv"
    overrides_fn = "overrides.conf"
    overrideslog_fn = "overrides.log"
    slist_fn = "slist"
@@ -1242,25 +1243,19 @@ def correlate():
 %s
 # generated by %s on %s from these public domain sources:
 #
-# http://www.census.gov/geo/maps-data/data/gazetteer2015.html
+# https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.html
 # %s %s %s
 # %s %s %s
 # %s %s %s
 # %s %s %s
 #
-# http://www.weather.gov/geodata/catalog/wsom/html/cntyzone.htm
+# https://www.weather.gov/gis/ZoneCounty/
 # %s %s %s
 #
-# http://tgftp.nws.noaa.gov/data/nsd_cccc.txt
+# https://tgftp.nws.noaa.gov/data/
 # %s %s %s
 #
-# http://tgftp.nws.noaa.gov/data/zonecatalog.curr.tar
-# %s %s %s
-#
-# http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl
-# %s %s %s
-#
-# http://www.ncdc.noaa.gov/homr/reports
+# https://ourairports.com/data/
 # %s %s %s
 #
 # ...and these manually-generated or hand-compiled adjustments:
@@ -1303,21 +1298,11 @@ def correlate():
            datetime.datetime.fromtimestamp( os.path.getmtime(nsdcccc_fn) )
        ),
        nsdcccc_fn,
-        hashlib.md5( open(zcatalog_an, "rb").read() ).hexdigest(),
+        hashlib.md5( open(ourairports_fn, "rb").read() ).hexdigest(),
        datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(zcatalog_an) )
+            datetime.datetime.fromtimestamp( os.path.getmtime(ourairports_fn) )
        ),
-        zcatalog_an,
-        hashlib.md5( open(metartbl_fn, "rb").read() ).hexdigest(),
-        datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) )
-        ),
-        metartbl_fn,
-        hashlib.md5( open(coopstn_fn, "rb").read() ).hexdigest(),
-        datetime.date.isoformat(
-            datetime.datetime.fromtimestamp( os.path.getmtime(coopstn_fn) )
-        ),
-        coopstn_fn,
+        ourairports_fn,
        hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(),
        datetime.date.isoformat(
            datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) )
@@ -1344,9 +1329,9 @@ def correlate():
    sys.stdout.flush()
    count = 0
    gcounties = zipfile.ZipFile(gcounties_an).open(gcounties_fn, "rU")
-    columns = gcounties.readline().decode("latin1").strip().split("\t")
+    columns = gcounties.readline().decode("utf-8").strip().split("\t")
    for line in gcounties:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
        f_geoid = fields[ columns.index("GEOID") ].strip()
        f_name = fields[ columns.index("NAME") ].strip()
        f_usps = fields[ columns.index("USPS") ].strip()
@@ -1367,9 +1352,9 @@ def correlate():
    sys.stdout.flush()
    count = 0
    gcousubs = zipfile.ZipFile(gcousubs_an).open(gcousubs_fn, "rU")
-    columns = gcousubs.readline().decode("latin1").strip().split("\t")
+    columns = gcousubs.readline().decode("utf-8").strip().split("\t")
    for line in gcousubs:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
        f_geoid = fields[ columns.index("GEOID") ].strip()
        f_name = fields[ columns.index("NAME") ].strip()
        f_usps = fields[ columns.index("USPS") ].strip()
@@ -1390,9 +1375,9 @@ def correlate():
    sys.stdout.flush()
    count = 0
    gplace = zipfile.ZipFile(gplace_an).open(gplace_fn, "rU")
-    columns = gplace.readline().decode("latin1").strip().split("\t")
+    columns = gplace.readline().decode("utf-8").strip().split("\t")
    for line in gplace:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
        f_geoid = fields[ columns.index("GEOID") ].strip()
        f_name = fields[ columns.index("NAME") ].strip()
        f_usps = fields[ columns.index("USPS") ].strip()
@@ -1412,51 +1397,22 @@ def correlate():
    sys.stdout.write(message)
    sys.stdout.flush()
    count = 0
-    slist = codecs.open(slist_fn, "rU")
+    slist = codecs.open(slist_fn, "rU", "utf-8")
    for line in slist:
        icao = line.split("#")[0].strip()
        if icao:
            stations[icao] = {
-                "metar": "http://tgftp.nws.noaa.gov/data/observations/"\
+                "metar": "https://tgftp.nws.noaa.gov/data/observations/"\
                    + "metar/decoded/%s.TXT" % icao.upper()
            }
            count += 1
    slist.close()
    print("done (%s lines)." % count)
-    message = "Reading %s..." % metartbl_fn
-    sys.stdout.write(message)
-    sys.stdout.flush()
-    count = 0
-    metartbl = codecs.open(metartbl_fn, "rU")
-    for line in metartbl:
-        icao = line[:4].strip().lower()
-        if icao in stations:
-            description = []
-            name = " ".join(
-                line[16:48].replace("_", " ").strip().title().split()
-            )
-            if name: description.append(name)
-            st = line[49:51].strip()
-            if st: description.append(st)
-            cn = line[52:54].strip()
-            if cn: description.append(cn)
-            if description:
-                stations[icao]["description"] = ", ".join(description)
-            lat = line[55:60].strip()
-            if lat:
-                lat = int(lat)/100.0
-                lon = line[61:67].strip()
-                if lon:
-                    lon = int(lon)/100.0
-                    stations[icao]["location"] = gecos( "%s,%s" % (lat, lon) )
-        count += 1
-    metartbl.close()
-    print("done (%s lines)." % count)
    message = "Reading %s..." % nsdcccc_fn
    sys.stdout.write(message)
    sys.stdout.flush()
    count = 0
-    nsdcccc = codecs.open(nsdcccc_fn, "rU", "latin1")
+    nsdcccc = codecs.open(nsdcccc_fn, "rU", "utf-8")
    for line in nsdcccc:
        line = str(line)
        fields = line.split(";")
@@ -1481,44 +1437,49 @@ def correlate():
        count += 1
    nsdcccc.close()
    print("done (%s lines)." % count)
-    message = "Reading %s..." % coopstn_fn
+    message = "Reading %s..." % ourairports_fn
    sys.stdout.write(message)
    sys.stdout.flush()
    count = 0
-    coopstn = open(coopstn_fn)
-    for line in coopstn:
-        icao = line[33:37].strip().lower()
+    ourairports = open(ourairports_fn, "rU")
+    for row in csv.reader(ourairports):
+        icao = row[12].decode('utf-8').lower()
        if icao in stations:
-            iata = line[22:26].strip().lower()
+            iata = row[13].decode('utf-8').lower()
            if len(iata) == 3: airports[iata] = { "station": icao }
            if "description" not in stations[icao]:
                description = []
-                name = " ".join( line[99:129].strip().title().split() )
+                name = row[3].decode('utf-8')
                if name: description.append(name)
-                st = line[59:61].strip()
-                if st: description.append(st)
-                country = " ".join( line[38:58].strip().title().split() )
-                if country: description.append(country)
+                municipality = row[10].decode('utf-8')
+                if municipality: description.append(municipality)
+                region = row[9].decode('utf-8')
+                country = row[8].decode('utf-8')
+                if region:
+                    if "-" in region:
+                        c,r = region.split("-", 1)
+                        if c == country: region = r
+                    description.append(region)
+                if country:
+                    description.append(country)
                if description:
                    stations[icao]["description"] = ", ".join(description)
            if "location" not in stations[icao]:
-                lat = line[130:139].strip()
+                lat = row[4].decode('utf-8')
                if lat:
-                    lat = lat.replace(" ", "-")
-                    lon = line[140:150].strip()
+                    lon = row[5].decode('utf-8')
                    if lon:
-                        lon = lon.replace(" ", "-")
                        stations[icao]["location"] = gecos(
                            "%s,%s" % (lat, lon)
                        )
        count += 1
-    coopstn.close()
+    ourairports.close()
    print("done (%s lines)." % count)
    message = "Reading %s..." % zlist_fn
    sys.stdout.write(message)
    sys.stdout.flush()
    count = 0
-    zlist = codecs.open(zlist_fn, "rU")
+    zlist = codecs.open(zlist_fn, "rU", "utf-8")
    for line in zlist:
        line = line.split("#")[0].strip()
        if line:
@@ -1526,69 +1487,76 @@ def correlate():
            count += 1
    zlist.close()
    print("done (%s lines)." % count)
-    message = "Reading %s:*..." % zcatalog_an
-    sys.stdout.write(message)
-    sys.stdout.flush()
-    count = 0
-    zcatalog = tarfile.open(zcatalog_an)
-    for entry in zcatalog.getmembers():
-        if entry.isfile():
-            fnmatch = re.match(
-                r"([a-z]+z[0-9]+)\.txt$",
-                os.path.basename(entry.name)
-            )
-            if fnmatch:
-                zone = fnmatch.group(1)
-                if zone in zones:
-                    data = zcatalog.extractfile(entry).readlines()
-                    description = data[0].decode("ascii").strip()
-                    zones[zone]["description"] = description
-                    for line in data[1:]:
-                        line = line.decode("latin1").strip()
-                        urimatch = re.match("/webdocs/pub/(.+):(.+) for ",
-                                            line)
-                        if urimatch:
-                            uritype = urimatch.group(2).lower().replace(" ","_")
-                            zones[zone][uritype]  = (
-                                "http://tgftp.nws.noaa.gov/%s"
-                                % urimatch.group(1))
-        count += 1
-    zcatalog.close()
-    print("done (%s files)." % count)
    message = "Reading %s..." % cpfzcf_fn
    sys.stdout.write(message)
    sys.stdout.flush()
    count = 0
    cpfz = {}
-    cpfzcf = open(cpfzcf_fn)
+    cpfzcf = codecs.open(cpfzcf_fn, "rU", "utf-8")
    for line in cpfzcf:
        fields = line.strip().split("|")
        if len(fields) == 11 \
            and fields[0] and fields[1] and fields[9] and fields[10]:
            zone = "z".join( fields[:2] ).lower()
            if zone in zones:
-                zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
-            elif fields[6]:
                state = fields[0]
-                description = fields[3]
-                county = fields[5]
+                if state:
+                    zones[zone]["coastal_flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/coastal/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flash_flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/statement/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["flash_flood_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/warning/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["flash_flood_watch"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flash_flood/watch/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flood_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/statement/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["flood_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "flood/warning/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["severe_thunderstorm_warning"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "thunderstorm/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["severe_weather_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "severe_weather_stmt/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["short_term_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/nowcast/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["special_weather_statement"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "special_weather_stmt/%s/%s.txt"
+                        % (state.lower(), zone))
+                    zones[zone]["state_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/state/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["urgent_weather_message"] = (
+                        "https://tgftp.nws.noaa.gov/data/watches_warnings/"
+                        "non_precip/%s/%s.txt" % (state.lower(), zone))
+                    zones[zone]["zone_forecast"] = (
+                        "https://tgftp.nws.noaa.gov/data/forecasts/zone/"
+                        "%s/%s.txt" % (state.lower(), zone))
+                description = fields[3].strip()
                fips = "fips%s"%fields[6]
-                possible = [
-                    "%s, %s" % (county, state),
-                    "%s County, %s" % (county, state),
-                ]
-                if description.endswith(" Counties"):
-                    description = description[:-9]
-                for addition in description.split(" and "):
-                    possible.append( "%s, %s" % (addition, state) )
-                    possible.append( "%s County, %s" % (addition, state) )
-                if fips in places and "centroid" in places[fips]:
-                    for candidate in zones:
-                        if "centroid" not in zones[candidate] and \
-                            "description" in zones[candidate] and \
-                            zones[candidate]["description"] in possible:
-                            zones[candidate]["centroid"] = \
-                                places[fips]["centroid"]
+                county = fields[5]
+                if county:
+                    if description.endswith(county):
+                        description += " County"
+                    else:
+                        description += ", %s County" % county
+                description += ", %s, US" % state
+                zones[zone]["description"] = description
+                zones[zone]["centroid"] = gecos( ",".join( fields[9:11] ) )
+                if fips in places and not zones[zone]["centroid"]:
+                    zones[zone]["centroid"] = places[fips]["centroid"]
        count += 1
    cpfzcf.close()
    print("done (%s lines)." % count)
@@ -1597,9 +1565,9 @@ def correlate():
    sys.stdout.flush()
    count = 0
    gzcta = zipfile.ZipFile(gzcta_an).open(gzcta_fn, "rU")
-    columns = gzcta.readline().decode("latin1").strip().split("\t")
+    columns = gzcta.readline().decode("utf-8").strip().split("\t")
    for line in gzcta:
-        fields = line.decode("latin1").strip().split("\t")
+        fields = line.decode("utf-8").strip().split("\t")
        f_geoid = fields[ columns.index("GEOID") ].strip()
        f_intptlat = fields[ columns.index("INTPTLAT") ].strip()
        f_intptlong = fields[ columns.index("INTPTLONG") ].strip()
@@ -1967,7 +1935,7 @@ def correlate():
    count = 0
    if os.path.exists(stations_fn):
        os.rename(stations_fn, "%s_old"%stations_fn)
-    stations_fd = codecs.open(stations_fn, "w", "utf8")
+    stations_fd = codecs.open(stations_fn, "w", "utf-8")
    stations_fd.write(header)
    for station in sorted( stations.keys() ):
        stations_fd.write("\n\n[%s]" % station)