Support more recent data sources

Add support for 2014 Census Bureau data and the newer version of the
NWS COOP stations file.
This commit is contained in:
Jeremy Stanley
2014-10-30 00:23:01 +00:00
parent 49a6ebe760
commit 562fc1c1df
2 changed files with 28 additions and 27 deletions

3
.gitignore vendored
View File

@@ -1,7 +1,8 @@
*_old *_old
*Gaz_*_national.zip
COOP-ACT.TXT COOP-ACT.TXT
Gaz_*_national.zip
bp??????.dbx bp??????.dbx
coop-stations.txt
metar.tbl metar.tbl
nsd_cccc.txt nsd_cccc.txt
*.pyc *.pyc

View File

@@ -1196,14 +1196,14 @@ def correlate():
import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile import codecs, datetime, hashlib, os, re, sys, tarfile, time, zipfile
if pyversion("3"): import configparser if pyversion("3"): import configparser
else: import ConfigParser as configparser else: import ConfigParser as configparser
gcounties_an = "Gaz_counties_national.zip" gcounties_an = "2014_Gaz_counties_national.zip"
gcounties_fn = "Gaz_counties_national.txt" gcounties_fn = "2014_Gaz_counties_national.txt"
gcousubs_an = "Gaz_cousubs_national.zip" gcousubs_an = "2014_Gaz_cousubs_national.zip"
gcousubs_fn = "Gaz_cousubs_national.txt" gcousubs_fn = "2014_Gaz_cousubs_national.txt"
gplaces_an = "Gaz_places_national.zip" gplace_an = "2014_Gaz_place_national.zip"
gplaces_fn = "Gaz_places_national.txt" gplace_fn = "2014_Gaz_place_national.txt"
gzcta_an = "Gaz_zcta_national.zip" gzcta_an = "2014_Gaz_zcta_national.zip"
gzcta_fn = "Gaz_zcta_national.txt" gzcta_fn = "2014_Gaz_zcta_national.txt"
for filename in os.listdir("."): for filename in os.listdir("."):
if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename): if re.match("bp[0-9][0-9][a-z][a-z][0-9][0-9].dbx$", filename):
cpfzcf_fn = filename cpfzcf_fn = filename
@@ -1211,7 +1211,7 @@ def correlate():
nsdcccc_fn = "nsd_cccc.txt" nsdcccc_fn = "nsd_cccc.txt"
zcatalog_an = "zonecatalog.curr.tar" zcatalog_an = "zonecatalog.curr.tar"
metartbl_fn = "metar.tbl" metartbl_fn = "metar.tbl"
coopact_fn = "COOP-ACT.TXT" coopstn_fn = "coop-stations.txt"
overrides_fn = "overrides.conf" overrides_fn = "overrides.conf"
overrideslog_fn = "overrides.log" overrideslog_fn = "overrides.log"
slist_fn = "slist" slist_fn = "slist"
@@ -1226,7 +1226,7 @@ def correlate():
%s %s
# generated by %s on %s from these public domain sources: # generated by %s on %s from these public domain sources:
# #
# http://www.census.gov/geo/maps-data/data/gazetteer2010.html # http://www.census.gov/geo/maps-data/data/gazetteer2014.html
# %s %s %s # %s %s %s
# %s %s %s # %s %s %s
# %s %s %s # %s %s %s
@@ -1244,7 +1244,7 @@ def correlate():
# http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl # http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/dictionaries/metar.tbl
# %s %s %s # %s %s %s
# #
# ftp://ftp.ncdc.noaa.gov/pub/data/inventories/COOP-ACT.TXT # http://www.ncdc.noaa.gov/homr/reports
# %s %s %s # %s %s %s
# #
# ...and these manually-generated or hand-compiled adjustments: # ...and these manually-generated or hand-compiled adjustments:
@@ -1267,11 +1267,11 @@ def correlate():
datetime.datetime.fromtimestamp( os.path.getmtime(gcousubs_an) ) datetime.datetime.fromtimestamp( os.path.getmtime(gcousubs_an) )
), ),
gcousubs_an, gcousubs_an,
hashlib.md5( open(gplaces_an, "rb").read() ).hexdigest(), hashlib.md5( open(gplace_an, "rb").read() ).hexdigest(),
datetime.date.isoformat( datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(gplaces_an) ) datetime.datetime.fromtimestamp( os.path.getmtime(gplace_an) )
), ),
gplaces_an, gplace_an,
hashlib.md5( open(gzcta_an, "rb").read() ).hexdigest(), hashlib.md5( open(gzcta_an, "rb").read() ).hexdigest(),
datetime.date.isoformat( datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(gzcta_an) ) datetime.datetime.fromtimestamp( os.path.getmtime(gzcta_an) )
@@ -1297,11 +1297,11 @@ def correlate():
datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) ) datetime.datetime.fromtimestamp( os.path.getmtime(metartbl_fn) )
), ),
metartbl_fn, metartbl_fn,
hashlib.md5( open(coopact_fn, "rb").read() ).hexdigest(), hashlib.md5( open(coopstn_fn, "rb").read() ).hexdigest(),
datetime.date.isoformat( datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(coopact_fn) ) datetime.datetime.fromtimestamp( os.path.getmtime(coopstn_fn) )
), ),
coopact_fn, coopstn_fn,
hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(), hashlib.md5( open(overrides_fn, "rb").read() ).hexdigest(),
datetime.date.isoformat( datetime.date.isoformat(
datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) ) datetime.datetime.fromtimestamp( os.path.getmtime(overrides_fn) )
@@ -1369,13 +1369,13 @@ def correlate():
count += 1 count += 1
gcousubs.close() gcousubs.close()
print("done (%s lines)." % count) print("done (%s lines)." % count)
message = "Reading %s:%s..." % (gplaces_an, gplaces_fn) message = "Reading %s:%s..." % (gplace_an, gplace_fn)
sys.stdout.write(message) sys.stdout.write(message)
sys.stdout.flush() sys.stdout.flush()
count = 0 count = 0
gplaces = zipfile.ZipFile(gplaces_an).open(gplaces_fn, "rU") gplace = zipfile.ZipFile(gplace_an).open(gplace_fn, "rU")
columns = gplaces.readline().decode("latin1").strip().split("\t") columns = gplace.readline().decode("latin1").strip().split("\t")
for line in gplaces: for line in gplace:
fields = line.decode("latin1").strip().split("\t") fields = line.decode("latin1").strip().split("\t")
f_geoid = fields[ columns.index("GEOID") ].strip() f_geoid = fields[ columns.index("GEOID") ].strip()
f_name = fields[ columns.index("NAME") ].strip() f_name = fields[ columns.index("NAME") ].strip()
@@ -1390,7 +1390,7 @@ def correlate():
) )
places[fips]["description"] = "%s, %s" % (f_name, f_usps) places[fips]["description"] = "%s, %s" % (f_name, f_usps)
count += 1 count += 1
gplaces.close() gplace.close()
print("done (%s lines)." % count) print("done (%s lines)." % count)
message = "Reading %s..." % slist_fn message = "Reading %s..." % slist_fn
sys.stdout.write(message) sys.stdout.write(message)
@@ -1465,12 +1465,12 @@ def correlate():
count += 1 count += 1
nsdcccc.close() nsdcccc.close()
print("done (%s lines)." % count) print("done (%s lines)." % count)
message = "Reading %s..." % coopact_fn message = "Reading %s..." % coopstn_fn
sys.stdout.write(message) sys.stdout.write(message)
sys.stdout.flush() sys.stdout.flush()
count = 0 count = 0
coopact = open(coopact_fn) coopstn = open(coopstn_fn)
for line in coopact: for line in coopstn:
icao = line[33:37].strip().lower() icao = line[33:37].strip().lower()
if icao in stations: if icao in stations:
iata = line[22:26].strip().lower() iata = line[22:26].strip().lower()
@@ -1496,7 +1496,7 @@ def correlate():
"%s,%s" % (lat, lon) "%s,%s" % (lat, lon)
) )
count += 1 count += 1
coopact.close() coopstn.close()
print("done (%s lines)." % count) print("done (%s lines)." % count)
message = "Reading %s..." % zlist_fn message = "Reading %s..." % zlist_fn
sys.stdout.write(message) sys.stdout.write(message)