X-Git-Url: http://git.shiar.net/sheet.git/blobdiff_plain/c133e6ad3027e095811931f3e0d21b818865e0e8..3f38a7192491a73a1527179cdd68218f023aed61:/tools/mkcountries-geonames?ds=inline diff --git a/tools/mkcountries-geonames b/tools/mkcountries-geonames index b7fb27e..13f6e65 100755 --- a/tools/mkcountries-geonames +++ b/tools/mkcountries-geonames @@ -1,7 +1,9 @@ #!/usr/bin/env perl -use 5.012; +use 5.014; use warnings; +our $VERSION = '1.04'; + my %cc; # map of country code to info array %cc = ( # transitional reservations @@ -16,15 +18,16 @@ my %cc; # map of country code to info array # exceptional reservations ac => ["exceptionally reserved: Ascension Island", "c-oc Xr", "Ascension Island", 'sh'], cp => ["exceptionally reserved: Clipperton Island", "c-na Xr", "Clipperton Island"], + cq => ["exceptionally reserved by gb: Island of Sark", "c-eu Xr", "Island of Sark", 'gg'], dg => ["exceptionally reserved: Diego Garcia", "c-as Xr", "Diego Garcia", 'io'], ea => ["exceptionally reserved: Ceuta and Melilla", "c-af Xr", "Ceuta and Melilla"], eu => ["exceptionally reserved: European Union", "c-eu Xr", "European Union"], ez => ["exceptionally reserved: European OTC derivatives", "c-eu Xr", "Eurozone"], - fx => ["exceptionally reserved: Metropolitan France", "c-eu Xr", "Metropolitan France", 'fr'], + fx => ["exceptionally reserved by fr: Metropolitan France", "c-eu Xr", "Metropolitan France", 'fr'], ic => ["exceptionally reserved: Canary Islands", "c-af Xr", "Canary Islands"], su => ["exceptionally reserved: former USSR", "c-eu Xr", "USSR"], ta => ["exceptionally reserved: Tristan da Cunha", "c-oc Xr", "Tristan da Cunha", 'sh'], - uk => ["exceptionally reserved: United Kingdom", "c-eu Xr", "(United Kingdom)", 'gb'], + uk => ["exceptionally reserved by gb: United Kingdom", "c-eu Xr", "(United Kingdom)", 'gb'], un => ["exceptionally reserved: United Nations", "Xi Xr", "United Nations"], # indeterminate reservations @@ -61,10 +64,20 @@ my %cc; # map of country code to info array oa => ["not used: African Intellectual Property Organization", "Xi", "OAPI"], # c-af wo => ["not used: World Intellectual Property Organization", "Xi", "WIPO"], - # unicode identifiers + # common user-assigned + xa => ["user-assigned by Russia: Abkhazia", "c-az Co Xi", "Abkhazia", 'ge'], # also IC alias by CH + xo => ["user-assigned by Russia: South Ossetia", "c-az Co Xi", "South Ossetia", 'ge'], + xi => ["user-assigned for UK/EU trade: Northern Ireland", "c-eu Co Xi", "Northern Ireland", 'gb'], + xn => ["user-assigned by WIPO: Nordic Patent Institute", "c-eu Co Xi", "Nordic", 'dk is no se'], + xu => ["user-assigned by WIPO: international org UPOV", "Co Xi", "UPOV"], + xv => ["user-assigned by WIPO: Visegrad Patent Institute", "c-eu Co Xi", "Visegrad", 'cz hu pl sk'], + xx => ["user-assigned by WIPO: unknown state or other entity", "Co Xi", "other"], + xz => ["UN/LOCODE semantics: international waters", "Co Xi", "international"], + qm => ["user-assigned by ISRC: United States alternative", "c-na Co Xi", "USA"], qo => ["Unicode semantics: Outlying Oceania", "c-oc Co Xi", "Oceania"], qu => ["Unicode semantics: European Union deprecated reserve", "c-eu Co Xi", "EU", 'eu'], zz => ["Unicode semantics: unknown or invalid territory", "Co Xi","unknown"], + oo => ["escape to indicate additional code", "Co Xi", "escape"], ); while (<>) { @@ -85,15 +98,17 @@ for (values %cc) { s/(?<=.)\(.*\)\s*//; s/ republic\b//gi; s/ islands?\b//gi; - s/\bthe //g; + s/\bthe //gi; s/ and / & /g and s/(?<=.)[a-z ]+//g; s/ of / /g; s/\bsa?int /st /gi; - s/United /Un. /gi; + s/Un\Kited /. /gi; s/South(?:ern)? /S-/g; s/North(?:ern)? /N-/g; + s/West(?:ern)? /W-/g; + s/East(?:ern)? /E-/g; s/New /n./g; - s/(\S)(\S+)-/$1-/g; # strip most chars preceding dash + s/(\w)(\w+)-/$1-/g; # strip most chars preceding dash s/(\S{4}[b-df-hj-np-tv-xz])((?