Add missing locode details using overrides #21
7 changed files with 21578 additions and 23 deletions
6
Makefile
6
Makefile
|
@ -41,13 +41,17 @@ in/unlocode-SubdivisionCodes.csv: $(DIRS)
|
||||||
in/unlocode-CodeList.csv: $(DIRS)
|
in/unlocode-CodeList.csv: $(DIRS)
|
||||||
zcat data/unlocode-CodeList.csv.gz > in/unlocode-CodeList.csv
|
zcat data/unlocode-CodeList.csv.gz > in/unlocode-CodeList.csv
|
||||||
|
|
||||||
|
in/unlocode-CodeList_overrides.csv: $(DIRS)
|
||||||
|
cp data/unlocode-CodeList_overrides.csv in/unlocode-CodeList_overrides.csv
|
||||||
|
|
||||||
# Generate locode_db BoltDB file
|
# Generate locode_db BoltDB file
|
||||||
locode_db: in/unlocode-CodeList.csv in/unlocode-SubdivisionCodes.csv in/continents.geojson in/airports.dat in/countries.dat $(FROST_LOCODE)
|
locode_db: in/unlocode-CodeList.csv in/unlocode-SubdivisionCodes.csv in/continents.geojson in/airports.dat in/countries.dat in/unlocode-CodeList_overrides.csv $(FROST_LOCODE)
|
||||||
$(FROST_LOCODE) generate \
|
$(FROST_LOCODE) generate \
|
||||||
--airports in/airports.dat \
|
--airports in/airports.dat \
|
||||||
--continents in/continents.geojson \
|
--continents in/continents.geojson \
|
||||||
--countries in/countries.dat \
|
--countries in/countries.dat \
|
||||||
--in in/unlocode-CodeList.csv \
|
--in in/unlocode-CodeList.csv \
|
||||||
|
--overrides in/unlocode-CodeList_overrides.csv \
|
||||||
--subdiv in/unlocode-SubdivisionCodes.csv \
|
--subdiv in/unlocode-SubdivisionCodes.csv \
|
||||||
--out locode_db
|
--out locode_db
|
||||||
chmod 644 locode_db
|
chmod 644 locode_db
|
||||||
|
|
|
@ -16,6 +16,8 @@ This repository tools generate UN/LOCODE database for FrostFS using data from
|
||||||
following sources:
|
following sources:
|
||||||
- [UN/LOCODE](https://unece.org/trade/cefact/UNLOCODE-Download) database in CSV
|
- [UN/LOCODE](https://unece.org/trade/cefact/UNLOCODE-Download) database in CSV
|
||||||
format, licensed under the [ODC Public Domain Dedication and Licence (PDDL)](http://opendatacommons.org/licenses/pddl/1-0/)
|
format, licensed under the [ODC Public Domain Dedication and Licence (PDDL)](http://opendatacommons.org/licenses/pddl/1-0/)
|
||||||
|
- [Improved UN/LOCODE](https://raw.githubusercontent.com/cristan/improved-un-locodes/86b2e96f17289ddb62f52dcfcfac520bc1ca002b/data/code-list-improved.csv) database
|
||||||
|
that contains more details about LOCODEs (like coordinates) which may miss in UN/LOCODE database.
|
||||||
- [OpenFlight Airports and
|
- [OpenFlight Airports and
|
||||||
Countries](https://raw.githubusercontent.com/jpatokal/openflights/master/data/)
|
Countries](https://raw.githubusercontent.com/jpatokal/openflights/master/data/)
|
||||||
databases, licensed under the [GNU AGPL-3.0
|
databases, licensed under the [GNU AGPL-3.0
|
||||||
|
|
21460
data/unlocode-CodeList_overrides.csv
Normal file
21460
data/unlocode-CodeList_overrides.csv
Normal file
File diff suppressed because it is too large
Load diff
|
@ -16,6 +16,7 @@ type namesDB struct {
|
||||||
|
|
||||||
const (
|
const (
|
||||||
locodeGenerateInputFlag = "in"
|
locodeGenerateInputFlag = "in"
|
||||||
|
locodeGenerateOverridesFlag = "overrides"
|
||||||
locodeGenerateSubDivFlag = "subdiv"
|
locodeGenerateSubDivFlag = "subdiv"
|
||||||
locodeGenerateAirportsFlag = "airports"
|
locodeGenerateAirportsFlag = "airports"
|
||||||
locodeGenerateCountriesFlag = "countries"
|
locodeGenerateCountriesFlag = "countries"
|
||||||
|
@ -25,6 +26,7 @@ const (
|
||||||
|
|
||||||
var (
|
var (
|
||||||
locodeGenerateInPaths []string
|
locodeGenerateInPaths []string
|
||||||
|
locodeGenerateOverridesPath string
|
||||||
locodeGenerateSubDivPath string
|
locodeGenerateSubDivPath string
|
||||||
locodeGenerateAirportsPath string
|
locodeGenerateAirportsPath string
|
||||||
locodeGenerateCountriesPath string
|
locodeGenerateCountriesPath string
|
||||||
|
@ -35,11 +37,11 @@ var (
|
||||||
Use: "generate",
|
Use: "generate",
|
||||||
Short: "Generate UN/LOCODE database for FrostFS",
|
Short: "Generate UN/LOCODE database for FrostFS",
|
||||||
Run: func(cmd *cobra.Command, _ []string) {
|
Run: func(cmd *cobra.Command, _ []string) {
|
||||||
|
|
||||||
locodeDB := csvlocode.New(
|
locodeDB := csvlocode.New(
|
||||||
csvlocode.Prm{
|
csvlocode.Prm{
|
||||||
Path: locodeGenerateInPaths[0],
|
Path: locodeGenerateInPaths[0],
|
||||||
SubDivPath: locodeGenerateSubDivPath,
|
SubDivPath: locodeGenerateSubDivPath,
|
||||||
|
OverridesPath: locodeGenerateOverridesPath,
|
||||||
},
|
},
|
||||||
csvlocode.WithExtraPaths(locodeGenerateInPaths[1:]...),
|
csvlocode.WithExtraPaths(locodeGenerateInPaths[1:]...),
|
||||||
)
|
)
|
||||||
|
@ -67,8 +69,10 @@ var (
|
||||||
Table: locodeDB,
|
Table: locodeDB,
|
||||||
}
|
}
|
||||||
|
|
||||||
err = locodedb.FillDatabase(locodeDB, airportDB, continentsDB, names, targetDB)
|
res, err := locodedb.FillDatabase(locodeDB, airportDB, continentsDB, names, targetDB)
|
||||||
ExitOnErr(cmd, "", err)
|
ExitOnErr(cmd, "", err)
|
||||||
|
cmd.Printf("Records added to locode db: %d\n", res.AddedRecordCount)
|
||||||
|
cmd.Printf("Records ignored: %d\n", res.IgnoredRecordCount)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -79,6 +83,8 @@ func initUtilLocodeGenerateCmd() {
|
||||||
flags.StringSliceVar(&locodeGenerateInPaths, locodeGenerateInputFlag, nil, "List of paths to UN/LOCODE tables (csv)")
|
flags.StringSliceVar(&locodeGenerateInPaths, locodeGenerateInputFlag, nil, "List of paths to UN/LOCODE tables (csv)")
|
||||||
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateInputFlag)
|
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateInputFlag)
|
||||||
|
|
||||||
|
flags.StringVar(&locodeGenerateOverridesPath, locodeGenerateOverridesFlag, "", "Path to UN/LOCODE override tables (csv)")
|
||||||
|
|
||||||
flags.StringVar(&locodeGenerateSubDivPath, locodeGenerateSubDivFlag, "", "Path to UN/LOCODE subdivision database (csv)")
|
flags.StringVar(&locodeGenerateSubDivPath, locodeGenerateSubDivFlag, "", "Path to UN/LOCODE subdivision database (csv)")
|
||||||
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateSubDivFlag)
|
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateSubDivFlag)
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
"git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode"
|
"git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
|
@ -75,38 +76,58 @@ type NamesDB interface {
|
||||||
SubDivName(*CountryCode, string) (string, error)
|
SubDivName(*CountryCode, string) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type FillDatabaseResult struct {
|
||||||
|
AddedRecordCount int
|
||||||
|
IgnoredRecordCount int
|
||||||
|
}
|
||||||
|
|
||||||
// FillDatabase generates the FrostFS location database based on the UN/LOCODE table.
|
// FillDatabase generates the FrostFS location database based on the UN/LOCODE table.
|
||||||
func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
|
func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) (FillDatabaseResult, error) {
|
||||||
var errG errgroup.Group
|
var errG errgroup.Group
|
||||||
|
|
||||||
|
var added, ignored atomic.Int32
|
||||||
|
|
||||||
// Pick some sane default, after this the performance stopped increasing.
|
// Pick some sane default, after this the performance stopped increasing.
|
||||||
errG.SetLimit(runtime.NumCPU() * 16)
|
errG.SetLimit(runtime.NumCPU() * 16)
|
||||||
_ = table.IterateAll(func(tableRecord locode.Record) error {
|
_ = table.IterateAll(func(tableRecord locode.Record) error {
|
||||||
errG.Go(func() error {
|
errG.Go(func() error {
|
||||||
return processTableRecord(tableRecord, airports, continents, names, db)
|
wasAdded, err := processTableRecord(tableRecord, airports, continents, names, db)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if wasAdded {
|
||||||
|
added.Add(1)
|
||||||
|
} else {
|
||||||
|
ignored.Add(1)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
})
|
})
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
return errG.Wait()
|
|
||||||
|
return FillDatabaseResult{
|
||||||
|
AddedRecordCount: int(added.Load()),
|
||||||
|
IgnoredRecordCount: int(ignored.Load()),
|
||||||
|
}, errG.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
|
func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) (bool, error) {
|
||||||
if tableRecord.LOCODE.LocationCode() == "" {
|
if tableRecord.LOCODE.LocationCode() == "" {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
dbKey, err := NewKey(tableRecord.LOCODE)
|
dbKey, err := NewKey(tableRecord.LOCODE)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
dbRecord, err := NewRecord(tableRecord)
|
dbRecord, err := NewRecord(tableRecord)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, errParseCoordinates) {
|
if errors.Is(err, errParseCoordinates) {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
geoPoint := dbRecord.GeoPoint()
|
geoPoint := dbRecord.GeoPoint()
|
||||||
|
@ -116,10 +137,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
|
||||||
airportRecord, err := airports.Get(tableRecord)
|
airportRecord, err := airports.Get(tableRecord)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, ErrAirportNotFound) {
|
if errors.Is(err, ErrAirportNotFound) {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
geoPoint = airportRecord.Point
|
geoPoint = airportRecord.Point
|
||||||
|
@ -132,10 +153,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
|
||||||
countryName, err = names.CountryName(dbKey.CountryCode())
|
countryName, err = names.CountryName(dbKey.CountryCode())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, ErrCountryNotFound) {
|
if errors.Is(err, ErrCountryNotFound) {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return false, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -145,10 +166,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
|
||||||
subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode)
|
subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, ErrSubDivNotFound) {
|
if errors.Is(err, ErrSubDivNotFound) {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
dbRecord.SetSubDivName(subDivName)
|
dbRecord.SetSubDivName(subDivName)
|
||||||
|
@ -156,14 +177,14 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
|
||||||
|
|
||||||
continent, err := continents.PointContinent(geoPoint)
|
continent, err := continents.PointContinent(geoPoint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not calculate continent geo point: %w", err)
|
return false, fmt.Errorf("could not calculate continent geo point: %w", err)
|
||||||
} else if continent.Is(ContinentUnknown) {
|
} else if continent.Is(ContinentUnknown) {
|
||||||
return nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
dbRecord.SetContinent(continent)
|
dbRecord.SetContinent(continent)
|
||||||
|
|
||||||
return db.Put(*dbKey, *dbRecord)
|
return true, db.Put(*dbKey, *dbRecord)
|
||||||
}
|
}
|
||||||
|
|
||||||
// LocodeRecord returns the record from the FrostFS location database
|
// LocodeRecord returns the record from the FrostFS location database
|
||||||
|
|
|
@ -3,6 +3,7 @@ package csvlocode
|
||||||
import (
|
import (
|
||||||
"encoding/csv"
|
"encoding/csv"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -40,6 +41,10 @@ func (t *Table) IterateAll(f func(locode.Record) error) error {
|
||||||
Remarks: words[11],
|
Remarks: words[11],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := t.Override(&record); err != nil {
|
||||||
|
return fmt.Errorf("override: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
return f(record)
|
return f(record)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -84,6 +89,14 @@ func (t *Table) SubDivName(countryCode *locodedb.CountryCode, code string) (stri
|
||||||
return rec.name, nil
|
return rec.name, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *Table) Override(record *locode.Record) error {
|
||||||
|
override, found := t.overrides[record.LOCODE]
|
||||||
|
if found {
|
||||||
|
*record = override
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (t *Table) initSubDiv() (err error) {
|
func (t *Table) initSubDiv() (err error) {
|
||||||
t.subDivOnce.Do(func() {
|
t.subDivOnce.Do(func() {
|
||||||
t.mSubDiv = make(map[subDivKey]subDivRecord)
|
t.mSubDiv = make(map[subDivKey]subDivRecord)
|
||||||
|
@ -103,6 +116,40 @@ func (t *Table) initSubDiv() (err error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *Table) initOverrides(overridesPath string) error {
|
||||||
|
const wordsPerRecord = 12
|
||||||
|
|
||||||
|
t.overrides = make(map[locode.LOCODE]locode.Record)
|
||||||
|
|
||||||
|
if overridesPath == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return t.scanWords([]string{overridesPath}, wordsPerRecord, func(words []string) error {
|
||||||
|
lc, err := locode.FromString(strings.Join(words[1:3], " "))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
record := locode.Record{
|
||||||
|
Ch: words[0],
|
||||||
|
LOCODE: *lc,
|
||||||
|
Name: words[3],
|
||||||
|
NameWoDiacritics: words[4],
|
||||||
|
SubDiv: words[5],
|
||||||
|
Function: words[6],
|
||||||
|
Status: words[7],
|
||||||
|
Date: words[8],
|
||||||
|
IATA: words[9],
|
||||||
|
Coordinates: words[10],
|
||||||
|
Remarks: words[11],
|
||||||
|
}
|
||||||
|
|
||||||
|
t.overrides[record.LOCODE] = record
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
var errScanInt = errors.New("interrupt scan")
|
var errScanInt = errors.New("interrupt scan")
|
||||||
|
|
||||||
func (t *Table) scanWords(paths []string, fpr int, wordsHandler func([]string) error) error {
|
func (t *Table) scanWords(paths []string, fpr int, wordsHandler func([]string) error) error {
|
||||||
|
|
|
@ -4,6 +4,8 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Prm groups the required parameters of the Table's constructor.
|
// Prm groups the required parameters of the Table's constructor.
|
||||||
|
@ -21,6 +23,11 @@ type Prm struct {
|
||||||
//
|
//
|
||||||
// Must not be empty.
|
// Must not be empty.
|
||||||
SubDivPath string
|
SubDivPath string
|
||||||
|
|
||||||
|
// Path to a csv table with UN/LOCODE overrides.
|
||||||
|
//
|
||||||
|
// Optional.
|
||||||
|
OverridesPath string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Table is a descriptor of the UN/LOCODE table in csv format.
|
// Table is a descriptor of the UN/LOCODE table in csv format.
|
||||||
|
@ -39,6 +46,8 @@ type Table struct {
|
||||||
subDivOnce sync.Once
|
subDivOnce sync.Once
|
||||||
|
|
||||||
mSubDiv map[subDivKey]subDivRecord
|
mSubDiv map[subDivKey]subDivRecord
|
||||||
|
|
||||||
|
overrides map[locode.LOCODE]locode.Record
|
||||||
}
|
}
|
||||||
|
|
||||||
const invalidPrmValFmt = "invalid parameter %s (%T):%v"
|
const invalidPrmValFmt = "invalid parameter %s (%T):%v"
|
||||||
|
@ -67,9 +76,15 @@ func New(prm Prm, opts ...Option) *Table {
|
||||||
opts[i](o)
|
opts[i](o)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Table{
|
t := &Table{
|
||||||
paths: append(o.extraPaths, prm.Path),
|
paths: append(o.extraPaths, prm.Path),
|
||||||
mode: o.mode,
|
mode: o.mode,
|
||||||
subDivPath: prm.SubDivPath,
|
subDivPath: prm.SubDivPath,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := t.initOverrides(prm.OverridesPath); err != nil {
|
||||||
|
panic(fmt.Errorf("init overrides: %w", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
return t
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue