Compare commits

...
Sign in to create a new pull request.

6 commits

Author SHA1 Message Date
1c14038948 [#13] locode: Add command to list all locodes
Signed-off-by: Anton Nikiforov <an.nikiforov@yadro.com>
2025-03-12 10:21:02 +03:00
c7cb68f1f7 [#13] locode: Use stdout as default output
Signed-off-by: Anton Nikiforov <an.nikiforov@yadro.com>
2025-03-12 10:15:11 +03:00
f35b1c62e8 [#21] locode: Count ignored and added records to result locode db
Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
2025-01-31 15:45:23 +03:00
f2113a9c02 [#21] locode: Introduce locode overriding
Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
2025-01-31 15:40:12 +03:00
16ff77e35c [#21] Makefile: Introduce UN/LOCODE overrides and copy it by the recipe
* Introduce `unlocode-CodeList_overrides.csv` that contains missing details
 for locodes from the main `unlocode-CodeList.csv` file;
* Fix `Makefile`;
* Add info to `README.md`.

Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
2025-01-22 22:10:36 +03:00
c0812d5ad6 [#20] Use selfhosted actions mirror
TrueCloudLab/frostfs-infra#152

Signed-off-by: Vitaliy Potyarkin <v.potyarkin@yadro.com>
2024-12-12 11:54:11 +03:00
12 changed files with 21645 additions and 28 deletions

View file

@ -5,7 +5,7 @@ jobs:
checkupdates:
runs-on: docker
steps:
- uses: https://code.forgejo.org/actions/checkout@v3
- uses: actions/checkout@v3
- run: make update
- run: |
git config user.name "Snegurochka"

View file

@ -41,13 +41,17 @@ in/unlocode-SubdivisionCodes.csv: $(DIRS)
in/unlocode-CodeList.csv: $(DIRS)
zcat data/unlocode-CodeList.csv.gz > in/unlocode-CodeList.csv
in/unlocode-CodeList_overrides.csv: $(DIRS)
cp data/unlocode-CodeList_overrides.csv in/unlocode-CodeList_overrides.csv
# Generate locode_db BoltDB file
locode_db: in/unlocode-CodeList.csv in/unlocode-SubdivisionCodes.csv in/continents.geojson in/airports.dat in/countries.dat $(FROST_LOCODE)
locode_db: in/unlocode-CodeList.csv in/unlocode-SubdivisionCodes.csv in/continents.geojson in/airports.dat in/countries.dat in/unlocode-CodeList_overrides.csv $(FROST_LOCODE)
$(FROST_LOCODE) generate \
--airports in/airports.dat \
--continents in/continents.geojson \
--countries in/countries.dat \
--in in/unlocode-CodeList.csv \
--overrides in/unlocode-CodeList_overrides.csv \
--subdiv in/unlocode-SubdivisionCodes.csv \
--out locode_db
chmod 644 locode_db

View file

@ -16,6 +16,8 @@ This repository tools generate UN/LOCODE database for FrostFS using data from
following sources:
- [UN/LOCODE](https://unece.org/trade/cefact/UNLOCODE-Download) database in CSV
format, licensed under the [ODC Public Domain Dedication and Licence (PDDL)](http://opendatacommons.org/licenses/pddl/1-0/)
- [Improved UN/LOCODE](https://raw.githubusercontent.com/cristan/improved-un-locodes/86b2e96f17289ddb62f52dcfcfac520bc1ca002b/data/code-list-improved.csv) database
that contains more details about LOCODEs (like coordinates) which may miss in UN/LOCODE database.
- [OpenFlight Airports and
Countries](https://raw.githubusercontent.com/jpatokal/openflights/master/data/)
databases, licensed under the [GNU AGPL-3.0

File diff suppressed because it is too large Load diff

View file

@ -16,6 +16,7 @@ type namesDB struct {
const (
locodeGenerateInputFlag = "in"
locodeGenerateOverridesFlag = "overrides"
locodeGenerateSubDivFlag = "subdiv"
locodeGenerateAirportsFlag = "airports"
locodeGenerateCountriesFlag = "countries"
@ -25,6 +26,7 @@ const (
var (
locodeGenerateInPaths []string
locodeGenerateOverridesPath string
locodeGenerateSubDivPath string
locodeGenerateAirportsPath string
locodeGenerateCountriesPath string
@ -35,11 +37,11 @@ var (
Use: "generate",
Short: "Generate UN/LOCODE database for FrostFS",
Run: func(cmd *cobra.Command, _ []string) {
locodeDB := csvlocode.New(
csvlocode.Prm{
Path: locodeGenerateInPaths[0],
SubDivPath: locodeGenerateSubDivPath,
Path: locodeGenerateInPaths[0],
SubDivPath: locodeGenerateSubDivPath,
OverridesPath: locodeGenerateOverridesPath,
},
csvlocode.WithExtraPaths(locodeGenerateInPaths[1:]...),
)
@ -67,8 +69,10 @@ var (
Table: locodeDB,
}
err = locodedb.FillDatabase(locodeDB, airportDB, continentsDB, names, targetDB)
res, err := locodedb.FillDatabase(locodeDB, airportDB, continentsDB, names, targetDB)
ExitOnErr(cmd, "", err)
cmd.Printf("Records added to locode db: %d\n", res.AddedRecordCount)
cmd.Printf("Records ignored: %d\n", res.IgnoredRecordCount)
},
}
)
@ -79,6 +83,8 @@ func initUtilLocodeGenerateCmd() {
flags.StringSliceVar(&locodeGenerateInPaths, locodeGenerateInputFlag, nil, "List of paths to UN/LOCODE tables (csv)")
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateInputFlag)
flags.StringVar(&locodeGenerateOverridesPath, locodeGenerateOverridesFlag, "", "Path to UN/LOCODE override tables (csv)")
flags.StringVar(&locodeGenerateSubDivPath, locodeGenerateSubDivFlag, "", "Path to UN/LOCODE subdivision database (csv)")
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateSubDivFlag)

View file

@ -7,8 +7,9 @@ import (
)
const (
locodeInfoDBFlag = "db"
locodeInfoCodeFlag = "locode"
locodeInfoDBFlag = "db"
locodeInfoDBFlagDesc = "Path to FrostFS UN/LOCODE database"
locodeInfoCodeFlag = "locode"
)
var (
@ -47,7 +48,7 @@ var (
func initUtilLocodeInfoCmd() {
flags := locodeInfoCmd.Flags()
flags.StringVar(&locodeInfoDBPath, locodeInfoDBFlag, "", "Path to FrostFS UN/LOCODE database")
flags.StringVar(&locodeInfoDBPath, locodeInfoDBFlag, "", locodeInfoDBFlagDesc)
_ = locodeInfoCmd.MarkFlagRequired(locodeInfoDBFlag)
flags.StringVar(&locodeInfoCode, locodeInfoCodeFlag, "", "UN/LOCODE")

37
locode_list.go Normal file
View file

@ -0,0 +1,37 @@
package main
import (
locodedb "git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode/db"
locodebolt "git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode/db/boltdb"
"github.com/spf13/cobra"
)
var (
locodeListCmd = &cobra.Command{
Use: "list",
Short: "Print all locodes from FrostFS database",
Run: func(cmd *cobra.Command, _ []string) {
targetDB := locodebolt.New(locodebolt.Prm{
Path: locodeInfoDBPath,
}, locodebolt.ReadOnly())
err := targetDB.Open()
ExitOnErr(cmd, "", err)
defer targetDB.Close()
err = targetDB.IterateOverLocodes(func(locode string, geoPoint locodedb.Point) {
cmd.Printf("%s\t %0.2f %0.2f\n", locode, geoPoint.Latitude(), geoPoint.Longitude())
})
ExitOnErr(cmd, "", err)
},
}
)
func initUtilLocodeListCmd() {
flags := locodeListCmd.Flags()
flags.StringVar(&locodeInfoDBPath, locodeInfoDBFlag, "", locodeInfoDBFlagDesc)
_ = locodeListCmd.MarkFlagRequired(locodeInfoDBFlag)
}

View file

@ -35,9 +35,13 @@ func ExitOnErr(cmd *cobra.Command, errFmt string, err error) {
}
func main() {
// use stdout as default output for cmd.Print()
rootCmd.SetOut(os.Stdout)
initUtilLocodeGenerateCmd()
initUtilLocodeInfoCmd()
rootCmd.AddCommand(locodeGenerateCmd, locodeInfoCmd)
initUtilLocodeListCmd()
rootCmd.AddCommand(locodeGenerateCmd, locodeInfoCmd, locodeListCmd)
err := rootCmd.Execute()
if err != nil {
ExitOnErr(rootCmd, "", err)

View file

@ -164,3 +164,23 @@ func (db *DB) Get(key locodedb.Key) (rec *locodedb.Record, err error) {
return
}
// IterateOverLocodes iterates over all locodes.
//
// Returns an error if unable to unmarshal data from DB.
//
// Must not be called before successful Open call.
func (db *DB) IterateOverLocodes(f func(string, locodedb.Point)) error {
return db.bolt.View(func(tx *bbolt.Tx) error {
return tx.ForEach(func(cname []byte, bktCountry *bbolt.Bucket) error {
return bktCountry.ForEach(func(k, v []byte) error {
rec, err := recordFromValue(v)
if err != nil {
return err
}
f(fmt.Sprintf("%s %s", cname, k), *rec.GeoPoint())
return nil
})
})
})
}

View file

@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"runtime"
"sync/atomic"
"git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode"
"golang.org/x/sync/errgroup"
@ -75,38 +76,58 @@ type NamesDB interface {
SubDivName(*CountryCode, string) (string, error)
}
type FillDatabaseResult struct {
AddedRecordCount int
IgnoredRecordCount int
}
// FillDatabase generates the FrostFS location database based on the UN/LOCODE table.
func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) (FillDatabaseResult, error) {
var errG errgroup.Group
var added, ignored atomic.Int32
// Pick some sane default, after this the performance stopped increasing.
errG.SetLimit(runtime.NumCPU() * 16)
_ = table.IterateAll(func(tableRecord locode.Record) error {
errG.Go(func() error {
return processTableRecord(tableRecord, airports, continents, names, db)
wasAdded, err := processTableRecord(tableRecord, airports, continents, names, db)
if err != nil {
return err
}
if wasAdded {
added.Add(1)
} else {
ignored.Add(1)
}
return nil
})
return nil
})
return errG.Wait()
return FillDatabaseResult{
AddedRecordCount: int(added.Load()),
IgnoredRecordCount: int(ignored.Load()),
}, errG.Wait()
}
func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) (bool, error) {
if tableRecord.LOCODE.LocationCode() == "" {
return nil
return false, nil
}
dbKey, err := NewKey(tableRecord.LOCODE)
if err != nil {
return err
return false, err
}
dbRecord, err := NewRecord(tableRecord)
if err != nil {
if errors.Is(err, errParseCoordinates) {
return nil
return false, nil
}
return err
return false, err
}
geoPoint := dbRecord.GeoPoint()
@ -116,10 +137,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
airportRecord, err := airports.Get(tableRecord)
if err != nil {
if errors.Is(err, ErrAirportNotFound) {
return nil
return false, nil
}
return err
return false, err
}
geoPoint = airportRecord.Point
@ -132,10 +153,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
countryName, err = names.CountryName(dbKey.CountryCode())
if err != nil {
if errors.Is(err, ErrCountryNotFound) {
return nil
return false, nil
}
return err
return false, err
}
}
@ -145,10 +166,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode)
if err != nil {
if errors.Is(err, ErrSubDivNotFound) {
return nil
return false, nil
}
return err
return false, err
}
dbRecord.SetSubDivName(subDivName)
@ -156,14 +177,14 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
continent, err := continents.PointContinent(geoPoint)
if err != nil {
return fmt.Errorf("could not calculate continent geo point: %w", err)
return false, fmt.Errorf("could not calculate continent geo point: %w", err)
} else if continent.Is(ContinentUnknown) {
return nil
return false, nil
}
dbRecord.SetContinent(continent)
return db.Put(*dbKey, *dbRecord)
return true, db.Put(*dbKey, *dbRecord)
}
// LocodeRecord returns the record from the FrostFS location database

View file

@ -3,6 +3,7 @@ package csvlocode
import (
"encoding/csv"
"errors"
"fmt"
"io"
"os"
"strings"
@ -40,6 +41,10 @@ func (t *Table) IterateAll(f func(locode.Record) error) error {
Remarks: words[11],
}
if err := t.Override(&record); err != nil {
return fmt.Errorf("override: %w", err)
}
return f(record)
})
}
@ -84,6 +89,14 @@ func (t *Table) SubDivName(countryCode *locodedb.CountryCode, code string) (stri
return rec.name, nil
}
func (t *Table) Override(record *locode.Record) error {
override, found := t.overrides[record.LOCODE]
if found {
*record = override
}
return nil
}
func (t *Table) initSubDiv() (err error) {
t.subDivOnce.Do(func() {
t.mSubDiv = make(map[subDivKey]subDivRecord)
@ -103,6 +116,40 @@ func (t *Table) initSubDiv() (err error) {
return
}
func (t *Table) initOverrides(overridesPath string) error {
const wordsPerRecord = 12
t.overrides = make(map[locode.LOCODE]locode.Record)
if overridesPath == "" {
return nil
}
return t.scanWords([]string{overridesPath}, wordsPerRecord, func(words []string) error {
lc, err := locode.FromString(strings.Join(words[1:3], " "))
if err != nil {
return err
}
record := locode.Record{
Ch: words[0],
LOCODE: *lc,
Name: words[3],
NameWoDiacritics: words[4],
SubDiv: words[5],
Function: words[6],
Status: words[7],
Date: words[8],
IATA: words[9],
Coordinates: words[10],
Remarks: words[11],
}
t.overrides[record.LOCODE] = record
return nil
})
}
var errScanInt = errors.New("interrupt scan")
func (t *Table) scanWords(paths []string, fpr int, wordsHandler func([]string) error) error {

View file

@ -4,6 +4,8 @@ import (
"fmt"
"io/fs"
"sync"
"git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode"
)
// Prm groups the required parameters of the Table's constructor.
@ -21,6 +23,11 @@ type Prm struct {
//
// Must not be empty.
SubDivPath string
// Path to a csv table with UN/LOCODE overrides.
//
// Optional.
OverridesPath string
}
// Table is a descriptor of the UN/LOCODE table in csv format.
@ -39,6 +46,8 @@ type Table struct {
subDivOnce sync.Once
mSubDiv map[subDivKey]subDivRecord
overrides map[locode.LOCODE]locode.Record
}
const invalidPrmValFmt = "invalid parameter %s (%T):%v"
@ -67,9 +76,15 @@ func New(prm Prm, opts ...Option) *Table {
opts[i](o)
}
return &Table{
t := &Table{
paths: append(o.extraPaths, prm.Path),
mode: o.mode,
subDivPath: prm.SubDivPath,
}
if err := t.initOverrides(prm.OverridesPath); err != nil {
panic(fmt.Errorf("init overrides: %w", err))
}
return t
}