Compare commits

..

9 commits

Author SHA1 Message Date
f35b1c62e8 [#21] locode: Count ignored and added records to result locode db
Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
2025-01-31 15:45:23 +03:00
f2113a9c02 [#21] locode: Introduce locode overriding
Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
2025-01-31 15:40:12 +03:00
16ff77e35c [#21] Makefile: Introduce UN/LOCODE overrides and copy it by the recipe
* Introduce `unlocode-CodeList_overrides.csv` that contains missing details
 for locodes from the main `unlocode-CodeList.csv` file;
* Fix `Makefile`;
* Add info to `README.md`.

Signed-off-by: Airat Arifullin <a.arifullin@yadro.com>
2025-01-22 22:10:36 +03:00
c0812d5ad6 [#20] Use selfhosted actions mirror
TrueCloudLab/frostfs-infra#152

Signed-off-by: Vitaliy Potyarkin <v.potyarkin@yadro.com>
2024-12-12 11:54:11 +03:00
3a73e1c89c [#18] Stop using obsolete .github directory
This commit is a part of multi-repo cleanup effort:
TrueCloudLab/frostfs-infra#136

Signed-off-by: Vitaliy Potyarkin <v.potyarkin@yadro.com>
2024-11-06 15:25:12 +03:00
e3d2c661e1 [#14] Add locode_generate benchmark
Signed-off-by: George Bartolomey <george@bh4.ru>
2024-10-02 07:31:37 +00:00
f99cc90185 [#14] locode_generate: Increase number of CPUs
Signed-off-by: George Bartolomey <george@bh4.ru>
2024-10-02 07:31:37 +00:00
59516714d1 [#14] Use quadtree to find continent
Signed-off-by: George Bartolomey <george@bh4.ru>
2024-10-02 07:31:37 +00:00
abf6f2ab75 Release v0.5.0
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
2024-09-30 13:57:14 +03:00
20 changed files with 21697 additions and 62 deletions

View file

Before

Width:  |  Height:  |  Size: 5.5 KiB

After

Width:  |  Height:  |  Size: 5.5 KiB

View file

@ -0,0 +1,26 @@
# yamllint disable rule:truthy
name: DCO check
on:
pull_request:
jobs:
dco:
name: DCO
runs-on: docker
container:
image: node:22-bookworm
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: '1.22'
- name: Run commit format checker
uses: https://git.frostfs.info/TrueCloudLab/dco-go@v3
with:
from: 'origin/${{ github.event.pull_request.base.ref }}'

View file

@ -5,7 +5,7 @@ jobs:
checkupdates:
runs-on: docker
steps:
- uses: https://code.forgejo.org/actions/checkout@v3
- uses: actions/checkout@v3
- run: make update
- run: |
git config user.name "Snegurochka"

1
.github/CODEOWNERS vendored
View file

@ -1 +0,0 @@
* @alexvanin @realloc @fyrchik @anatoly-bogatyrev

View file

@ -1,21 +0,0 @@
name: DCO check
on:
pull_request:
branches:
- master
jobs:
commits_check_job:
runs-on: ubuntu-latest
name: Commits Check
steps:
- name: Get PR Commits
id: 'get-pr-commits'
uses: tim-actions/get-pr-commits@master
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: DCO Check
uses: tim-actions/dco@master
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}

View file

@ -1,6 +1,12 @@
# Changelog
Changelog for FrostFS LOCODE database
## 0.5.0 - 2023-09-30
The package now contains tool to generate database to be used
by FrostFS inner ring nodes. The dependency on the `frostfs-cli`
is removed from the debian package.
## 0.4.0 - 2023-04-24
Now we can build UN/LOCODE DB without Internet connection and with a

1
CODEOWNERS Normal file
View file

@ -0,0 +1 @@
.* @alexvanin @realloc @fyrchik @a.bogatyrev

View file

@ -41,13 +41,17 @@ in/unlocode-SubdivisionCodes.csv: $(DIRS)
in/unlocode-CodeList.csv: $(DIRS)
zcat data/unlocode-CodeList.csv.gz > in/unlocode-CodeList.csv
in/unlocode-CodeList_overrides.csv: $(DIRS)
cp data/unlocode-CodeList_overrides.csv in/unlocode-CodeList_overrides.csv
# Generate locode_db BoltDB file
locode_db: in/unlocode-CodeList.csv in/unlocode-SubdivisionCodes.csv in/continents.geojson in/airports.dat in/countries.dat $(FROST_LOCODE)
locode_db: in/unlocode-CodeList.csv in/unlocode-SubdivisionCodes.csv in/continents.geojson in/airports.dat in/countries.dat in/unlocode-CodeList_overrides.csv $(FROST_LOCODE)
$(FROST_LOCODE) generate \
--airports in/airports.dat \
--continents in/continents.geojson \
--countries in/countries.dat \
--in in/unlocode-CodeList.csv \
--overrides in/unlocode-CodeList_overrides.csv \
--subdiv in/unlocode-SubdivisionCodes.csv \
--out locode_db
chmod 644 locode_db

View file

@ -1,5 +1,5 @@
<p align="center">
<img src="./.github/logo.svg" width="500px" alt="NeoFS">
<img src="./.forgejo/logo.svg" width="500px" alt="FrostFS logo">
</p>
<p align="center">
UN/LOCODE database for <a href="https://frostfs.info">ForstFS</a>
@ -16,6 +16,8 @@ This repository tools generate UN/LOCODE database for FrostFS using data from
following sources:
- [UN/LOCODE](https://unece.org/trade/cefact/UNLOCODE-Download) database in CSV
format, licensed under the [ODC Public Domain Dedication and Licence (PDDL)](http://opendatacommons.org/licenses/pddl/1-0/)
- [Improved UN/LOCODE](https://raw.githubusercontent.com/cristan/improved-un-locodes/86b2e96f17289ddb62f52dcfcfac520bc1ca002b/data/code-list-improved.csv) database
that contains more details about LOCODEs (like coordinates) which may miss in UN/LOCODE database.
- [OpenFlight Airports and
Countries](https://raw.githubusercontent.com/jpatokal/openflights/master/data/)
databases, licensed under the [GNU AGPL-3.0

File diff suppressed because it is too large Load diff

View file

@ -16,6 +16,7 @@ type namesDB struct {
const (
locodeGenerateInputFlag = "in"
locodeGenerateOverridesFlag = "overrides"
locodeGenerateSubDivFlag = "subdiv"
locodeGenerateAirportsFlag = "airports"
locodeGenerateCountriesFlag = "countries"
@ -25,6 +26,7 @@ const (
var (
locodeGenerateInPaths []string
locodeGenerateOverridesPath string
locodeGenerateSubDivPath string
locodeGenerateAirportsPath string
locodeGenerateCountriesPath string
@ -35,11 +37,11 @@ var (
Use: "generate",
Short: "Generate UN/LOCODE database for FrostFS",
Run: func(cmd *cobra.Command, _ []string) {
locodeDB := csvlocode.New(
csvlocode.Prm{
Path: locodeGenerateInPaths[0],
SubDivPath: locodeGenerateSubDivPath,
OverridesPath: locodeGenerateOverridesPath,
},
csvlocode.WithExtraPaths(locodeGenerateInPaths[1:]...),
)
@ -67,8 +69,10 @@ var (
Table: locodeDB,
}
err = locodedb.FillDatabase(locodeDB, airportDB, continentsDB, names, targetDB)
res, err := locodedb.FillDatabase(locodeDB, airportDB, continentsDB, names, targetDB)
ExitOnErr(cmd, "", err)
cmd.Printf("Records added to locode db: %d\n", res.AddedRecordCount)
cmd.Printf("Records ignored: %d\n", res.IgnoredRecordCount)
},
}
)
@ -79,6 +83,8 @@ func initUtilLocodeGenerateCmd() {
flags.StringSliceVar(&locodeGenerateInPaths, locodeGenerateInputFlag, nil, "List of paths to UN/LOCODE tables (csv)")
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateInputFlag)
flags.StringVar(&locodeGenerateOverridesPath, locodeGenerateOverridesFlag, "", "Path to UN/LOCODE override tables (csv)")
flags.StringVar(&locodeGenerateSubDivPath, locodeGenerateSubDivFlag, "", "Path to UN/LOCODE subdivision database (csv)")
_ = locodeGenerateCmd.MarkFlagRequired(locodeGenerateSubDivFlag)

30
locode_generate_test.go Normal file
View file

@ -0,0 +1,30 @@
package main
import (
"flag"
"testing"
)
var (
in = flag.String(locodeGenerateInputFlag, "", "List of paths to UN/LOCODE tables (csv)")
subdiv = flag.String(locodeGenerateSubDivFlag, "", "Path to UN/LOCODE subdivision database (csv)")
airports = flag.String(locodeGenerateAirportsFlag, "", "Path to OpenFlights airport database (csv)")
countries = flag.String(locodeGenerateCountriesFlag, "", "Path to OpenFlights country database (csv)")
continents = flag.String(locodeGenerateContinentsFlag, "", "Path to continent polygons (GeoJSON)")
out = flag.String(locodeGenerateOutputFlag, "", "Target path for generated database")
)
func BenchmarkLocodeGenerate(b *testing.B) {
locodeGenerateInPaths = append(locodeGenerateInPaths, *in)
locodeGenerateSubDivPath = *subdiv
locodeGenerateAirportsPath = *airports
locodeGenerateCountriesPath = *countries
locodeGenerateContinentsPath = *continents
locodeGenerateOutPath = *out
b.ResetTimer()
for i := 0; i < b.N; i++ {
locodeGenerateCmd.Run(locodeGenerateCmd, []string{})
}
}

View file

@ -8,6 +8,7 @@ import (
"github.com/paulmach/orb"
"github.com/paulmach/orb/geojson"
"github.com/paulmach/orb/planar"
"github.com/paulmach/orb/quadtree"
)
const continentProperty = "Continent"
@ -36,24 +37,26 @@ func (db *DB) PointContinent(point *locodedb.Point) (*locodedb.Continent, error)
minDst float64
)
pointer := db.tree.Matching(planarPoint, func(p orb.Pointer) bool {
return planar.PolygonContains(
p.(*geojson.Feature).Geometry.(orb.Polygon),
planarPoint,
)
})
if pointer != nil {
continent = pointer.(*geojson.Feature).Properties.MustString(continentProperty)
}
if continent == "" {
for _, feature := range db.features {
if multiPolygon, ok := feature.Geometry.(orb.MultiPolygon); ok {
if planar.MultiPolygonContains(multiPolygon, planarPoint) {
continent = feature.Properties.MustString(continentProperty)
break
}
} else if polygon, ok := feature.Geometry.(orb.Polygon); ok {
if planar.PolygonContains(polygon, planarPoint) {
continent = feature.Properties.MustString(continentProperty)
break
}
}
distance := planar.DistanceFrom(feature.Geometry, planarPoint)
if minDst == 0 || minDst > distance {
minDst = distance
continent = feature.Properties.MustString(continentProperty)
}
}
}
c := continentFromString(continent)
@ -73,6 +76,39 @@ func (db *DB) init() error {
db.features = features.Features
err = db.buildQuadtree()
if err != nil {
return fmt.Errorf("could not build quadtree: %w", err)
}
return nil
}
func (db *DB) buildQuadtree() error {
db.tree = quadtree.New(orb.Bound{
Min: orb.Point{-180, -180},
Max: orb.Point{180, 180},
})
for _, feature := range db.features {
var multiPolygon orb.MultiPolygon
if polygon, ok := feature.Geometry.(orb.Polygon); ok {
multiPolygon = append(multiPolygon, polygon)
} else {
multiPolygon = feature.Geometry.(orb.MultiPolygon)
}
for _, polygon := range multiPolygon {
newFeature := geojson.NewFeature(polygon)
newFeature.Properties = feature.Properties.Clone()
err := db.tree.Add(newFeature)
if err != nil {
return err
}
}
}
return nil
}

View file

@ -5,6 +5,7 @@ import (
"sync"
"github.com/paulmach/orb/geojson"
"github.com/paulmach/orb/quadtree"
)
// Prm groups the required parameters of the DB's constructor.
@ -31,6 +32,8 @@ type DB struct {
once sync.Once
features []*geojson.Feature
tree *quadtree.Quadtree
}
const invalidPrmValFmt = "invalid parameter %s (%T):%v"

View file

@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"runtime"
"sync/atomic"
"git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode"
"golang.org/x/sync/errgroup"
@ -75,38 +76,58 @@ type NamesDB interface {
SubDivName(*CountryCode, string) (string, error)
}
// FillDatabase generates the FrostFS location database based on the UN/LOCODE table.
func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
var errG errgroup.Group
// Pick some sane default, after this the performance stopped increasing.
errG.SetLimit(runtime.NumCPU() * 4)
_ = table.IterateAll(func(tableRecord locode.Record) error {
errG.Go(func() error {
return processTableRecord(tableRecord, airports, continents, names, db)
})
return nil
})
return errG.Wait()
type FillDatabaseResult struct {
AddedRecordCount int
IgnoredRecordCount int
}
func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
if tableRecord.LOCODE.LocationCode() == "" {
// FillDatabase generates the FrostFS location database based on the UN/LOCODE table.
func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) (FillDatabaseResult, error) {
var errG errgroup.Group
var added, ignored atomic.Int32
// Pick some sane default, after this the performance stopped increasing.
errG.SetLimit(runtime.NumCPU() * 16)
_ = table.IterateAll(func(tableRecord locode.Record) error {
errG.Go(func() error {
wasAdded, err := processTableRecord(tableRecord, airports, continents, names, db)
if err != nil {
return err
}
if wasAdded {
added.Add(1)
} else {
ignored.Add(1)
}
return nil
})
return nil
})
return FillDatabaseResult{
AddedRecordCount: int(added.Load()),
IgnoredRecordCount: int(ignored.Load()),
}, errG.Wait()
}
func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) (bool, error) {
if tableRecord.LOCODE.LocationCode() == "" {
return false, nil
}
dbKey, err := NewKey(tableRecord.LOCODE)
if err != nil {
return err
return false, err
}
dbRecord, err := NewRecord(tableRecord)
if err != nil {
if errors.Is(err, errParseCoordinates) {
return nil
return false, nil
}
return err
return false, err
}
geoPoint := dbRecord.GeoPoint()
@ -116,10 +137,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
airportRecord, err := airports.Get(tableRecord)
if err != nil {
if errors.Is(err, ErrAirportNotFound) {
return nil
return false, nil
}
return err
return false, err
}
geoPoint = airportRecord.Point
@ -132,10 +153,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
countryName, err = names.CountryName(dbKey.CountryCode())
if err != nil {
if errors.Is(err, ErrCountryNotFound) {
return nil
return false, nil
}
return err
return false, err
}
}
@ -145,10 +166,10 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode)
if err != nil {
if errors.Is(err, ErrSubDivNotFound) {
return nil
return false, nil
}
return err
return false, err
}
dbRecord.SetSubDivName(subDivName)
@ -156,14 +177,14 @@ func processTableRecord(tableRecord locode.Record, airports AirportDB, continent
continent, err := continents.PointContinent(geoPoint)
if err != nil {
return fmt.Errorf("could not calculate continent geo point: %w", err)
return false, fmt.Errorf("could not calculate continent geo point: %w", err)
} else if continent.Is(ContinentUnknown) {
return nil
return false, nil
}
dbRecord.SetContinent(continent)
return db.Put(*dbKey, *dbRecord)
return true, db.Put(*dbKey, *dbRecord)
}
// LocodeRecord returns the record from the FrostFS location database

View file

@ -3,6 +3,7 @@ package csvlocode
import (
"encoding/csv"
"errors"
"fmt"
"io"
"os"
"strings"
@ -40,6 +41,10 @@ func (t *Table) IterateAll(f func(locode.Record) error) error {
Remarks: words[11],
}
if err := t.Override(&record); err != nil {
return fmt.Errorf("override: %w", err)
}
return f(record)
})
}
@ -84,6 +89,14 @@ func (t *Table) SubDivName(countryCode *locodedb.CountryCode, code string) (stri
return rec.name, nil
}
func (t *Table) Override(record *locode.Record) error {
override, found := t.overrides[record.LOCODE]
if found {
*record = override
}
return nil
}
func (t *Table) initSubDiv() (err error) {
t.subDivOnce.Do(func() {
t.mSubDiv = make(map[subDivKey]subDivRecord)
@ -103,6 +116,40 @@ func (t *Table) initSubDiv() (err error) {
return
}
func (t *Table) initOverrides(overridesPath string) error {
const wordsPerRecord = 12
t.overrides = make(map[locode.LOCODE]locode.Record)
if overridesPath == "" {
return nil
}
return t.scanWords([]string{overridesPath}, wordsPerRecord, func(words []string) error {
lc, err := locode.FromString(strings.Join(words[1:3], " "))
if err != nil {
return err
}
record := locode.Record{
Ch: words[0],
LOCODE: *lc,
Name: words[3],
NameWoDiacritics: words[4],
SubDiv: words[5],
Function: words[6],
Status: words[7],
Date: words[8],
IATA: words[9],
Coordinates: words[10],
Remarks: words[11],
}
t.overrides[record.LOCODE] = record
return nil
})
}
var errScanInt = errors.New("interrupt scan")
func (t *Table) scanWords(paths []string, fpr int, wordsHandler func([]string) error) error {

View file

@ -4,6 +4,8 @@ import (
"fmt"
"io/fs"
"sync"
"git.frostfs.info/TrueCloudLab/frostfs-locode-db/pkg/locode"
)
// Prm groups the required parameters of the Table's constructor.
@ -21,6 +23,11 @@ type Prm struct {
//
// Must not be empty.
SubDivPath string
// Path to a csv table with UN/LOCODE overrides.
//
// Optional.
OverridesPath string
}
// Table is a descriptor of the UN/LOCODE table in csv format.
@ -39,6 +46,8 @@ type Table struct {
subDivOnce sync.Once
mSubDiv map[subDivKey]subDivRecord
overrides map[locode.LOCODE]locode.Record
}
const invalidPrmValFmt = "invalid parameter %s (%T):%v"
@ -67,9 +76,15 @@ func New(prm Prm, opts ...Option) *Table {
opts[i](o)
}
return &Table{
t := &Table{
paths: append(o.extraPaths, prm.Path),
mode: o.mode,
subDivPath: prm.SubDivPath,
}
if err := t.initOverrides(prm.OverridesPath); err != nil {
panic(fmt.Errorf("init overrides: %w", err))
}
return t
}