locode: Use quadtree to find continent #830

Closed
Nesterfifa wants to merge 6 commits from Nesterfifa/frostfs-node:782-use-quadtree into master
4 changed files with 84 additions and 15 deletions

View file

@ -0,0 +1,30 @@
package util
import (
"flag"
"testing"
)
var (
in = flag.String(locodeGenerateInputFlag, "", "List of paths to UN/LOCODE tables (csv)")
subdiv = flag.String(locodeGenerateSubDivFlag, "", "Path to UN/LOCODE subdivision database (csv)")
airports = flag.String(locodeGenerateAirportsFlag, "", "Path to OpenFlights airport database (csv)")
countries = flag.String(locodeGenerateCountriesFlag, "", "Path to OpenFlights country database (csv)")
continents = flag.String(locodeGenerateContinentsFlag, "", "Path to continent polygons (GeoJSON)")
out = flag.String(locodeGenerateOutputFlag, "", "Target path for generated database")
)
func BenchmarkLocodeGenerate(b *testing.B) {

Could you also attach benchmark results on master vs this branch?
You can use https://pkg.go.dev/golang.org/x/perf/cmd/benchstat to compare gobench results.

Could you also attach benchmark results on master vs this branch? You can use https://pkg.go.dev/golang.org/x/perf/cmd/benchstat to compare gobench results.
locodeGenerateInPaths = append(locodeGenerateInPaths, *in)
locodeGenerateSubDivPath = *subdiv
locodeGenerateAirportsPath = *airports
locodeGenerateCountriesPath = *countries
locodeGenerateContinentsPath = *continents
locodeGenerateOutPath = *out
b.ResetTimer()
for i := 0; i < b.N; i++ {
locodeGenerateCmd.Run(locodeGenerateCmd, []string{})
}
}

View file

@ -8,6 +8,7 @@ import (
"github.com/paulmach/orb" "github.com/paulmach/orb"
fyrchik marked this conversation as resolved Outdated

Please, attach all non-stdlib imports in a single group (see paulmach/orb below).

Please, attach all non-stdlib imports in a single group (see `paulmach/orb` below).

Fixed

Fixed
"github.com/paulmach/orb/geojson" "github.com/paulmach/orb/geojson"
"github.com/paulmach/orb/planar" "github.com/paulmach/orb/planar"
"github.com/paulmach/orb/quadtree"
) )
const continentProperty = "Continent" const continentProperty = "Continent"
@ -36,24 +37,26 @@ func (db *DB) PointContinent(point *locodedb.Point) (*locodedb.Continent, error)
minDst float64 minDst float64
) )
pointer := db.tree.Matching(planarPoint, func(p orb.Pointer) bool {
return planar.PolygonContains(
p.(*geojson.Feature).Geometry.(orb.Polygon),
planarPoint,
)
})
if pointer != nil {
continent = pointer.(*geojson.Feature).Properties.MustString(continentProperty)
}
if continent == "" {
for _, feature := range db.features { for _, feature := range db.features {
if multiPolygon, ok := feature.Geometry.(orb.MultiPolygon); ok {
if planar.MultiPolygonContains(multiPolygon, planarPoint) {
continent = feature.Properties.MustString(continentProperty)
break
}
} else if polygon, ok := feature.Geometry.(orb.Polygon); ok {
if planar.PolygonContains(polygon, planarPoint) {
continent = feature.Properties.MustString(continentProperty)
break
}
}
distance := planar.DistanceFrom(feature.Geometry, planarPoint) distance := planar.DistanceFrom(feature.Geometry, planarPoint)
if minDst == 0 || minDst > distance { if minDst == 0 || minDst > distance {
minDst = distance minDst = distance
continent = feature.Properties.MustString(continentProperty) continent = feature.Properties.MustString(continentProperty)
} }
} }
}
c := continentFromString(continent) c := continentFromString(continent)
@ -73,6 +76,39 @@ func (db *DB) init() error {
db.features = features.Features db.features = features.Features
err = db.buildQuadtree()
if err != nil {
return fmt.Errorf("could not build quadtree: %w", err)
}
return nil
}
func (db *DB) buildQuadtree() error {
db.tree = quadtree.New(orb.Bound{
Min: orb.Point{-180, -180},
Max: orb.Point{180, 180},
})
for _, feature := range db.features {
var multiPolygon orb.MultiPolygon
Is there a way to simplify this code? https://git.frostfs.info/TrueCloudLab/frostfs-node/src/commit/cff78d77c22b788a398a958a9a9d1f02b43b6148/pkg/util/locode/db/continents/geojson/calls.go#L94-L110

Reduced the nesting depth to 2

Reduced the nesting depth to 2
fyrchik marked this conversation as resolved
Review

What about var multiPolygon orb.MultiPolygon? make(_, 0) is usually never needed: nil acts good enough as the default value.

What about `var multiPolygon orb.MultiPolygon`? `make(_, 0)` is usually never needed: `nil` acts good enough as the default value.
Review

Fixed

Fixed
if polygon, ok := feature.Geometry.(orb.Polygon); ok {
multiPolygon = append(multiPolygon, polygon)
} else {
multiPolygon = feature.Geometry.(orb.MultiPolygon)
}
for _, polygon := range multiPolygon {
newFeature := geojson.NewFeature(polygon)
newFeature.Properties = feature.Properties.Clone()
err := db.tree.Add(newFeature)
if err != nil {
return err
}
}
}
return nil return nil
} }

View file

@ -5,6 +5,7 @@ import (
"sync" "sync"
"github.com/paulmach/orb/geojson" "github.com/paulmach/orb/geojson"
"github.com/paulmach/orb/quadtree"
) )
// Prm groups the required parameters of the DB's constructor. // Prm groups the required parameters of the DB's constructor.
@ -31,6 +32,8 @@ type DB struct {
once sync.Once once sync.Once
features []*geojson.Feature features []*geojson.Feature
tree *quadtree.Quadtree
} }
const invalidPrmValFmt = "invalid parameter %s (%T):%v" const invalidPrmValFmt = "invalid parameter %s (%T):%v"

View file

@ -80,7 +80,7 @@ func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB
var errG errgroup.Group var errG errgroup.Group
// Pick some sane default, after this the performance stopped increasing. // Pick some sane default, after this the performance stopped increasing.
errG.SetLimit(runtime.NumCPU() * 4) errG.SetLimit(runtime.NumCPU() * 16)

This change seems unrelated to the commit. Can we move it to a separate commit and describe the WHY?

git rebase and git reset commands might be helpful, especially git reset -p.

This change seems unrelated to the commit. Can we move it to a separate commit and describe the WHY? `git rebase` and `git reset` commands might be helpful, especially `git reset -p`.
_ = table.IterateAll(func(tableRecord locode.Record) error { _ = table.IterateAll(func(tableRecord locode.Record) error {
errG.Go(func() error { errG.Go(func() error {
return processTableRecord(tableRecord, airports, continents, names, db) return processTableRecord(tableRecord, airports, continents, names, db)