From 02c02974b3967d6968b207f13c65ba8adea8f702 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Wed, 3 May 2023 16:23:58 +0300 Subject: [PATCH] [#309] locode: Parallelize DB generation For v0.4.0 release: Before: ``` Executed in 571.64 secs fish external usr time 283.07 secs 744.00 micros 283.07 secs sys time 8.41 secs 179.00 micros 8.41 secs ``` After: ``` Executed in 54.23 secs fish external usr time 418.65 secs 1.01 millis 418.65 secs sys time 0.61 secs 0.25 millis 0.60 secs ``` Signed-off-by: Evgenii Stratonikov --- CHANGELOG.md | 1 + pkg/util/locode/db/boltdb/calls.go | 2 +- pkg/util/locode/db/db.go | 130 ++++++++++++++++------------- 3 files changed, 74 insertions(+), 59 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07e7b043..316518e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Changelog for FrostFS Node - Reload pprof and metrics on SIGHUP for ir (#125) ### Changed +- `frostfs-cli util locode generate` is now much faster (#309) ### Fixed - Take network settings into account during netmap contract update (#100) - Read config files from dir even if config file not provided via `--config` for node (#238) diff --git a/pkg/util/locode/db/boltdb/calls.go b/pkg/util/locode/db/boltdb/calls.go index 171808af..6a80def3 100644 --- a/pkg/util/locode/db/boltdb/calls.go +++ b/pkg/util/locode/db/boltdb/calls.go @@ -103,7 +103,7 @@ func recordFromValue(data []byte) (*locodedb.Record, error) { // Must not be called before successful Open call. // Must not be called in read-only mode: behavior is undefined. func (db *DB) Put(key locodedb.Key, rec locodedb.Record) error { - return db.bolt.Update(func(tx *bbolt.Tx) error { + return db.bolt.Batch(func(tx *bbolt.Tx) error { countryKey, err := countryBucketKey(key.CountryCode()) if err != nil { return err diff --git a/pkg/util/locode/db/db.go b/pkg/util/locode/db/db.go index 2a0f2668..8c71ea79 100644 --- a/pkg/util/locode/db/db.go +++ b/pkg/util/locode/db/db.go @@ -3,8 +3,10 @@ package locodedb import ( "errors" "fmt" + "runtime" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/locode" + "golang.org/x/sync/errgroup" ) // SourceTable is an interface of the UN/LOCODE table. @@ -75,81 +77,93 @@ type NamesDB interface { // FillDatabase generates the FrostFS location database based on the UN/LOCODE table. func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error { - return table.IterateAll(func(tableRecord locode.Record) error { - if tableRecord.LOCODE.LocationCode() == "" { + var errG errgroup.Group + + // Pick some sane default, after this the performance stopped increasing. + errG.SetLimit(runtime.NumCPU() * 4) + _ = table.IterateAll(func(tableRecord locode.Record) error { + errG.Go(func() error { + return processTableRecord(tableRecord, airports, continents, names, db) + }) + return nil + }) + return errG.Wait() +} + +func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error { + if tableRecord.LOCODE.LocationCode() == "" { + return nil + } + + dbKey, err := NewKey(tableRecord.LOCODE) + if err != nil { + return err + } + + dbRecord, err := NewRecord(tableRecord) + if err != nil { + if errors.Is(err, errParseCoordinates) { return nil } - dbKey, err := NewKey(tableRecord.LOCODE) - if err != nil { - return err - } + return err + } - dbRecord, err := NewRecord(tableRecord) + geoPoint := dbRecord.GeoPoint() + countryName := "" + + if geoPoint == nil { + airportRecord, err := airports.Get(tableRecord) if err != nil { - if errors.Is(err, errParseCoordinates) { + if errors.Is(err, ErrAirportNotFound) { return nil } return err } - geoPoint := dbRecord.GeoPoint() - countryName := "" + geoPoint = airportRecord.Point + countryName = airportRecord.CountryName + } - if geoPoint == nil { - airportRecord, err := airports.Get(tableRecord) - if err != nil { - if errors.Is(err, ErrAirportNotFound) { - return nil - } + dbRecord.SetGeoPoint(geoPoint) - return err - } - - geoPoint = airportRecord.Point - countryName = airportRecord.CountryName - } - - dbRecord.SetGeoPoint(geoPoint) - - if countryName == "" { - countryName, err = names.CountryName(dbKey.CountryCode()) - if err != nil { - if errors.Is(err, ErrCountryNotFound) { - return nil - } - - return err - } - } - - dbRecord.SetCountryName(countryName) - - if subDivCode := dbRecord.SubDivCode(); subDivCode != "" { - subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode) - if err != nil { - if errors.Is(err, ErrSubDivNotFound) { - return nil - } - - return err - } - - dbRecord.SetSubDivName(subDivName) - } - - continent, err := continents.PointContinent(geoPoint) + if countryName == "" { + countryName, err = names.CountryName(dbKey.CountryCode()) if err != nil { - return fmt.Errorf("could not calculate continent geo point: %w", err) - } else if continent.Is(ContinentUnknown) { - return nil + if errors.Is(err, ErrCountryNotFound) { + return nil + } + + return err + } + } + + dbRecord.SetCountryName(countryName) + + if subDivCode := dbRecord.SubDivCode(); subDivCode != "" { + subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode) + if err != nil { + if errors.Is(err, ErrSubDivNotFound) { + return nil + } + + return err } - dbRecord.SetContinent(continent) + dbRecord.SetSubDivName(subDivName) + } - return db.Put(*dbKey, *dbRecord) - }) + continent, err := continents.PointContinent(geoPoint) + if err != nil { + return fmt.Errorf("could not calculate continent geo point: %w", err) + } else if continent.Is(ContinentUnknown) { + return nil + } + + dbRecord.SetContinent(continent) + + return db.Put(*dbKey, *dbRecord) } // LocodeRecord returns the record from the FrostFS location database