[#309] locode: Parallelize DB generation

For v0.4.0 release:
Before:
```
Executed in  571.64 secs    fish           external
   usr time  283.07 secs  744.00 micros  283.07 secs
   sys time    8.41 secs  179.00 micros    8.41 secs
```

After:
```
Executed in   54.23 secs    fish           external
   usr time  418.65 secs    1.01 millis  418.65 secs
   sys time    0.61 secs    0.25 millis    0.60 secs
```
Signed-off-by: Evgenii Stratonikov <e.stratonikov@yadro.com>
This commit is contained in:
Evgenii Stratonikov 2023-05-03 16:23:58 +03:00 committed by Evgenii Stratonikov
parent 147ae8728a
commit 02c02974b3
3 changed files with 74 additions and 59 deletions

View file

@ -9,6 +9,7 @@ Changelog for FrostFS Node
- Reload pprof and metrics on SIGHUP for ir (#125) - Reload pprof and metrics on SIGHUP for ir (#125)
### Changed ### Changed
- `frostfs-cli util locode generate` is now much faster (#309)
### Fixed ### Fixed
- Take network settings into account during netmap contract update (#100) - Take network settings into account during netmap contract update (#100)
- Read config files from dir even if config file not provided via `--config` for node (#238) - Read config files from dir even if config file not provided via `--config` for node (#238)

View file

@ -103,7 +103,7 @@ func recordFromValue(data []byte) (*locodedb.Record, error) {
// Must not be called before successful Open call. // Must not be called before successful Open call.
// Must not be called in read-only mode: behavior is undefined. // Must not be called in read-only mode: behavior is undefined.
func (db *DB) Put(key locodedb.Key, rec locodedb.Record) error { func (db *DB) Put(key locodedb.Key, rec locodedb.Record) error {
return db.bolt.Update(func(tx *bbolt.Tx) error { return db.bolt.Batch(func(tx *bbolt.Tx) error {
countryKey, err := countryBucketKey(key.CountryCode()) countryKey, err := countryBucketKey(key.CountryCode())
if err != nil { if err != nil {
return err return err

View file

@ -3,8 +3,10 @@ package locodedb
import ( import (
"errors" "errors"
"fmt" "fmt"
"runtime"
"git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/locode" "git.frostfs.info/TrueCloudLab/frostfs-node/pkg/util/locode"
"golang.org/x/sync/errgroup"
) )
// SourceTable is an interface of the UN/LOCODE table. // SourceTable is an interface of the UN/LOCODE table.
@ -75,81 +77,93 @@ type NamesDB interface {
// FillDatabase generates the FrostFS location database based on the UN/LOCODE table. // FillDatabase generates the FrostFS location database based on the UN/LOCODE table.
func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error { func FillDatabase(table SourceTable, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
return table.IterateAll(func(tableRecord locode.Record) error { var errG errgroup.Group
if tableRecord.LOCODE.LocationCode() == "" {
// Pick some sane default, after this the performance stopped increasing.
errG.SetLimit(runtime.NumCPU() * 4)
_ = table.IterateAll(func(tableRecord locode.Record) error {
errG.Go(func() error {
return processTableRecord(tableRecord, airports, continents, names, db)
})
return nil
})
return errG.Wait()
}
func processTableRecord(tableRecord locode.Record, airports AirportDB, continents ContinentsDB, names NamesDB, db DB) error {
if tableRecord.LOCODE.LocationCode() == "" {
return nil
}
dbKey, err := NewKey(tableRecord.LOCODE)
if err != nil {
return err
}
dbRecord, err := NewRecord(tableRecord)
if err != nil {
if errors.Is(err, errParseCoordinates) {
return nil return nil
} }
dbKey, err := NewKey(tableRecord.LOCODE) return err
if err != nil { }
return err
}
dbRecord, err := NewRecord(tableRecord) geoPoint := dbRecord.GeoPoint()
countryName := ""
if geoPoint == nil {
airportRecord, err := airports.Get(tableRecord)
if err != nil { if err != nil {
if errors.Is(err, errParseCoordinates) { if errors.Is(err, ErrAirportNotFound) {
return nil return nil
} }
return err return err
} }
geoPoint := dbRecord.GeoPoint() geoPoint = airportRecord.Point
countryName := "" countryName = airportRecord.CountryName
}
if geoPoint == nil { dbRecord.SetGeoPoint(geoPoint)
airportRecord, err := airports.Get(tableRecord)
if err != nil {
if errors.Is(err, ErrAirportNotFound) {
return nil
}
return err if countryName == "" {
} countryName, err = names.CountryName(dbKey.CountryCode())
geoPoint = airportRecord.Point
countryName = airportRecord.CountryName
}
dbRecord.SetGeoPoint(geoPoint)
if countryName == "" {
countryName, err = names.CountryName(dbKey.CountryCode())
if err != nil {
if errors.Is(err, ErrCountryNotFound) {
return nil
}
return err
}
}
dbRecord.SetCountryName(countryName)
if subDivCode := dbRecord.SubDivCode(); subDivCode != "" {
subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode)
if err != nil {
if errors.Is(err, ErrSubDivNotFound) {
return nil
}
return err
}
dbRecord.SetSubDivName(subDivName)
}
continent, err := continents.PointContinent(geoPoint)
if err != nil { if err != nil {
return fmt.Errorf("could not calculate continent geo point: %w", err) if errors.Is(err, ErrCountryNotFound) {
} else if continent.Is(ContinentUnknown) { return nil
return nil }
return err
}
}
dbRecord.SetCountryName(countryName)
if subDivCode := dbRecord.SubDivCode(); subDivCode != "" {
subDivName, err := names.SubDivName(dbKey.CountryCode(), subDivCode)
if err != nil {
if errors.Is(err, ErrSubDivNotFound) {
return nil
}
return err
} }
dbRecord.SetContinent(continent) dbRecord.SetSubDivName(subDivName)
}
return db.Put(*dbKey, *dbRecord) continent, err := continents.PointContinent(geoPoint)
}) if err != nil {
return fmt.Errorf("could not calculate continent geo point: %w", err)
} else if continent.Is(ContinentUnknown) {
return nil
}
dbRecord.SetContinent(continent)
return db.Put(*dbKey, *dbRecord)
} }
// LocodeRecord returns the record from the FrostFS location database // LocodeRecord returns the record from the FrostFS location database