#6 Add automated database updater

A small parser of unece.org "Trade code download page" added to Makefile to fetch last
database's URL to download. Also, the forgejo action added. Action runs every
month to update databases and send PR to Forgejo.

Signed-off-by: George Bartolomey <george@bh4.ru>
This commit is contained in:
George Bartolomey 2024-07-03 16:39:00 +03:00
parent 7a76e91bdd
commit 040c0e9d08
Signed by: george.bartolomey
GPG key ID: 35BC54839D73BFAD
2 changed files with 20 additions and 1 deletions

View file

@ -0,0 +1,17 @@
on:
schedule:
- cron: "2 0 1 * *"
jobs:
checkupdates:
runs-on: docker
steps:
- uses: https://code.forgejo.org/actions/checkout@v3
- run: make update
- run: |
git config user.name "Snegurochka"
git config user.email "snegurochka@frostfs.info"
git switch -c update-dbs
git add .
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@git.frostfs.info/TrueCloudLab/frostfs-locode-db
git commit -m "Update databases" && \
git push origin HEAD:refs/for/master -o title="Automatic update databases" -o topic=update-db

View file

@ -63,7 +63,9 @@ data/countries.dat.gz: $(DIRS)
# IGNORE # IGNORE
# See https://unece.org/trade/cefact/UNLOCODE-Download # See https://unece.org/trade/cefact/UNLOCODE-Download
tmp/locode.csv.zip :$(DIRS) tmp/locode.csv.zip :$(DIRS)
wget -c https://service.unece.org/trade/locode/loc222csv.zip -O tmp/locode.csv.zip # just parsing page using grep+sed
DOWNLOADURL=$$(wget -O - https://unece.org/trade/cefact/UNLOCODE-Download | grep -io '<a href=['"'"'"][^"'"'"']*['"'"'"]' | sed -e 's/^<a href=["'"'"']//i' -e 's/["'"'"']$$//i' | grep -E 'loc[[:digit:]]+csv\.zip'); \
wget -c "$$DOWNLOADURL" -O tmp/locode.csv.zip
data/unlocode-SubdivisionCodes.csv.gz: tmp/locode.csv.zip data/unlocode-SubdivisionCodes.csv.gz: tmp/locode.csv.zip
unzip -p tmp/locode.csv.zip "*SubdivisionCodes.csv" | gzip > data/unlocode-SubdivisionCodes.csv.gz unzip -p tmp/locode.csv.zip "*SubdivisionCodes.csv" | gzip > data/unlocode-SubdivisionCodes.csv.gz