[#26] ci: Revive automatic database updates

Database updates were broken for a while (and noone noticed because CI was
not even switched on). This commit:

- Adds some greyhat web scraping (fake User-Agent to avoid HTTP 403)
- Surfaces errors in intermediate recipe steps (`cmd && cmd` instead of
  `cmd; cmd`)
- Works around other possible CI errors in automatic PRs (--signoff)

Signed-off-by: Vitaliy Potyarkin <v.potyarkin@yadro.com>
This commit is contained in:
Vitaliy Potyarkin 2025-03-13 12:47:21 +03:00
parent 1c14038948
commit 1a38db8a2b
2 changed files with 9 additions and 4 deletions

View file

@ -1,17 +1,19 @@
on:
schedule:
- cron: "2 0 1 * *"
workflow_dispatch:
jobs:
checkupdates:
runs-on: docker
steps:
- uses: actions/checkout@v3
- run: make update
- run: make clean_data update
- run: |
git config user.name "Snegurochka"
git config user.email "snegurochka@frostfs.info"
git switch -c update-dbs
git add .
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@git.frostfs.info/TrueCloudLab/frostfs-locode-db
git commit -m "Automatic database update (UN/LOCODE version $(cat tmp/locode-version.txt))" && \
git commit -sm "Automatic database update (UN/LOCODE version $(cat tmp/locode-version.txt))" && \
git push origin HEAD:refs/for/master -o topic=automatic-database-update

View file

@ -71,8 +71,11 @@ data/countries.dat.gz: $(DIRS)
# See https://unece.org/trade/cefact/UNLOCODE-Download
tmp/locode.csv.zip :$(DIRS)
DOWNLOADURL=$$(wget -O - https://unece.org/trade/cefact/UNLOCODE-Download \
| grep -oP '(?<=href=")\S+loc\d+csv\.zip'); \
echo "$$DOWNLOADURL" | grep -oP '(?<=loc)\d+' > tmp/locode-version.txt; \
--header='User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0' \
| grep -oP '(?<=href=")\S+loc\d+csv\.zip' \
| head -n1) && \
test -n "$$DOWNLOADURL" && \
echo "$$DOWNLOADURL" | grep -oP '(?<=loc)\d+' > tmp/locode-version.txt && \
wget -c "$$DOWNLOADURL" -O tmp/locode.csv.zip
data/unlocode-SubdivisionCodes.csv.gz: tmp/locode.csv.zip