From 0a56a168ff0ec7817d2041781477adfb60e6ea54 Mon Sep 17 00:00:00 2001
From: Nick Craig-Wood <nick@craig-wood.com>
Date: Mon, 25 Feb 2019 20:15:04 +0000
Subject: [PATCH] bin/get-github-release.go: scrape the downloads page to avoid
 the API limit

This should fix pull requests build failures which can't use the
github token.
---
 bin/get-github-release.go | 61 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

diff --git a/bin/get-github-release.go b/bin/get-github-release.go
index 0e27625f7..970f545a9 100644
--- a/bin/get-github-release.go
+++ b/bin/get-github-release.go
@@ -17,14 +17,18 @@ import (
 	"io/ioutil"
 	"log"
 	"net/http"
+	"net/url"
 	"os"
 	"os/exec"
+	"path"
 	"path/filepath"
 	"regexp"
 	"runtime"
 	"strings"
 	"time"
 
+	"github.com/ncw/rclone/lib/rest"
+	"golang.org/x/net/html"
 	"golang.org/x/sys/unix"
 )
 
@@ -33,6 +37,7 @@ var (
 	install = flag.Bool("install", false, "Install the downloaded package using sudo dpkg -i.")
 	extract = flag.String("extract", "", "Extract the named executable from the .tar.gz and install into bindir.")
 	bindir  = flag.String("bindir", defaultBinDir(), "Directory to install files downloaded with -extract.")
+	useAPI  = flag.Bool("use-api", false, "Use the API for finding the release instead of scraping the page.")
 	// Globals
 	matchProject = regexp.MustCompile(`^([\w-]+)/([\w-]+)$`)
 	osAliases    = map[string][]string{
@@ -209,6 +214,55 @@ func getAsset(project string, matchName *regexp.Regexp) (string, string) {
 	return "", ""
 }
 
+// Get an asset URL and name by scraping the downloads page
+//
+// This doesn't use the API so isn't rate limited when not using GITHUB login details
+func getAssetFromReleasesPage(project string, matchName *regexp.Regexp) (assetURL string, assetName string) {
+	baseURL := "https://github.com/" + project + "/releases"
+	log.Printf("Fetching asset info for %q from %q", project, baseURL)
+	base, err := url.Parse(baseURL)
+	if err != nil {
+		log.Fatalf("URL Parse failed: %v", err)
+	}
+	resp, err := http.Get(baseURL)
+	if err != nil {
+		log.Fatalf("Failed to fetch release info %q: %v", baseURL, err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		log.Printf("Error: %s", readBody(resp.Body))
+		log.Fatalf("Bad status %d when fetching %q release info: %s", resp.StatusCode, baseURL, resp.Status)
+	}
+	doc, err := html.Parse(resp.Body)
+	if err != nil {
+		log.Fatalf("Failed to parse web page: %v", err)
+	}
+	var walk func(*html.Node)
+	walk = func(n *html.Node) {
+		if n.Type == html.ElementNode && n.Data == "a" {
+			for _, a := range n.Attr {
+				if a.Key == "href" {
+					if name := path.Base(a.Val); matchName.MatchString(name) && isOurOsArch(name) {
+						if u, err := rest.URLJoin(base, a.Val); err == nil {
+							assetName = name
+							assetURL = u.String()
+						}
+					}
+					break
+				}
+			}
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			walk(c)
+		}
+	}
+	walk(doc)
+	if assetName == "" || assetURL == "" {
+		log.Fatalf("Didn't find URL in page")
+	}
+	return assetURL, assetName
+}
+
 // isOurOsArch returns true if s contains our OS and our Arch
 func isOurOsArch(s string) bool {
 	s = strings.ToLower(s)
@@ -346,7 +400,12 @@ func main() {
 		log.Fatalf("Invalid regexp for name %q: %v", nameRe, err)
 	}
 
-	assetURL, assetName := getAsset(project, matchName)
+	var assetURL, assetName string
+	if *useAPI {
+		assetURL, assetName = getAsset(project, matchName)
+	} else {
+		assetURL, assetName = getAssetFromReleasesPage(project, matchName)
+	}
 	fileName := filepath.Join(os.TempDir(), assetName)
 	getFile(assetURL, fileName)