Sparse checkout of repos, all repo downloads happen to different folders for concurrency support (#1779)

This commit is contained in:
Janos Dobronszki 2020-07-02 14:40:40 +02:00 committed by GitHub
parent 174e44b846
commit 3c633e3577
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 186 additions and 210 deletions

1
go.mod
View File

@ -47,6 +47,7 @@ require (
github.com/patrickmn/go-cache v2.1.0+incompatible github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pkg/errors v0.9.1 github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.4.0 github.com/stretchr/testify v1.4.0
github.com/teris-io/shortid v0.0.0-20171029131806-771a37caa5cf
github.com/tmc/grpc-websocket-proxy v0.0.0-20200122045848-3419fae592fc // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20200122045848-3419fae592fc // indirect
go.etcd.io/bbolt v1.3.4 go.etcd.io/bbolt v1.3.4
go.uber.org/zap v1.13.0 go.uber.org/zap v1.13.0

2
go.sum
View File

@ -477,6 +477,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQDeX7m2XsSOlQEnM= github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQDeX7m2XsSOlQEnM=
github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog= github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog=
github.com/teris-io/shortid v0.0.0-20171029131806-771a37caa5cf h1:Z2X3Os7oRzpdJ75iPqWZc0HeJWFYNCvKsfpQwFpRNTA=
github.com/teris-io/shortid v0.0.0-20171029131806-771a37caa5cf/go.mod h1:M8agBzgqHIhgj7wEn9/0hJUZcrvt9VY+Ln+S1I5Mha0=
github.com/timewasted/linode v0.0.0-20160829202747-37e84520dcf7/go.mod h1:imsgLplxEC/etjIhdr3dNzV3JeT27LbVu5pYWm0JCBY= github.com/timewasted/linode v0.0.0-20160829202747-37e84520dcf7/go.mod h1:imsgLplxEC/etjIhdr3dNzV3JeT27LbVu5pYWm0JCBY=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmc/grpc-websocket-proxy v0.0.0-20200122045848-3419fae592fc h1:yUaosFVTJwnltaHbSNC3i82I92quFs+OFPRl8kNMVwo= github.com/tmc/grpc-websocket-proxy v0.0.0-20200122045848-3419fae592fc h1:yUaosFVTJwnltaHbSNC3i82I92quFs+OFPRl8kNMVwo=

View File

@ -1,8 +1,6 @@
package runtime package runtime
import ( import (
"archive/tar"
"compress/gzip"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@ -58,7 +56,6 @@ func NewRuntime(opts ...Option) Runtime {
} }
} }
// @todo move this to runtime default
func (r *runtime) checkoutSourceIfNeeded(s *Service) error { func (r *runtime) checkoutSourceIfNeeded(s *Service) error {
// Runtime service like config have no source. // Runtime service like config have no source.
// Skip checkout in that case // Skip checkout in that case
@ -77,7 +74,7 @@ func (r *runtime) checkoutSourceIfNeeded(s *Service) error {
if err != nil { if err != nil {
return err return err
} }
err = uncompress(cpath, path) err = git.Uncompress(cpath, path)
if err != nil { if err != nil {
return err return err
} }
@ -98,83 +95,6 @@ func (r *runtime) checkoutSourceIfNeeded(s *Service) error {
return nil return nil
} }
// modified version of: https://gist.github.com/mimoo/25fc9716e0f1353791f5908f94d6e726
func uncompress(src string, dst string) error {
file, err := os.OpenFile(src, os.O_RDWR|os.O_CREATE, 0666)
defer file.Close()
if err != nil {
return err
}
// ungzip
zr, err := gzip.NewReader(file)
if err != nil {
return err
}
// untar
tr := tar.NewReader(zr)
// uncompress each element
for {
header, err := tr.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
return err
}
target := header.Name
// validate name against path traversal
if !validRelPath(header.Name) {
return fmt.Errorf("tar contained invalid name error %q\n", target)
}
// add dst + re-format slashes according to system
target = filepath.Join(dst, header.Name)
// if no join is needed, replace with ToSlash:
// target = filepath.ToSlash(header.Name)
// check the type
switch header.Typeflag {
// if its a dir and it doesn't exist create it (with 0755 permission)
case tar.TypeDir:
if _, err := os.Stat(target); err != nil {
// @todo think about this:
// if we don't nuke the folder, we might end up with files from
// the previous decompress.
if err := os.MkdirAll(target, 0755); err != nil {
return err
}
}
// if it's a file create it (with same permission)
case tar.TypeReg:
// the truncating is probably unnecessary due to the `RemoveAll` of folders
// above
fileToWrite, err := os.OpenFile(target, os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode))
if err != nil {
return err
}
// copy over contents
if _, err := io.Copy(fileToWrite, tr); err != nil {
return err
}
// manually close here after each file operation; defering would cause each file close
// to wait until all operations have completed.
fileToWrite.Close()
}
}
return nil
}
// check for path traversal and correct forward slashes
func validRelPath(p string) bool {
if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") {
return false
}
return true
}
// Init initializes runtime options // Init initializes runtime options
func (r *runtime) Init(opts ...Option) error { func (r *runtime) Init(opts ...Option) error {
r.Lock() r.Lock()

View File

@ -1,157 +1,79 @@
package git package git
import ( import (
"errors" "archive/tar"
"archive/zip"
"compress/gzip"
"fmt" "fmt"
"io"
"net/http"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"regexp" "regexp"
"strings" "strings"
"github.com/go-git/go-git/v5" "github.com/teris-io/shortid"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
) )
type Gitter interface { type Gitter interface {
Clone(repo string) error
FetchAll(repo string) error
Checkout(repo, branchOrCommit string) error Checkout(repo, branchOrCommit string) error
RepoDir(repo string) string RepoDir() string
}
type libGitter struct {
folder string
}
func (g libGitter) Clone(repo string) error {
fold := filepath.Join(g.folder, dirifyRepo(repo))
exists, err := pathExists(fold)
if err != nil {
return err
}
if exists {
return nil
}
_, err = git.PlainClone(fold, false, &git.CloneOptions{
URL: repo,
Progress: os.Stdout,
})
return err
}
func (g libGitter) FetchAll(repo string) error {
repos, err := git.PlainOpen(filepath.Join(g.folder, dirifyRepo(repo)))
if err != nil {
return err
}
remotes, err := repos.Remotes()
if err != nil {
return err
}
err = remotes[0].Fetch(&git.FetchOptions{
RefSpecs: []config.RefSpec{"refs/*:refs/*", "HEAD:refs/heads/HEAD"},
Progress: os.Stdout,
Depth: 1,
})
if err != nil && err != git.NoErrAlreadyUpToDate {
return err
}
return nil
}
func (g libGitter) Checkout(repo, branchOrCommit string) error {
if branchOrCommit == "latest" {
branchOrCommit = "master"
}
repos, err := git.PlainOpen(filepath.Join(g.folder, dirifyRepo(repo)))
if err != nil {
return err
}
worktree, err := repos.Worktree()
if err != nil {
return err
}
if plumbing.IsHash(branchOrCommit) {
return worktree.Checkout(&git.CheckoutOptions{
Hash: plumbing.NewHash(branchOrCommit),
Force: true,
})
}
return worktree.Checkout(&git.CheckoutOptions{
Branch: plumbing.NewBranchReferenceName(branchOrCommit),
Force: true,
})
}
func (g libGitter) RepoDir(repo string) string {
return filepath.Join(g.folder, dirifyRepo(repo))
} }
type binaryGitter struct { type binaryGitter struct {
folder string folder string
} }
func (g binaryGitter) Clone(repo string) error { func (g *binaryGitter) Checkout(repo, branchOrCommit string) error {
fold := filepath.Join(g.folder, dirifyRepo(repo), ".git")
exists, err := pathExists(fold)
if err != nil {
return err
}
if exists {
return nil
}
fold = filepath.Join(g.folder, dirifyRepo(repo))
cmd := exec.Command("git", "clone", repo, ".")
err = os.MkdirAll(fold, 0777)
if err != nil {
return err
}
cmd.Dir = fold
_, err = cmd.Output()
if err != nil {
return err
}
return err
}
func (g binaryGitter) FetchAll(repo string) error {
cmd := exec.Command("git", "fetch", "--all")
cmd.Dir = filepath.Join(g.folder, dirifyRepo(repo))
outp, err := cmd.CombinedOutput()
if err != nil {
return errors.New(string(outp))
}
return err
}
func (g binaryGitter) Checkout(repo, branchOrCommit string) error {
if branchOrCommit == "latest" { if branchOrCommit == "latest" {
branchOrCommit = "master" branchOrCommit = "master"
} }
cmd := exec.Command("git", "checkout", "-f", branchOrCommit) // @todo if it's a commit it must not be checked out all the time
cmd.Dir = filepath.Join(g.folder, dirifyRepo(repo)) repoFolder := strings.ReplaceAll(strings.ReplaceAll(repo, "/", "-"), "https://", "")
outp, err := cmd.CombinedOutput() g.folder = filepath.Join(os.TempDir(),
if err != nil { repoFolder+"-"+shortid.MustGenerate())
return errors.New(string(outp))
url := fmt.Sprintf("%v/archive/%v.zip", repo, branchOrCommit)
if !strings.HasPrefix(url, "https://") {
url = "https://" + url
} }
return nil resp, err := http.Get(url)
if err != nil {
return fmt.Errorf("Can't get zip: %v", err)
}
defer resp.Body.Close()
// Github returns 404 for tar.gz files...
// but still gives back a proper file so ignoring status code
// for now.
//if resp.StatusCode != 200 {
// return errors.New("Status code was not 200")
//}
src := g.folder + ".zip"
// Create the file
out, err := os.Create(src)
if err != nil {
return fmt.Errorf("Can't create source file %v src: %v", src, err)
}
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
if err != nil {
return err
}
return unzip(src, g.folder, true)
} }
func (g binaryGitter) RepoDir(repo string) string { func (g *binaryGitter) RepoDir() string {
return filepath.Join(g.folder, dirifyRepo(repo)) return g.folder
} }
func NewGitter(folder string) Gitter { func NewGitter(folder string) Gitter {
if commandExists("git") { return &binaryGitter{folder}
return binaryGitter{folder}
}
return libGitter{folder}
} }
func commandExists(cmd string) bool { func commandExists(cmd string) bool {
@ -324,13 +246,12 @@ func CheckoutSource(folder string, source *Source) error {
if !strings.Contains(repo, "https://") { if !strings.Contains(repo, "https://") {
repo = "https://" + repo repo = "https://" + repo
} }
// Always clone, it's idempotent and only clones if needed err := gitter.Checkout(source.Repo, source.Ref)
err := gitter.Clone(repo)
if err != nil { if err != nil {
return err return err
} }
source.FullPath = filepath.Join(gitter.RepoDir(source.Repo), source.Folder) source.FullPath = filepath.Join(gitter.RepoDir(), source.Folder)
return gitter.Checkout(repo, source.Ref) return nil
} }
// code below is not used yet // code below is not used yet
@ -345,3 +266,135 @@ func extractServiceName(fileContent []byte) string {
hit := string(hits[0]) hit := string(hits[0])
return strings.Split(hit, "\"")[1] return strings.Split(hit, "\"")[1]
} }
// Uncompress is a modified version of: https://gist.github.com/mimoo/25fc9716e0f1353791f5908f94d6e726
func Uncompress(src string, dst string) error {
file, err := os.OpenFile(src, os.O_RDWR|os.O_CREATE, 0666)
defer file.Close()
if err != nil {
return err
}
// ungzip
zr, err := gzip.NewReader(file)
if err != nil {
return err
}
// untar
tr := tar.NewReader(zr)
// uncompress each element
for {
header, err := tr.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
return err
}
target := header.Name
// validate name against path traversal
if !validRelPath(header.Name) {
return fmt.Errorf("tar contained invalid name error %q\n", target)
}
// add dst + re-format slashes according to system
target = filepath.Join(dst, header.Name)
// if no join is needed, replace with ToSlash:
// target = filepath.ToSlash(header.Name)
// check the type
switch header.Typeflag {
// if its a dir and it doesn't exist create it (with 0755 permission)
case tar.TypeDir:
if _, err := os.Stat(target); err != nil {
// @todo think about this:
// if we don't nuke the folder, we might end up with files from
// the previous decompress.
if err := os.MkdirAll(target, 0755); err != nil {
return err
}
}
// if it's a file create it (with same permission)
case tar.TypeReg:
// the truncating is probably unnecessary due to the `RemoveAll` of folders
// above
fileToWrite, err := os.OpenFile(target, os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode))
if err != nil {
return err
}
// copy over contents
if _, err := io.Copy(fileToWrite, tr); err != nil {
return err
}
// manually close here after each file operation; defering would cause each file close
// to wait until all operations have completed.
fileToWrite.Close()
}
}
return nil
}
// check for path traversal and correct forward slashes
func validRelPath(p string) bool {
if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") {
return false
}
return true
}
// taken from https://stackoverflow.com/questions/20357223/easy-way-to-unzip-file-with-golang
func unzip(src, dest string, skipTopFolder bool) error {
r, err := zip.OpenReader(src)
if err != nil {
return err
}
defer func() {
r.Close()
}()
os.MkdirAll(dest, 0755)
// Closure to address file descriptors issue with all the deferred .Close() methods
extractAndWriteFile := func(f *zip.File) error {
rc, err := f.Open()
if err != nil {
return err
}
defer func() {
rc.Close()
}()
if skipTopFolder {
f.Name = strings.Join(strings.Split(f.Name, string(filepath.Separator))[1:], string(filepath.Separator))
}
path := filepath.Join(dest, f.Name)
if f.FileInfo().IsDir() {
os.MkdirAll(path, f.Mode())
} else {
os.MkdirAll(filepath.Dir(path), f.Mode())
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if err != nil {
return err
}
defer func() {
f.Close()
}()
_, err = io.Copy(f, rc)
if err != nil {
return err
}
}
return nil
}
for _, f := range r.File {
err := extractAndWriteFile(f)
if err != nil {
return err
}
}
return nil
}