From 3c633e3577990f77d8c17ea7abbd9948e63f6555 Mon Sep 17 00:00:00 2001 From: Janos Dobronszki Date: Thu, 2 Jul 2020 14:40:40 +0200 Subject: [PATCH] Sparse checkout of repos, all repo downloads happen to different folders for concurrency support (#1779) --- go.mod | 1 + go.sum | 2 + runtime/default.go | 82 +---------- runtime/local/git/git.go | 311 +++++++++++++++++++++++---------------- 4 files changed, 186 insertions(+), 210 deletions(-) diff --git a/go.mod b/go.mod index 2e565563..a669791b 100644 --- a/go.mod +++ b/go.mod @@ -47,6 +47,7 @@ require ( github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pkg/errors v0.9.1 github.com/stretchr/testify v1.4.0 + github.com/teris-io/shortid v0.0.0-20171029131806-771a37caa5cf github.com/tmc/grpc-websocket-proxy v0.0.0-20200122045848-3419fae592fc // indirect go.etcd.io/bbolt v1.3.4 go.uber.org/zap v1.13.0 diff --git a/go.sum b/go.sum index bcb52c9f..1bf6307b 100644 --- a/go.sum +++ b/go.sum @@ -477,6 +477,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQDeX7m2XsSOlQEnM= github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog= +github.com/teris-io/shortid v0.0.0-20171029131806-771a37caa5cf h1:Z2X3Os7oRzpdJ75iPqWZc0HeJWFYNCvKsfpQwFpRNTA= +github.com/teris-io/shortid v0.0.0-20171029131806-771a37caa5cf/go.mod h1:M8agBzgqHIhgj7wEn9/0hJUZcrvt9VY+Ln+S1I5Mha0= github.com/timewasted/linode v0.0.0-20160829202747-37e84520dcf7/go.mod h1:imsgLplxEC/etjIhdr3dNzV3JeT27LbVu5pYWm0JCBY= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20200122045848-3419fae592fc h1:yUaosFVTJwnltaHbSNC3i82I92quFs+OFPRl8kNMVwo= diff --git a/runtime/default.go b/runtime/default.go index 28ecc4b8..d86f9290 100644 --- a/runtime/default.go +++ b/runtime/default.go @@ -1,8 +1,6 @@ package runtime import ( - "archive/tar" - "compress/gzip" "errors" "fmt" "io" @@ -58,7 +56,6 @@ func NewRuntime(opts ...Option) Runtime { } } -// @todo move this to runtime default func (r *runtime) checkoutSourceIfNeeded(s *Service) error { // Runtime service like config have no source. // Skip checkout in that case @@ -77,7 +74,7 @@ func (r *runtime) checkoutSourceIfNeeded(s *Service) error { if err != nil { return err } - err = uncompress(cpath, path) + err = git.Uncompress(cpath, path) if err != nil { return err } @@ -98,83 +95,6 @@ func (r *runtime) checkoutSourceIfNeeded(s *Service) error { return nil } -// modified version of: https://gist.github.com/mimoo/25fc9716e0f1353791f5908f94d6e726 -func uncompress(src string, dst string) error { - file, err := os.OpenFile(src, os.O_RDWR|os.O_CREATE, 0666) - defer file.Close() - if err != nil { - return err - } - // ungzip - zr, err := gzip.NewReader(file) - if err != nil { - return err - } - // untar - tr := tar.NewReader(zr) - - // uncompress each element - for { - header, err := tr.Next() - if err == io.EOF { - break // End of archive - } - if err != nil { - return err - } - target := header.Name - - // validate name against path traversal - if !validRelPath(header.Name) { - return fmt.Errorf("tar contained invalid name error %q\n", target) - } - - // add dst + re-format slashes according to system - target = filepath.Join(dst, header.Name) - // if no join is needed, replace with ToSlash: - // target = filepath.ToSlash(header.Name) - - // check the type - switch header.Typeflag { - - // if its a dir and it doesn't exist create it (with 0755 permission) - case tar.TypeDir: - if _, err := os.Stat(target); err != nil { - // @todo think about this: - // if we don't nuke the folder, we might end up with files from - // the previous decompress. - if err := os.MkdirAll(target, 0755); err != nil { - return err - } - } - // if it's a file create it (with same permission) - case tar.TypeReg: - // the truncating is probably unnecessary due to the `RemoveAll` of folders - // above - fileToWrite, err := os.OpenFile(target, os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) - if err != nil { - return err - } - // copy over contents - if _, err := io.Copy(fileToWrite, tr); err != nil { - return err - } - // manually close here after each file operation; defering would cause each file close - // to wait until all operations have completed. - fileToWrite.Close() - } - } - return nil -} - -// check for path traversal and correct forward slashes -func validRelPath(p string) bool { - if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") { - return false - } - return true -} - // Init initializes runtime options func (r *runtime) Init(opts ...Option) error { r.Lock() diff --git a/runtime/local/git/git.go b/runtime/local/git/git.go index 88ec350e..f3957fa0 100644 --- a/runtime/local/git/git.go +++ b/runtime/local/git/git.go @@ -1,157 +1,79 @@ package git import ( - "errors" + "archive/tar" + "archive/zip" + "compress/gzip" "fmt" + "io" + "net/http" "os" "os/exec" "path/filepath" "regexp" "strings" - "github.com/go-git/go-git/v5" - "github.com/go-git/go-git/v5/config" - "github.com/go-git/go-git/v5/plumbing" + "github.com/teris-io/shortid" ) type Gitter interface { - Clone(repo string) error - FetchAll(repo string) error Checkout(repo, branchOrCommit string) error - RepoDir(repo string) string -} - -type libGitter struct { - folder string -} - -func (g libGitter) Clone(repo string) error { - fold := filepath.Join(g.folder, dirifyRepo(repo)) - exists, err := pathExists(fold) - if err != nil { - return err - } - if exists { - return nil - } - _, err = git.PlainClone(fold, false, &git.CloneOptions{ - URL: repo, - Progress: os.Stdout, - }) - return err -} - -func (g libGitter) FetchAll(repo string) error { - repos, err := git.PlainOpen(filepath.Join(g.folder, dirifyRepo(repo))) - if err != nil { - return err - } - remotes, err := repos.Remotes() - if err != nil { - return err - } - - err = remotes[0].Fetch(&git.FetchOptions{ - RefSpecs: []config.RefSpec{"refs/*:refs/*", "HEAD:refs/heads/HEAD"}, - Progress: os.Stdout, - Depth: 1, - }) - if err != nil && err != git.NoErrAlreadyUpToDate { - return err - } - return nil -} - -func (g libGitter) Checkout(repo, branchOrCommit string) error { - if branchOrCommit == "latest" { - branchOrCommit = "master" - } - repos, err := git.PlainOpen(filepath.Join(g.folder, dirifyRepo(repo))) - if err != nil { - return err - } - worktree, err := repos.Worktree() - if err != nil { - return err - } - - if plumbing.IsHash(branchOrCommit) { - return worktree.Checkout(&git.CheckoutOptions{ - Hash: plumbing.NewHash(branchOrCommit), - Force: true, - }) - } - - return worktree.Checkout(&git.CheckoutOptions{ - Branch: plumbing.NewBranchReferenceName(branchOrCommit), - Force: true, - }) -} - -func (g libGitter) RepoDir(repo string) string { - return filepath.Join(g.folder, dirifyRepo(repo)) + RepoDir() string } type binaryGitter struct { folder string } -func (g binaryGitter) Clone(repo string) error { - fold := filepath.Join(g.folder, dirifyRepo(repo), ".git") - exists, err := pathExists(fold) - if err != nil { - return err - } - if exists { - return nil - } - fold = filepath.Join(g.folder, dirifyRepo(repo)) - cmd := exec.Command("git", "clone", repo, ".") - - err = os.MkdirAll(fold, 0777) - if err != nil { - return err - } - cmd.Dir = fold - _, err = cmd.Output() - if err != nil { - return err - } - return err -} - -func (g binaryGitter) FetchAll(repo string) error { - cmd := exec.Command("git", "fetch", "--all") - cmd.Dir = filepath.Join(g.folder, dirifyRepo(repo)) - outp, err := cmd.CombinedOutput() - if err != nil { - return errors.New(string(outp)) - } - return err -} - -func (g binaryGitter) Checkout(repo, branchOrCommit string) error { +func (g *binaryGitter) Checkout(repo, branchOrCommit string) error { if branchOrCommit == "latest" { branchOrCommit = "master" } - cmd := exec.Command("git", "checkout", "-f", branchOrCommit) - cmd.Dir = filepath.Join(g.folder, dirifyRepo(repo)) - outp, err := cmd.CombinedOutput() - if err != nil { - return errors.New(string(outp)) + // @todo if it's a commit it must not be checked out all the time + repoFolder := strings.ReplaceAll(strings.ReplaceAll(repo, "/", "-"), "https://", "") + g.folder = filepath.Join(os.TempDir(), + repoFolder+"-"+shortid.MustGenerate()) + + url := fmt.Sprintf("%v/archive/%v.zip", repo, branchOrCommit) + if !strings.HasPrefix(url, "https://") { + url = "https://" + url } - return nil + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("Can't get zip: %v", err) + } + + defer resp.Body.Close() + // Github returns 404 for tar.gz files... + // but still gives back a proper file so ignoring status code + // for now. + //if resp.StatusCode != 200 { + // return errors.New("Status code was not 200") + //} + + src := g.folder + ".zip" + // Create the file + out, err := os.Create(src) + if err != nil { + return fmt.Errorf("Can't create source file %v src: %v", src, err) + } + defer out.Close() + + // Write the body to file + _, err = io.Copy(out, resp.Body) + if err != nil { + return err + } + return unzip(src, g.folder, true) } -func (g binaryGitter) RepoDir(repo string) string { - return filepath.Join(g.folder, dirifyRepo(repo)) +func (g *binaryGitter) RepoDir() string { + return g.folder } func NewGitter(folder string) Gitter { - if commandExists("git") { - return binaryGitter{folder} - } - return libGitter{folder} + return &binaryGitter{folder} + } func commandExists(cmd string) bool { @@ -324,13 +246,12 @@ func CheckoutSource(folder string, source *Source) error { if !strings.Contains(repo, "https://") { repo = "https://" + repo } - // Always clone, it's idempotent and only clones if needed - err := gitter.Clone(repo) + err := gitter.Checkout(source.Repo, source.Ref) if err != nil { return err } - source.FullPath = filepath.Join(gitter.RepoDir(source.Repo), source.Folder) - return gitter.Checkout(repo, source.Ref) + source.FullPath = filepath.Join(gitter.RepoDir(), source.Folder) + return nil } // code below is not used yet @@ -345,3 +266,135 @@ func extractServiceName(fileContent []byte) string { hit := string(hits[0]) return strings.Split(hit, "\"")[1] } + +// Uncompress is a modified version of: https://gist.github.com/mimoo/25fc9716e0f1353791f5908f94d6e726 +func Uncompress(src string, dst string) error { + file, err := os.OpenFile(src, os.O_RDWR|os.O_CREATE, 0666) + defer file.Close() + if err != nil { + return err + } + // ungzip + zr, err := gzip.NewReader(file) + if err != nil { + return err + } + // untar + tr := tar.NewReader(zr) + + // uncompress each element + for { + header, err := tr.Next() + if err == io.EOF { + break // End of archive + } + if err != nil { + return err + } + target := header.Name + + // validate name against path traversal + if !validRelPath(header.Name) { + return fmt.Errorf("tar contained invalid name error %q\n", target) + } + + // add dst + re-format slashes according to system + target = filepath.Join(dst, header.Name) + // if no join is needed, replace with ToSlash: + // target = filepath.ToSlash(header.Name) + + // check the type + switch header.Typeflag { + + // if its a dir and it doesn't exist create it (with 0755 permission) + case tar.TypeDir: + if _, err := os.Stat(target); err != nil { + // @todo think about this: + // if we don't nuke the folder, we might end up with files from + // the previous decompress. + if err := os.MkdirAll(target, 0755); err != nil { + return err + } + } + // if it's a file create it (with same permission) + case tar.TypeReg: + // the truncating is probably unnecessary due to the `RemoveAll` of folders + // above + fileToWrite, err := os.OpenFile(target, os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) + if err != nil { + return err + } + // copy over contents + if _, err := io.Copy(fileToWrite, tr); err != nil { + return err + } + // manually close here after each file operation; defering would cause each file close + // to wait until all operations have completed. + fileToWrite.Close() + } + } + return nil +} + +// check for path traversal and correct forward slashes +func validRelPath(p string) bool { + if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") { + return false + } + return true +} + +// taken from https://stackoverflow.com/questions/20357223/easy-way-to-unzip-file-with-golang +func unzip(src, dest string, skipTopFolder bool) error { + r, err := zip.OpenReader(src) + if err != nil { + return err + } + defer func() { + r.Close() + }() + + os.MkdirAll(dest, 0755) + + // Closure to address file descriptors issue with all the deferred .Close() methods + extractAndWriteFile := func(f *zip.File) error { + rc, err := f.Open() + if err != nil { + return err + } + defer func() { + rc.Close() + }() + if skipTopFolder { + f.Name = strings.Join(strings.Split(f.Name, string(filepath.Separator))[1:], string(filepath.Separator)) + } + path := filepath.Join(dest, f.Name) + if f.FileInfo().IsDir() { + os.MkdirAll(path, f.Mode()) + } else { + os.MkdirAll(filepath.Dir(path), f.Mode()) + f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) + if err != nil { + return err + } + defer func() { + f.Close() + }() + + _, err = io.Copy(f, rc) + if err != nil { + return err + } + } + return nil + } + + for _, f := range r.File { + err := extractAndWriteFile(f) + if err != nil { + return err + } + } + + return nil +}