initial import

Signed-off-by: Vasiliy Tolstov <v.tolstov@selfip.ru>
This commit is contained in:
2018-03-22 13:35:38 +03:00
commit 83f9fa19ad
73 changed files with 4302 additions and 0 deletions

87
backend/backend.go Normal file
View File

@@ -0,0 +1,87 @@
package backend
import (
"errors"
"fmt"
"io"
"strings"
)
/*
You could do "files" as variable length blocks, where you have a list of blocks needed to reconstruct. To insert data you create a new block, keep the original, but specify that you only need parts of the data.
A file is a list of blocks. When you insert new data, you create a new block, and adjust start/end of affected blocks.
This allows you to insert data at the same speed as end-of-file writes. You can add some lazy cleanup, that removes unused parts of blocks and recompresses them, but the important part is that you do not need to do that when the file is updated.
There will only be a read overhead when "start" of a block is >0. That will be the same for using "ReadAt". You skip in your blocks until you get to the position you want and start decoding from there.
If blocks are stored based on their uncompressed hash, you furthermore get a coarse high-level deduplication, though that of course makes it more difficult to figure out when a block is no longer used by any file.
With that, you should get pretty good performance, even with block sizes up to 16-64MB.
For each block, I would probably go with variably sized de-duplication, with a 4K average, and deflate the result @level 1. That should give you ~50-60MB/core throughput. If you need higher write throughput, you can always set deflate to level 0 (store), and leave it up to a lazy task to compress the data. That should give you about 150MB/core. Obviously you can de-duplicate/compress several blocks in parallel.
*/
/*
type File struct {
Name string
Size uint64
Blocks []Block
Ring *config.Ring
}
type Block struct {
Start uint64
Stop uint64
Size uint64
}
*/
var (
ErrIO = errors.New("IO error")
)
var backendTypes map[string]Backend
func init() {
backendTypes = make(map[string]Backend)
}
func RegisterBackend(engine string, backend Backend) {
backendTypes[engine] = backend
}
func New(btype string, cfg interface{}) (Backend, error) {
var err error
backend, ok := backendTypes[btype]
if !ok {
return nil, fmt.Errorf("unknown backend type %s. only %s supported", btype, strings.Join(BackendTypes(), ","))
}
if cfg == nil {
return backend, nil
}
err = backend.Init(cfg)
if err != nil {
return nil, err
}
return backend, nil
}
func BackendTypes() []string {
var btypes []string
for btype, _ := range backendTypes {
btypes = append(btypes, btype)
}
return btypes
}
type Backend interface {
Configure(interface{}) error
Init(interface{}) error
ReaderFrom(string, io.Reader, int64, int64, int, int) (int64, error)
WriterTo(string, io.Writer, int64, int64, int, int) (int64, error)
WriteAt(string, []byte, int64, int, int) (int, error)
ReadAt(string, []byte, int64, int, int) (int, error)
Allocate(string, int64, int, int) error
Remove(string, int, int) error
Exists(string, int, int) (bool, error)
}

100
backend/block/block.go Normal file
View File

@@ -0,0 +1,100 @@
package block
/*
import (
"fmt"
"os"
"github.com/mitchellh/mapstructure"
"golang.org/x/sys/unix"
"github.com/sdstack/storage/backend"
)
type StoreBlock struct {
store []string
}
type ObjectBlock struct {
fs *StoreBlock
fp *os.File
}
func init() {
backend.RegisterBackend("block", &StoreBlock{})
}
func (s *StoreBlock) Init(data interface{}) error {
var err error
err = mapstructure.Decode(data, &s.store)
if err != nil {
return err
}
fmt.Printf("%#+v\n", s)
return nil
}
func (s *StoreBlock) Configure(data interface{}) error {
var err error
err = mapstructure.Decode(data, &s.store)
if err != nil {
return err
}
fmt.Printf("%#+v\n", s)
return nil
}
func (s *StoreBlock) Open() error {
return nil
}
func (s *StoreBlock) Close() error {
return nil
}
func (o *ObjectBlock) Allocate(mode uint64, offset uint64, length uint64) error {
return unix.Fallocate(int(o.fp.Fd()), uint32(mode), int64(offset), int64(length))
}
func (o *ObjectBlock) Delete(flags uint64) error {
return nil
// return os.Remove(filepath.Join())
}
func (o *ObjectBlock) ReadAt(data []byte, offset uint64, flags uint64) (uint64, error) {
return 0, nil
}
func (o *ObjectBlock) WriteAt(data []byte, offset uint64, flags uint64) (uint64, error) {
return 0, nil
}
func (o *ObjectBlock) Read(data []byte) (int, error) {
return o.fp.Read(data)
}
func (o *ObjectBlock) Write(data []byte) (int, error) {
return o.fp.Write(data)
}
func (o *ObjectBlock) Seek(offset int64, whence int) (int64, error) {
return o.fp.Seek(offset, whence)
}
func (o *StoreBlock) ObjectExists(name string) (bool, error) {
_, err := os.Stat(name)
return os.IsNotExist(err), nil
}
func (o *ObjectBlock) Sync() error {
return o.fp.Sync()
}
func (o *ObjectBlock) Close() error {
return o.fp.Close()
}
func (o *ObjectBlock) Remove() error {
return nil
}
*/

View File

@@ -0,0 +1,348 @@
package filesystem
import (
"fmt"
"hash"
"io"
"os"
"path/filepath"
"sync"
"github.com/mitchellh/mapstructure"
"github.com/valyala/fastrand"
"golang.org/x/sys/unix"
"github.com/sdstack/storage/backend"
"github.com/sdstack/storage/cache"
shash "github.com/sdstack/storage/hash"
"github.com/sdstack/storage/ring"
)
type BackendFilesystem struct {
cfg *config
hash hash.Hash
fdcache cache.Cache
rng fastrand.RNG
mu sync.Mutex
}
const (
vSize = 16 * 1024 * 1024
)
type config struct {
Debug bool
FDCache struct {
Engine string
Size int
} `mapstructure:"fdcache"`
Mode string `mapstructure:"mode"`
Options map[string]interface{}
Shards struct {
Data int
Parity int
}
Ring string
Store []struct {
ID string
Path string
Weight int
}
}
func init() {
s := &BackendFilesystem{}
s.weights = make(map[string]int)
backend.RegisterBackend("filesystem", s)
}
func (s *BackendFilesystem) Init(data interface{}) error {
var err error
var fi os.FileInfo
var statfs unix.Statfs_t
err = mapstructure.Decode(data, &s.cfg)
if err != nil {
return err
}
for _, it := range s.cfg.Store {
if it.Weight < 0 {
continue
}
if fi, err = os.Stat(it.Path); err != nil || !fi.IsDir() {
continue
}
if it.Weight > 0 {
s.weights[it.Path] = it.Weight
continue
}
if err = unix.Statfs(it.Path, &statfs); err == nil {
s.weights[it.Path] = int(statfs.Blocks) * int(statfs.Bsize) / vSize
}
}
if s.cfg.Mode == "copy" && len(s.weights) < s.cfg.Shards.Data {
return fmt.Errorf("data shards is more then available disks")
}
/*
if s.fdcache, err = cache.New(s.cfg.FDCache.Engine, s.cfg.FDCache.Size); err != nil {
return err
}
if err = s.fdcache.OnEvict(s.onCacheEvict); err != nil {
return err
}
*/
if s.ring, err = ring.New(s.cfg.Ring, s.weights); err != nil {
return err
}
if s.hash, err = shash.New("xxhash"); err != nil {
return err
}
return nil
}
/*
func (s *StoreFilesystem) onCacheEvict(k interface{}, v interface{}) {
v.(*os.File).Close()
}
*/
func (s *BackendFilesystem) Configure(data interface{}) error {
var err error
var fi os.FileInfo
err = mapstructure.Decode(data, &s.cfg)
if err != nil {
return err
}
for _, it := range s.cfg.Store {
if fi, err = os.Stat(it.Path); err != nil || !fi.IsDir() {
continue
}
s.weights[it.Path] = it.Weight
}
if s.cfg.Mode == "copy" && len(s.weights) < s.cfg.Shards.Data {
return fmt.Errorf("data shards is more then available disks")
}
s.ring, err = ring.New(s.cfg.Ring, s.weights)
s.fdcache.Purge()
return err
}
func (s *BackendFilesystem) Allocate(name string, size int64, ndata int, nparity int) error {
var err error
var fp *os.File
var items interface{}
var disks []string
if s.cfg.Debug {
fmt.Printf("%T %s\n", s, "allocate")
}
if nparity == 0 && ndata > 0 {
if items, err = s.ring.GetItem(name, ndata); err != nil {
return err
}
}
disks = items.([]string)
if s.cfg.Debug {
fmt.Printf("%T %s %v %s\n", s, "read", disks, name)
}
var errs []error
for _, disk := range disks {
if fp, err = os.OpenFile(filepath.Join(disk, name), os.O_RDWR|os.O_CREATE, os.FileMode(0660)); err != nil {
continue
// TODO: remove from ring
}
if err = unix.Fallocate(int(fp.Fd()), 0, 0, int64(size)); err != nil {
// TODO: remove from ring
fp.Close()
}
}
if len(errs) > 0 {
return errs[0]
}
return nil
}
func (s *BackendFilesystem) ReadAt(name string, buf []byte, offset int64, ndata int, nparity int) (int, error) {
if s.cfg.Debug {
fmt.Printf("%T %s\n", s, "read")
}
//var errs []error
var err error
var n int
var items interface{}
var disks []string
var fp *os.File
if nparity == 0 && ndata > 0 {
if items, err = s.ring.GetItem(name, ndata); err != nil {
return 0, err
}
}
disks = items.([]string)
if s.cfg.Debug {
fmt.Printf("%T %s %v %s %d %d\n", s, "read", disks, name, offset, len(buf))
}
for len(disks) > 0 {
s.mu.Lock()
disk := disks[int(s.rng.Uint32n(uint32(len(disks))))]
s.mu.Unlock()
fname := filepath.Join(disk, name)
if fp, err = os.OpenFile(fname, os.O_CREATE|os.O_RDWR, os.FileMode(0660)); err != nil {
continue
}
n, err = fp.ReadAt(buf, int64(offset))
fp.Close()
if err == nil || err == io.EOF {
//fmt.Printf("ret from read %d %s\n", n, buf)
return n, nil
}
// fmt.Printf("aaaa %v\n", err)
/*
o.fs.ring.DelItem(disk)
o.fs.ring.SetState(ring.StateDegrade)
copy(disks[rnd:], disks[rnd+1:])
// disks[len(disks)-1] = nil
disks = disks[:len(disks)-1]
*/
}
return 0, backend.ErrIO
}
type rwres struct {
n int
err error
}
func (s *BackendFilesystem) WriterTo(name string, w io.Writer, offset int64, size int64, ndata int, nparity int) (int64, error) {
buf := make([]byte, size)
n, err := s.ReadAt(name, buf, offset, ndata, nparity)
if err != nil || int64(n) < size {
return 0, backend.ErrIO
}
n, err = w.Write(buf)
return int64(n), err
}
func (s *BackendFilesystem) ReaderFrom(name string, r io.Reader, offset int64, size int64, ndata int, nparity int) (int64, error) {
buf := make([]byte, size)
n, err := r.Read(buf)
if err != nil || int64(n) < size {
return 0, backend.ErrIO
}
n, err = s.WriteAt(name, buf, offset, ndata, nparity)
return int64(n), err
}
func (s *BackendFilesystem) WriteAt(name string, buf []byte, offset int64, ndata int, nparity int) (int, error) {
if s.cfg.Debug {
fmt.Printf("%T %s\n", s, "write")
}
var n int
var err error
var items interface{}
var disks []string
var fp *os.File
if nparity == 0 && ndata > 0 {
if items, err = s.ring.GetItem(name, ndata); err != nil {
return 0, err
}
}
disks = items.([]string)
hinfo := struct {
Hashes []uint64
}{}
_ = hinfo
//result := make(chan rwres, len(disks))
for _, disk := range disks {
// go func() {
//var res rwres
fp, err = os.OpenFile(filepath.Join(disk, name), os.O_CREATE|os.O_RDWR, os.FileMode(0660))
if err != nil {
continue
}
//mw := io.MultiWriter{s.hash, fp
n, err = fp.WriteAt(buf, int64(offset))
/*
if o.fs.cfg.Options.Sync {
if res.err = fp.Sync(); res.err != nil {
result <- res
}
}
*/
fp.Close()
}
if s.ring.Size() < 1 {
s.ring.SetState(ring.StateFail)
return 0, fmt.Errorf("can't write to failed ring")
}
/*
if res.err != nil || res.n < len(buf) {
n = res.n
err = res.err
//delete(o.fps, res.disk)
}
*/
return n, nil
}
func (s *BackendFilesystem) Exists(name string, ndata int, nparity int) (bool, error) {
if s.cfg.Debug {
fmt.Printf("%T %s %s\n", s, "object_exists", name)
}
var err error
var items interface{}
var disks []string
if nparity == 0 && ndata > 0 {
if items, err = s.ring.GetItem(name, ndata); err != nil {
return false, err
}
}
disks = items.([]string)
for len(disks) > 0 {
s.mu.Lock()
disk := disks[int(s.rng.Uint32n(uint32(len(disks))))]
s.mu.Unlock()
if _, err = os.Stat(filepath.Join(disk, name)); err == nil {
break
}
}
return !os.IsNotExist(err), nil
}
func (s *BackendFilesystem) Remove(name string, ndata int, nparity int) error {
if s.cfg.Debug {
fmt.Printf("%T %s\n", s, "remove")
}
return nil
}