87
backend/backend.go
Normal file
87
backend/backend.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
/*
|
||||
You could do "files" as variable length blocks, where you have a list of blocks needed to reconstruct. To insert data you create a new block, keep the original, but specify that you only need parts of the data.
|
||||
A file is a list of blocks. When you insert new data, you create a new block, and adjust start/end of affected blocks.
|
||||
This allows you to insert data at the same speed as end-of-file writes. You can add some lazy cleanup, that removes unused parts of blocks and recompresses them, but the important part is that you do not need to do that when the file is updated.
|
||||
There will only be a read overhead when "start" of a block is >0. That will be the same for using "ReadAt". You skip in your blocks until you get to the position you want and start decoding from there.
|
||||
If blocks are stored based on their uncompressed hash, you furthermore get a coarse high-level deduplication, though that of course makes it more difficult to figure out when a block is no longer used by any file.
|
||||
With that, you should get pretty good performance, even with block sizes up to 16-64MB.
|
||||
For each block, I would probably go with variably sized de-duplication, with a 4K average, and deflate the result @level 1. That should give you ~50-60MB/core throughput. If you need higher write throughput, you can always set deflate to level 0 (store), and leave it up to a lazy task to compress the data. That should give you about 150MB/core. Obviously you can de-duplicate/compress several blocks in parallel.
|
||||
*/
|
||||
|
||||
/*
|
||||
type File struct {
|
||||
Name string
|
||||
Size uint64
|
||||
Blocks []Block
|
||||
Ring *config.Ring
|
||||
}
|
||||
|
||||
type Block struct {
|
||||
Start uint64
|
||||
Stop uint64
|
||||
Size uint64
|
||||
}
|
||||
*/
|
||||
|
||||
var (
|
||||
ErrIO = errors.New("IO error")
|
||||
)
|
||||
|
||||
var backendTypes map[string]Backend
|
||||
|
||||
func init() {
|
||||
backendTypes = make(map[string]Backend)
|
||||
}
|
||||
|
||||
func RegisterBackend(engine string, backend Backend) {
|
||||
backendTypes[engine] = backend
|
||||
}
|
||||
|
||||
func New(btype string, cfg interface{}) (Backend, error) {
|
||||
var err error
|
||||
|
||||
backend, ok := backendTypes[btype]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown backend type %s. only %s supported", btype, strings.Join(BackendTypes(), ","))
|
||||
}
|
||||
|
||||
if cfg == nil {
|
||||
return backend, nil
|
||||
}
|
||||
|
||||
err = backend.Init(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return backend, nil
|
||||
}
|
||||
|
||||
func BackendTypes() []string {
|
||||
var btypes []string
|
||||
for btype, _ := range backendTypes {
|
||||
btypes = append(btypes, btype)
|
||||
}
|
||||
return btypes
|
||||
}
|
||||
|
||||
type Backend interface {
|
||||
Configure(interface{}) error
|
||||
Init(interface{}) error
|
||||
ReaderFrom(string, io.Reader, int64, int64, int, int) (int64, error)
|
||||
WriterTo(string, io.Writer, int64, int64, int, int) (int64, error)
|
||||
WriteAt(string, []byte, int64, int, int) (int, error)
|
||||
ReadAt(string, []byte, int64, int, int) (int, error)
|
||||
Allocate(string, int64, int, int) error
|
||||
Remove(string, int, int) error
|
||||
Exists(string, int, int) (bool, error)
|
||||
}
|
100
backend/block/block.go
Normal file
100
backend/block/block.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package block
|
||||
|
||||
/*
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"golang.org/x/sys/unix"
|
||||
"github.com/sdstack/storage/backend"
|
||||
)
|
||||
|
||||
type StoreBlock struct {
|
||||
store []string
|
||||
}
|
||||
|
||||
type ObjectBlock struct {
|
||||
fs *StoreBlock
|
||||
fp *os.File
|
||||
}
|
||||
|
||||
func init() {
|
||||
backend.RegisterBackend("block", &StoreBlock{})
|
||||
}
|
||||
func (s *StoreBlock) Init(data interface{}) error {
|
||||
var err error
|
||||
|
||||
err = mapstructure.Decode(data, &s.store)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("%#+v\n", s)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *StoreBlock) Configure(data interface{}) error {
|
||||
var err error
|
||||
|
||||
err = mapstructure.Decode(data, &s.store)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf("%#+v\n", s)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *StoreBlock) Open() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *StoreBlock) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Allocate(mode uint64, offset uint64, length uint64) error {
|
||||
return unix.Fallocate(int(o.fp.Fd()), uint32(mode), int64(offset), int64(length))
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Delete(flags uint64) error {
|
||||
return nil
|
||||
// return os.Remove(filepath.Join())
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) ReadAt(data []byte, offset uint64, flags uint64) (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) WriteAt(data []byte, offset uint64, flags uint64) (uint64, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Read(data []byte) (int, error) {
|
||||
return o.fp.Read(data)
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Write(data []byte) (int, error) {
|
||||
return o.fp.Write(data)
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Seek(offset int64, whence int) (int64, error) {
|
||||
return o.fp.Seek(offset, whence)
|
||||
}
|
||||
|
||||
func (o *StoreBlock) ObjectExists(name string) (bool, error) {
|
||||
_, err := os.Stat(name)
|
||||
return os.IsNotExist(err), nil
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Sync() error {
|
||||
return o.fp.Sync()
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Close() error {
|
||||
return o.fp.Close()
|
||||
}
|
||||
|
||||
func (o *ObjectBlock) Remove() error {
|
||||
return nil
|
||||
}
|
||||
*/
|
348
backend/filesystem/filesystem.go
Normal file
348
backend/filesystem/filesystem.go
Normal file
@@ -0,0 +1,348 @@
|
||||
package filesystem
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
"github.com/valyala/fastrand"
|
||||
"golang.org/x/sys/unix"
|
||||
"github.com/sdstack/storage/backend"
|
||||
"github.com/sdstack/storage/cache"
|
||||
shash "github.com/sdstack/storage/hash"
|
||||
"github.com/sdstack/storage/ring"
|
||||
)
|
||||
|
||||
type BackendFilesystem struct {
|
||||
cfg *config
|
||||
hash hash.Hash
|
||||
fdcache cache.Cache
|
||||
rng fastrand.RNG
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
const (
|
||||
vSize = 16 * 1024 * 1024
|
||||
)
|
||||
|
||||
type config struct {
|
||||
Debug bool
|
||||
FDCache struct {
|
||||
Engine string
|
||||
Size int
|
||||
} `mapstructure:"fdcache"`
|
||||
Mode string `mapstructure:"mode"`
|
||||
Options map[string]interface{}
|
||||
Shards struct {
|
||||
Data int
|
||||
Parity int
|
||||
}
|
||||
Ring string
|
||||
Store []struct {
|
||||
ID string
|
||||
Path string
|
||||
Weight int
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
s := &BackendFilesystem{}
|
||||
s.weights = make(map[string]int)
|
||||
backend.RegisterBackend("filesystem", s)
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) Init(data interface{}) error {
|
||||
var err error
|
||||
var fi os.FileInfo
|
||||
var statfs unix.Statfs_t
|
||||
|
||||
err = mapstructure.Decode(data, &s.cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, it := range s.cfg.Store {
|
||||
if it.Weight < 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if fi, err = os.Stat(it.Path); err != nil || !fi.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
if it.Weight > 0 {
|
||||
s.weights[it.Path] = it.Weight
|
||||
continue
|
||||
}
|
||||
|
||||
if err = unix.Statfs(it.Path, &statfs); err == nil {
|
||||
s.weights[it.Path] = int(statfs.Blocks) * int(statfs.Bsize) / vSize
|
||||
}
|
||||
}
|
||||
|
||||
if s.cfg.Mode == "copy" && len(s.weights) < s.cfg.Shards.Data {
|
||||
return fmt.Errorf("data shards is more then available disks")
|
||||
}
|
||||
/*
|
||||
if s.fdcache, err = cache.New(s.cfg.FDCache.Engine, s.cfg.FDCache.Size); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = s.fdcache.OnEvict(s.onCacheEvict); err != nil {
|
||||
return err
|
||||
}
|
||||
*/
|
||||
if s.ring, err = ring.New(s.cfg.Ring, s.weights); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.hash, err = shash.New("xxhash"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
func (s *StoreFilesystem) onCacheEvict(k interface{}, v interface{}) {
|
||||
v.(*os.File).Close()
|
||||
}
|
||||
*/
|
||||
|
||||
func (s *BackendFilesystem) Configure(data interface{}) error {
|
||||
var err error
|
||||
var fi os.FileInfo
|
||||
|
||||
err = mapstructure.Decode(data, &s.cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, it := range s.cfg.Store {
|
||||
if fi, err = os.Stat(it.Path); err != nil || !fi.IsDir() {
|
||||
continue
|
||||
}
|
||||
s.weights[it.Path] = it.Weight
|
||||
}
|
||||
|
||||
if s.cfg.Mode == "copy" && len(s.weights) < s.cfg.Shards.Data {
|
||||
return fmt.Errorf("data shards is more then available disks")
|
||||
}
|
||||
|
||||
s.ring, err = ring.New(s.cfg.Ring, s.weights)
|
||||
|
||||
s.fdcache.Purge()
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) Allocate(name string, size int64, ndata int, nparity int) error {
|
||||
var err error
|
||||
var fp *os.File
|
||||
var items interface{}
|
||||
var disks []string
|
||||
|
||||
if s.cfg.Debug {
|
||||
fmt.Printf("%T %s\n", s, "allocate")
|
||||
}
|
||||
|
||||
if nparity == 0 && ndata > 0 {
|
||||
if items, err = s.ring.GetItem(name, ndata); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
disks = items.([]string)
|
||||
if s.cfg.Debug {
|
||||
fmt.Printf("%T %s %v %s\n", s, "read", disks, name)
|
||||
}
|
||||
|
||||
var errs []error
|
||||
for _, disk := range disks {
|
||||
if fp, err = os.OpenFile(filepath.Join(disk, name), os.O_RDWR|os.O_CREATE, os.FileMode(0660)); err != nil {
|
||||
continue
|
||||
// TODO: remove from ring
|
||||
}
|
||||
if err = unix.Fallocate(int(fp.Fd()), 0, 0, int64(size)); err != nil {
|
||||
// TODO: remove from ring
|
||||
fp.Close()
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errs[0]
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) ReadAt(name string, buf []byte, offset int64, ndata int, nparity int) (int, error) {
|
||||
if s.cfg.Debug {
|
||||
fmt.Printf("%T %s\n", s, "read")
|
||||
}
|
||||
//var errs []error
|
||||
var err error
|
||||
var n int
|
||||
var items interface{}
|
||||
var disks []string
|
||||
var fp *os.File
|
||||
|
||||
if nparity == 0 && ndata > 0 {
|
||||
if items, err = s.ring.GetItem(name, ndata); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
disks = items.([]string)
|
||||
if s.cfg.Debug {
|
||||
fmt.Printf("%T %s %v %s %d %d\n", s, "read", disks, name, offset, len(buf))
|
||||
}
|
||||
|
||||
for len(disks) > 0 {
|
||||
s.mu.Lock()
|
||||
disk := disks[int(s.rng.Uint32n(uint32(len(disks))))]
|
||||
s.mu.Unlock()
|
||||
fname := filepath.Join(disk, name)
|
||||
if fp, err = os.OpenFile(fname, os.O_CREATE|os.O_RDWR, os.FileMode(0660)); err != nil {
|
||||
continue
|
||||
}
|
||||
n, err = fp.ReadAt(buf, int64(offset))
|
||||
fp.Close()
|
||||
if err == nil || err == io.EOF {
|
||||
//fmt.Printf("ret from read %d %s\n", n, buf)
|
||||
return n, nil
|
||||
}
|
||||
// fmt.Printf("aaaa %v\n", err)
|
||||
/*
|
||||
o.fs.ring.DelItem(disk)
|
||||
o.fs.ring.SetState(ring.StateDegrade)
|
||||
|
||||
copy(disks[rnd:], disks[rnd+1:])
|
||||
// disks[len(disks)-1] = nil
|
||||
disks = disks[:len(disks)-1]
|
||||
*/
|
||||
}
|
||||
|
||||
return 0, backend.ErrIO
|
||||
}
|
||||
|
||||
type rwres struct {
|
||||
n int
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) WriterTo(name string, w io.Writer, offset int64, size int64, ndata int, nparity int) (int64, error) {
|
||||
|
||||
buf := make([]byte, size)
|
||||
n, err := s.ReadAt(name, buf, offset, ndata, nparity)
|
||||
if err != nil || int64(n) < size {
|
||||
return 0, backend.ErrIO
|
||||
}
|
||||
n, err = w.Write(buf)
|
||||
return int64(n), err
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) ReaderFrom(name string, r io.Reader, offset int64, size int64, ndata int, nparity int) (int64, error) {
|
||||
buf := make([]byte, size)
|
||||
n, err := r.Read(buf)
|
||||
if err != nil || int64(n) < size {
|
||||
return 0, backend.ErrIO
|
||||
}
|
||||
n, err = s.WriteAt(name, buf, offset, ndata, nparity)
|
||||
return int64(n), err
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) WriteAt(name string, buf []byte, offset int64, ndata int, nparity int) (int, error) {
|
||||
if s.cfg.Debug {
|
||||
fmt.Printf("%T %s\n", s, "write")
|
||||
}
|
||||
|
||||
var n int
|
||||
var err error
|
||||
var items interface{}
|
||||
var disks []string
|
||||
var fp *os.File
|
||||
if nparity == 0 && ndata > 0 {
|
||||
if items, err = s.ring.GetItem(name, ndata); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
disks = items.([]string)
|
||||
hinfo := struct {
|
||||
Hashes []uint64
|
||||
}{}
|
||||
_ = hinfo
|
||||
|
||||
//result := make(chan rwres, len(disks))
|
||||
for _, disk := range disks {
|
||||
// go func() {
|
||||
//var res rwres
|
||||
|
||||
fp, err = os.OpenFile(filepath.Join(disk, name), os.O_CREATE|os.O_RDWR, os.FileMode(0660))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
//mw := io.MultiWriter{s.hash, fp
|
||||
n, err = fp.WriteAt(buf, int64(offset))
|
||||
/*
|
||||
if o.fs.cfg.Options.Sync {
|
||||
if res.err = fp.Sync(); res.err != nil {
|
||||
result <- res
|
||||
}
|
||||
}
|
||||
*/
|
||||
fp.Close()
|
||||
}
|
||||
|
||||
if s.ring.Size() < 1 {
|
||||
s.ring.SetState(ring.StateFail)
|
||||
return 0, fmt.Errorf("can't write to failed ring")
|
||||
}
|
||||
/*
|
||||
if res.err != nil || res.n < len(buf) {
|
||||
n = res.n
|
||||
err = res.err
|
||||
//delete(o.fps, res.disk)
|
||||
}
|
||||
*/
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) Exists(name string, ndata int, nparity int) (bool, error) {
|
||||
if s.cfg.Debug {
|
||||
fmt.Printf("%T %s %s\n", s, "object_exists", name)
|
||||
}
|
||||
|
||||
var err error
|
||||
var items interface{}
|
||||
var disks []string
|
||||
if nparity == 0 && ndata > 0 {
|
||||
if items, err = s.ring.GetItem(name, ndata); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
disks = items.([]string)
|
||||
|
||||
for len(disks) > 0 {
|
||||
s.mu.Lock()
|
||||
disk := disks[int(s.rng.Uint32n(uint32(len(disks))))]
|
||||
s.mu.Unlock()
|
||||
if _, err = os.Stat(filepath.Join(disk, name)); err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return !os.IsNotExist(err), nil
|
||||
}
|
||||
|
||||
func (s *BackendFilesystem) Remove(name string, ndata int, nparity int) error {
|
||||
if s.cfg.Debug {
|
||||
fmt.Printf("%T %s\n", s, "remove")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
Reference in New Issue
Block a user