commit 24f641df0d33ee93950024b3af496731d011477a Author: Vasiliy Tolstov Date: Thu Apr 18 23:13:04 2019 +0300 initial rewrite Signed-off-by: Vasiliy Tolstov diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7560965 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +# ---> Go +# Binaries for programs and plugins +*.exe +*.dll +*.so +*.dylib + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 +.glide/ + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f913bef --- /dev/null +++ b/LICENSE @@ -0,0 +1,191 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2018 unistack. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..8770272 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# crushkv + +CRUSH key-val store diff --git a/crush/crush.go b/crush/crush.go new file mode 100644 index 0000000..ae9ae97 --- /dev/null +++ b/crush/crush.go @@ -0,0 +1,111 @@ +package crush + +// Select choose nodes +func Select(parent Node, input uint32, count uint32, nodeType uint16, c Comparitor) []Node { + var results []Node + //if len(parent.Children) < count { + // panic("Asked for more node than are available") + //} + var rPrime uint32 + for r := uint32(1); r <= count; r++ { + var failure = uint32(0) + var loopbacks = 0 + var escape = false + var retryOrigin bool + var out Node + for { + retryOrigin = false + var in = parent + var skip = make(map[Node]bool) + var retryNode bool + for { + retryNode = false + rPrime = uint32(r + failure) + out = in.Select(input, rPrime) + if out.GetType() != nodeType { + in = out + retryNode = true + } else { + if contains(results, out) { + if !nodesAvailable(in, results, skip) { + if loopbacks == 150 { + escape = true + break + } + loopbacks++ + retryOrigin = true + } else { + retryNode = true + } + failure++ + + } else if c != nil && !c(out) { + skip[out] = true + if !nodesAvailable(in, results, skip) { + if loopbacks == 150 { + escape = true + break + } + loopbacks++ + retryOrigin = true + } else { + retryNode = true + } + failure++ + } else if isDefunct(out) { + failure++ + if loopbacks == 150 { + escape = true + break + } + loopbacks++ + retryOrigin = true + } else { + break + } + } + if !retryNode { + break + } + } + if !retryOrigin { + break + } + } + if escape { + continue + } + results = append(results, out) + } + return results +} + +func nodesAvailable(parent Node, selected []Node, rejected map[Node]bool) bool { + var children = parent.GetChildren() + for _, child := range children { + if !isDefunct(child) { + if ok := contains(selected, child); !ok { + if _, ok := rejected[child]; !ok { + return true + } + } + } + } + return false +} + +func contains(s []Node, n Node) bool { + for _, a := range s { + if a == n { + return true + } + } + return false +} + +func isDefunct(n Node) bool { + if n.IsLeaf() && n.IsFailed() { + return true + } + return false +} diff --git a/crush/hash_rjenkins.go b/crush/hash_rjenkins.go new file mode 100644 index 0000000..1ad64aa --- /dev/null +++ b/crush/hash_rjenkins.go @@ -0,0 +1,106 @@ +package crush + +const ( + MaxVal = uint32(0xFFFFFFFF) + HashSeed = uint32(1315423911) +) + +func hash1(a uint32) uint32 { + hash := HashSeed ^ a + x := uint32(231232) + y := uint32(1232) + b := a + hashMix(&b, &x, &hash) + hashMix(&y, &a, &hash) + + return hash +} + +func hash2(a, b uint32) uint32 { + hash := HashSeed ^ a ^ b + var x = uint32(231232) + var y = uint32(1232) + hashMix(&a, &b, &hash) + hashMix(&x, &a, &hash) + hashMix(&b, &y, &hash) + + return hash +} + +func hash3(a, b, c uint32) uint32 { + hash := HashSeed ^ a ^ b ^ c + x := uint32(231232) + y := uint32(1232) + hashMix(&a, &b, &hash) + hashMix(&c, &x, &hash) + hashMix(&y, &a, &hash) + hashMix(&b, &x, &hash) + hashMix(&y, &c, &hash) + + return hash +} + +func hash4(a, b, c, d uint32) uint32 { + hash := HashSeed ^ a ^ b ^ c ^ d + x := uint32(231232) + y := uint32(1232) + hashMix(&a, &b, &hash) + hashMix(&c, &d, &hash) + hashMix(&a, &x, &hash) + hashMix(&y, &b, &hash) + hashMix(&c, &x, &hash) + hashMix(&y, &d, &hash) + + return hash +} + +func hash5(a, b, c, d, e uint32) uint32 { + hash := HashSeed ^ a ^ b ^ c ^ d ^ e + x := uint32(231232) + y := uint32(1232) + hashMix(&a, &b, &hash) + hashMix(&c, &d, &hash) + hashMix(&e, &x, &hash) + hashMix(&y, &a, &hash) + hashMix(&b, &x, &hash) + hashMix(&y, &c, &hash) + hashMix(&d, &x, &hash) + hashMix(&y, &e, &hash) + + return hash +} + +/* + * Robert Jenkins' function for mixing 32-bit values + * http://burtleburtle.net/bob/hash/evahash.html + * a, b = random bits, c = input and output + */ +func hashMix(a, b, c *uint32) { + (*a) -= *b + (*a) -= *c + *a = *a ^ (*c >> 13) + *b -= *c + *b -= *a + *b = *b ^ (*a << 8) + *c -= *a + *c -= *b + *c = *c ^ (*b >> 13) + *a -= *b + *a -= *c + *a = *a ^ (*c >> 12) + *b -= *c + *b -= *a + *b = *b ^ (*a << 16) + *c -= *a + *c -= *b + *c = *c ^ (*b >> 5) + *a -= *b + *a -= *c + *a = *a ^ (*c >> 3) + *b -= *c + *b -= *a + *b = *b ^ (*a << 10) + *c -= *a + *c -= *b + *c = *c ^ (*b >> 15) +} diff --git a/crush/ln_score.go b/crush/ln_score.go new file mode 100644 index 0000000..f169d5b --- /dev/null +++ b/crush/ln_score.go @@ -0,0 +1,30 @@ +package crush + +func lnScore(child Node, weight float32, input uint32, round uint32) int64 { + var draw int64 + + if weight > 0 { + hash := hash3(input, Btoi(digestString(child.GetID())), round) + hash = hash & 0xFFFF + ln := int64(crushLn(hash) - 0x1000000000000) + draw = int64(float32(ln) / weight) + } else { + draw = S64_MIN + } + return draw +} + +const ( + U8_MAX uint8 = 255 + S8_MAX int8 = 127 + S8_MIN int8 = (-S8_MAX - 1) + U16_MAX uint16 = 65535 + S16_MAX int16 = 32767 + S16_MIN int16 = (-S16_MAX - 1) + U32_MAX uint32 = 4294967295 + S32_MAX int32 = 2147483647 + S32_MIN int32 = (-S32_MAX - 1) + U64_MAX uint64 = 18446744073709551615 + S64_MAX int64 = 9223372036854775807 + S64_MIN int64 = (-S64_MAX - 1) +) diff --git a/crush/node.go b/crush/node.go new file mode 100644 index 0000000..8a420ca --- /dev/null +++ b/crush/node.go @@ -0,0 +1,75 @@ +package crush + +type Selector interface { + Select(input uint32, round uint32) Node +} + +type Node interface { + GetChildren() []Node + GetType() uint16 + GetWeight() float32 + GetID() string + IsFailed() bool + GetSelector() Selector + SetSelector(Selector) + GetParent() Node + IsLeaf() bool + Select(input uint32, round uint32) Node +} + +type Comparitor func(Node) bool + +type CrushNode struct { + Selector Selector +} + +type TestingNode struct { + Children []Node + CrushNode + Weight float32 + Parent Node + Failed bool + ID string + Type uint16 + Alg string +} + +func (n CrushNode) GetSelector() Selector { + return n.Selector +} + +func (n *CrushNode) SetSelector(Selector Selector) { + n.Selector = Selector +} + +func (n CrushNode) Select(input uint32, round uint32) Node { + return n.GetSelector().Select(input, round) +} + +func (n TestingNode) IsFailed() bool { + return n.Failed +} + +func (n TestingNode) IsLeaf() bool { + return len(n.Children) == 0 +} + +func (n TestingNode) GetParent() Node { + return n.Parent +} + +func (n TestingNode) GetID() string { + return n.ID +} + +func (n TestingNode) GetWeight() float32 { + return n.Weight +} + +func (n TestingNode) GetType() uint16 { + return n.Type +} + +func (n TestingNode) GetChildren() []Node { + return n.Children +} diff --git a/crush/selector_hashing.go b/crush/selector_hashing.go new file mode 100644 index 0000000..ece2a40 --- /dev/null +++ b/crush/selector_hashing.go @@ -0,0 +1,115 @@ +package crush + +import ( + "bytes" + "crypto/sha1" + "encoding/binary" + "sort" +) + +type HashingSelector struct { + tokenList tokenList + tokenMap map[uint32]Node +} + +func NewHashingSelector(n Node) *HashingSelector { + var h = new(HashingSelector) + var nodes = n.GetChildren() + var maxWeight float32 + for _, node := range nodes { + maxWeight = Max64(maxWeight, node.GetWeight()) + } + h.tokenMap = make(map[uint32]Node) + for _, node := range nodes { + count := 500 * node.GetWeight() / maxWeight + var hash []byte + for i := float32(0); i < count; i++ { + var input []byte + if len(hash) == 0 { + input = []byte(node.GetID()) + } else { + input = hash + } + hash = digestBytes(input) + token := Btoi(hash) + if _, ok := h.tokenMap[token]; !ok { + h.tokenMap[token] = node + } + } + } + h.tokenList = make([]uint32, 0, len(h.tokenMap)) + for k := range h.tokenMap { + h.tokenList = append(h.tokenList, k) + } + sort.Sort(h.tokenList) + return h + +} + +type tokenList []uint32 + +func (t tokenList) Len() int { + return len(t) +} +func (t tokenList) Less(i, j int) bool { + return t[i] < t[j] +} + +func (t tokenList) Swap(i, j int) { + t[i], t[j] = t[j], t[i] +} + +func Max64(a float32, b float32) float32 { + if a > b { + return a + } + return b +} + +func digestInt64(input int64) []byte { + buf := new(bytes.Buffer) + binary.Write(buf, binary.LittleEndian, input) + bytes := buf.Bytes() + result := sha1.Sum(bytes) + hash := make([]byte, 20) + copy(hash[:], result[:20]) + return hash +} +func digestBytes(input []byte) []byte { + result := sha1.Sum(input) + hash := make([]byte, 20) + copy(hash[:], result[:20]) + return hash +} + +func digestString(input string) []byte { + result := sha1.Sum([]byte(input)) + hash := make([]byte, 20) + copy(hash[:], result[:20]) + return hash +} + +func Btoi(b []byte) uint32 { + var result uint32 + buf := bytes.NewReader(b) + binary.Read(buf, binary.LittleEndian, &result) + return result +} + +func (s *HashingSelector) Select(input uint32, round uint32) Node { + buf := new(bytes.Buffer) + binary.Write(buf, binary.LittleEndian, input) + binary.Write(buf, binary.LittleEndian, round) + bytes := buf.Bytes() + hash := digestBytes(bytes) + token := Btoi(hash) + return s.tokenMap[s.findToken(token)] +} + +func (s *HashingSelector) findToken(token uint32) uint32 { + i := sort.Search(len(s.tokenList), func(i int) bool { return s.tokenList[i] >= token }) + if i == len(s.tokenList) { + return s.tokenList[i-1] + } + return s.tokenList[i] +} diff --git a/crush/selector_straw2.go b/crush/selector_straw2.go new file mode 100644 index 0000000..3d712b2 --- /dev/null +++ b/crush/selector_straw2.go @@ -0,0 +1,224 @@ +package crush + +//"log" + +type Straw2Selector struct { + ItemWeights map[Node]float32 +} + +func NewStraw2Selector(n Node) *Straw2Selector { + s := new(Straw2Selector) + s.ItemWeights = make(map[Node]float32) + if n.IsLeaf() { + return s + } + + for _, child := range n.GetChildren() { + s.ItemWeights[child] = child.GetWeight() + } + + return s +} + +func (s *Straw2Selector) Select(input uint32, round uint32) Node { + var result Node + var i uint32 + var highDraw int64 + + for child, weight := range s.ItemWeights { + draw := lnScore(child, weight, input, round) + if i == 0 || draw > highDraw { + highDraw = draw + result = child + } + i++ + } + + if result == nil { + panic("Illegal state") + } + return result +} + +//compute 2^44*log2(input+1) +func crushLn(xin uint32) uint64 { + var x, x1 uint32 + var iexpon, index1, index2 int + var RH, LH, LL, xl64, result uint64 + + x = xin + 1 + + /* normalize input */ + iexpon = 15 + for 0 == (x & 0x18000) { + x <<= 1 + iexpon-- + } + + index1 = int((x >> 8) << 1) + /* RH ~ 2^56/index1 */ + RH = __RH_LH_tbl[index1-256] + /* LH ~ 2^48 * log2(index1/256) */ + LH = __RH_LH_tbl[index1+1-256] + + /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */ + xl64 = uint64(x) * RH + xl64 >>= 48 + x1 = uint32(xl64) + + result = uint64(iexpon) + result <<= (12 + 32) + + index2 = int(x1 & 0xff) + /* LL ~ 2^48*log2(1.0+index2/2^15) */ + LL = uint64(__LL_tbl[index2]) + + LH = LH + LL + + LH >>= (48 - 12 - 32) + result += LH + + return result +} + +/* + * RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0) + * RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0) + */ +var __RH_LH_tbl []uint64 = []uint64{ + 0x0001000000000000, 0x0000000000000000, 0x0000fe03f80fe040, 0x000002dfca16dde1, + 0x0000fc0fc0fc0fc1, 0x000005b9e5a170b4, 0x0000fa232cf25214, 0x0000088e68ea899a, + 0x0000f83e0f83e0f9, 0x00000b5d69bac77e, 0x0000f6603d980f67, 0x00000e26fd5c8555, + 0x0000f4898d5f85bc, 0x000010eb389fa29f, 0x0000f2b9d6480f2c, 0x000013aa2fdd27f1, + 0x0000f0f0f0f0f0f1, 0x00001663f6fac913, 0x0000ef2eb71fc435, 0x00001918a16e4633, + 0x0000ed7303b5cc0f, 0x00001bc84240adab, 0x0000ebbdb2a5c162, 0x00001e72ec117fa5, + 0x0000ea0ea0ea0ea1, 0x00002118b119b4f3, 0x0000e865ac7b7604, 0x000023b9a32eaa56, + 0x0000e6c2b4481cd9, 0x00002655d3c4f15c, 0x0000e525982af70d, 0x000028ed53f307ee, + 0x0000e38e38e38e39, 0x00002b803473f7ad, 0x0000e1fc780e1fc8, 0x00002e0e85a9de04, + 0x0000e070381c0e08, 0x0000309857a05e07, 0x0000dee95c4ca038, 0x0000331dba0efce1, + 0x0000dd67c8a60dd7, 0x0000359ebc5b69d9, 0x0000dbeb61eed19d, 0x0000381b6d9bb29b, + 0x0000da740da740db, 0x00003a93dc9864b2, 0x0000d901b2036407, 0x00003d0817ce9cd4, + 0x0000d79435e50d7a, 0x00003f782d7204d0, 0x0000d62b80d62b81, 0x000041e42b6ec0c0, + 0x0000d4c77b03531e, 0x0000444c1f6b4c2d, 0x0000d3680d3680d4, 0x000046b016ca47c1, + 0x0000d20d20d20d21, 0x000049101eac381c, 0x0000d0b69fcbd259, 0x00004b6c43f1366a, + 0x0000cf6474a8819f, 0x00004dc4933a9337, 0x0000ce168a772509, 0x0000501918ec6c11, + 0x0000cccccccccccd, 0x00005269e12f346e, 0x0000cb8727c065c4, 0x000054b6f7f1325a, + 0x0000ca4587e6b750, 0x0000570068e7ef5a, 0x0000c907da4e8712, 0x000059463f919dee, + 0x0000c7ce0c7ce0c8, 0x00005b8887367433, 0x0000c6980c6980c7, 0x00005dc74ae9fbec, + 0x0000c565c87b5f9e, 0x00006002958c5871, 0x0000c4372f855d83, 0x0000623a71cb82c8, + 0x0000c30c30c30c31, 0x0000646eea247c5c, 0x0000c1e4bbd595f7, 0x000066a008e4788c, + 0x0000c0c0c0c0c0c1, 0x000068cdd829fd81, 0x0000bfa02fe80bfb, 0x00006af861e5fc7d, + 0x0000be82fa0be830, 0x00006d1fafdce20a, 0x0000bd6910470767, 0x00006f43cba79e40, + 0x0000bc52640bc527, 0x00007164beb4a56d, 0x0000bb3ee721a54e, 0x000073829248e961, + 0x0000ba2e8ba2e8bb, 0x0000759d4f80cba8, 0x0000b92143fa36f6, 0x000077b4ff5108d9, + 0x0000b81702e05c0c, 0x000079c9aa879d53, 0x0000b70fbb5a19bf, 0x00007bdb59cca388, + 0x0000b60b60b60b61, 0x00007dea15a32c1b, 0x0000b509e68a9b95, 0x00007ff5e66a0ffe, + 0x0000b40b40b40b41, 0x000081fed45cbccb, 0x0000b30f63528918, 0x00008404e793fb81, + 0x0000b21642c8590c, 0x000086082806b1d5, 0x0000b11fd3b80b12, 0x000088089d8a9e47, + 0x0000b02c0b02c0b1, 0x00008a064fd50f2a, 0x0000af3addc680b0, 0x00008c01467b94bb, + 0x0000ae4c415c9883, 0x00008df988f4ae80, 0x0000ad602b580ad7, 0x00008fef1e987409, + 0x0000ac7691840ac8, 0x000091e20ea1393e, 0x0000ab8f69e2835a, 0x000093d2602c2e5f, + 0x0000aaaaaaaaaaab, 0x000095c01a39fbd6, 0x0000a9c84a47a080, 0x000097ab43af59f9, + 0x0000a8e83f5717c1, 0x00009993e355a4e5, 0x0000a80a80a80a81, 0x00009b79ffdb6c8b, + 0x0000a72f0539782a, 0x00009d5d9fd5010b, 0x0000a655c4392d7c, 0x00009f3ec9bcfb80, + 0x0000a57eb50295fb, 0x0000a11d83f4c355, 0x0000a4a9cf1d9684, 0x0000a2f9d4c51039, + 0x0000a3d70a3d70a4, 0x0000a4d3c25e68dc, 0x0000a3065e3fae7d, 0x0000a6ab52d99e76, + 0x0000a237c32b16d0, 0x0000a8808c384547, 0x0000a16b312ea8fd, 0x0000aa5374652a1c, + 0x0000a0a0a0a0a0a1, 0x0000ac241134c4e9, 0x00009fd809fd80a0, 0x0000adf26865a8a1, + 0x00009f1165e72549, 0x0000afbe7fa0f04d, 0x00009e4cad23dd60, 0x0000b1885c7aa982, + 0x00009d89d89d89d9, 0x0000b35004723c46, 0x00009cc8e160c3fc, 0x0000b5157cf2d078, + 0x00009c09c09c09c1, 0x0000b6d8cb53b0ca, 0x00009b4c6f9ef03b, 0x0000b899f4d8ab63, + 0x00009a90e7d95bc7, 0x0000ba58feb2703a, 0x000099d722dabde6, 0x0000bc15edfeed32, + 0x0000991f1a515886, 0x0000bdd0c7c9a817, 0x00009868c809868d, 0x0000bf89910c1678, + 0x000097b425ed097c, 0x0000c1404eadf383, 0x000097012e025c05, 0x0000c2f5058593d9, + 0x0000964fda6c0965, 0x0000c4a7ba58377c, 0x000095a02568095b, 0x0000c65871da59dd, + 0x000094f2094f2095, 0x0000c80730b00016, 0x0000944580944581, 0x0000c9b3fb6d0559, + 0x0000939a85c4093a, 0x0000cb5ed69565af, 0x000092f113840498, 0x0000cd07c69d8702, + 0x0000924924924925, 0x0000ceaecfea8085, 0x000091a2b3c4d5e7, 0x0000d053f6d26089, + 0x000090fdbc090fdc, 0x0000d1f73f9c70c0, 0x0000905a38633e07, 0x0000d398ae817906, + 0x00008fb823ee08fc, 0x0000d53847ac00a6, 0x00008f1779d9fdc4, 0x0000d6d60f388e41, + 0x00008e78356d1409, 0x0000d8720935e643, 0x00008dda5202376a, 0x0000da0c39a54804, + 0x00008d3dcb08d3dd, 0x0000dba4a47aa996, 0x00008ca29c046515, 0x0000dd3b4d9cf24b, + 0x00008c08c08c08c1, 0x0000ded038e633f3, 0x00008b70344a139c, 0x0000e0636a23e2ee, + 0x00008ad8f2fba939, 0x0000e1f4e5170d02, 0x00008a42f870566a, 0x0000e384ad748f0e, + 0x000089ae4089ae41, 0x0000e512c6e54998, 0x0000891ac73ae982, 0x0000e69f35065448, + 0x0000888888888889, 0x0000e829fb693044, 0x000087f78087f781, 0x0000e9b31d93f98e, + 0x00008767ab5f34e5, 0x0000eb3a9f019750, 0x000086d905447a35, 0x0000ecc08321eb30, + 0x0000864b8a7de6d2, 0x0000ee44cd59ffab, 0x000085bf37612cef, 0x0000efc781043579, + 0x0000853408534086, 0x0000f148a170700a, 0x000084a9f9c8084b, 0x0000f2c831e44116, + 0x0000842108421085, 0x0000f446359b1353, 0x0000839930523fbf, 0x0000f5c2afc65447, + 0x000083126e978d50, 0x0000f73da38d9d4a, 0x0000828cbfbeb9a1, 0x0000f8b7140edbb1, + 0x0000820820820821, 0x0000fa2f045e7832, 0x000081848da8faf1, 0x0000fba577877d7d, + 0x0000810204081021, 0x0000fd1a708bbe11, 0x0000808080808081, 0x0000fe8df263f957, + 0x0000800000000000, 0x0000ffff00000000, +} + +/* + * LL_tbl[k] = 2^48*log2(1.0+k/2^15) + */ +var __LL_tbl []int64 = []int64{ + 0x0000000000000000, 0x00000002e2a60a00, 0x000000070cb64ec5, 0x00000009ef50ce67, + 0x0000000cd1e588fd, 0x0000000fb4747e9c, 0x0000001296fdaf5e, 0x0000001579811b58, + 0x000000185bfec2a1, 0x0000001b3e76a552, 0x0000001e20e8c380, 0x0000002103551d43, + 0x00000023e5bbb2b2, 0x00000026c81c83e4, 0x00000029aa7790f0, 0x0000002c8cccd9ed, + 0x0000002f6f1c5ef2, 0x0000003251662017, 0x0000003533aa1d71, 0x0000003815e8571a, + 0x0000003af820cd26, 0x0000003dda537fae, 0x00000040bc806ec8, 0x000000439ea79a8c, + 0x0000004680c90310, 0x0000004962e4a86c, 0x0000004c44fa8ab6, 0x0000004f270aaa06, + 0x0000005209150672, 0x00000054eb19a013, 0x00000057cd1876fd, 0x0000005aaf118b4a, + 0x0000005d9104dd0f, 0x0000006072f26c64, 0x0000006354da3960, 0x0000006636bc441a, + 0x0000006918988ca8, 0x0000006bfa6f1322, 0x0000006edc3fd79f, 0x00000071be0ada35, + 0x000000749fd01afd, 0x00000077818f9a0c, 0x0000007a6349577a, 0x0000007d44fd535e, + 0x0000008026ab8dce, 0x00000083085406e3, 0x00000085e9f6beb2, 0x00000088cb93b552, + 0x0000008bad2aeadc, 0x0000008e8ebc5f65, 0x0000009170481305, 0x0000009451ce05d3, + 0x00000097334e37e5, 0x0000009a14c8a953, 0x0000009cf63d5a33, 0x0000009fd7ac4a9d, + 0x000000a2b07f3458, 0x000000a59a78ea6a, 0x000000a87bd699fb, 0x000000ab5d2e8970, + 0x000000ae3e80b8e3, 0x000000b11fcd2869, 0x000000b40113d818, 0x000000b6e254c80a, + 0x000000b9c38ff853, 0x000000bca4c5690c, 0x000000bf85f51a4a, 0x000000c2671f0c26, + 0x000000c548433eb6, 0x000000c82961b211, 0x000000cb0a7a664d, 0x000000cdeb8d5b82, + 0x000000d0cc9a91c8, 0x000000d3ada20933, 0x000000d68ea3c1dd, 0x000000d96f9fbbdb, + 0x000000dc5095f744, 0x000000df31867430, 0x000000e2127132b5, 0x000000e4f35632ea, + 0x000000e7d43574e6, 0x000000eab50ef8c1, 0x000000ed95e2be90, 0x000000f076b0c66c, + 0x000000f35779106a, 0x000000f6383b9ca2, 0x000000f918f86b2a, 0x000000fbf9af7c1a, + 0x000000feda60cf88, 0x00000101bb0c658c, 0x000001049bb23e3c, 0x000001077c5259af, + 0x0000010a5cecb7fc, 0x0000010d3d81593a, 0x000001101e103d7f, 0x00000112fe9964e4, + 0x00000115df1ccf7e, 0x00000118bf9a7d64, 0x0000011ba0126ead, 0x0000011e8084a371, + 0x0000012160f11bc6, 0x000001244157d7c3, 0x0000012721b8d77f, 0x0000012a02141b10, + 0x0000012ce269a28e, 0x0000012fc2b96e0f, 0x00000132a3037daa, 0x000001358347d177, + 0x000001386386698c, 0x0000013b43bf45ff, 0x0000013e23f266e9, 0x00000141041fcc5e, + 0x00000143e4477678, 0x00000146c469654b, 0x00000149a48598f0, 0x0000014c849c117c, + 0x0000014f64accf08, 0x0000015244b7d1a9, 0x0000015524bd1976, 0x0000015804bca687, + 0x0000015ae4b678f2, 0x0000015dc4aa90ce, 0x00000160a498ee31, 0x0000016384819134, + 0x00000166646479ec, 0x000001694441a870, 0x0000016c24191cd7, 0x0000016df6ca19bd, + 0x00000171e3b6d7aa, 0x00000174c37d1e44, 0x00000177a33dab1c, 0x0000017a82f87e49, + 0x0000017d62ad97e2, 0x00000180425cf7fe, 0x00000182b07f3458, 0x0000018601aa8c19, + 0x00000188e148c046, 0x0000018bc0e13b52, 0x0000018ea073fd52, 0x000001918001065d, + 0x000001945f88568b, 0x000001973f09edf2, 0x0000019a1e85ccaa, 0x0000019cfdfbf2c8, + 0x0000019fdd6c6063, 0x000001a2bcd71593, 0x000001a59c3c126e, 0x000001a87b9b570b, + 0x000001ab5af4e380, 0x000001ae3a48b7e5, 0x000001b11996d450, 0x000001b3f8df38d9, + 0x000001b6d821e595, 0x000001b9b75eda9b, 0x000001bc96961803, 0x000001bf75c79de3, + 0x000001c254f36c51, 0x000001c534198365, 0x000001c81339e336, 0x000001caf2548bd9, + 0x000001cdd1697d67, 0x000001d0b078b7f5, 0x000001d38f823b9a, 0x000001d66e86086d, + 0x000001d94d841e86, 0x000001dc2c7c7df9, 0x000001df0b6f26df, 0x000001e1ea5c194e, + 0x000001e4c943555d, 0x000001e7a824db23, 0x000001ea8700aab5, 0x000001ed65d6c42b, + 0x000001f044a7279d, 0x000001f32371d51f, 0x000001f60236ccca, 0x000001f8e0f60eb3, + 0x000001fbbfaf9af3, 0x000001fe9e63719e, 0x000002017d1192cc, 0x000002045bb9fe94, + 0x000002073a5cb50d, 0x00000209c06e6212, 0x0000020cf791026a, 0x0000020fd622997c, + 0x00000212b07f3458, 0x000002159334a8d8, 0x0000021871b52150, 0x0000021b502fe517, + 0x0000021d6a73a78f, 0x000002210d144eee, 0x00000223eb7df52c, 0x00000226c9e1e713, + 0x00000229a84024bb, 0x0000022c23679b4e, 0x0000022f64eb83a8, 0x000002324338a51b, + 0x00000235218012a9, 0x00000237ffc1cc69, 0x0000023a2c3b0ea4, 0x0000023d13ee805b, + 0x0000024035e9221f, 0x00000243788faf25, 0x0000024656b4e735, 0x00000247ed646bfe, + 0x0000024c12ee3d98, 0x0000024ef1025c1a, 0x00000251cf10c799, 0x0000025492644d65, + 0x000002578b1c85ee, 0x0000025a6919d8f0, 0x0000025d13ee805b, 0x0000026025036716, + 0x0000026296453882, 0x00000265e0d62b53, 0x00000268beb701f3, 0x0000026b9c92265e, + 0x0000026d32f798a9, 0x00000271583758eb, 0x000002743601673b, 0x0000027713c5c3b0, + 0x00000279f1846e5f, 0x0000027ccf3d6761, 0x0000027e6580aecb, 0x000002828a9e44b3, + 0x0000028568462932, 0x00000287bdbf5255, 0x0000028b2384de4a, 0x0000028d13ee805b, + 0x0000029035e9221f, 0x0000029296453882, 0x0000029699bdfb61, 0x0000029902a37aab, + 0x0000029c54b864c9, 0x0000029deabd1083, 0x000002a20f9c0bb5, 0x000002a4c7605d61, + 0x000002a7bdbf5255, 0x000002a96056dafc, 0x000002ac3daf14ef, 0x000002af1b019eca, + 0x000002b296453882, 0x000002b5d022d80f, 0x000002b8fa471cb3, 0x000002ba9012e713, + 0x000002bd6d4901cc, 0x000002c04a796cf6, 0x000002c327a428a6, 0x000002c61a5e8f4c, + 0x000002c8e1e891f6, 0x000002cbbf023fc2, 0x000002ce9c163e6e, 0x000002d179248e13, + 0x000002d4562d2ec6, 0x000002d73330209d, 0x000002da102d63b0, 0x000002dced24f814, +} diff --git a/crush/selector_straw2_test.go b/crush/selector_straw2_test.go new file mode 100644 index 0000000..842a482 --- /dev/null +++ b/crush/selector_straw2_test.go @@ -0,0 +1,74 @@ +package crush + +import ( + "fmt" + "io/ioutil" + "testing" + + "github.com/unistack-org/crushkv/crushmap" +) + +func TestCrushStraw2(t *testing.T) { + tree, err := makeStraw2Tree(t, "ssd") + if err != nil { + t.Fatal(err) + } + + /* + fmt.Printf("root %s %f\n", tree.GetID(), tree.GetWeight()) + for _, n := range tree.GetChildren() { + if n.GetChildren() != nil { + for _, k := range n.GetChildren() { + fmt.Printf("children node %s %f\n", k.GetID(), k.GetWeight()) + } + } + fmt.Printf("node %s %f\n", n.GetID(), n.GetWeight()) + } + */ + nodes1 := Select(tree, 15, 2, 11, nil) + for _, node := range nodes1 { + for _, n := range node.GetChildren() { + fmt.Printf("[STRAW2] For key %d got node : %#+v\n", 15, n.GetID()) + } + } + + // nodes2 := Select(tree, 4564564564, 2, 11, nil) + /* + nodes3 := Select(tree, 8789342322, 3, NODE, nil) + */ + /* + for i := 0; i < 100000000; i++ { + for _, node := range nodes1 { + n := node.GetChildren()[0] + if n.GetID() != "root:ssd->disktype:rmosd1_ssd->osd.3" { + t.Fatal(fmt.Sprintf("[STRAW] For key %d got node : %#+v\n", 15, n)) + } + } + } + */ + /* + for _, node := range nodes2 { + n := node.GetChildren()[0] + log.Printf("[STRAW] For key %d got node : %#+v\n", 4564564564, n) + } + /* + for _, node := range nodes3 { + log.Printf("[STRAW] For key %d got node : %s", 8789342322, node.GetID()) + } + */ +} + +func makeStraw2Tree(t *testing.T, pool string) (*TestingNode, error) { + buf, err := ioutil.ReadFile("crushmap/testdata/map.txt2") + if err != nil { + return nil, err + } + + m := crushmap.NewMap() + err = m.DecodeText(buf) + if err != nil { + return nil, err + } + + return makeNode(m, m.GetBucketByName(pool), nil), nil +} diff --git a/crush/selector_test.go b/crush/selector_test.go new file mode 100644 index 0000000..e365e7f --- /dev/null +++ b/crush/selector_test.go @@ -0,0 +1,61 @@ +package crush + +import ( + "strings" + + "github.com/unistack-org/crushkv/crushmap" +) + +func makeNode(m *crushmap.Map, bucket *crushmap.Bucket, parent *TestingNode) *TestingNode { + var child *TestingNode + node := new(TestingNode) + node.ID = bucket.TypeName + ":" + bucket.Name + node.Type = uint16(bucket.TypeID) + node.Weight = bucket.Weight + node.Alg = bucket.Alg + //node.Children = make([]Node, len(bucket.Items)) + node.Parent = parent + if parent != nil { + node.ID = parent.ID + "->" + node.ID + // parent.Weight += node.Weight + } + + for _, item := range bucket.Items { + childBucket := m.GetBucketByName(item.Name) + if childBucket != nil { + child = makeNode(m, childBucket, node) + } else { + idx := strings.Index(item.Name, ".") + child = &TestingNode{ + ID: item.Name, + Type: m.GetTypeIDByName(item.Name[:idx]), + Weight: item.Weight, + Parent: node, + } + } + child.ID = node.ID + "->" + child.ID + if parent != nil { + parent.Weight += child.Weight + } + + switch child.Alg { + case "straw2": + child.Selector = NewStraw2Selector(child) + } + + node.Children = append(node.Children, child) + } + + if node.Weight == 0 { + for _, child := range node.Children { + node.Weight += child.GetWeight() + } + } + + switch bucket.Alg { + case "straw2": + node.Selector = NewStraw2Selector(node) + } + + return node +} diff --git a/crushmap/binary.go b/crushmap/binary.go new file mode 100644 index 0000000..81d9d60 --- /dev/null +++ b/crushmap/binary.go @@ -0,0 +1,208 @@ +package crushmap + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" +) + +const ( + Magic = uint32(0x00010000) +) + +type CrushRuleOpType uint32 + +const ( + CrushRuleNoop CrushRuleOpType = iota + CrushRuleTake + CrushRuleChooseFirstN + CrushRuleChooseIndep + CrushRuleEmit + CrushRuleChooseleafFirstN + CrushRuleChooseleafIndep + + CrushRuleSetChooseTries + CrushRuleSetChooseleafTries + CrushRuleSetChooseLocalTries + CrushRuleSetChooseLocalFallbackTries + CrushRuleSetChooseleafVaryR + CrushRuleSetChooseleafStable +) + +var ( + crushRuleOpTypeStringMap = map[CrushRuleOpType]string{ + CrushRuleNoop: "noop", + CrushRuleTake: "take", + CrushRuleChooseFirstN: "choose firstn", + CrushRuleChooseIndep: "choose indep", + CrushRuleEmit: "emit", + CrushRuleChooseleafFirstN: "choose_leaf firstn", + CrushRuleChooseleafIndep: "choose_leaf indep", + CrushRuleSetChooseTries: "set_choose_tries", + CrushRuleSetChooseleafTries: "set_chooseleaf_tries", + CrushRuleSetChooseLocalTries: "set_choose_local_tries", + CrushRuleSetChooseLocalFallbackTries: "set_choose_local_fallback_tries", + CrushRuleSetChooseleafVaryR: "set_choose_leaf_vary_r", + CrushRuleSetChooseleafStable: "set_choose_leaf_stable", + } + crushRuleOpStringTypeMap = map[string]CrushRuleOpType{ + "noop": CrushRuleNoop, + "take": CrushRuleTake, + "choose firstn": CrushRuleChooseFirstN, + "choose indep": CrushRuleChooseIndep, + "emit": CrushRuleEmit, + "choose_leaf firstn": CrushRuleChooseleafFirstN, + "choose_leaf indep": CrushRuleChooseleafIndep, + "set choose_tries": CrushRuleSetChooseTries, + "set chooseleaf_tries": CrushRuleSetChooseleafTries, + "set choose_local_tries": CrushRuleSetChooseLocalTries, + "set choose_local_fallback_tries": CrushRuleSetChooseLocalFallbackTries, + "set choose_leaf_vary_r": CrushRuleSetChooseleafVaryR, + "set choose_leaf_stable": CrushRuleSetChooseleafStable, + } +) + +func (t CrushRuleOpType) String() string { + op, ok := crushRuleOpTypeStringMap[t] + if !ok { + op = "invalid" + } + return op +} + +type CrushRuleStep struct { + Op CrushRuleOpType + Arg1 int32 + Arg2 int32 +} + +type binaryParser struct { + r io.Reader + w io.Writer +} + +func (cmap *Map) DecodeBinary(data []byte) error { + var err error + + var magic uint32 + p := &binaryParser{r: bytes.NewBuffer(data)} + + err = binary.Read(p.r, binary.LittleEndian, &magic) + if err != nil { + return err + } else if magic != Magic { + return fmt.Errorf("invalid magic: %0x != %0x", magic, Magic) + } + + var ( + maxBuckets int32 + maxRules uint32 + maxDevices int32 + ) + + err = binary.Read(p.r, binary.LittleEndian, &maxBuckets) + if err != nil { + return err + } + err = binary.Read(p.r, binary.LittleEndian, &maxRules) + if err != nil { + return err + } + err = binary.Read(p.r, binary.LittleEndian, &maxDevices) + if err != nil { + return err + } + + for i := int32(0); i < maxBuckets; i++ { + ibucket, err := p.handleBucket() + if err != nil { + return err + } + if ibucket == nil { + continue + } + cmap.Buckets = append(cmap.Buckets, ibucket) + } + + for i := uint32(0); i < maxRules; i++ { + irule, err := p.handleRule() + if err != nil { + return err + } + cmap.Rules = append(cmap.Rules, irule) + } + + itypes, err := p.handleType() + if err != nil { + return err + } + cmap.Types = itypes + + btypes := make(map[int32]string, len(itypes)) + for _, t := range itypes { + btypes[t.ID] = t.Name + } + + bnames := make(map[int32]string) + itypes, err = p.handleType() + if err != nil { + return err + } + for _, t := range itypes { + bnames[t.ID] = t.Name + } + + rnames := make(map[int32]string) + itypes, err = p.handleType() + if err != nil { + return err + } + for _, t := range itypes { + rnames[t.ID] = t.Name + } + + var ok bool + for _, bucket := range cmap.Buckets { + if bucket != nil { + if bucket.TypeName, ok = btypes[int32(bucket.TypeID)]; !ok { + return fmt.Errorf("unknown type id: %d", bucket.TypeID) + } + for _, item := range bucket.Items { + if item.Name, ok = bnames[int32(bucket.ID)]; !ok { + return fmt.Errorf("unknown type id: %d", bucket.ID) + } + } + } + } + + itypes, err = p.handleType() + if err != nil { + return err + } + + for _, rule := range cmap.Rules { + if rule.Name, ok = rnames[int32(rule.Ruleset)]; !ok { + return fmt.Errorf("unknown type id: %d", rule.ID) + } + for _, step := range rule.Steps { + switch step.Op { + default: + case CrushRuleChooseFirstN.String(), CrushRuleChooseIndep.String(), CrushRuleChooseleafFirstN.String(), CrushRuleChooseleafIndep.String(): + if step.ItemType, ok = btypes[step.ItemTypeID]; !ok { + return fmt.Errorf("unknown type id: %d", step.ItemTypeID) + } + } + } + } + + itunables, err := p.handleTunable() + if err != nil { + return err + } + cmap.Tunables = itunables + + cmap.rulesSort() + cmap.bucketsSort() + return nil +} diff --git a/crushmap/binary_bucket.go b/crushmap/binary_bucket.go new file mode 100644 index 0000000..19b7d34 --- /dev/null +++ b/crushmap/binary_bucket.go @@ -0,0 +1,302 @@ +package crushmap + +import ( + "encoding/binary" + "fmt" + "math" +) + +type binaryBucket interface { + BucketID() int32 + BucketType() CrushBucketType + BucketAlg() CrushAlgType + BucketHash() CrushBucketHashType + BucketWeight() float32 + BucketSize() uint32 +} + +func (b *BucketUniform) BucketID() int32 { + return b.ID +} +func (b *BucketUniform) BucketType() CrushBucketType { + return b.Type +} +func (b *BucketUniform) BucketAlg() CrushAlgType { + return b.Alg +} +func (b *BucketUniform) BucketHash() CrushBucketHashType { + return b.Hash +} +func (b *BucketUniform) BucketWeight() float32 { + return b.Weight +} +func (b *BucketUniform) BucketSize() uint32 { + return b.Size +} +func (b *BucketList) BucketID() int32 { + return b.ID +} +func (b *BucketList) BucketType() CrushBucketType { + return b.Type +} +func (b *BucketList) BucketAlg() CrushAlgType { + return b.Alg +} +func (b *BucketList) BucketHash() CrushBucketHashType { + return b.Hash +} +func (b *BucketList) BucketWeight() float32 { + return b.Weight +} +func (b *BucketList) BucketSize() uint32 { + return b.Size +} +func (b *BucketTree) BucketID() int32 { + return b.ID +} +func (b *BucketTree) BucketType() CrushBucketType { + return b.Type +} +func (b *BucketTree) BucketAlg() CrushAlgType { + return b.Alg +} +func (b *BucketTree) BucketHash() CrushBucketHashType { + return b.Hash +} +func (b *BucketTree) BucketWeight() float32 { + return b.Weight +} +func (b *BucketTree) BucketSize() uint32 { + return b.Size +} +func (b *BucketStraw) BucketID() int32 { + return b.ID +} +func (b *BucketStraw) BucketType() CrushBucketType { + return b.Type +} +func (b *BucketStraw) BucketAlg() CrushAlgType { + return b.Alg +} +func (b *BucketStraw) BucketHash() CrushBucketHashType { + return b.Hash +} +func (b *BucketStraw) BucketWeight() float32 { + return b.Weight +} +func (b *BucketStraw) BucketSize() uint32 { + return b.Size +} + +func (b *BucketStraw2) BucketID() int32 { + return b.ID +} +func (b *BucketStraw2) BucketType() CrushBucketType { + return b.Type +} +func (b *BucketStraw2) BucketAlg() CrushAlgType { + return b.Alg +} +func (b *BucketStraw2) BucketHash() CrushBucketHashType { + return b.Hash +} +func (b *BucketStraw2) BucketWeight() float32 { + return b.Weight +} +func (b *BucketStraw2) BucketSize() uint32 { + return b.Size +} + +type binaryBucketHeader struct { + ID int32 + Type CrushBucketType + Alg CrushAlgType + Hash CrushBucketHashType + Weight float32 + Size uint32 +} + +type binaryBucketCommon struct { + binaryBucketHeader + Items []int32 +} + +type BucketUniform struct { + binaryBucketCommon + ItemWeight float32 +} + +type BucketList struct { + binaryBucketCommon + ItemWeights []float32 + SumWeights []float32 +} + +type BucketTree struct { + binaryBucketCommon + NumNodes uint8 + NodeWeights []float32 +} + +type BucketStraw struct { + binaryBucketCommon + ItemWeights []float32 + Straws []uint32 +} + +type BucketStraw2 struct { + binaryBucketCommon + ItemWeights []float32 +} + +func (b *binaryBucketHeader) String() string { + return fmt.Sprintf("id: %d, type: %s, alg: %s, hash: %s, weight: %f, size: %d", + b.ID, b.Type, b.Alg, b.Hash, b.Weight, b.Size) +} + +func (p *binaryParser) handleBucket() (*Bucket, error) { + var err error + ibucket := &Bucket{} + var bucket binaryBucket + + var alg uint32 + err = binary.Read(p.r, binary.LittleEndian, &alg) + if err != nil { + return nil, err + } + + if CrushAlgType(alg) == CrushAlgInvalid { + return nil, nil + } + + bucketHeader := binaryBucketHeader{} + err = binary.Read(p.r, binary.LittleEndian, &bucketHeader.ID) + if err != nil { + return nil, err + } + err = binary.Read(p.r, binary.LittleEndian, &bucketHeader.Type) + if err != nil { + return nil, err + } + err = binary.Read(p.r, binary.LittleEndian, &bucketHeader.Alg) + if err != nil { + return nil, err + } + err = binary.Read(p.r, binary.LittleEndian, &bucketHeader.Hash) + if err != nil { + return nil, err + } + + var weight uint32 + err = binary.Read(p.r, binary.LittleEndian, &weight) + if err != nil { + return nil, err + } + bucketHeader.Weight = math.Float32frombits(weight) + err = binary.Read(p.r, binary.LittleEndian, &bucketHeader.Size) + if err != nil { + return nil, err + } + + bucketCommon := binaryBucketCommon{binaryBucketHeader: bucketHeader} + bucketCommon.Items = make([]int32, bucketHeader.Size) + for i := uint32(0); i < bucketHeader.Size; i++ { + err = binary.Read(p.r, binary.LittleEndian, &bucketCommon.Items[i]) + if err != nil { + return nil, err + } + } + switch bucketHeader.Alg { + case CrushAlgUniform: + bucketUniform := &BucketUniform{ + binaryBucketCommon: bucketCommon, + } + var itemWeight uint32 + err = binary.Read(p.r, binary.LittleEndian, &itemWeight) + if err != nil { + return nil, err + } + bucketUniform.ItemWeight = math.Float32frombits(itemWeight) + bucket = bucketUniform + case CrushAlgList: + itemWeights := make([]uint32, bucketHeader.Size) + sumWeights := make([]uint32, bucketHeader.Size) + bucketList := &BucketList{ + binaryBucketCommon: bucketCommon, + ItemWeights: make([]float32, bucketHeader.Size), + SumWeights: make([]float32, bucketHeader.Size), + } + for i := uint32(0); i <= bucketHeader.Size; i++ { + err = binary.Read(p.r, binary.LittleEndian, &itemWeights[i]) + if err != nil { + return nil, err + } + bucketList.ItemWeights[i] = math.Float32frombits(itemWeights[i]) + err = binary.Read(p.r, binary.LittleEndian, &sumWeights[i]) + if err != nil { + return nil, err + } + bucketList.SumWeights[i] = math.Float32frombits(sumWeights[i]) + } + bucket = bucketList + case CrushAlgTree: + bucketTree := &BucketTree{ + binaryBucketCommon: bucketCommon, + } + err = binary.Read(p.r, binary.LittleEndian, &bucketTree.NumNodes) + if err != nil { + return nil, err + } + nodeWeights := make([]uint32, bucketTree.NumNodes*4) + bucketTree.NodeWeights = make([]float32, bucketTree.NumNodes*4) + err = binary.Read(p.r, binary.LittleEndian, &nodeWeights) + if err != nil { + return nil, err + } + for i := 0; i < int(bucketTree.NumNodes*4); i++ { + bucketTree.NodeWeights[i] = math.Float32frombits(nodeWeights[i]) + } + bucket = bucketTree + case CrushAlgStraw: + itemWeights := make([]uint32, (bucketHeader.Size)*4) + bucketStraw := &BucketStraw{ + binaryBucketCommon: bucketCommon, + Straws: make([]uint32, (bucketHeader.Size)*4), + ItemWeights: make([]float32, (bucketHeader.Size)*4), + } + for i := uint32(0); i < bucketHeader.Size; i++ { + err = binary.Read(p.r, binary.LittleEndian, &itemWeights[i]) + if err != nil { + return nil, err + } + bucketStraw.ItemWeights[i] = math.Float32frombits(itemWeights[i]) + err = binary.Read(p.r, binary.LittleEndian, &bucketStraw.Straws[i]) + if err != nil { + return nil, err + } + } + bucket = bucketStraw + case CrushAlgStraw2: + itemWeights := make([]uint32, (bucketHeader.Size+1)*4) + bucketStraw2 := &BucketStraw2{ + binaryBucketCommon: bucketCommon, + ItemWeights: make([]float32, (bucketHeader.Size+1)*4), + } + err = binary.Read(p.r, binary.LittleEndian, &itemWeights) + if err != nil { + return nil, err + } + for i := uint32(0); i < (bucketHeader.Size+1)*4; i++ { + bucketStraw2.ItemWeights[i] = math.Float32frombits(itemWeights[i]) + } + bucket = bucketStraw2 + } + ibucket.ID = bucketHeader.ID + ibucket.Alg = bucketHeader.Alg.String() + ibucket.Hash = bucketHeader.Hash.String() + ibucket.TypeID = bucketHeader.Type + ibucket.Weight = bucketHeader.Weight + ibucket.Size = bucketHeader.Size + _ = bucket + return ibucket, nil +} diff --git a/crushmap/binary_rule.go b/crushmap/binary_rule.go new file mode 100644 index 0000000..8cb1a4c --- /dev/null +++ b/crushmap/binary_rule.go @@ -0,0 +1,85 @@ +package crushmap + +import ( + "encoding/binary" +) + +type binaryRuleStep struct { + Op CrushRuleOpType + Arg1 int32 + Arg2 int32 +} + +type binaryRuleMask struct { + Ruleset uint8 + Type uint8 + MinSize uint8 + MaxSize uint8 +} + +type binaryRule struct { + Len uint32 + Mask binaryRuleMask + Steps []binaryRuleStep +} + +func (p *binaryParser) handleRule() (*Rule, error) { + var err error + irule := &Rule{} + + var yes uint32 + err = binary.Read(p.r, binary.LittleEndian, &yes) + if err != nil { + return nil, err + } + if yes == 0 { + return nil, nil + } + + var rule binaryRule + err = binary.Read(p.r, binary.LittleEndian, &rule.Len) + if err != nil { + return nil, err + } + err = binary.Read(p.r, binary.LittleEndian, &rule.Mask) + if err != nil { + return nil, err + } + + // rule.Steps = make([]RuleStep, rule.Len) + for i := uint32(0); i < rule.Len; i++ { + var step binaryRuleStep + istep := &Step{} + err = binary.Read(p.r, binary.LittleEndian, &step) + if err != nil { + return nil, err + } + istep.Op = step.Op.String() + switch step.Op { + case CrushRuleChooseFirstN, CrushRuleChooseIndep, CrushRuleChooseleafFirstN, CrushRuleChooseleafIndep: + istep.Num = step.Arg1 + istep.ItemTypeID = step.Arg2 //TYPE!!! + case CrushRuleTake: + istep.ItemTypeID = step.Arg1 + // case CrushRuleEmit: + + // default: + // panic(step.Op.String()) + } + /* + Op string `json:"op"` + Item int `json:"item,omitempty"` + ItemName string `json:"item_name,omitempty"` + ItemClass string `json:"item_class,omitempty"` + Num int `json:"num,omitempty"` + ItemType string `json:"type,omitempty"` + */ + irule.Steps = append(irule.Steps, istep) + } + irule.ID = rule.Mask.Ruleset + irule.Ruleset = rule.Mask.Ruleset + irule.MinSize = rule.Mask.MinSize + irule.MaxSize = rule.Mask.MaxSize + irule.Type = rule.Mask.Type + return irule, nil +} diff --git a/crushmap/binary_test.go b/crushmap/binary_test.go new file mode 100644 index 0000000..96b2268 --- /dev/null +++ b/crushmap/binary_test.go @@ -0,0 +1,20 @@ +package crushmap + +import ( + "io/ioutil" + "testing" +) + +func TestBinary(t *testing.T) { + buf, err := ioutil.ReadFile("testdata/map.bin") + if err != nil { + t.Fatal(err) + } + + m := NewMap() + err = m.DecodeBinary(buf) + if err != nil { + t.Fatal(err) + } + _ = m +} diff --git a/crushmap/binary_tunable.go b/crushmap/binary_tunable.go new file mode 100644 index 0000000..91ef140 --- /dev/null +++ b/crushmap/binary_tunable.go @@ -0,0 +1,103 @@ +package crushmap + +import ( + "encoding/binary" + "io" +) + +type tunables struct { + // new block + ChooseLocalTries uint32 `json:"choose_local_tries,omitempty"` + ChooseLocalFallbackTries uint32 `json:"choose_local_fallback_tries,omitempty"` + ChooseTotalTries uint32 `json:"choose_total_tries,omitempty"` + // new block must be equal 1 + ChooseleafDescendOnce uint32 `json:"chooseleaf_descend_once,omitempty"` + // new block must be equal 1 + ChooseleafVaryR uint8 `json:"chooseleaf_vary_r,omitempty"` + // new block must be equal 1 + StrawCalcVersion uint8 `json:"straw_calc_version,omitempty"` + // new block must be equal ?? + AllowedBucketAlgs uint32 `json:"allowed_bucket_algs,omitempty"` + // new block must be equal 1 + ChooseleafStable uint8 `json:"chooseleaf_stable,omitempty"` +} + +func legacyTunable() tunables { + return tunables{ + ChooseLocalTries: 2, + ChooseLocalFallbackTries: 5, + ChooseTotalTries: 19, + ChooseleafDescendOnce: 0, + ChooseleafVaryR: 0, + ChooseleafStable: 0, + AllowedBucketAlgs: CrushLegacyAllowedBucketAlgs, + StrawCalcVersion: 0, + } +} + +func (p *binaryParser) handleTunable() (map[string]interface{}, error) { + var err error + + itunables := make(map[string]interface{}) + tune := legacyTunable() + + err = binary.Read(p.r, binary.LittleEndian, &tune.ChooseLocalTries) + if err != nil { + return nil, err + } + itunables["choose_local_tries"] = tune.ChooseLocalTries + + err = binary.Read(p.r, binary.LittleEndian, &tune.ChooseTotalTries) + if err != nil { + return nil, err + } + itunables["choose_total_tries"] = tune.ChooseTotalTries + + err = binary.Read(p.r, binary.LittleEndian, &tune.ChooseLocalFallbackTries) + if err != nil { + return nil, err + } + itunables["choose_local_fallback_tries"] = tune.ChooseLocalFallbackTries + + err = binary.Read(p.r, binary.LittleEndian, &tune.ChooseleafDescendOnce) + if err != nil && err == io.EOF { + return itunables, nil + } else if err != nil { + return nil, err + } + itunables["chooseleaf_descend_once"] = tune.ChooseleafDescendOnce + + err = binary.Read(p.r, binary.LittleEndian, &tune.ChooseleafVaryR) + if err != nil && err == io.EOF { + return itunables, nil + } else if err != nil { + return nil, err + } + itunables["chooseleaf_vary_r"] = tune.ChooseleafVaryR + + err = binary.Read(p.r, binary.LittleEndian, &tune.StrawCalcVersion) + if err != nil && err == io.EOF { + return itunables, nil + } else if err != nil { + return nil, err + } + itunables["straw_calc_version"] = tune.StrawCalcVersion + + err = binary.Read(p.r, binary.LittleEndian, &tune.AllowedBucketAlgs) + if err != nil && err == io.EOF { + return itunables, nil + } else if err != nil { + return nil, err + } + itunables["allowed_bucket_algs"] = tune.AllowedBucketAlgs + + err = binary.Read(p.r, binary.LittleEndian, &tune.ChooseleafStable) + if err != nil && err == io.EOF { + return itunables, nil + } else if err != nil { + return nil, err + } + itunables["chooseleaf_stable"] = tune.ChooseleafStable + + return itunables, nil +} diff --git a/crushmap/binary_type.go b/crushmap/binary_type.go new file mode 100644 index 0000000..97523b0 --- /dev/null +++ b/crushmap/binary_type.go @@ -0,0 +1,45 @@ +package crushmap + +import ( + "encoding/binary" +) + +func (p *binaryParser) handleType() ([]*Type, error) { + var err error + var n uint32 + var itypes []*Type + + err = binary.Read(p.r, binary.LittleEndian, &n) + if err != nil { + return nil, err + } + + for i := n; i > 0; i-- { + var key int32 + err = binary.Read(p.r, binary.LittleEndian, &key) + if err != nil { + return nil, err + } + + var l uint32 + err = binary.Read(p.r, binary.LittleEndian, &l) + if err != nil { + return nil, err + } + if l == 0 { + err = binary.Read(p.r, binary.LittleEndian, &l) + if err != nil { + return nil, err + } + } + + val := make([]byte, l) + err = binary.Read(p.r, binary.LittleEndian, &val) + if err != nil { + return nil, err + } + itypes = append(itypes, &Type{ID: key, Name: string(val)}) + } + + return itypes, nil +} diff --git a/crushmap/check.go b/crushmap/check.go new file mode 100644 index 0000000..b3659d6 --- /dev/null +++ b/crushmap/check.go @@ -0,0 +1,8 @@ +package crushmap + +/* +item 'slot1-3' in bucket 'cn01' has pos 3 >= size 3 (pos from 0 to len - 1 items) + + +in rule 'rule-data-ssd' item 'root' not defined (check items in take action step take root class ssd) +*/ diff --git a/crushmap/common.go b/crushmap/common.go new file mode 100644 index 0000000..dfb2c1e --- /dev/null +++ b/crushmap/common.go @@ -0,0 +1,241 @@ +package crushmap + +import ( + "fmt" + "sort" + "strconv" +) + +const ( + ReplicatedPG = 1 + ErasurePG = 3 +) + +type CrushBucketType uint16 +type CrushAlgType uint8 +type CrushBucketHashType uint8 + +const ( + CrushAlgInvalid CrushAlgType = iota + CrushAlgUniform + CrushAlgList + CrushAlgTree + CrushAlgStraw + CrushAlgStraw2 +) + +const ( + CrushLegacyAllowedBucketAlgs = (1 << CrushAlgUniform) | (1 << CrushAlgList) | (1 << CrushAlgStraw) +) + +var ( + crushAlgTypeStringMap = map[CrushAlgType]string{ + CrushAlgUniform: "uniform", + CrushAlgList: "list", + CrushAlgTree: "tree", + CrushAlgStraw: "straw", + CrushAlgStraw2: "straw2", + } + crushAlgStringTypeMap = map[string]CrushAlgType{ + "uniform": CrushAlgUniform, + "list": CrushAlgList, + "tree": CrushAlgTree, + "straw": CrushAlgStraw, + "straw2": CrushAlgStraw2, + } +) + +func (t CrushBucketType) String() string { + return fmt.Sprintf(strconv.Itoa(int(t))) +} + +func (t CrushBucketHashType) String() string { + if t == 0 { + return "rjenkins1" + } + return fmt.Sprintf(strconv.Itoa(int(t))) +} + +func (t CrushAlgType) String() string { + alg, ok := crushAlgTypeStringMap[t] + if !ok { + alg = "invalid" + } + return alg +} + +func CrushAlgFromType(t CrushAlgType) (string, error) { + alg, ok := crushAlgTypeStringMap[t] + if !ok { + return "", fmt.Errorf("unknown crush bucket alg: %d", t) + } + return alg, nil +} + +func CrushAlgFromString(t string) (CrushAlgType, error) { + alg, ok := crushAlgStringTypeMap[t] + if !ok { + return CrushAlgInvalid, fmt.Errorf("unknown crush bucket algo: %s", t) + } + return alg, nil +} + +type Tunables struct { + // new block + ChooseLocalTries uint32 `json:"choose_local_tries,omitempty"` + ChooseLocalFallbackTries uint32 `json:"choose_local_fallback_tries,omitempty"` + ChooseTotalTries uint32 `json:"choose_total_tries,omitempty"` + // new block must be equal 1 + ChooseleafDescendOnce uint32 `json:"chooseleaf_descend_once,omitempty"` + // new block must be equal 1 + ChooseleafVaryR uint8 `json:"chooseleaf_vary_r,omitempty"` + // new block must be equal 1 + StrawCalcVersion uint8 `json:"straw_calc_version,omitempty"` + // new block must be equal ?? + AllowedBucketAlgs uint32 `json:"allowed_bucket_algs,omitempty"` + // new block must be equal 1 + ChooseleafStable uint8 `json:"chooseleaf_stable,omitempty"` + // + + /* + "profile": "firefly", + "optimal_tunables": 0, + "legacy_tunables": 0, + "minimum_required_version": "firefly", + "require_feature_tunables": 1, + "require_feature_tunables2": 1, + "has_v2_rules": 1, + "require_feature_tunables3": 1, + "has_v3_rules": 0, + "has_v4_buckets": 0, + "require_feature_tunables5": 0, + "has_v5_rules": 0 + */ +} + +type Step struct { + Op string `json:"op"` + Item int `json:"item,omitempty"` + ItemName string `json:"item_name,omitempty"` + ItemClass string `json:"item_class,omitempty"` + Num int32 `json:"num,omitempty"` + ItemType string `json:"type,omitempty"` + ItemTypeID int32 `json:"-"` +} + +type Rule struct { + Name string `json:"rule_name"` + ID uint8 `json:"rule_id"` + Ruleset uint8 `json:"ruleset,omitempty"` + Type uint8 `json:"type"` + MinSize uint8 `json:"min_size,omitempty"` + MaxSize uint8 `json:"max_size,omitempty"` + Steps []*Step `json:"steps"` +} + +type Item struct { + ID int32 `json:"id"` + Name string `json:"-,omitempty"` + Weight float32 `json:"weight"` + Pos int `json:"pos"` +} + +type Bucket struct { + Name string `json:"name"` + TypeID CrushBucketType `json:"type_id"` + TypeName string `json:"type_name"` + Weight float32 `json:"weight"` + ID int32 `json:"id"` + IDClass string `json:"id_class,omitempty"` + Alg string `json:"alg"` + Hash string `json:"hash"` + Size uint32 `json:"-"` + Items []*Item `json:"items"` +} + +type Device struct { + ID int32 `json:"id"` + Name string `json:"name"` + Class string `json:"class,omitempty"` +} + +type Type struct { + ID int32 `json:"type_id"` + Name string `json:"name"` +} + +type ChooseArg struct { + BucketID int32 `json:"bucket_id,omitempty"` + WeightSet []float64 `json:"weight_set,omitempty"` + IDs []int `json:"ids,omitempty"` +} + +type Map struct { + Tunables map[string]interface{} `json:"tunables,omitempty"` + Devices []*Device `json:"devices"` + Types []*Type `json:"types"` + Buckets []*Bucket `json:"buckets"` + Rules []*Rule `json:"rules"` + ChooseArgs map[string]ChooseArg `json:"choose_args,omitempty"` +} + +type CrushChild struct { + ID int `json:"id"` + Name string `json:"name"` + Weight float32 `json:"weight"` +} + +type CrushTree struct { + Type string `json:"type"` + Name string `json:"name"` + ID int `json:"id"` + Children []*CrushChild `json:"children"` +} + +type CrushRule struct { + Data [][]string `json:"data"` +} + +type Crushmap struct { + Trees []*CrushTree `json:"trees"` + Rules []*CrushRule `json:"rules"` +} + +func (m *Map) rulesSort() { + sort.Slice(m.Rules, func(i, j int) bool { return m.Rules[i].ID < m.Rules[j].ID }) +} + +func (m *Map) bucketsSort() { + sort.Slice(m.Buckets, func(i, j int) bool { return m.Buckets[i].TypeID > m.Buckets[j].TypeID }) +} + +func (m *Map) GetTypeIDByName(name string) uint16 { + for _, t := range m.Types { + if t.Name == name { + return uint16(t.ID) + } + } + return 0 +} + +func (m *Map) GetBucketByName(name string) *Bucket { + for _, b := range m.Buckets { + if b.Name == name { + return b + } + } + return nil +} + +func (m *Map) GetBucketByID(id int32) *Bucket { + for _, b := range m.Buckets { + if b.ID == id { + return b + } + } + return nil +} + +func NewMap() *Map { + return &Map{Tunables: make(map[string]interface{})} +} diff --git a/crushmap/json.go b/crushmap/json.go new file mode 100644 index 0000000..7e744d0 --- /dev/null +++ b/crushmap/json.go @@ -0,0 +1,17 @@ +package crushmap + +import "encoding/json" + +func (cmap *Map) DecodeJson(data []byte) error { + err := json.Unmarshal(data, &cmap) + if err != nil { + return err + } + cmap.rulesSort() + cmap.bucketsSort() + return nil +} + +func (cmap *Map) EncodeJson() ([]byte, error) { + return json.Marshal(cmap) +} diff --git a/crushmap/json_test.go b/crushmap/json_test.go new file mode 100644 index 0000000..8c8b650 --- /dev/null +++ b/crushmap/json_test.go @@ -0,0 +1,20 @@ +package crushmap + +import ( + "io/ioutil" + "testing" +) + +func TestJson(t *testing.T) { + buf, err := ioutil.ReadFile("testdata/map.json") + if err != nil { + t.Fatal(err) + } + + m := NewMap() + err = m.DecodeJson(buf) + if err != nil { + t.Fatal(err) + } + _ = m +} diff --git a/crushmap/testdata/map.bin b/crushmap/testdata/map.bin new file mode 100644 index 0000000..0d22f59 Binary files /dev/null and b/crushmap/testdata/map.bin differ diff --git a/crushmap/testdata/map.json b/crushmap/testdata/map.json new file mode 100644 index 0000000..5243ad4 --- /dev/null +++ b/crushmap/testdata/map.json @@ -0,0 +1,416 @@ +{ + "devices": [ + { + "id": 0, + "name": "osd.0" + }, + { + "id": 1, + "name": "osd.1" + }, + { + "id": 2, + "name": "osd.2" + }, + { + "id": 3, + "name": "osd.3" + }, + { + "id": 4, + "name": "osd.4" + }, + { + "id": 5, + "name": "osd.5" + } + ], + "types": [ + { + "type_id": 0, + "name": "osd" + }, + { + "type_id": 1, + "name": "host" + }, + { + "type_id": 2, + "name": "chassis" + }, + { + "type_id": 3, + "name": "rack" + }, + { + "type_id": 4, + "name": "row" + }, + { + "type_id": 5, + "name": "pdu" + }, + { + "type_id": 6, + "name": "pod" + }, + { + "type_id": 7, + "name": "room" + }, + { + "type_id": 8, + "name": "datacenter" + }, + { + "type_id": 9, + "name": "region" + }, + { + "type_id": 10, + "name": "root" + }, + { + "type_id": 11, + "name": "disktype" + } + ], + "buckets": [ + { + "id": -1, + "name": "default", + "type_id": 10, + "type_name": "root", + "weight": 109509, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": -2, + "weight": 36503, + "pos": 0 + }, + { + "id": -3, + "weight": 36503, + "pos": 1 + }, + { + "id": -4, + "weight": 36503, + "pos": 2 + } + ] + }, + { + "id": -2, + "name": "rmosd1", + "type_id": 1, + "type_name": "host", + "weight": 36569, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": -5, + "weight": 32768, + "pos": 0 + }, + { + "id": -6, + "weight": 3801, + "pos": 1 + } + ] + }, + { + "id": -3, + "name": "rmosd2", + "type_id": 1, + "type_name": "host", + "weight": 36569, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": -7, + "weight": 32768, + "pos": 0 + }, + { + "id": -8, + "weight": 3801, + "pos": 1 + } + ] + }, + { + "id": -4, + "name": "rmosd3", + "type_id": 1, + "type_name": "host", + "weight": 36569, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": -9, + "weight": 32768, + "pos": 0 + }, + { + "id": -10, + "weight": 3801, + "pos": 1 + } + ] + }, + { + "id": -5, + "name": "rmosd1_ssd", + "type_id": 11, + "type_name": "disktype", + "weight": 3801, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": 3, + "weight": 3801, + "pos": 0 + } + ] + }, + { + "id": -6, + "name": "rmosd1_spinning", + "type_id": 11, + "type_name": "disktype", + "weight": 32768, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": 0, + "weight": 32768, + "pos": 0 + } + ] + }, + { + "id": -7, + "name": "rmosd2_ssd", + "type_id": 11, + "type_name": "disktype", + "weight": 3801, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": 4, + "weight": 3801, + "pos": 0 + } + ] + }, + { + "id": -8, + "name": "rmosd2_spinning", + "type_id": 11, + "type_name": "disktype", + "weight": 32768, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": 1, + "weight": 32768, + "pos": 0 + } + ] + }, + { + "id": -9, + "name": "rmosd3_ssd", + "type_id": 11, + "type_name": "disktype", + "weight": 3801, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": 5, + "weight": 3801, + "pos": 0 + } + ] + }, + { + "id": -10, + "name": "rmosd3_spinning", + "type_id": 11, + "type_name": "disktype", + "weight": 32768, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": 2, + "weight": 32768, + "pos": 0 + } + ] + }, + { + "id": -11, + "name": "spinning", + "type_id": 10, + "type_name": "root", + "weight": 98304, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": -6, + "weight": 32768, + "pos": 0 + }, + { + "id": -8, + "weight": 32768, + "pos": 1 + }, + { + "id": -10, + "weight": 32768, + "pos": 2 + } + ] + }, + { + "id": -12, + "name": "ssd", + "type_id": 10, + "type_name": "root", + "weight": 11403, + "alg": "straw", + "hash": "rjenkins1", + "items": [ + { + "id": -5, + "weight": 3801, + "pos": 0 + }, + { + "id": -7, + "weight": 3801, + "pos": 1 + }, + { + "id": -9, + "weight": 3801, + "pos": 2 + } + ] + } + ], + "rules": [ + { + "rule_id": 0, + "rule_name": "replicated_ruleset", + "ruleset": 0, + "type": 1, + "min_size": 1, + "max_size": 10, + "steps": [ + { + "op": "take", + "item": -11, + "item_name": "spinning" + }, + { + "op": "chooseleaf_firstn", + "num": 0, + "type": "disktype" + }, + { + "op": "emit" + } + ] + }, + { + "rule_id": 1, + "rule_name": "spinning", + "ruleset": 1, + "type": 3, + "min_size": 3, + "max_size": 20, + "steps": [ + { + "op": "set_chooseleaf_tries", + "num": 5 + }, + { + "op": "take", + "item": -11, + "item_name": "spinning" + }, + { + "op": "chooseleaf_indep", + "num": 0, + "type": "osd" + }, + { + "op": "emit" + } + ] + }, + { + "rule_id": 2, + "rule_name": "ssd", + "ruleset": 2, + "type": 1, + "min_size": 1, + "max_size": 10, + "steps": [ + { + "op": "take", + "item": -12, + "item_name": "ssd" + }, + { + "op": "chooseleaf_firstn", + "num": 0, + "type": "disktype" + }, + { + "op": "emit" + } + ] + } + ], + "tunables": { + "choose_local_tries": 0, + "choose_local_fallback_tries": 0, + "choose_total_tries": 50, + "chooseleaf_descend_once": 1, + "chooseleaf_vary_r": 1, + "chooseleaf_stable": 0, + "straw_calc_version": 1, + "allowed_bucket_algs": 22, + "profile": "firefly", + "optimal_tunables": 0, + "legacy_tunables": 0, + "minimum_required_version": "firefly", + "require_feature_tunables": 1, + "require_feature_tunables2": 1, + "has_v2_rules": 1, + "require_feature_tunables3": 1, + "has_v3_rules": 0, + "has_v4_buckets": 0, + "require_feature_tunables5": 0, + "has_v5_rules": 0 + }, + "choose_args": {} +} + diff --git a/crushmap/testdata/map.txt b/crushmap/testdata/map.txt new file mode 100644 index 0000000..db5a80c --- /dev/null +++ b/crushmap/testdata/map.txt @@ -0,0 +1,151 @@ +# begin crush map +tunable choose_local_tries 0 +tunable choose_local_fallback_tries 0 +tunable choose_total_tries 50 +tunable chooseleaf_descend_once 1 +tunable chooseleaf_vary_r 1 +tunable straw_calc_version 1 +# devices +device 0 osd.0 +device 1 osd.1 +device 2 osd.2 +device 3 osd.3 +device 4 osd.4 +device 5 osd.5 +# types +type 0 osd +type 1 host +type 2 chassis +type 3 rack +type 4 row +type 5 pdu +type 6 pod +type 7 room +type 8 datacenter +type 9 region +type 10 root +type 11 disktype +# buckets +disktype rmosd1_ssd { +id -5 # do not change unnecessarily +# weight 0.058 +alg straw +hash 0 # rjenkins1 +item osd.3 weight 0.058 +} +disktype rmosd1_spinning { +id -6 # do not change unnecessarily +# weight 0.500 +alg straw +hash 0 # rjenkins1 +item osd.0 weight 0.500 +} +host rmosd1 { +id -2 # do not change unnecessarily +# weight 0.557 +alg straw +hash 0 # rjenkins1 +item rmosd1_ssd weight 0.500 +item rmosd1_spinning weight 0.058 +} +disktype rmosd2_ssd { +id -7 # do not change unnecessarily +# weight 0.058 +alg straw +hash 0 # rjenkins1 +item osd.4 weight 0.058 +} +disktype rmosd2_spinning { +id -8 # do not change unnecessarily +# weight 0.500 +alg straw +hash 0 # rjenkins1 +item osd.1 weight 0.500 +} +host rmosd2 { +id -3 # do not change unnecessarily +# weight 0.557 +alg straw +hash 0 # rjenkins1 +item rmosd2_ssd weight 0.500 +item rmosd2_spinning weight 0.058 +} +disktype rmosd3_ssd { +id -9 # do not change unnecessarily +# weight 0.058 +alg straw +hash 0 # rjenkins1 +item osd.5 weight 0.058 +} +disktype rmosd3_spinning { +id -10 # do not change unnecessarily +# weight 0.500 +alg straw +hash 0 # rjenkins1 +item osd.2 weight 0.500 +} +host rmosd3 { +id -4 # do not change unnecessarily +# weight 0.557 +alg straw +hash 0 # rjenkins1 +item rmosd3_ssd weight 0.500 +item rmosd3_spinning weight 0.058 +} +root default { +id -1 # do not change unnecessarily +# weight 1.672 +alg straw +hash 0 # rjenkins1 +item rmosd1 weight 0.557 +item rmosd2 weight 0.557 +item rmosd3 weight 0.557 +} +root spinning { +id -11 # do not change unnecessarily +# weight 1.500 +alg straw +hash 0 # rjenkins1 +item rmosd1_spinning weight 0.500 +item rmosd2_spinning weight 0.500 +item rmosd3_spinning weight 0.500 +} +root ssd { +id -12 # do not change unnecessarily +# weight 0.174 +alg straw +hash 0 # rjenkins1 +item rmosd1_ssd weight 0.058 +item rmosd2_ssd weight 0.058 +item rmosd3_ssd weight 0.058 +} +# rules +rule replicated_ruleset { +ruleset 0 +type replicated +min_size 1 +max_size 10 +step take spinning +step chooseleaf firstn 0 type disktype +step emit +} +rule spinning { +ruleset 1 +type erasure +min_size 3 +max_size 20 +step set_chooseleaf_tries 5 +step take spinning +step chooseleaf indep 0 type osd +step emit +} +rule ssd { +ruleset 2 +type replicated +min_size 1 +max_size 10 +step take ssd +step chooseleaf firstn 0 type disktype +step emit +} +# end crush map diff --git a/crushmap/testdata/map.txt2 b/crushmap/testdata/map.txt2 new file mode 100644 index 0000000..7587f42 --- /dev/null +++ b/crushmap/testdata/map.txt2 @@ -0,0 +1,151 @@ +# begin crush map +tunable choose_local_tries 0 +tunable choose_local_fallback_tries 0 +tunable choose_total_tries 50 +tunable chooseleaf_descend_once 1 +tunable chooseleaf_vary_r 1 +tunable straw_calc_version 1 +# devices +device 0 osd.0 +device 1 osd.1 +device 2 osd.2 +device 3 osd.3 +device 4 osd.4 +device 5 osd.5 +# types +type 0 osd +type 1 host +type 2 chassis +type 3 rack +type 4 row +type 5 pdu +type 6 pod +type 7 room +type 8 datacenter +type 9 region +type 10 root +type 11 disktype +# buckets +disktype rmosd1_ssd { +id -5 # do not change unnecessarily +# weight 0.058 +alg straw2 +hash 0 # rjenkins1 +item osd.3 weight 0.058 +} +disktype rmosd1_spinning { +id -6 # do not change unnecessarily +# weight 0.500 +alg straw2 +hash 0 # rjenkins1 +item osd.0 weight 0.500 +} +host rmosd1 { +id -2 # do not change unnecessarily +# weight 0.557 +alg straw2 +hash 0 # rjenkins1 +item rmosd1_ssd weight 0.500 +item rmosd1_spinning weight 0.058 +} +disktype rmosd2_ssd { +id -7 # do not change unnecessarily +# weight 0.058 +alg straw2 +hash 0 # rjenkins1 +item osd.4 weight 0.058 +} +disktype rmosd2_spinning { +id -8 # do not change unnecessarily +# weight 0.500 +alg straw2 +hash 0 # rjenkins1 +item osd.1 weight 0.500 +} +host rmosd2 { +id -3 # do not change unnecessarily +# weight 0.557 +alg straw2 +hash 0 # rjenkins1 +item rmosd2_ssd weight 0.500 +item rmosd2_spinning weight 0.058 +} +disktype rmosd3_ssd { +id -9 # do not change unnecessarily +# weight 0.058 +alg straw2 +hash 0 # rjenkins1 +item osd.5 weight 0.058 +} +disktype rmosd3_spinning { +id -10 # do not change unnecessarily +# weight 0.500 +alg straw2 +hash 0 # rjenkins1 +item osd.2 weight 0.500 +} +host rmosd3 { +id -4 # do not change unnecessarily +# weight 0.557 +alg straw2 +hash 0 # rjenkins1 +item rmosd3_ssd weight 0.500 +item rmosd3_spinning weight 0.058 +} +root default { +id -1 # do not change unnecessarily +# weight 1.672 +alg straw2 +hash 0 # rjenkins1 +item rmosd1 weight 0.557 +item rmosd2 weight 0.557 +item rmosd3 weight 0.557 +} +root spinning { +id -11 # do not change unnecessarily +# weight 1.500 +alg straw2 +hash 0 # rjenkins1 +item rmosd1_spinning weight 0.500 +item rmosd2_spinning weight 0.500 +item rmosd3_spinning weight 0.500 +} +root ssd { +id -12 # do not change unnecessarily +# weight 0.174 +alg straw2 +hash 0 # rjenkins1 +item rmosd1_ssd weight 0.058 +item rmosd2_ssd weight 0.058 +item rmosd3_ssd weight 0.058 +} +# rules +rule replicated_ruleset { +ruleset 0 +type replicated +min_size 1 +max_size 10 +step take spinning +step chooseleaf firstn 0 type disktype +step emit +} +rule spinning { +ruleset 1 +type erasure +min_size 3 +max_size 20 +step set_chooseleaf_tries 5 +step take spinning +step chooseleaf indep 0 type osd +step emit +} +rule ssd { +ruleset 2 +type replicated +min_size 1 +max_size 10 +step take ssd +step chooseleaf firstn 0 type disktype +step emit +} +# end crush map diff --git a/crushmap/text.go b/crushmap/text.go new file mode 100644 index 0000000..86180b9 --- /dev/null +++ b/crushmap/text.go @@ -0,0 +1,147 @@ +package crushmap + +import ( + "fmt" + "sync" +) + +type textParser struct { + l *lex +} + +func identState(l *lex) stateFn { + +loop: + for { + r := l.lexPeek() + switch r { + case ' ': + break loop + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "device": + l.lexIgnore() + l.lexPush(topState) + return deviceState + case "type": + l.lexIgnore() + l.lexPush(topState) + return typeState + case "rule": + l.lexIgnore() + l.lexPush(topState) + return ruleState + case "tunable": + l.lexIgnore() + l.lexPush(topState) + return tunableState + } + + l.lexPush(topState) + return bucketState +} + +func topState(l *lex) stateFn { + for { + r := l.lexPeek() + switch r { + case ' ': + l.lexNext() + l.lexIgnore() + case '\n': + l.lexNext() + l.lexIgnore() + case EOFRune: + l.lexEmit(itemEOF) + return nil + case '#': + l.lexNext() + l.lexIgnore() + l.lexPush(topState) + return commentLineState + default: + return identState + } + } + + return nil +} + +func (cmap *Map) DecodeText(data []byte) error { + var mu sync.Mutex + + mapItems := make(map[string]int32) + p := &textParser{l: lexNew(string(data), topState)} + p.l.lexStartSync() + +loop: + for { + tok, done := p.l.lexNextToken() + if done { + break loop + } + switch tok.itype { + case itemEOF: + break loop + case itemComment: + continue + case itemTunableBeg: + if itunekey, ituneval, err := p.handleTunable(); err != nil { + return err + } else { + cmap.Tunables[itunekey] = ituneval + } + case itemDeviceBeg: + if idevice, err := p.handleDevice(); err != nil { + return err + } else { + mu.Lock() + mapItems[idevice.Name] = idevice.ID + mu.Unlock() + cmap.Devices = append(cmap.Devices, idevice) + } + case itemTypeBeg: + if itype, err := p.handleType(); err != nil { + return err + } else { + mu.Lock() + mapItems[itype.Name] = itype.ID + mu.Unlock() + cmap.Types = append(cmap.Types, itype) + } + case itemRuleBeg: + if irule, err := p.handleRule(); err != nil { + return err + } else { + cmap.Rules = append(cmap.Rules, irule) + } + case itemBucketBeg: + if ibucket, err := p.handleBucket(tok.ivalue); err != nil { + return err + } else { + mu.Lock() + mapItems[ibucket.Name] = ibucket.ID + mu.Unlock() + cmap.Buckets = append(cmap.Buckets, ibucket) + } + default: + return fmt.Errorf("error: %s\n", tok.ivalue) + } + } + + for idx := range cmap.Buckets { + id, ok := mapItems[cmap.Buckets[idx].TypeName] + if !ok { + return fmt.Errorf("invalid bucket type: %s", cmap.Buckets[idx].TypeName) + } + cmap.Buckets[idx].TypeID = CrushBucketType(id) + } + + cmap.rulesSort() + cmap.bucketsSort() + return nil +} diff --git a/crushmap/text_bucket.go b/crushmap/text_bucket.go new file mode 100644 index 0000000..cb7c187 --- /dev/null +++ b/crushmap/text_bucket.go @@ -0,0 +1,337 @@ +package crushmap + +import ( + "errors" + "fmt" + "strconv" +) + +func bucketState(l *lex) stateFn { + l.lexEmit(itemBucketBeg) + return bucketStartState +} + +func bucketStartState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + r := l.lexPeek() + switch r { + case '{': + l.lexNext() + l.lexIgnore() + return bucketIdentState + case '#': + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + } + + return bucketNameState +} + +func bucketNameState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case '{': + l.lexNext() + l.lexIgnore() + break loop + case ' ': + break loop + case '\n', '#': + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + default: + l.lexNext() + } + } + + l.lexEmit(itemBucketName) + return bucketIdentState +} + +func bucketIDState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop1: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop1 + } + l.lexNext() + } + + l.lexEmit(itemBucketID) + + l.lexTake(" \t") + l.lexIgnore() + +loop2: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop2 + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "class": + l.lexIgnore() + return bucketIDClassState + } + + return bucketIdentState +} + +func bucketIDClassState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemBucketIDClass) + + return bucketIdentState +} + +func bucketHashState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + l.lexTake("0123456789") + l.lexEmit(itemBucketHash) + + return bucketIdentState +} + +func bucketAlgState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + for { + r := l.lexPeek() + if r == '\n' || r == ' ' || r == '#' { + break + } + l.lexNext() + } + + l.lexEmit(itemBucketAlg) + return bucketIdentState +} + +func bucketItemState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ': + break loop + case '\n', '#': + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + } + l.lexNext() + } + + l.lexEmit(itemBucketItemName) + return bucketItemIdentState +} + +func bucketItemIdentState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ', '\n': + break loop + case '#': + break loop + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "weight": + l.lexIgnore() + return bucketItemWeightState + case "pos": + l.lexIgnore() + return bucketItemPosState + } + + l.lexEmit(itemBucketItemEnd) + return bucketIdentState +} + +func bucketItemWeightState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + l.lexTake(".0123456789") + l.lexEmit(itemBucketItemWeight) + + return bucketItemIdentState +} + +func bucketItemPosState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + l.lexTake("0123456789") + l.lexEmit(itemBucketItemPos) + + return bucketItemIdentState +} + +func bucketIdentState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ': + break loop + case '#': + l.lexNext() + l.lexIgnore() + l.lexPush(bucketIdentState) + return commentLineState + case '}': + l.lexNext() + l.lexIgnore() + l.lexEmit(itemBucketEnd) + return l.lexPop() + case '\n': + l.lexNext() + l.lexIgnore() + return bucketIdentState + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "id": + l.lexIgnore() + return bucketIDState + case "alg": + l.lexIgnore() + return bucketAlgState + case "hash": + l.lexIgnore() + return bucketHashState + case "item": + l.lexIgnore() + l.lexEmit(itemBucketItemBeg) + return bucketItemState + } + + return l.lexPop() +} + +func (p *textParser) handleBucket(itype string) (*Bucket, error) { + ibucket := &Bucket{TypeName: itype} + +Loop: + for { + tok, done := p.l.lexNextToken() + if done { + break Loop + } + switch tok.itype { + case itemEOF, itemBucketEnd: + break Loop + case itemComment: + continue + case itemBucketName: + ibucket.Name = tok.ivalue + case itemBucketIDClass: + ibucket.IDClass = tok.ivalue + case itemBucketID: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + ibucket.ID = int32(id) + case itemBucketAlg: + ibucket.Alg = tok.ivalue + case itemBucketHash: + if tok.ivalue == "0" { + ibucket.Hash = "rjenkins1" + } else { + return nil, errors.New("invalid bucket hash") + } + case itemBucketItemBeg: + item, err := p.handleBucketItem() + if err != nil { + return nil, err + } + ibucket.Items = append(ibucket.Items, item) + } + } + + return ibucket, nil +} + +func (p *textParser) handleBucketItem() (*Item, error) { + item := &Item{} + +Loop: + for { + tok, done := p.l.lexNextToken() + if done { + break Loop + } + switch tok.itype { + case itemEOF, itemBucketItemEnd: + break Loop + case itemComment: + continue + case itemBucketItemName: + item.Name = tok.ivalue + case itemBucketItemWeight: + id, err := strconv.ParseFloat(tok.ivalue, 32) + if err != nil { + return nil, err + } + item.Weight = float32(id) + case itemBucketItemPos: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + item.Pos = id + } + } + return item, nil +} diff --git a/crushmap/text_comment.go b/crushmap/text_comment.go new file mode 100644 index 0000000..44518f1 --- /dev/null +++ b/crushmap/text_comment.go @@ -0,0 +1,14 @@ +package crushmap + +func commentLineState(l *lex) stateFn { +loop: + for { + if r := l.lexPeek(); r == '\n' { + l.lexNext() + break loop + } + l.lexNext() + } + l.lexEmitTrim(itemComment) + return l.lexPop() +} diff --git a/crushmap/text_device.go b/crushmap/text_device.go new file mode 100644 index 0000000..c3a16ca --- /dev/null +++ b/crushmap/text_device.go @@ -0,0 +1,135 @@ +package crushmap + +import ( + "errors" + "fmt" + "strconv" +) + +func deviceState(l *lex) stateFn { + l.lexIgnore() + l.lexEmit(itemDeviceBeg) + + return deviceIDState +} + +func deviceIDState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case '\n', '#': + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + case ' ': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemDeviceID) + return deviceNameState +} + +func deviceNameState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemDeviceName) + return deviceIdentState +} + +func deviceIdentState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case '\n', '#': + l.lexEmit(itemDeviceEnd) + return l.lexPop() + case ' ': + break loop + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "class": + l.lexIgnore() + return deviceClassState + } + + return l.lexPop() +} + +func deviceClassState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case '\n', '#', ' ': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemDeviceClass) + return deviceIdentState +} + +func (p *textParser) handleDevice() (*Device, error) { + idevice := &Device{ID: -1} + +Loop: + for { + tok, done := p.l.lexNextToken() + if done { + break Loop + } + switch tok.itype { + case itemEOF, itemDeviceEnd: + break Loop + case itemComment: + continue + case itemDeviceID: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + idevice.ID = int32(id) + case itemDeviceName: + idevice.Name = tok.ivalue + case itemDeviceClass: + idevice.Class = tok.ivalue + } + } + + if idevice.Name == "" { + return nil, errors.New("invalid device") + } + + return idevice, nil +} diff --git a/crushmap/text_lexer.go b/crushmap/text_lexer.go new file mode 100644 index 0000000..3880e24 --- /dev/null +++ b/crushmap/text_lexer.go @@ -0,0 +1,229 @@ +package crushmap + +import ( + "errors" + "strings" + "unicode/utf8" +) + +type stateFn func(*lex) stateFn + +type tokType int + +const ( + EOFRune rune = -1 +) + +const ( + itemError tokType = iota + + itemEOF + + itemComment + + itemTunableBeg + itemTunableKey + itemTunableVal + itemTunableEnd + + itemDeviceBeg + itemDeviceID + itemDeviceName + itemDeviceClass + itemDeviceEnd + + itemTypeBeg + itemTypeID + itemTypeName + itemTypeEnd + + itemBucketBeg + itemBucketName + itemBucketID + itemBucketIDClass + itemBucketAlg + itemBucketHash + itemBucketItemBeg + itemBucketItemName + itemBucketItemWeight + itemBucketItemPos + itemBucketItemEnd + itemBucketEnd + + itemRuleBeg + itemRuleName + itemRuleID + itemRuleRuleset + itemRuleType + itemRuleMinSize + itemRuleMaxSize + itemRuleStepBeg + itemRuleStepSetChooseleafTries + itemRuleStepSetChooseTries + itemRuleStepTake + itemRuleStepTakeType + itemRuleStepChoose + itemRuleStepTakeClass + itemRuleStepChooseFirstN + itemRuleStepChooseIndep + itemRuleStepChooseType + itemRuleStepEmit + itemRuleStepEnd + itemRuleEnd +) + +type item struct { + itype tokType + ivalue string + iline int +} + +type lex struct { + source string + start int + position int + line int + startState stateFn + err error + items chan item + errHandler func(string) + rewind runeStack + stack []stateFn +} + +func lexNew(src string, start stateFn) *lex { + buffSize := len(src) / 2 + if buffSize <= 0 { + buffSize = 1 + } + + return &lex{ + source: src, + startState: start, + line: 1, + rewind: newRuneStack(), + items: make(chan item, buffSize), + stack: make([]stateFn, 0, 10), + } +} + +func (l *lex) lexStart() { + go l.lexRun() +} + +func (l *lex) lexStartSync() { + l.lexRun() +} + +func lexIsDigit(r rune) bool { + return r >= '0' && r <= '9' +} + +func (l *lex) lexCurrent() string { + return l.source[l.start:l.position] +} + +func (l *lex) lexEmit(t tokType) { + itm := item{ + itype: t, + ivalue: l.lexCurrent(), + } + l.items <- itm + l.start = l.position + l.rewind.clear() +} + +func (l *lex) lexEmitTrim(t tokType) { + itm := item{ + itype: t, + ivalue: strings.TrimSpace(l.lexCurrent()), + } + l.items <- itm + l.start = l.position + l.rewind.clear() +} + +func (l *lex) lexIgnore() { + l.rewind.clear() + l.start = l.position +} + +func (l *lex) lexPeek() rune { + r := l.lexNext() + l.lexRewind() + return r +} + +func (l *lex) lexRewind() { + r := l.rewind.pop() + if r > EOFRune { + size := utf8.RuneLen(r) + l.position -= size + if l.position < l.start { + l.position = l.start + } + } +} + +func (l *lex) lexNext() rune { + var ( + r rune + s int + ) + str := l.source[l.position:] + if len(str) == 0 { + r, s = EOFRune, 0 + } else { + r, s = utf8.DecodeRuneInString(str) + } + l.position += s + l.rewind.push(r) + + return r +} + +func (l *lex) lexPush(state stateFn) { + l.stack = append(l.stack, state) +} + +func (l *lex) lexPop() stateFn { + if len(l.stack) == 0 { + l.lexErr("BUG in lexer: no states to pop") + } + last := l.stack[len(l.stack)-1] + l.stack = l.stack[0 : len(l.stack)-1] + return last +} + +func (l *lex) lexTake(chars string) { + r := l.lexNext() + for strings.ContainsRune(chars, r) { + r = l.lexNext() + } + l.lexRewind() // last next wasn't a match +} + +func (l *lex) lexNextToken() (*item, bool) { + if itm, ok := <-l.items; ok { + return &itm, false + } else { + return nil, true + } +} + +func (l *lex) lexErr(e string) { + if l.errHandler != nil { + l.err = errors.New(e) + l.errHandler(e) + } else { + panic(e) + } +} + +func (l *lex) lexRun() { + state := l.startState + for state != nil { + state = state(l) + } + close(l.items) +} diff --git a/crushmap/text_rule.go b/crushmap/text_rule.go new file mode 100644 index 0000000..b30511d --- /dev/null +++ b/crushmap/text_rule.go @@ -0,0 +1,462 @@ +package crushmap + +import ( + "errors" + "fmt" + "strconv" +) + +func ruleState(l *lex) stateFn { + l.lexEmit(itemRuleBeg) + + l.lexTake(" \t") + l.lexIgnore() + + r := l.lexPeek() + switch r { + case '{': + l.lexNext() + l.lexIgnore() + return ruleIdentState + case '#': + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + } + + return ruleNameState +} + +func ruleIdentState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ': + break loop + case '#': + l.lexNext() + l.lexIgnore() + l.lexPush(ruleIdentState) + return commentLineState + case '}': + l.lexNext() + l.lexIgnore() + l.lexEmit(itemRuleEnd) + return l.lexPop() + case '\n': + l.lexNext() + l.lexIgnore() + return ruleIdentState + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "id": + l.lexIgnore() + return ruleRuleIDState + case "ruleset": + l.lexIgnore() + return ruleRulesetState + case "min_size": + l.lexIgnore() + return ruleMinSizeState + case "max_size": + l.lexIgnore() + return ruleMaxSizeState + case "type": + l.lexIgnore() + return ruleTypeState + case "step": + l.lexIgnore() + l.lexEmit(itemRuleStepBeg) + return ruleStepIdentState + } + return l.lexPop() +} + +func ruleStepIdentState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "set_chooseleaf_tries": + l.lexIgnore() + return ruleStepSetChooseleafTries + case "set_choose_tries": + l.lexIgnore() + return ruleStepSetChooseTries + case "take": + l.lexIgnore() + l.lexEmit(itemRuleStepTake) + return ruleStepTake + case "chooseleaf", "choose": + l.lexEmit(itemRuleStepChoose) + return ruleStepChoose + case "emit": + l.lexEmit(itemRuleStepEmit) + return ruleStepEmit + } + + return ruleIdentState +} + +func ruleStepSetChooseleafTries(l *lex) stateFn { + l.lexTake("0123456789") + l.lexEmit(itemRuleStepSetChooseleafTries) + return ruleIdentState +} + +func ruleStepSetChooseTries(l *lex) stateFn { + l.lexTake("0123456789") + l.lexEmit(itemRuleStepSetChooseTries) + return ruleIdentState +} + +func ruleStepChoose(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "firstn": + l.lexIgnore() + return ruleStepChooseFirstN + case "indep": + l.lexIgnore() + return ruleStepChooseIndep + case "type": + l.lexIgnore() + return ruleStepChooseType + } + + l.lexEmit(itemRuleStepEnd) + return ruleIdentState +} + +func ruleStepChooseFirstN(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + l.lexTake("0123456789") + l.lexEmit(itemRuleStepChooseFirstN) + return ruleStepChoose +} + +func ruleStepChooseIndep(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + l.lexTake("0123456789") + l.lexEmit(itemRuleStepChooseIndep) + return ruleStepChoose +} + +func ruleStepChooseType(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemRuleStepChooseType) + return ruleStepChoose +} + +func ruleStepEmit(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + l.lexEmit(itemRuleStepEnd) + return ruleIdentState +} + +func ruleStepTake(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop1: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop1 + default: + l.lexNext() + } + } + + l.lexEmit(itemRuleStepTakeType) + + l.lexTake(" \t") + l.lexIgnore() + +loop2: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop2 + default: + l.lexNext() + } + } + + switch l.lexCurrent() { + case "class": + l.lexIgnore() + return ruleStepTakeClass + } + + l.lexEmit(itemRuleStepEnd) + return ruleIdentState +} + +func ruleStepTakeClass(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case ' ', '\n', '#', '\t': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemRuleStepTakeClass) + l.lexEmit(itemRuleStepEnd) + return ruleIdentState +} + +func ruleNameState(l *lex) stateFn { +loop: + for { + r := l.lexPeek() + switch r { + case '{': + l.lexNext() + l.lexIgnore() + break loop + case ' ': + break loop + case '\n', '#': + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + default: + l.lexNext() + } + } + l.lexEmit(itemRuleName) + return ruleIdentState +} + +func ruleRulesetState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + for { + r := l.lexPeek() + if r == '\n' || r == ' ' || r == '#' { + break + } + l.lexNext() + } + + l.lexEmit(itemRuleRuleset) + return ruleIdentState +} + +func ruleRuleIDState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + for { + r := l.lexPeek() + if r == '\n' || r == ' ' || r == '#' { + break + } + l.lexNext() + } + + l.lexEmit(itemRuleID) + return ruleIdentState +} + +func ruleMinSizeState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + l.lexTake("0123456789") + l.lexEmit(itemRuleMinSize) + return ruleIdentState +} + +func ruleMaxSizeState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + l.lexTake("0123456789") + l.lexEmit(itemRuleMaxSize) + return ruleIdentState +} + +func ruleTypeState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + for { + r := l.lexPeek() + if r == '\n' || r == ' ' || r == '#' { + break + } + l.lexNext() + } + l.lexEmit(itemRuleType) + return ruleIdentState +} + +func (p *textParser) handleRule() (*Rule, error) { + irule := &Rule{} + +Loop: + for { + tok, done := p.l.lexNextToken() + if done { + break Loop + } + switch tok.itype { + case itemEOF, itemRuleEnd: + break Loop + case itemComment: + continue + case itemRuleRuleset: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + irule.Ruleset = uint8(id) + case itemRuleMinSize: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + irule.MinSize = uint8(id) + case itemRuleMaxSize: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + irule.MaxSize = uint8(id) + case itemRuleStepBeg: + if istep, err := p.handleRuleStep(); err != nil { + return nil, err + } else { + istep.Num = int32(len(irule.Steps)) + irule.Steps = append(irule.Steps, istep) + } + case itemRuleName: + irule.Name = tok.ivalue + case itemRuleType: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + switch tok.ivalue { + case "replicated": + irule.Type = ReplicatedPG + case "erasure": + irule.Type = ErasurePG + default: + return nil, errors.New("unknown rule type") + } + } else { + irule.Type = uint8(id) + } + case itemRuleID: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + irule.ID = uint8(id) + } + } + if irule.ID != irule.Ruleset { + irule.Ruleset = irule.ID + } + return irule, nil +} + +func (p *textParser) handleRuleStep() (*Step, error) { + istep := &Step{} + +Loop: + for { + tok, done := p.l.lexNextToken() + if done { + break Loop + } + switch tok.itype { + case itemEOF, itemRuleStepEnd: + break Loop + case itemComment: + continue + case itemRuleStepTake: + istep.Op = "take" + istep.Item = -1 + case itemRuleStepTakeType: + istep.ItemName = tok.ivalue + case itemRuleStepTakeClass: + istep.ItemClass = tok.ivalue + case itemRuleStepChoose: + istep.Op = tok.ivalue + case itemRuleStepChooseIndep: + istep.Op = fmt.Sprintf("%s_%s", istep.Op, "indep") + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + istep.Num = int32(id) + case itemRuleStepChooseFirstN: + istep.Op = fmt.Sprintf("%s_%s", istep.Op, "firstn") + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + istep.Num = int32(id) + case itemRuleStepChooseType: + istep.ItemType = tok.ivalue + case itemRuleStepEmit: + istep.Op = "emit" + } + } + return istep, nil +} diff --git a/crushmap/text_space.go b/crushmap/text_space.go new file mode 100644 index 0000000..debd3ef --- /dev/null +++ b/crushmap/text_space.go @@ -0,0 +1,16 @@ +package crushmap + +import "fmt" + +func spaceState(l *lex) stateFn { + r := l.lexNext() + + if r != ' ' && r != '\t' && r != '\n' && r != '\r' { + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return nil + } + + l.lexTake(" \t") + l.lexIgnore() + return l.lexPop() +} diff --git a/crushmap/text_stack.go b/crushmap/text_stack.go new file mode 100644 index 0000000..d8c281a --- /dev/null +++ b/crushmap/text_stack.go @@ -0,0 +1,38 @@ +package crushmap + +type runeNode struct { + r rune + next *runeNode +} + +type runeStack struct { + start *runeNode +} + +func newRuneStack() runeStack { + return runeStack{} +} + +func (s *runeStack) push(r rune) { + node := &runeNode{r: r} + if s.start == nil { + s.start = node + } else { + node.next = s.start + s.start = node + } +} + +func (s *runeStack) pop() rune { + if s.start == nil { + return EOFRune + } else { + n := s.start + s.start = n.next + return n.r + } +} + +func (s *runeStack) clear() { + s.start = nil +} diff --git a/crushmap/text_test.go b/crushmap/text_test.go new file mode 100644 index 0000000..90cf5bc --- /dev/null +++ b/crushmap/text_test.go @@ -0,0 +1,20 @@ +package crushmap + +import ( + "io/ioutil" + "testing" +) + +func TestText(t *testing.T) { + buf, err := ioutil.ReadFile("testdata/map.txt") + if err != nil { + t.Fatal(err) + } + + m := NewMap() + err = m.DecodeText(buf) + if err != nil { + t.Fatal(err) + } + _ = m +} diff --git a/crushmap/text_tunable.go b/crushmap/text_tunable.go new file mode 100644 index 0000000..5908256 --- /dev/null +++ b/crushmap/text_tunable.go @@ -0,0 +1,100 @@ +package crushmap + +import ( + "errors" + "fmt" + "strconv" +) + +func tunableState(l *lex) stateFn { + l.lexIgnore() + + if r := l.lexPeek(); r != ' ' { + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + } + + l.lexEmit(itemTunableBeg) + + return tunableKeyState +} + +func tunableKeyState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case '\n', '#': + l.lexErr(fmt.Sprintf("unexpected token %q", r)) + return l.lexPop() + case ' ': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemTunableKey) + + return tunableValState +} + +func tunableValState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + +loop: + for { + r := l.lexPeek() + switch r { + case '\n', '#', ' ': + break loop + default: + l.lexNext() + } + } + + l.lexEmit(itemTunableVal) + + l.lexEmit(itemTunableEnd) + + return l.lexPop() +} + +func (p *textParser) handleTunable() (string, interface{}, error) { + var key string + var val interface{} + +Loop: + for { + tok, done := p.l.lexNextToken() + if done { + break Loop + } + + switch tok.itype { + case itemEOF, itemTunableEnd: + break Loop + case itemComment: + continue + case itemTunableKey: + key = tok.ivalue + case itemTunableVal: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + val = tok.ivalue + } else { + val = id + } + } + } + + if key == "" { + return "", nil, errors.New("invalid tunable") + } + + return key, val, nil +} diff --git a/crushmap/text_type.go b/crushmap/text_type.go new file mode 100644 index 0000000..142633f --- /dev/null +++ b/crushmap/text_type.go @@ -0,0 +1,75 @@ +package crushmap + +import ( + "errors" + "strconv" +) + +func typeState(l *lex) stateFn { + l.lexEmit(itemTypeBeg) + return typeIDState +} + +func typeIDState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + for { + r := l.lexPeek() + if r == '\n' || r == ' ' || r == '#' { + break + } + l.lexNext() + } + + l.lexEmit(itemTypeID) + return typeNameState +} + +func typeNameState(l *lex) stateFn { + l.lexTake(" \t") + l.lexIgnore() + + for { + r := l.lexPeek() + if r == '\n' || r == ' ' || r == '#' { + break + } + l.lexNext() + } + + l.lexEmit(itemTypeName) + l.lexEmit(itemTypeEnd) + return l.lexPop() +} + +func (p *textParser) handleType() (*Type, error) { + itype := &Type{ID: -1} + +Loop: + for { + tok, done := p.l.lexNextToken() + if done { + break Loop + } + switch tok.itype { + case itemEOF, itemTypeEnd: + break Loop + case itemComment: + continue + case itemTypeID: + id, err := strconv.Atoi(tok.ivalue) + if err != nil { + return nil, err + } + itype.ID = int32(id) + case itemTypeName: + itype.Name = tok.ivalue + } + } + + if itype.Name == "" { + return nil, errors.New("invalid type") + } + + return itype, nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..10616c7 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/unistack-org/crushkv + +go 1.12