Initial commit. Beginnings of an ndb parsing library. Lexer/parser functional.
What's left is mapping the input to Go values.
This commit is contained in:
commit
751b403e15
155
ndb.go
Normal file
155
ndb.go
Normal file
@ -0,0 +1,155 @@
|
||||
// Package ndb decodes and encodes simple strings of key=value pairs.
|
||||
// The accepted format is based on Plan 9's ndb(6) format found at
|
||||
// http://plan9.bell-labs.com/magic/man2html/6/ndb . Values containing
|
||||
// white space must be quoted in single quotes. Two single quotes escape
|
||||
// a literal single quote. Attributes must not contain white space. A
|
||||
// value may contain any printable unicode character except for a new line.
|
||||
package ndb
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"bytes"
|
||||
"bufio"
|
||||
"net/textproto"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// A SyntaxError contains the data that caused an error and the
|
||||
// offset of the first byte that caused the syntax error. Data may
|
||||
// only be valid until the next call to the Decode() method
|
||||
type SyntaxError struct {
|
||||
Data []byte
|
||||
Offset int64
|
||||
Message string
|
||||
}
|
||||
|
||||
// A TypeError occurs when a Go value is incompatible with the ndb
|
||||
// string it must store or create.
|
||||
type TypeError struct {
|
||||
Type reflect.Type
|
||||
}
|
||||
|
||||
func (e *TypeError) Error() string {
|
||||
return fmt.Sprintf("Invalid type %s or nil pointer", e.Type.String())
|
||||
}
|
||||
|
||||
func min(a,b int64) int64 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func (e *SyntaxError) Error() string {
|
||||
start := e.Offset
|
||||
end := min(e.Offset + 10, int64(len(e.Data)))
|
||||
|
||||
// Make sure we're on utf8 boundaries
|
||||
for !utf8.RuneStart(e.Data[start]) && start > 0 {
|
||||
start--
|
||||
}
|
||||
for !utf8.Valid(e.Data[start:end]) && end < int64(len(e.Data)) {
|
||||
end++
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s\n\tat `%s'", e.Message, e.Data[start:end])
|
||||
}
|
||||
|
||||
// An Encoder wraps an io.Writer and serializes Go values
|
||||
// into ndb strings. Successive calls to the Encode() method
|
||||
// append lines to the io.Writer.
|
||||
type Encoder struct {
|
||||
out bufio.Writer
|
||||
}
|
||||
|
||||
// A decoder wraps an io.Reader and decodes successive ndb strings
|
||||
// into Go values using the Decode() function.
|
||||
type Decoder struct {
|
||||
src *textproto.Reader
|
||||
pairbuf []pair
|
||||
}
|
||||
|
||||
// The Parse function reads an entire ndb string and unmarshals it
|
||||
// into the Go value v. Parse will behave differently depending on
|
||||
// the concrete type of v. Value v must be a reference type, either a
|
||||
// pointer, map, or slice.
|
||||
//
|
||||
// * If v is a slice, Parse will decode all lines from the ndb
|
||||
// input into array elements. Otherwise, Parse will decode only
|
||||
// the first line.
|
||||
//
|
||||
// * If v is of the type (map[string] interface{}), Parse will
|
||||
// populate v with key/value pairs, where value is decoded
|
||||
// according to the concrete type of the map's value.
|
||||
//
|
||||
// * If v is a struct, Parse will populate struct fields whose
|
||||
// names match the ndb attribute. Struct fields may be annotated
|
||||
// with a tag of the form `ndb: name`, where name matches the
|
||||
// attribute string in the ndb input.
|
||||
//
|
||||
// Struct fields or map keys that do not match the ndb input are left
|
||||
// unmodified. Ndb attributes that do not match any struct fields are
|
||||
// silently dropped. If an ndb string cannot be converted to the
|
||||
// destination value or a syntax error occurs, an error is returned
|
||||
// and v is left unmodified. Parse can only store to exported (capitalized)
|
||||
// fields of a struct.
|
||||
func Parse(data []byte, v interface{}) error {
|
||||
d := NewDecoder(bytes.NewReader(data))
|
||||
return d.Decode(v)
|
||||
}
|
||||
|
||||
// NewDecoder returns a Decoder with its input pulled from an io.Reader
|
||||
func NewDecoder(r io.Reader) *Decoder {
|
||||
d := new(Decoder)
|
||||
d.src = textproto.NewReader(bufio.NewReader(r))
|
||||
return d
|
||||
}
|
||||
|
||||
// The Decode method follows the same parsing rules as Parse(), but
|
||||
// will read at most one ndb string. As such, slices or arrays are
|
||||
// not valid types for v.
|
||||
func (d *Decoder) Decode(v interface{}) error {
|
||||
val := reflect.ValueOf(v)
|
||||
if val.Kind() != reflect.Ptr || val.IsNil() {
|
||||
return &TypeError{val.Type()}
|
||||
}
|
||||
if p,err := d.getPairs(); err != nil {
|
||||
return err
|
||||
} else {
|
||||
return d.saveData(p, val.Elem())
|
||||
}
|
||||
}
|
||||
|
||||
// Emit encodes a value into an ndb string. Emit will use the String
|
||||
// method of each struct field or map entry to produce ndb output.
|
||||
// If v is a slice or array, multiple ndb lines will be output, one
|
||||
// for each element. For structs, attribute names will be the name of
|
||||
// the struct field, or the fields ndb annotation if it exists.
|
||||
// Ndb attributes may not contain white space. Ndb values may contain
|
||||
// white space but may not contain new lines. If Emit cannot produce
|
||||
// valid ndb strings, an error is returned.
|
||||
func Emit(v interface{}) ([]byte, error) {
|
||||
return nil,nil
|
||||
}
|
||||
|
||||
// The Encode method will write the ndb encoding of the Go value v
|
||||
// to its backend io.Writer. Unlike Decode(), slice or array values
|
||||
// are valid, and will cause multiple ndb lines to be written.
|
||||
// If the value cannot be fully encoded, an error is returned and
|
||||
// no data will be written to the io.Writer.
|
||||
func (e *Encoder) Encode(v interface{}) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewEncoder returns an Encoder that writes ndb output to an
|
||||
// io.Writer
|
||||
func NewEncoder(w io.Writer) *Encoder {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush forces all outstanding data in an Encoder to be written to
|
||||
// its backend io.Writer.
|
||||
func (e *Encoder) Flush() {
|
||||
}
|
177
parse.go
Normal file
177
parse.go
Normal file
@ -0,0 +1,177 @@
|
||||
package ndb
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"net/textproto"
|
||||
"unicode"
|
||||
"bytes"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type scanner struct {
|
||||
src *textproto.Reader
|
||||
}
|
||||
|
||||
type pair struct {
|
||||
attr, val []byte
|
||||
}
|
||||
|
||||
func (p pair) String() string {
|
||||
return fmt.Sprintf("%s => %s", string(p.attr), string(p.val))
|
||||
}
|
||||
|
||||
func errBadAttr(line []byte, offset int64) error {
|
||||
return &SyntaxError { line, offset, "Invalid attribute name" }
|
||||
}
|
||||
func errUnterminated(line []byte, offset int64) error {
|
||||
return &SyntaxError { line, offset, "Unterminated quoted string" }
|
||||
}
|
||||
func errBadUnicode(line []byte, offset int64) error {
|
||||
return &SyntaxError { line, offset, "Invalid UTF8 input" }
|
||||
}
|
||||
func errNewline(line []byte, offset int64) error {
|
||||
return &SyntaxError { line, offset, "Values may not contain new lines" }
|
||||
}
|
||||
|
||||
func (d *Decoder) getPairs() ([]pair, error) {
|
||||
var tuples [][]byte
|
||||
d.pairbuf = d.pairbuf[0:0]
|
||||
line, err := d.src.ReadContinuedLineBytes()
|
||||
if err != nil {
|
||||
return nil,err
|
||||
}
|
||||
tuples,err = lex(line)
|
||||
if err != nil {
|
||||
return nil,err
|
||||
} else {
|
||||
for _,t := range tuples {
|
||||
d.pairbuf = append(d.pairbuf, parseTuple(t))
|
||||
}
|
||||
}
|
||||
return d.pairbuf, nil
|
||||
}
|
||||
|
||||
func (d *Decoder) saveData(p []pair, val reflect.Value) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseTuple(tuple []byte) pair {
|
||||
var p pair
|
||||
fmt.Printf("Split %s\n", string(tuple))
|
||||
s := bytes.SplitN(tuple, []byte("="), 2)
|
||||
p.attr = s[0]
|
||||
if len(s) > 1 {
|
||||
if len(s[1]) > 1 {
|
||||
if s[1][0] == '\'' && len(s[1]) > 2 && s[1][len(s[1])-1] == '\'' {
|
||||
s[1] = s[1][1:len(s[1])-1]
|
||||
}
|
||||
}
|
||||
p.val = bytes.Replace(s[1], []byte("''"), []byte("'"), -1)
|
||||
}
|
||||
fmt.Println("Made ", p)
|
||||
return p
|
||||
}
|
||||
|
||||
type scanState []int
|
||||
func (s *scanState) push(n int) {
|
||||
*s = append(*s, n)
|
||||
}
|
||||
func (s scanState) top() int {
|
||||
if len(s) > 0 {
|
||||
return s[len(s)-1]
|
||||
}
|
||||
return scanNone
|
||||
}
|
||||
func (s *scanState) pop() int {
|
||||
v := s.top()
|
||||
if len(*s) > 0 {
|
||||
*s = (*s)[0:len(*s)-1]
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
const (
|
||||
scanNone = iota
|
||||
scanAttr
|
||||
scanValue
|
||||
scanValueStart
|
||||
scanQuoteStart
|
||||
scanQuoteString
|
||||
)
|
||||
|
||||
func lex(line []byte) ([][]byte, error) {
|
||||
var offset int64
|
||||
state := make(scanState, 0, 3)
|
||||
tuples := make([][]byte, 0, 10)
|
||||
buf := bytes.NewReader(line)
|
||||
var beg int64
|
||||
|
||||
for r,sz,err := buf.ReadRune(); err == nil; r,sz,err = buf.ReadRune() {
|
||||
fmt.Printf("(%d,%c) %s|%s\n", state.top(), r, line[:offset], line[offset:])
|
||||
if r == 0xFFFD && sz == 1 {
|
||||
return nil, errBadUnicode(line, offset)
|
||||
}
|
||||
switch state.top() {
|
||||
case scanNone:
|
||||
if unicode.IsSpace(r) {
|
||||
// skip
|
||||
} else if unicode.IsLetter(r) || unicode.IsNumber(r) {
|
||||
state.push(scanAttr)
|
||||
beg = offset
|
||||
} else {
|
||||
return nil,errBadAttr(line, offset)
|
||||
}
|
||||
case scanAttr:
|
||||
if unicode.IsSpace(r) {
|
||||
state.pop()
|
||||
tuples = append(tuples, line[beg:offset])
|
||||
fmt.Println("Save", string(line[beg:offset]))
|
||||
} else if r == '=' {
|
||||
state.pop()
|
||||
state.push(scanValueStart)
|
||||
} else if !(unicode.IsLetter(r) || unicode.IsNumber(r)) {
|
||||
return nil,errBadAttr(line, offset)
|
||||
}
|
||||
case scanValueStart:
|
||||
if unicode.IsSpace(r) {
|
||||
state.pop()
|
||||
tuples = append(tuples, line[beg:offset])
|
||||
fmt.Println("Save", string(line[beg:offset]))
|
||||
} else if r == '\'' {
|
||||
state.push(scanQuoteStart)
|
||||
} else {
|
||||
state.pop()
|
||||
state.push(scanValue)
|
||||
}
|
||||
case scanValue:
|
||||
if unicode.IsSpace(r) {
|
||||
state.pop()
|
||||
tuples = append(tuples, line[beg:offset])
|
||||
fmt.Println("Save", string(line[beg:offset]))
|
||||
}
|
||||
case scanQuoteStart:
|
||||
if r == '\'' {
|
||||
state.pop()
|
||||
} else {
|
||||
state.pop()
|
||||
state.push(scanQuoteString)
|
||||
}
|
||||
case scanQuoteString:
|
||||
if r == '\'' {
|
||||
state.pop()
|
||||
} else if r == '\n' {
|
||||
return nil,errNewline(line, offset)
|
||||
}
|
||||
}
|
||||
offset += int64(sz)
|
||||
}
|
||||
switch state.top() {
|
||||
case scanQuoteString, scanQuoteStart:
|
||||
return nil,errUnterminated(line, offset)
|
||||
case scanNone:
|
||||
default:
|
||||
tuples = append(tuples, line[beg:offset])
|
||||
fmt.Println("Save", string(line[beg:offset]))
|
||||
}
|
||||
return tuples,nil
|
||||
}
|
69
parse_test.go
Normal file
69
parse_test.go
Normal file
@ -0,0 +1,69 @@
|
||||
package ndb
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
var parseTests = []struct {
|
||||
in []byte
|
||||
out []pair
|
||||
}{
|
||||
{
|
||||
in: []byte("key1=val1 key2=val2 key3=val3"),
|
||||
out: []pair {
|
||||
{[]byte("key1"),[]byte("val1")},
|
||||
{[]byte("key2"),[]byte("val2")},
|
||||
{[]byte("key3"),[]byte("val3")}},
|
||||
},
|
||||
{
|
||||
in: []byte("title='Some value with spaces' width=340 height=200"),
|
||||
out: []pair {
|
||||
{[]byte("title"),[]byte("Some value with spaces")},
|
||||
{[]byte("width"),[]byte("340")},
|
||||
{[]byte("height"),[]byte("200")}},
|
||||
},
|
||||
{
|
||||
in: []byte("title='Dave''s pasta' sq=Davis cost=$$"),
|
||||
out: []pair {
|
||||
{[]byte("title"),[]byte("Dave's pasta")},
|
||||
{[]byte("sq"),[]byte("Davis")},
|
||||
{[]byte("cost"),[]byte("$$")}},
|
||||
},
|
||||
{
|
||||
in: []byte("action=''bradley key=jay mod=ctrl+alt+shift"),
|
||||
out: []pair {
|
||||
{[]byte("action"),[]byte("'bradley")},
|
||||
{[]byte("key"),[]byte("jay")},
|
||||
{[]byte("mod"),[]byte("ctrl+alt+shift")}},
|
||||
},
|
||||
{
|
||||
in: []byte("action=reload key='' mod=ctrl+alt+shift"),
|
||||
out: []pair {
|
||||
{[]byte("action"),[]byte("reload")},
|
||||
{[]byte("key"),[]byte("'")},
|
||||
{[]byte("mod"),[]byte("ctrl+alt+shift")}},
|
||||
},
|
||||
}
|
||||
|
||||
func Test_parsing(t *testing.T) {
|
||||
for i,tt := range parseTests {
|
||||
d := NewDecoder(bytes.NewReader(tt.in))
|
||||
p,err := d.getPairs()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
t.FailNow()
|
||||
} else {
|
||||
for j := range tt.out {
|
||||
if j > len(p) || !match(p[j],tt.out[j]) {
|
||||
t.Errorf("%d: getPairs %s => %v, want %v",i, tt.in, p, tt.out)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func match(p1, p2 pair) bool {
|
||||
return (bytes.Compare(p1.attr, p2.attr) == 0) && (bytes.Compare(p1.val, p2.val) == 0)
|
||||
}
|
Loading…
Reference in New Issue
Block a user