commit 751b403e15e6024f60e285a2fe7f988163d8ba15
Author: David Arroyo <darroyo@constantcontact.com>
Date:   Sat Jul 13 14:05:31 2013 -0400

    Initial commit. Beginnings of an ndb parsing library. Lexer/parser functional.
    What's left is mapping the input to Go values.

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/decode.go b/decode.go
new file mode 100644
index 0000000..faea4ff
--- /dev/null
+++ b/decode.go
@@ -0,0 +1 @@
+package ndb
diff --git a/encode.go b/encode.go
new file mode 100644
index 0000000..faea4ff
--- /dev/null
+++ b/encode.go
@@ -0,0 +1 @@
+package ndb
diff --git a/ndb.go b/ndb.go
new file mode 100644
index 0000000..c1a9701
--- /dev/null
+++ b/ndb.go
@@ -0,0 +1,155 @@
+// Package ndb decodes and encodes simple strings of key=value pairs.
+// The accepted format is based on Plan 9's ndb(6) format found at
+// http://plan9.bell-labs.com/magic/man2html/6/ndb . Values containing
+// white space must be quoted in single quotes. Two single quotes escape
+// a literal single quote. Attributes must not contain white space. A 
+// value may contain any printable unicode character except for a new line.
+package ndb
+
+import (
+	"reflect"
+	"bytes"
+	"bufio"
+	"net/textproto"
+	"fmt"
+	"io"
+	"unicode/utf8"
+)
+
+// A SyntaxError contains the data that caused an error and the
+// offset of the first byte that caused the syntax error. Data may
+// only be valid until the next call to the Decode() method
+type SyntaxError struct {
+	Data []byte
+	Offset int64
+	Message string
+}
+
+// A TypeError occurs when a Go value is incompatible with the ndb
+// string it must store or create.
+type TypeError struct {
+	Type reflect.Type
+}
+
+func (e *TypeError) Error() string {
+	return fmt.Sprintf("Invalid type %s or nil pointer", e.Type.String())
+}
+
+func min(a,b int64) int64 {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func (e *SyntaxError) Error() string {
+	start := e.Offset
+	end := min(e.Offset + 10, int64(len(e.Data)))
+	
+	// Make sure we're on utf8 boundaries
+	for !utf8.RuneStart(e.Data[start]) && start > 0 {
+		start--
+	}
+	for !utf8.Valid(e.Data[start:end]) && end < int64(len(e.Data)) {
+		end++
+	}
+	
+	return fmt.Sprintf("%s\n\tat `%s'", e.Message, e.Data[start:end])
+}
+
+// An Encoder wraps an io.Writer and serializes Go values
+// into ndb strings. Successive calls to the Encode() method
+// append lines to the io.Writer.
+type Encoder struct {
+	out bufio.Writer
+}
+
+// A decoder wraps an io.Reader and decodes successive ndb strings
+// into Go values using the Decode() function.
+type Decoder struct {
+	src *textproto.Reader
+	pairbuf []pair
+}
+
+// The Parse function reads an entire ndb string and unmarshals it
+// into the Go value v. Parse will behave differently depending on
+// the concrete type of v. Value v must be a reference type, either a
+// pointer, map, or slice.
+//
+// 	* If v is a slice, Parse will decode all lines from the ndb
+// 	  input into array elements. Otherwise, Parse will decode only
+// 	  the first line.
+//
+// 	* If v is of the type (map[string] interface{}), Parse will
+// 	  populate v with key/value pairs, where value is decoded
+// 	  according to the concrete type of the map's value.
+//
+// 	* If v is a struct, Parse will populate struct fields whose
+// 	  names match the ndb attribute. Struct fields may be annotated
+// 	  with a tag of the form `ndb: name`, where name matches the
+// 	  attribute string in the ndb input.
+//
+// Struct fields or map keys that do not match the ndb input are left
+// unmodified. Ndb attributes that do not match any struct fields are
+// silently dropped. If an ndb string cannot be converted to the
+// destination value or a syntax error occurs, an error is returned
+// and v is left unmodified. Parse can only store to exported (capitalized)
+// fields of a struct.
+func Parse(data []byte, v interface{}) error {
+	d := NewDecoder(bytes.NewReader(data))
+	return d.Decode(v)
+}
+
+// NewDecoder returns a Decoder with its input pulled from an io.Reader
+func NewDecoder(r io.Reader) *Decoder {
+	d := new(Decoder)
+	d.src = textproto.NewReader(bufio.NewReader(r))
+	return d
+}
+
+// The Decode method follows the same parsing rules as Parse(), but
+// will read at most one ndb string. As such, slices or arrays are
+// not valid types for v.
+func (d *Decoder) Decode(v interface{}) error {
+	val := reflect.ValueOf(v)
+	if val.Kind() != reflect.Ptr || val.IsNil() {
+		return &TypeError{val.Type()}
+	}
+	if p,err := d.getPairs(); err != nil {
+		return err
+	} else {
+		return d.saveData(p, val.Elem())
+	}
+}
+
+// Emit encodes a value into an ndb string. Emit will use the String
+// method of each struct field or map entry to produce ndb output.
+// If v is a slice or array, multiple ndb lines will be output, one
+// for each element. For structs, attribute names will be the name of
+// the struct field, or the fields ndb annotation if it exists.
+// Ndb attributes may not contain white space. Ndb values may contain
+// white space but may not contain new lines. If Emit cannot produce
+// valid ndb strings, an error is returned.
+func Emit(v interface{}) ([]byte, error) {
+	return nil,nil
+}
+
+// The Encode method will write the ndb encoding of the Go value v
+// to its backend io.Writer. Unlike Decode(), slice or array values
+// are valid, and will cause multiple ndb lines to be written.
+// If the value cannot be fully encoded, an error is returned and
+// no data will be written to the io.Writer.
+func (e *Encoder) Encode(v interface{}) error {
+	return nil
+}
+
+// NewEncoder returns an Encoder that writes ndb output to an
+// io.Writer
+func NewEncoder(w io.Writer) *Encoder {
+	return nil
+}
+
+// Flush forces all outstanding data in an Encoder to be written to
+// its backend io.Writer.
+func (e *Encoder) Flush() {
+}
diff --git a/parse.go b/parse.go
new file mode 100644
index 0000000..769ce45
--- /dev/null
+++ b/parse.go
@@ -0,0 +1,177 @@
+package ndb
+
+import (
+	"reflect"
+	"net/textproto"
+	"unicode"
+	"bytes"
+	"fmt"
+)
+
+type scanner struct {
+	src *textproto.Reader
+}
+
+type pair struct {
+	attr, val []byte
+}
+
+func (p pair) String() string {
+	return fmt.Sprintf("%s => %s", string(p.attr), string(p.val))
+}
+
+func errBadAttr(line []byte, offset int64) error {
+	return &SyntaxError { line, offset, "Invalid attribute name" }
+}
+func errUnterminated(line []byte, offset int64) error {
+	return &SyntaxError { line, offset, "Unterminated quoted string" }
+}
+func errBadUnicode(line []byte, offset int64) error {
+	return &SyntaxError { line, offset, "Invalid UTF8 input" }
+}
+func errNewline(line []byte, offset int64) error {
+	return &SyntaxError { line, offset, "Values may not contain new lines" }
+}
+
+func (d *Decoder) getPairs() ([]pair, error) {
+	var tuples [][]byte
+	d.pairbuf = d.pairbuf[0:0]
+	line, err := d.src.ReadContinuedLineBytes()
+	if err != nil {
+		return nil,err
+	}
+	tuples,err = lex(line)
+	if err != nil {
+		return nil,err
+	} else {
+		for _,t := range tuples {
+			d.pairbuf = append(d.pairbuf, parseTuple(t))
+		}
+	}
+	return d.pairbuf, nil
+}
+
+func (d *Decoder) saveData(p []pair, val reflect.Value) error {
+	return nil
+}
+
+func parseTuple(tuple []byte) pair {
+	var p pair
+	fmt.Printf("Split %s\n", string(tuple))
+	s := bytes.SplitN(tuple, []byte("="), 2)
+	p.attr = s[0]
+	if len(s) > 1 {
+		if len(s[1]) > 1 {
+			if s[1][0] == '\'' && len(s[1]) > 2 && s[1][len(s[1])-1] == '\'' {
+				s[1] = s[1][1:len(s[1])-1]
+			}
+		}
+		p.val = bytes.Replace(s[1], []byte("''"), []byte("'"), -1)
+	}
+	fmt.Println("Made ", p)
+	return p
+}
+
+type scanState []int
+func (s *scanState) push(n int) {
+	*s = append(*s, n)
+}
+func (s scanState) top() int {
+	if len(s) > 0 {
+		return s[len(s)-1]
+	}
+	return scanNone
+}
+func (s *scanState) pop() int {
+	v := s.top()
+	if len(*s) > 0 {
+		*s = (*s)[0:len(*s)-1]
+	}
+	return v
+}
+
+const (
+	scanNone = iota
+	scanAttr
+	scanValue
+	scanValueStart
+	scanQuoteStart
+	scanQuoteString
+)
+
+func lex(line []byte) ([][]byte, error) {
+	var offset int64
+	state := make(scanState, 0, 3)
+	tuples := make([][]byte, 0, 10)
+	buf := bytes.NewReader(line)
+	var beg int64
+	
+	for r,sz,err := buf.ReadRune(); err == nil; r,sz,err = buf.ReadRune() {
+		fmt.Printf("(%d,%c) %s|%s\n", state.top(), r, line[:offset], line[offset:])
+		if r == 0xFFFD && sz == 1 {
+			return nil, errBadUnicode(line, offset)
+		}
+		switch state.top() {
+		case scanNone:
+			if unicode.IsSpace(r) {
+				// skip
+			} else if unicode.IsLetter(r) || unicode.IsNumber(r) {
+				state.push(scanAttr)
+				beg = offset
+			} else {
+				return nil,errBadAttr(line, offset)
+			}
+		case scanAttr:
+			if unicode.IsSpace(r) {
+				state.pop()
+				tuples = append(tuples, line[beg:offset])
+				fmt.Println("Save", string(line[beg:offset]))
+			} else if r == '=' {
+				state.pop()
+				state.push(scanValueStart)
+			} else if !(unicode.IsLetter(r) || unicode.IsNumber(r))  {
+				return nil,errBadAttr(line, offset)
+			}
+		case scanValueStart:
+			if unicode.IsSpace(r) {
+				state.pop()
+				tuples = append(tuples, line[beg:offset])
+				fmt.Println("Save", string(line[beg:offset]))
+			} else if r == '\'' {
+				state.push(scanQuoteStart)
+			} else {
+				state.pop()
+				state.push(scanValue)
+			}
+		case scanValue:
+			if unicode.IsSpace(r) {
+				state.pop()
+				tuples = append(tuples, line[beg:offset])
+				fmt.Println("Save", string(line[beg:offset]))
+			}
+		case scanQuoteStart:
+			if r == '\'' {
+				state.pop()
+			} else {
+				state.pop()
+				state.push(scanQuoteString)
+			}
+		case scanQuoteString:
+			if r == '\'' {
+				state.pop()
+			} else if r == '\n' {
+				return nil,errNewline(line, offset)
+			}
+		}
+		offset += int64(sz)
+	}
+	switch state.top() {
+	case scanQuoteString, scanQuoteStart:
+		return nil,errUnterminated(line, offset)
+	case scanNone:
+	default:
+		tuples = append(tuples, line[beg:offset])
+		fmt.Println("Save", string(line[beg:offset]))
+	}
+	return tuples,nil
+}
diff --git a/parse_test.go b/parse_test.go
new file mode 100644
index 0000000..c66e25c
--- /dev/null
+++ b/parse_test.go
@@ -0,0 +1,69 @@
+package ndb
+
+import (
+	"testing"
+	"bytes"
+)
+
+var parseTests = []struct {
+	in []byte
+	out []pair
+}{
+	{
+		in: []byte("key1=val1 key2=val2 key3=val3"),
+		out: []pair {
+			{[]byte("key1"),[]byte("val1")},
+			{[]byte("key2"),[]byte("val2")},
+			{[]byte("key3"),[]byte("val3")}},
+	},
+	{
+		in: []byte("title='Some value with spaces' width=340 height=200"),
+		out: []pair {
+			{[]byte("title"),[]byte("Some value with spaces")},
+			{[]byte("width"),[]byte("340")},
+			{[]byte("height"),[]byte("200")}},
+	},
+	{
+		in: []byte("title='Dave''s pasta' sq=Davis cost=$$"),
+		out: []pair {
+			{[]byte("title"),[]byte("Dave's pasta")},
+			{[]byte("sq"),[]byte("Davis")},
+			{[]byte("cost"),[]byte("$$")}},
+	},
+	{
+		in: []byte("action=''bradley key=jay mod=ctrl+alt+shift"),
+		out: []pair {
+			{[]byte("action"),[]byte("'bradley")},
+			{[]byte("key"),[]byte("jay")},
+			{[]byte("mod"),[]byte("ctrl+alt+shift")}},
+	},
+	{
+		in: []byte("action=reload key='' mod=ctrl+alt+shift"),
+		out: []pair {
+			{[]byte("action"),[]byte("reload")},
+			{[]byte("key"),[]byte("'")},
+			{[]byte("mod"),[]byte("ctrl+alt+shift")}},
+	},
+}
+
+func Test_parsing(t *testing.T) {
+	for i,tt := range parseTests {
+		d := NewDecoder(bytes.NewReader(tt.in))
+		p,err := d.getPairs()
+		if err != nil {
+			t.Error(err)
+			t.FailNow()
+		} else {
+			for j := range tt.out {
+				if j > len(p) || !match(p[j],tt.out[j]) {
+					t.Errorf("%d: getPairs %s => %v, want %v",i, tt.in, p, tt.out)
+					t.FailNow()
+				}
+			}
+		}
+	}
+}
+
+func match(p1, p2 pair) bool {
+	return (bytes.Compare(p1.attr, p2.attr) == 0) && (bytes.Compare(p1.val, p2.val) == 0)
+}