Giter VIP home page Giter VIP logo

go_parser's Introduction

go_parser

Parser API in Go

ABOUT

The 'go_parser' package is an API to help you create hand-written lexers and parsers.

The package was inspired by Rob Pikes' video Lexical Scanning In Go and golang's 'template' package.

PARSER INTERFACE

Below is the interface for the main Parser type:

// Parser helps you process lexer tokens
type Parser interface {

	// PeekTokenType allows you to look ahead at tokens without consuming them
	PeekTokenType(int) lexer.TokenType

	// PeekToken allows you to look ahead at tokens without consuming them
	PeekToken(int) *lexer.Token

	// NextToken consumes and returns the next token
	NextToken() *lexer.Token

	// SkipToken consumes the next token without returning it
	SkipToken()

	// SkipTokens consumes the next n tokens without returning them
	SkipTokens(int)

	// BackupToken un-consumes the last token
	BackupToken()

	// BackupTokens un-consumes the last n tokens
	BackupTokens(int)

	// ClearTokens clears all consumed tokens
	ClearTokens()

	// Emit emits an object, consuming matched tokens
	Emit(interface{})

	EOF() bool

	// Next retrieves the next emitted item
	Next() interface{}

	// Marker returns a marker that you can use to reset the parser state later
	Marker() *Marker

	// Reset resets the lexer state to the specified marker
	Reset(*Marker)
}

EXAMPLE

Below is a sample calculator program that uses the parser (and lexer) API:

//
//	calc.go implements a simple calculator using the iNamik lexer and parser api.
//
//	Input is read from STDIN
//
//	The input expression is matched against the following pattern:
//
//	input_exp:
//	( id '=' )? general_exp
//	general_exp:
//		operand ( operator operand )?
//	operand:
//		number | id | '(' general_exp ')'
//	operator:
//		'+' | '-' | '*' | '/'
//	number:
//		digit+ ( '.' digit+ )?
//	digit:
//		['0'..'9']
//	id:
//		alpha ( alpha | digit )*
//	alpha:
//		['a'..'z'] | ['A'..'Z']
//
//	Precedence is as expected, with '*' and '/' have higher precedence
//	than '+' and '-', as follows:
//
//	1 + 2 * 3 - 4 / 5  ==  1 + (2 * 3) - (4 / 5)
//

package main

import (
	"bufio"
	"bytes"
	"fmt"
	"os"
	"strconv"
	"strings"
)
import (
	"github.com/iNamik/go_lexer"
	"github.com/iNamik/go_lexer/rangeutil"
	"github.com/iNamik/go_parser"
)

// We define our lexer tokens starting from the pre-defined EOF token
const (
	T_EOF lexer.TokenType = lexer.TokenTypeEOF
	T_NIL                 = lexer.TokenTypeEOF + iota
	T_ID
	T_NUMBER
	T_PLUS
	T_MINUS
	T_MULTIPLY
	T_DIVIDE
	T_EQUALS
	T_OPEN_PAREN
	T_CLOSE_PAREN
)

// To store variables
var vars = map[string]float64{}

// Single-character tokens
var singleChars = []byte{'+', '-', '*', '/', '=', '(', ')'}

var singleTokens = []lexer.TokenType{T_PLUS, T_MINUS, T_MULTIPLY, T_DIVIDE, T_EQUALS, T_OPEN_PAREN, T_CLOSE_PAREN}

// Multi-character tokens
var bytesWhitespace = []byte{' ', '\t'}

var bytesDigits = rangeutil.RangeToBytes("0-9")

var bytesAlpha = rangeutil.RangeToBytes("a-zA-Z")

var bytesAlphaNum = rangeutil.RangeToBytes("0-9a-zA-Z")

// main
func main() {
	// Create a buffered reader from STDIN
	stdin := bufio.NewReader(os.Stdin)

	for {
		// Read a line of input
		input, _, err := stdin.ReadLine()

		// Error? we're done
		if nil != err {
			break
		}

		// Anything to process?
		if len(input) > 0 {
			// Create a new lexer to turn the input text into tokens
			l := lexer.New(lex, strings.NewReader(string(input)), len(input), 2)

			// Create a new parser that feeds off the lexer and generates expression values
			p := parser.New(parse, l, 2)

			// Loop over parser emits
			for i := p.Next(); nil != i; i = p.Next() {
				fmt.Printf("%v\n", i)
			}
		}
	}
}

// lex is the starting (and only) StateFn for lexing the input into tokens
func lex(l lexer.Lexer) lexer.StateFn {

	// EOF
	if l.MatchEOF() {
		l.EmitEOF()
		return nil // We're done here
	}

	// Single-char token?
	if i := bytes.IndexRune(singleChars, l.PeekRune(0)); i >= 0 {
		l.NextRune()
		l.EmitToken(singleTokens[i])
		return lex
	}

	switch {

	// Skip whitespace
	case l.MatchOneOrMoreBytes(bytesWhitespace):
		l.IgnoreToken()

	// Number
	case l.MatchOneOrMoreBytes(bytesDigits):
		if l.PeekRune(0) == '.' {
			l.NextRune() // skip '.'
			if !l.MatchOneOrMoreBytes(bytesDigits) {
				printError(l.Column(), "Illegal number format - Missing digits after '.'")
				l.IgnoreToken()
				break
			}
		}
		l.EmitTokenWithBytes(T_NUMBER)

	// ID
	case l.MatchOneBytes(bytesAlpha) && l.MatchZeroOrMoreBytes(bytesAlphaNum):
		l.EmitTokenWithBytes(T_ID)

	// Unknown
	default:
		l.NextRune()
		printError(l.Column(), "Unknown Character")
		l.IgnoreToken()
	}

	// See you again soon!
	return lex
}

// parse tries to execute a general expression from the lexed tokens.
// Returns nil - We only take one pass at the input string
func parse(p parser.Parser) parser.StateFn {

	if p.PeekTokenType(0) != T_EOF {
		// Assignment ( id = general_expression )
		if p.PeekTokenType(0) == T_ID && p.PeekTokenType(1) == T_EQUALS {
			tId := p.NextToken()

			p.SkipToken() // skip '='

			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
					printError(t.Column(), "Expecting operator")
				} else {
					id := string(tId.Bytes())
					vars[id] = val
				}
			}
			// General expression
		} else {
			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
					printError(t.Column(), "Expecting operator")
				} else {
					p.Emit(val)
				}
			}
		}
	}

	p.ClearTokens()
	p.Emit(nil) // We're done - One pass only

	return nil
}

// pGeneralExpression is the starting point for parsing a General Expression.
// It is basically a pass-through to pAdditiveExpression, but it feels cleaner
func pGeneralExpression(p parser.Parser) (f float64, ok bool) {
	return pAdditiveExpression(p)
}

// pAdditiveExpression parses [ expression ( ( '+' | '-' ) expression )? ]
func pAdditiveExpression(p parser.Parser) (f float64, ok bool) {

	f, ok = pMultiplicitiveExpression(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

		// Add (+)
		case T_PLUS:
			r, ok := pAdditiveExpression(p)
			if ok {
				f += r
			}

		// Subtract (-)
		case T_MINUS:
			r, ok := pAdditiveExpression(p)
			if ok {
				f -= r
			}

		// Unknown - Send it back upstream
		default:
			p.BackupToken()
			ok = true
		}
	}

	return
}

// pMultiplicitiveExpression parses [ expression ( ( '*' | '/' ) expression )? ]
func pMultiplicitiveExpression(p parser.Parser) (f float64, ok bool) {
	f, ok = pOperand(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

		// Multiply (*)
		case T_MULTIPLY:
			r, ok := pMultiplicitiveExpression(p)
			if ok {
				f *= r
			}

		// Divide (/)
		case T_DIVIDE:
			r, ok := pMultiplicitiveExpression(p)
			if ok {
				f /= r
			}

		// Unknown - Send it back upstream
		default:
			p.BackupToken()
			ok = true
		}
	}

	return
}

// pOperand parses [ id | number | '(' expression ')' ]
func pOperand(p parser.Parser) (f float64, ok bool) {
	var err error

	m := p.Marker()
	t := p.NextToken()

	switch t.Type() {

	// ID
	case T_ID:
		var id = string(t.Bytes())
		f, ok = vars[id]
		if !ok {
			printError(t.Column(), fmt.Sprint("id '", id, "' not defined"))
			f = 0.0
		}

	// Number
	case T_NUMBER:
		f, err = strconv.ParseFloat(string(t.Bytes()), 64)
		ok = nil == err
		if !ok {
			printError(t.Column(), fmt.Sprint("Error reading number: ", err.Error()))
			f = 0.0
		}

	// '(' Expresson ')'
	case T_OPEN_PAREN:
		f, ok = pGeneralExpression(p)
		if ok {
			t2 := p.NextToken()
			if t2.Type() != T_CLOSE_PAREN {
				printError(t.Column(), "Unbalanced Paren")
				ok = false
				f = 0.0
			}
		}

	// EOF
	case T_EOF:
		printError(t.Column(), "Unexpected EOF - Expecting operand")
		ok = false
		f = 0.0

	// Unknown
	default:
		printError(t.Column(), "Expecting operand")
		ok = false
		f = 0.0
	}

	if !ok {
		p.Reset(m)
	}

	return
}

// printError prints an error msg pointing to the specified column of the input.
func printError(col int, msg string) {
	fmt.Print(strings.Repeat(" ", col-1), "^ ", msg, "\n")
}

INSTALL

The package is built using the Go tool. Assuming you have correctly set the $GOPATH variable, you can run the folloing command:

go get github.com/iNamik/go_parser

DEPENDENCIES

go_parser depends on the iNamik go_container queue package and the iNamik go_lexer package:

AUTHORS

  • David Farrell

go_parser's People

Contributors

davidpfarrell avatar

Watchers

James Cloos avatar

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.