expr/scanner.go

694 lines
18 KiB
Go

// Copyright (c) 2024 Celestino Amoroso (celestino.amoroso@gmail.com).
// All rights reserved.
// expr project scanner.go
package expr
import (
"bufio"
"errors"
"fmt"
"io"
"strconv"
"strings"
)
type scanner struct {
current *Token
prev *Token
stage *Token
stream *bufio.Reader
row int
column int
translations map[Symbol]Symbol
}
func NewScanner(s io.Reader, translations map[Symbol]Symbol) (inst *scanner) {
inst = &scanner{
stream: bufio.NewReader(s),
row: 1,
column: 1,
translations: translations,
}
inst.current = inst.fetchNextToken()
return inst
}
func DefaultTranslations() map[Symbol]Symbol {
return map[Symbol]Symbol{
SymDoubleAmpersand: SymAnd,
SymKwAnd: SymAnd,
SymDoubleVertBar: SymOr,
SymKwOr: SymOr,
// SymTilde: SymNot,
SymKwNot: SymNot,
SymLessGreater: SymNotEqual,
}
}
// func (self *scanner) Current() *Token {
// return self.current
// }
func (scanner *scanner) readChar() (ch byte, err error) {
if ch, err = scanner.stream.ReadByte(); err == nil {
if ch == '\n' {
scanner.row++
scanner.column = 0
} else {
scanner.column++
}
}
return
}
func (scanner *scanner) unreadChar() (err error) {
if err = scanner.stream.UnreadByte(); err == nil {
if scanner.column--; scanner.column == 0 {
if scanner.row--; scanner.row == 0 {
err = errors.New("unread beyond the stream boundary")
} else {
scanner.column = 1
}
}
}
return
}
func (scanner *scanner) UnreadToken() (err error) {
if scanner.stage == nil {
scanner.stage = scanner.current
scanner.current = scanner.prev
} else {
err = fmt.Errorf("staging already present, currently one level only of staging is allowed")
}
return
}
func (scanner *scanner) lastPos() (r, c int) {
if scanner.prev != nil {
r = scanner.prev.row
c = scanner.prev.col
}
return
}
func (scanner *scanner) Previous() *Token {
return scanner.prev
}
func (scanner *scanner) Next() (tk *Token) {
scanner.prev = scanner.current
tk = scanner.current
if scanner.stage != nil {
scanner.current = scanner.stage
scanner.stage = nil
} else {
scanner.current = scanner.fetchNextToken()
}
return tk
}
func (scanner *scanner) fetchNextToken() (tk *Token) {
var ch byte
if err := scanner.skipBlanks(); err != nil {
return scanner.makeErrorToken(err)
}
escape := false
for {
ch, _ = scanner.readChar()
switch ch {
case '+':
if next, _ := scanner.peek(); next == '+' {
tk = scanner.moveOn(SymDoublePlus, ch, next)
} else if next == '=' {
tk = scanner.moveOn(SymPlusEqual, ch, next)
} else if next == '>' {
tk = scanner.moveOn(SymPlusGreater, ch, next)
} else {
tk = scanner.makeToken(SymPlus, ch)
}
case '-':
if next, _ := scanner.peek(); next == '-' {
tk = scanner.moveOn(SymDoubleMinus, ch, next)
} else if next == '=' {
tk = scanner.moveOn(SymMinusEqual, ch, next)
} else {
tk = scanner.makeToken(SymMinus, ch)
}
case '*':
if next, _ := scanner.peek(); next == '*' {
tk = scanner.moveOn(SymDoubleStar, ch, next)
// } else if next == '/' {
// tk = self.moveOn(SymClosedComment, ch, next)
} else if next, _ = scanner.peek(); next == '=' {
tk = scanner.moveOn(SymStarEqual, ch, next)
} else {
tk = scanner.makeToken(SymStar, ch)
}
case '/':
if next, _ := scanner.peek(); next == '*' {
scanner.readChar()
tk = scanner.fetchBlockComment()
} else if next, _ = scanner.peek(); next == '=' {
tk = scanner.moveOn(SymSlashEqual, ch, next)
} else if next == '/' {
scanner.readChar()
tk = scanner.fetchOnLineComment()
} else {
tk = scanner.makeToken(SymSlash, ch)
}
case '\\':
if escape {
tk = scanner.makeToken(SymBackSlash, ch)
escape = false
} else {
escape = true
}
case '|':
if next, _ := scanner.peek(); next == '|' {
tk = scanner.moveOn(SymDoubleVertBar, ch, next)
} else if next, _ = scanner.peek(); next == '=' {
tk = scanner.moveOn(SymVertBarEqual, ch, next)
} else {
tk = scanner.makeToken(SymVertBar, ch)
}
case ',':
tk = scanner.makeToken(SymComma, ch)
case '^':
if next, _ := scanner.peek(); next == '=' {
tk = scanner.moveOn(SymCaretEqual, ch, next)
} else {
tk = scanner.makeToken(SymCaret, ch)
}
case ':':
if next, _ := scanner.peek(); next == ':' {
tk = scanner.moveOn(SymDoubleColon, ch, next)
} else {
tk = scanner.makeToken(SymColon, ch)
}
case ';':
tk = scanner.makeToken(SymSemiColon, ch)
case '.':
//if next, _ := self.peek(); next >= '0' && next <= '9' {
// tk = self.parseNumber(ch)
//} else if next == '/' {
if next, _ := scanner.peek(); next == '/' {
tk = scanner.moveOn(SymDotSlash, ch, next)
} else if next == '.' {
if next1, _ := scanner.peek(); next1 == '.' {
tk = scanner.moveOn(SymTripleDot, ch, next, next1)
} else {
tk = scanner.moveOn(SymDoubleDot, ch, next)
}
} else {
tk = scanner.makeToken(SymDot, ch)
}
case '\'':
if escape {
tk = scanner.makeToken(SymQuote, ch)
escape = false
} else {
tk = scanner.fetchString(ch)
}
case '"':
if escape {
tk = scanner.makeToken(SymDoubleQuote, ch)
escape = false
} else {
tk = scanner.fetchString(ch)
}
case '`':
tk = scanner.makeToken(SymBackTick, ch)
case '!':
if next, _ := scanner.peek(); next == '=' {
tk = scanner.moveOn(SymNotEqual, ch, next)
} else {
tk = scanner.makeToken(SymExclamation, ch)
}
case '?':
if next, _ := scanner.peek(); next == '?' {
tk = scanner.moveOn(SymDoubleQuestion, ch, next)
} else if next == '=' {
tk = scanner.moveOn(SymQuestionEqual, ch, next)
} else if next == '!' {
tk = scanner.moveOn(SymQuestionExclam, ch, next)
} else {
tk = scanner.makeToken(SymQuestion, ch)
}
case '&':
if next, _ := scanner.peek(); next == '&' {
tk = scanner.moveOn(SymDoubleAmpersand, ch, next)
} else if next, _ = scanner.peek(); next == '=' {
tk = scanner.moveOn(SymAmpersandEqual, ch, next)
} else {
tk = scanner.makeToken(SymAmpersand, ch)
}
case '%':
if next, _ := scanner.peek(); next == '=' {
tk = scanner.moveOn(SymPercEqual, ch, next)
} else {
tk = scanner.makeToken(SymPercent, ch)
}
case '#':
tk = scanner.makeToken(SymHash, ch)
case '@':
if next, _ := scanner.peek(); (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z') {
scanner.readChar()
if tk = scanner.fetchIdentifier(next); tk.Sym == SymIdentifier {
//tk.Sym = SymIdRef
tk.source = "@" + tk.source
} else {
tk = scanner.makeErrorToken(fmt.Errorf("invalid variable reference %q", tk.source))
}
} else if next == '@' {
tk = scanner.moveOn(SymDoubleAt, ch, next)
} else {
tk = scanner.makeToken(SymAt, ch)
}
case '_':
tk = scanner.makeToken(SymUndescore, ch)
case '=':
if next, _ := scanner.peek(); next == '=' {
tk = scanner.moveOn(SymDoubleEqual, ch, next)
} else {
tk = scanner.makeToken(SymEqual, ch)
}
case '<':
if next, _ := scanner.peek(); next == '=' {
tk = scanner.moveOn(SymLessOrEqual, ch, next)
} else if next == '<' {
scanner.readChar()
next2, _ := scanner.readChar()
scanner.unreadChar()
if next2 == '=' {
tk = scanner.moveOn(SymDoubleLessEqual, ch, next, next2)
} else {
tk = scanner.accept(SymDoubleLess, ch, next)
}
} else if next == '>' {
tk = scanner.moveOn(SymLessGreater, ch, next)
} else if next == '+' {
tk = scanner.moveOn(SymLessPlus, ch, next)
} else {
tk = scanner.makeToken(SymLess, ch)
}
case '>':
if next, _ := scanner.peek(); next == '=' {
tk = scanner.moveOn(SymGreaterOrEqual, ch, next)
} else if next == '>' {
scanner.readChar()
next2, _ := scanner.readChar()
scanner.unreadChar()
if next2 == '=' {
tk = scanner.moveOn(SymDoubleGreaterEqual, ch, next, next2)
} else {
tk = scanner.accept(SymDoubleGreater, ch, next)
}
} else {
tk = scanner.makeToken(SymGreater, ch)
}
case '$':
if next, _ := scanner.peek(); next == '(' {
tk = scanner.moveOn(SymDollarRound, ch, next)
tk.source += ")"
} else if next == '$' {
tk = scanner.moveOn(SymDoubleDollar, ch, next)
} else {
tk = scanner.makeToken(SymDollar, ch)
}
case '(':
// if next, _ := scanner.peek(); next == ')' {
// tk = scanner.moveOn(SymOpenClosedRound, ch, next)
// } else {
tk = scanner.makeToken(SymOpenRound, ch)
// }
case ')':
tk = scanner.makeToken(SymClosedRound, ch)
case '[':
tk = scanner.makeToken(SymOpenSquare, ch)
case ']':
tk = scanner.makeToken(SymClosedSquare, ch)
case '{':
tk = scanner.makeToken(SymOpenBrace, ch)
case '}':
tk = scanner.makeToken(SymClosedBrace, ch)
case '~':
tk = scanner.makeToken(SymTilde, ch)
case 0:
if escape {
tk = scanner.makeErrorToken(errors.New("incomplete escape sequence"))
}
escape = false
default:
if /*ch == '_' ||*/ (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') {
if tk = scanner.fetchIdentifier(ch); tk.Sym == SymKwFunc {
if next, _ := scanner.peek(); next == '(' {
tk = scanner.moveOn(SymFuncDef, ch, next)
}
}
} else if ch >= '0' && ch <= '9' {
tk = scanner.parseNumber(ch)
}
}
if !escape {
break
}
}
if tk == nil {
tk = NewErrorToken(scanner.row, scanner.column, fmt.Errorf("unknown symbol '%c'", ch))
}
return
}
func (scanner *scanner) sync(err error) error {
if err == nil {
err = scanner.unreadChar()
}
return err
}
func isBinaryDigit(ch byte) bool {
return ch == '0' || ch == '1'
}
func isOctalDigit(ch byte) bool {
return ch >= '0' && ch <= '7'
}
func isDecimalDigit(ch byte) bool {
return ch >= '0' && ch <= '9'
}
func isHexDigit(ch byte) bool {
return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')
}
func (scanner *scanner) initBase(currentFirstCh byte) (firstCh byte, numBase int, digitFunc func(byte) bool, err error) {
var ch byte
var digitType string
firstCh = currentFirstCh
digitFunc = isDecimalDigit
numBase = 10
if ch, err = scanner.peek(); err == nil {
if ch == 'b' || ch == 'B' {
numBase = 2
digitType = "binary"
scanner.readChar()
digitFunc = isBinaryDigit
firstCh, err = scanner.readChar()
} else if ch == 'o' || ch == 'O' {
numBase = 8
digitType = "octal"
scanner.readChar()
digitFunc = isOctalDigit
firstCh, err = scanner.readChar()
} else if ch == 'x' || ch == 'X' {
numBase = 16
digitType = "hex"
scanner.readChar()
digitFunc = isHexDigit
firstCh, err = scanner.readChar()
}
if err == nil && !digitFunc(firstCh) {
if len(digitType) == 0 {
digitType = "decimal"
}
err = fmt.Errorf("expected %s digit, got '%c'", digitType, firstCh)
}
} else if err == io.EOF {
err = nil
}
return
}
func (scanner *scanner) parseNumber(firstCh byte) (tk *Token) {
var err error
var ch byte
var sym Symbol = SymInteger
var sb strings.Builder
var isDigit func(byte) bool = isDecimalDigit
var numBase = 10
if firstCh == '0' {
firstCh, numBase, isDigit, err = scanner.initBase(firstCh)
}
for ch = firstCh; err == nil && isDigit(ch); ch, err = scanner.readChar() {
sb.WriteByte(ch)
}
if numBase == 10 {
if err == nil && ch == '.' {
sym = SymFloat
sb.WriteByte(ch)
ch, err = scanner.readChar()
if ch >= '0' && ch <= '9' {
for ; err == nil && (ch >= '0' && ch <= '9'); ch, err = scanner.readChar() {
sb.WriteByte(ch)
}
}
}
if err == nil {
if ch == 'e' || ch == 'E' {
sym = SymFloat
sb.WriteByte(ch)
if ch, err = scanner.readChar(); err == nil {
if ch == '+' || ch == '-' {
sb.WriteByte(ch)
ch, err = scanner.readChar()
}
if ch >= '0' && ch <= '9' {
for ; err == nil && (ch >= '0' && ch <= '9'); ch, err = scanner.readChar() {
sb.WriteByte(ch)
}
} else {
err = fmt.Errorf("[%d:%d] expected integer exponent, got %c", scanner.row, scanner.column, ch)
}
}
} else if ch == '(' {
sym = SymFraction
sb.WriteByte(ch)
ch, err = scanner.readChar()
for ; err == nil && (ch >= '0' && ch <= '9'); ch, err = scanner.readChar() {
sb.WriteByte(ch)
}
if err == nil {
if ch != ')' {
err = fmt.Errorf("[%d:%d] expected ')', got '%c'", scanner.row, scanner.column, ch)
} else {
sb.WriteByte(ch)
_, err = scanner.readChar()
}
}
}
}
}
if err != nil && err != io.EOF {
tk = scanner.makeErrorToken(err)
} else {
var value any
_ = scanner.sync(err) // TODO: Check this function
txt := sb.String()
if sym == SymFloat {
value, err = strconv.ParseFloat(txt, 64)
} else if sym == SymFraction {
value, err = makeGeneratingFraction(txt)
} else {
value, err = strconv.ParseInt(txt, numBase, 64)
}
if err == nil {
tk = scanner.makeValueToken(sym, txt, value)
} else {
tk = scanner.makeErrorToken(err)
}
}
return
}
func (scanner *scanner) fetchIdentifier(firstCh byte) (tk *Token) {
var err error
var sb strings.Builder
for ch := firstCh; err == nil && (ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')); ch, err = scanner.readChar() {
sb.WriteByte(ch)
}
if err != nil && err != io.EOF {
tk = scanner.makeErrorToken(err)
} else if err = scanner.sync(err); err != nil && err != io.EOF {
tk = scanner.makeErrorToken(err)
} else {
txt := sb.String()
uptxt := strings.ToUpper(txt)
if sym, ok := keywords[uptxt]; ok {
tk = scanner.makeKeywordToken(sym, uptxt)
} else if uptxt == `TRUE` {
tk = scanner.makeValueToken(SymBool, txt, true)
} else if uptxt == `FALSE` {
tk = scanner.makeValueToken(SymBool, txt, false)
} else if ch, _ := scanner.peek(); ch == '(' {
scanner.readChar()
tk = scanner.makeValueToken(SymFuncCall, txt+"(", txt)
} else {
tk = scanner.makeValueToken(SymIdentifier, txt, txt)
}
}
// if err != nil && err != io.EOF {
// tk = self.makeErrorToken(err)
// } else if err = self.sync(err); err != nil && err != io.EOF {
// tk = self.makeErrorToken(err)
// } else {
// txt := sb.String()
// uptxt := strings.ToUpper(txt)
// if sym, ok := keywords[uptxt]; ok {
// tk = self.makeValueToken(sym, txt, "")
// } else {
// tk = self.makeValueToken(SymIdentifier, txt, txt)
// }
// }
return
}
func (scanner *scanner) fetchBlockComment() *Token {
return scanner.fetchUntil(SymComment, false, '*', '/')
}
func (scanner *scanner) fetchOnLineComment() *Token {
return scanner.fetchUntil(SymComment, true, '\n')
}
func (scanner *scanner) fetchUntil(sym Symbol, allowEos bool, endings ...byte) (tk *Token) {
var err error
var ch byte
var sb strings.Builder
var value string
ring := NewByteSlider(len(endings))
endReached := false
for ch, err = scanner.readChar(); err == nil && !endReached; {
sb.WriteByte(ch)
ring.PushEnd(ch)
if ring.Equal(endings) {
value = sb.String()[0 : sb.Len()-len(endings)]
endReached = true
} else {
ch, err = scanner.readChar()
}
}
if !endReached && allowEos {
value = sb.String()
endReached = true
}
if endReached {
tk = scanner.makeValueToken(sym, "", value)
} else {
tk = scanner.makeErrorToken(err)
}
return
}
func (scanner *scanner) fetchString(termCh byte) (tk *Token) {
var err error
var ch, prev byte
var sb strings.Builder
for ch, err = scanner.readChar(); err == nil; ch, err = scanner.readChar() {
if prev == '\\' {
switch ch {
case '"':
sb.WriteByte('"')
case 'n':
sb.WriteByte('\n')
case 'r':
sb.WriteByte('\r')
case 't':
sb.WriteByte('\t')
case '\\':
sb.WriteByte('\\')
default:
sb.WriteByte(ch)
}
prev = 0
} else if ch == termCh {
break
} else {
prev = ch
if ch != '\\' {
sb.WriteByte(ch)
}
}
}
if err != nil {
if err == io.EOF {
tk = scanner.makeErrorToken(errors.New(string(termCh)))
} else {
tk = scanner.makeErrorToken(err)
}
} else {
txt := sb.String()
tk = scanner.makeValueToken(SymString, `"`+txt+`"`, txt)
}
return
}
func (scanner *scanner) peek() (next byte, err error) {
var one []byte
if one, err = scanner.stream.Peek(1); err == nil {
next = one[0]
}
return
}
func (scanner *scanner) skipBlanks() (err error) {
var one []byte
for one, err = scanner.stream.Peek(1); err == nil && one[0] <= 32; one, err = scanner.stream.Peek(1) {
scanner.readChar()
}
return
}
func (scanner *scanner) translate(sym Symbol) Symbol {
if scanner.translations != nil {
if translatedSym, ok := scanner.translations[sym]; ok {
return translatedSym
}
}
return sym
}
func (scanner *scanner) moveOn(sym Symbol, chars ...byte) (tk *Token) {
tk = NewToken(scanner.row, scanner.column, scanner.translate(sym), string(chars))
// for i := 1; i < len(chars); i++ {
if len(chars) > 1 {
scanner.readChar()
}
// }
return
}
func (scanner *scanner) accept(sym Symbol, chars ...byte) (tk *Token) {
tk = NewToken(scanner.row, scanner.column, scanner.translate(sym), string(chars))
return
}
func (scanner *scanner) makeToken(sym Symbol, chars ...byte) (tk *Token) {
tk = NewToken(scanner.row, scanner.column, scanner.translate(sym), string(chars))
return
}
func (scanner *scanner) makeKeywordToken(sym Symbol, upperCaseKeyword string) (tk *Token) {
tk = NewToken(scanner.row, scanner.column, scanner.translate(sym), upperCaseKeyword)
return
}
func (scanner *scanner) makeValueToken(sym Symbol, source string, value any) (tk *Token) {
tk = NewValueToken(scanner.row, scanner.column, scanner.translate(sym), source, value)
return
}
func (scanner *scanner) makeErrorToken(err error) *Token {
return NewErrorToken(scanner.row, scanner.column, err)
}