// Copyright (c) 2024 Celestino Amoroso (celestino.amoroso@gmail.com). // All rights reserved. // expr project scanner.go package expr import ( "bufio" "errors" "fmt" "io" "strconv" "strings" ) type scanner struct { current *Token prev *Token stage *Token stream *bufio.Reader row int column int translations map[Symbol]Symbol } func NewScanner(s io.Reader, translations map[Symbol]Symbol) (inst *scanner) { inst = &scanner{ stream: bufio.NewReader(s), row: 1, column: 1, translations: translations, } inst.current = inst.fetchNextToken() return inst } func DefaultTranslations() map[Symbol]Symbol { return map[Symbol]Symbol{ SymDoubleAmpersand: SymAnd, SymKwAnd: SymAnd, SymDoubleVertBar: SymOr, SymKwOr: SymOr, // SymTilde: SymNot, SymKwNot: SymNot, SymLessGreater: SymNotEqual, } } // func (self *scanner) Current() *Token { // return self.current // } func (scanner *scanner) readChar() (ch byte, err error) { if ch, err = scanner.stream.ReadByte(); err == nil { if ch == '\n' { scanner.row++ scanner.column = 0 } else { scanner.column++ } } return } func (scanner *scanner) unreadChar() (err error) { if err = scanner.stream.UnreadByte(); err == nil { if scanner.column--; scanner.column == 0 { if scanner.row--; scanner.row == 0 { err = errors.New("unread beyond the stream boundary") } else { scanner.column = 1 } } } return } func (scanner *scanner) UnreadToken() (err error) { if scanner.stage == nil { scanner.stage = scanner.current scanner.current = scanner.prev } else { err = fmt.Errorf("staging already present, currently one level only of staging is allowed") } return } func (scanner *scanner) lastPos() (r, c int) { if scanner.prev != nil { r = scanner.prev.row c = scanner.prev.col } return } func (scanner *scanner) Previous() *Token { return scanner.prev } func (scanner *scanner) Next() (tk *Token) { scanner.prev = scanner.current tk = scanner.current if scanner.stage != nil { scanner.current = scanner.stage scanner.stage = nil } else { scanner.current = scanner.fetchNextToken() } return tk } func (scanner *scanner) fetchNextToken() (tk *Token) { var ch byte if err := scanner.skipBlanks(); err != nil { return scanner.makeErrorToken(err) } escape := false for { ch, _ = scanner.readChar() switch ch { case '+': if next, _ := scanner.peek(); next == '+' { tk = scanner.moveOn(SymDoublePlus, ch, next) } else if next == '=' { tk = scanner.moveOn(SymPlusEqual, ch, next) } else { tk = scanner.makeToken(SymPlus, ch) } case '-': if next, _ := scanner.peek(); next == '-' { tk = scanner.moveOn(SymDoubleMinus, ch, next) } else if next == '=' { tk = scanner.moveOn(SymMinusEqual, ch, next) } else { tk = scanner.makeToken(SymMinus, ch) } case '*': if next, _ := scanner.peek(); next == '*' { tk = scanner.moveOn(SymDoubleStar, ch, next) // } else if next == '/' { // tk = self.moveOn(SymClosedComment, ch, next) } else if next, _ = scanner.peek(); next == '=' { tk = scanner.moveOn(SymStarEqual, ch, next) } else { tk = scanner.makeToken(SymStar, ch) } case '/': if next, _ := scanner.peek(); next == '*' { scanner.readChar() tk = scanner.fetchBlockComment() } else if next, _ = scanner.peek(); next == '=' { tk = scanner.moveOn(SymSlashEqual, ch, next) } else if next == '/' { scanner.readChar() tk = scanner.fetchOnLineComment() } else { tk = scanner.makeToken(SymSlash, ch) } case '\\': if escape { tk = scanner.makeToken(SymBackSlash, ch) escape = false } else { escape = true } case '|': if next, _ := scanner.peek(); next == '|' { tk = scanner.moveOn(SymDoubleVertBar, ch, next) } else { tk = scanner.makeToken(SymVertBar, ch) } case ',': tk = scanner.makeToken(SymComma, ch) case '^': tk = scanner.makeToken(SymCaret, ch) case ':': if next, _ := scanner.peek(); next == ':' { tk = scanner.moveOn(SymDoubleColon, ch, next) } else { tk = scanner.makeToken(SymColon, ch) } case ';': tk = scanner.makeToken(SymSemiColon, ch) case '.': //if next, _ := self.peek(); next >= '0' && next <= '9' { // tk = self.parseNumber(ch) //} else if next == '/' { if next, _ := scanner.peek(); next == '/' { tk = scanner.moveOn(SymDotSlash, ch, next) } else if next == '.' { if next1, _ := scanner.peek(); next1 == '.' { tk = scanner.moveOn(SymTripleDot, ch, next, next1) } else { tk = scanner.moveOn(SymDoubleDot, ch, next) } } else { tk = scanner.makeToken(SymDot, ch) } case '\'': if escape { tk = scanner.makeToken(SymQuote, ch) escape = false } else { tk = scanner.fetchString(ch) } case '"': if escape { tk = scanner.makeToken(SymDoubleQuote, ch) escape = false } else { tk = scanner.fetchString(ch) } case '`': tk = scanner.makeToken(SymBackTick, ch) case '!': if next, _ := scanner.peek(); next == '=' { tk = scanner.moveOn(SymNotEqual, ch, next) } else { tk = scanner.makeToken(SymExclamation, ch) } case '?': if next, _ := scanner.peek(); next == '?' { tk = scanner.moveOn(SymDoubleQuestion, ch, next) } else if next == '=' { tk = scanner.moveOn(SymQuestionEqual, ch, next) } else if next == '!' { tk = scanner.moveOn(SymQuestionExclam, ch, next) } else { tk = scanner.makeToken(SymQuestion, ch) } case '&': if next, _ := scanner.peek(); next == '&' { tk = scanner.moveOn(SymDoubleAmpersand, ch, next) } else { tk = scanner.makeToken(SymAmpersand, ch) } case '%': tk = scanner.makeToken(SymPercent, ch) case '#': tk = scanner.makeToken(SymHash, ch) case '@': if next, _ := scanner.peek(); (next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z') { scanner.readChar() if tk = scanner.fetchIdentifier(next); tk.Sym == SymIdentifier { //tk.Sym = SymIdRef tk.source = "@" + tk.source } else { tk = scanner.makeErrorToken(fmt.Errorf("invalid variable reference %q", tk.source)) } } else if next == '@' { tk = scanner.moveOn(SymDoubleAt, ch, next) } else { tk = scanner.makeToken(SymAt, ch) } case '_': tk = scanner.makeToken(SymUndescore, ch) case '=': if next, _ := scanner.peek(); next == '=' { tk = scanner.moveOn(SymDoubleEqual, ch, next) } else { tk = scanner.makeToken(SymEqual, ch) } case '<': if next, _ := scanner.peek(); next == '=' { tk = scanner.moveOn(SymLessOrEqual, ch, next) } else if next == '<' { tk = scanner.moveOn(SymAppend, ch, next) } else if next == '>' { tk = scanner.moveOn(SymLessGreater, ch, next) } else { tk = scanner.makeToken(SymLess, ch) } case '>': if next, _ := scanner.peek(); next == '=' { tk = scanner.moveOn(SymGreaterOrEqual, ch, next) } else if next == '>' { tk = scanner.moveOn(SymInsert, ch, next) } else { tk = scanner.makeToken(SymGreater, ch) } case '$': if next, _ := scanner.peek(); next == '(' { tk = scanner.moveOn(SymDollarRound, ch, next) tk.source += ")" } else if next == '$' { tk = scanner.moveOn(SymDoubleDollar, ch, next) } else { tk = scanner.makeToken(SymDollar, ch) } case '(': // if next, _ := scanner.peek(); next == ')' { // tk = scanner.moveOn(SymOpenClosedRound, ch, next) // } else { tk = scanner.makeToken(SymOpenRound, ch) // } case ')': tk = scanner.makeToken(SymClosedRound, ch) case '[': tk = scanner.makeToken(SymOpenSquare, ch) case ']': tk = scanner.makeToken(SymClosedSquare, ch) case '{': tk = scanner.makeToken(SymOpenBrace, ch) case '}': tk = scanner.makeToken(SymClosedBrace, ch) case '~': tk = scanner.makeToken(SymTilde, ch) case 0: if escape { tk = scanner.makeErrorToken(errors.New("incomplete escape sequence")) } escape = false default: if /*ch == '_' ||*/ (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') { if tk = scanner.fetchIdentifier(ch); tk.Sym == SymKwFunc { if next, _ := scanner.peek(); next == '(' { tk = scanner.moveOn(SymFuncDef, ch, next) } } } else if ch >= '0' && ch <= '9' { tk = scanner.parseNumber(ch) } } if !escape { break } } if tk == nil { tk = NewErrorToken(scanner.row, scanner.column, fmt.Errorf("unknown symbol '%c'", ch)) } return } func (scanner *scanner) sync(err error) error { if err == nil { err = scanner.unreadChar() } return err } func isBinaryDigit(ch byte) bool { return ch == '0' || ch == '1' } func isOctalDigit(ch byte) bool { return ch >= '0' && ch <= '7' } func isDecimalDigit(ch byte) bool { return ch >= '0' && ch <= '9' } func isHexDigit(ch byte) bool { return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') } func (scanner *scanner) initBase(currentFirstCh byte) (firstCh byte, numBase int, digitFunc func(byte) bool, err error) { var ch byte var digitType string firstCh = currentFirstCh digitFunc = isDecimalDigit numBase = 10 if ch, err = scanner.peek(); err == nil { if ch == 'b' || ch == 'B' { numBase = 2 digitType = "binary" scanner.readChar() digitFunc = isBinaryDigit firstCh, err = scanner.readChar() } else if ch == 'o' || ch == 'O' { numBase = 8 digitType = "octal" scanner.readChar() digitFunc = isOctalDigit firstCh, err = scanner.readChar() } else if ch == 'x' || ch == 'X' { numBase = 16 digitType = "hex" scanner.readChar() digitFunc = isHexDigit firstCh, err = scanner.readChar() } if err == nil && !digitFunc(firstCh) { if len(digitType) == 0 { digitType = "decimal" } err = fmt.Errorf("expected %s digit, got '%c'", digitType, firstCh) } } else if err == io.EOF { err = nil } return } func (scanner *scanner) parseNumber(firstCh byte) (tk *Token) { var err error var ch byte var sym Symbol = SymInteger var sb strings.Builder var isDigit func(byte) bool = isDecimalDigit var numBase = 10 if firstCh == '0' { firstCh, numBase, isDigit, err = scanner.initBase(firstCh) } for ch = firstCh; err == nil && isDigit(ch); ch, err = scanner.readChar() { sb.WriteByte(ch) } if numBase == 10 { if err == nil && ch == '.' { sym = SymFloat sb.WriteByte(ch) ch, err = scanner.readChar() if ch >= '0' && ch <= '9' { for ; err == nil && (ch >= '0' && ch <= '9'); ch, err = scanner.readChar() { sb.WriteByte(ch) } } } if err == nil { if ch == 'e' || ch == 'E' { sym = SymFloat sb.WriteByte(ch) if ch, err = scanner.readChar(); err == nil { if ch == '+' || ch == '-' { sb.WriteByte(ch) ch, err = scanner.readChar() } if ch >= '0' && ch <= '9' { for ; err == nil && (ch >= '0' && ch <= '9'); ch, err = scanner.readChar() { sb.WriteByte(ch) } } else { err = fmt.Errorf("[%d:%d] expected integer exponent, got %c", scanner.row, scanner.column, ch) } } } else if ch == '(' { sym = SymFraction sb.WriteByte(ch) ch, err = scanner.readChar() for ; err == nil && (ch >= '0' && ch <= '9'); ch, err = scanner.readChar() { sb.WriteByte(ch) } if err == nil { if ch != ')' { err = fmt.Errorf("[%d:%d] expected ')', got '%c'", scanner.row, scanner.column, ch) } else { sb.WriteByte(ch) _, err = scanner.readChar() } } } } } if err != nil && err != io.EOF { tk = scanner.makeErrorToken(err) } else { var value any _ = scanner.sync(err) // TODO: Check this function txt := sb.String() if sym == SymFloat { value, err = strconv.ParseFloat(txt, 64) } else if sym == SymFraction { value, err = makeGeneratingFraction(txt) } else { value, err = strconv.ParseInt(txt, numBase, 64) } if err == nil { tk = scanner.makeValueToken(sym, txt, value) } else { tk = scanner.makeErrorToken(err) } } return } func (scanner *scanner) fetchIdentifier(firstCh byte) (tk *Token) { var err error var sb strings.Builder for ch := firstCh; err == nil && (ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')); ch, err = scanner.readChar() { sb.WriteByte(ch) } if err != nil && err != io.EOF { tk = scanner.makeErrorToken(err) } else if err = scanner.sync(err); err != nil && err != io.EOF { tk = scanner.makeErrorToken(err) } else { txt := sb.String() uptxt := strings.ToUpper(txt) if sym, ok := keywords[uptxt]; ok { tk = scanner.makeKeywordToken(sym, uptxt) } else if uptxt == `TRUE` { tk = scanner.makeValueToken(SymBool, txt, true) } else if uptxt == `FALSE` { tk = scanner.makeValueToken(SymBool, txt, false) } else if ch, _ := scanner.peek(); ch == '(' { scanner.readChar() tk = scanner.makeValueToken(SymFuncCall, txt+"(", txt) } else { tk = scanner.makeValueToken(SymIdentifier, txt, txt) } } // if err != nil && err != io.EOF { // tk = self.makeErrorToken(err) // } else if err = self.sync(err); err != nil && err != io.EOF { // tk = self.makeErrorToken(err) // } else { // txt := sb.String() // uptxt := strings.ToUpper(txt) // if sym, ok := keywords[uptxt]; ok { // tk = self.makeValueToken(sym, txt, "") // } else { // tk = self.makeValueToken(SymIdentifier, txt, txt) // } // } return } func (scanner *scanner) fetchBlockComment() *Token { return scanner.fetchUntil(SymComment, false, '*', '/') } func (scanner *scanner) fetchOnLineComment() *Token { return scanner.fetchUntil(SymComment, true, '\n') } func (scanner *scanner) fetchUntil(sym Symbol, allowEos bool, endings ...byte) (tk *Token) { var err error var ch byte var sb strings.Builder var value string ring := NewByteSlider(len(endings)) endReached := false for ch, err = scanner.readChar(); err == nil && !endReached; { sb.WriteByte(ch) ring.PushEnd(ch) if ring.Equal(endings) { value = sb.String()[0 : sb.Len()-len(endings)] endReached = true } else { ch, err = scanner.readChar() } } if !endReached && allowEos { value = sb.String() endReached = true } if endReached { tk = scanner.makeValueToken(sym, "", value) } else { tk = scanner.makeErrorToken(err) } return } func (scanner *scanner) fetchString(termCh byte) (tk *Token) { var err error var ch, prev byte var sb strings.Builder for ch, err = scanner.readChar(); err == nil; ch, err = scanner.readChar() { if prev == '\\' { switch ch { case '"': sb.WriteByte('"') case 'n': sb.WriteByte('\n') case 'r': sb.WriteByte('\r') case 't': sb.WriteByte('\t') case '\\': sb.WriteByte('\\') default: sb.WriteByte(ch) } prev = 0 } else if ch == termCh { break } else { prev = ch if ch != '\\' { sb.WriteByte(ch) } } } if err != nil { if err == io.EOF { tk = scanner.makeErrorToken(errors.New(string(termCh))) } else { tk = scanner.makeErrorToken(err) } } else { txt := sb.String() tk = scanner.makeValueToken(SymString, `"`+txt+`"`, txt) } return } func (scanner *scanner) peek() (next byte, err error) { var one []byte if one, err = scanner.stream.Peek(1); err == nil { next = one[0] } return } func (scanner *scanner) skipBlanks() (err error) { var one []byte for one, err = scanner.stream.Peek(1); err == nil && one[0] <= 32; one, err = scanner.stream.Peek(1) { scanner.readChar() } return } func (scanner *scanner) translate(sym Symbol) Symbol { if scanner.translations != nil { if translatedSym, ok := scanner.translations[sym]; ok { return translatedSym } } return sym } func (scanner *scanner) moveOn(sym Symbol, chars ...byte) (tk *Token) { tk = NewToken(scanner.row, scanner.column, scanner.translate(sym), string(chars)) for i := 1; i < len(chars); i++ { scanner.readChar() } return } func (scanner *scanner) makeToken(sym Symbol, chars ...byte) (tk *Token) { tk = NewToken(scanner.row, scanner.column, scanner.translate(sym), string(chars)) return } func (scanner *scanner) makeKeywordToken(sym Symbol, upperCaseKeyword string) (tk *Token) { tk = NewToken(scanner.row, scanner.column, scanner.translate(sym), upperCaseKeyword) return } func (scanner *scanner) makeValueToken(sym Symbol, source string, value any) (tk *Token) { tk = NewValueToken(scanner.row, scanner.column, scanner.translate(sym), source, value) return } func (scanner *scanner) makeErrorToken(err error) *Token { return NewErrorToken(scanner.row, scanner.column, err) }