using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; namespace Jace.Tokenizer { /// /// A token reader that converts the input string in a list of tokens. /// public class TokenReader { private readonly CultureInfo cultureInfo; private readonly char decimalSeparator; private readonly char argumentSeparator; public TokenReader() : this(CultureInfo.CurrentCulture) { } public TokenReader(CultureInfo cultureInfo) { this.cultureInfo = cultureInfo; this.decimalSeparator = cultureInfo.NumberFormat.NumberDecimalSeparator[0]; this.argumentSeparator = cultureInfo.TextInfo.ListSeparator[0]; } /// /// Read in the provided formula and convert it into a list of takens that can be processed by the /// Abstract Syntax Tree Builder. /// /// The formula that must be converted into a list of tokens. /// The list of tokens for the provided formula. public List Read(string formula) { if (string.IsNullOrEmpty(formula)) throw new ArgumentNullException("formula"); List tokens = new List(); char[] characters = formula.ToCharArray(); bool isFormulaSubPart = true; bool isScientific = false; for(int i = 0; i < characters.Length; i++) { if (IsPartOfNumeric(characters[i], true, isFormulaSubPart)) { StringBuilder buffer = new StringBuilder(); buffer.Append(characters[i]); //string buffer = "" + characters[i]; int startPosition = i; while (++i < characters.Length && IsPartOfNumeric(characters[i], false, isFormulaSubPart)) { if (isScientific && IsScientificNotation(characters[i])) throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); if (IsScientificNotation(characters[i])) { isScientific = IsScientificNotation(characters[i]); if (characters[i + 1] == '-') { buffer.Append(characters[i++]); } } buffer.Append(characters[i]); } // Verify if we do not have an int int intValue; if (int.TryParse(buffer.ToString(), out intValue)) { tokens.Add(new Token() { TokenType = TokenType.Integer, Value = intValue, StartPosition = startPosition, Length = i - startPosition }); isFormulaSubPart = false; } else { double doubleValue; if (double.TryParse(buffer.ToString(), NumberStyles.Float | NumberStyles.AllowThousands, cultureInfo, out doubleValue)) { tokens.Add(new Token() { TokenType = TokenType.FloatingPoint, Value = doubleValue, StartPosition = startPosition, Length = i - startPosition }); isScientific = false; isFormulaSubPart = false; } else if (buffer.ToString() == "-") { // Verify if we have a unary minus, we use the token '_' for a unary minus in the AST builder tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = startPosition, Length = 1 }); } // Else we skip } if (i == characters.Length) { // Last character read continue; } } if (IsPartOfVariable(characters[i], true)) { string buffer = "" + characters[i]; int startPosition = i; while (++i < characters.Length && IsPartOfVariable(characters[i], false)) { buffer += characters[i]; } tokens.Add(new Token() { TokenType = TokenType.Text, Value = buffer, StartPosition = startPosition, Length = i -startPosition }); isFormulaSubPart = false; if (i == characters.Length) { // Last character read continue; } } if (characters[i] == this.argumentSeparator) { tokens.Add(new Token() { TokenType = Tokenizer.TokenType.ArgumentSeparator, Value = characters[i], StartPosition = i, Length = 1 }); isFormulaSubPart = false; } else { switch (characters[i]) { case ' ': continue; case '+': case '-': case '*': case '/': case '^': case '%': case '≤': case '≥': case '≠': if (IsUnaryMinus(characters[i], tokens)) { // We use the token '_' for a unary minus in the AST builder tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = i, Length = 1 }); } else { tokens.Add(new Token() { TokenType = TokenType.Operation, Value = characters[i], StartPosition = i, Length = 1 }); } isFormulaSubPart = true; break; case '(': tokens.Add(new Token() { TokenType = TokenType.LeftBracket, Value = characters[i], StartPosition = i, Length = 1 }); isFormulaSubPart = true; break; case ')': tokens.Add(new Token() { TokenType = TokenType.RightBracket, Value = characters[i], StartPosition = i, Length = 1 }); isFormulaSubPart = false; break; case '<': if (i + 1 < characters.Length && characters[i + 1] == '=') tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≤', StartPosition = i++, Length = 2 }); else tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '<', StartPosition = i, Length = 1 }); isFormulaSubPart = false; break; case '>': if (i + 1 < characters.Length && characters[i + 1] == '=') tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≥', StartPosition = i++, Length = 2 }); else tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '>', StartPosition = i, Length = 1 }); isFormulaSubPart = false; break; case '!': if (i + 1 < characters.Length && characters[i + 1] == '=') { tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≠', StartPosition = i++, Length = 2 }); isFormulaSubPart = false; } else throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); break; case '&': if (i + 1 < characters.Length && characters[i + 1] == '&') { tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '&', StartPosition = i++, Length = 2 }); isFormulaSubPart = false; } else throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); break; case '|': if (i + 1 < characters.Length && characters[i + 1] == '|') { tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '|', StartPosition = i++, Length = 2 }); isFormulaSubPart = false; } else throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); break; case '=': if (i + 1 < characters.Length && characters[i + 1] == '=') { tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '=', StartPosition = i++, Length = 2 }); isFormulaSubPart = false; } else throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); break; default: throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); } } } return tokens; } private bool IsPartOfNumeric(char character, bool isFirstCharacter, bool isFormulaSubPart) { return character == decimalSeparator || (character >= '0' && character <= '9') || (isFormulaSubPart && isFirstCharacter && character == '-') || (!isFirstCharacter && character == 'e') || (!isFirstCharacter && character == 'E'); } private bool IsPartOfVariable(char character, bool isFirstCharacter) { return (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') || (!isFirstCharacter && character >= '0' && character <= '9') || (!isFirstCharacter && character == '_'); } private bool IsUnaryMinus(char currentToken, List tokens) { if (currentToken == '-') { Token previousToken = tokens[tokens.Count - 1]; return !(previousToken.TokenType == TokenType.FloatingPoint || previousToken.TokenType == TokenType.Integer || previousToken.TokenType == TokenType.Text || previousToken.TokenType == TokenType.RightBracket); } else return false; } private bool IsScientificNotation(char currentToken) { return currentToken == 'e' || currentToken == 'E'; } } }