using System;
|
using System.Collections.Generic;
|
using System.Globalization;
|
using System.Linq;
|
using System.Text;
|
|
namespace Jace.Tokenizer
|
{
|
/// <summary>
|
/// A token reader that converts the input string in a list of tokens.
|
/// </summary>
|
public class TokenReader
|
{
|
private readonly CultureInfo cultureInfo;
|
private readonly char decimalSeparator;
|
private readonly char argumentSeparator;
|
|
public TokenReader()
|
: this(CultureInfo.CurrentCulture)
|
{
|
}
|
|
public TokenReader(CultureInfo cultureInfo)
|
{
|
this.cultureInfo = cultureInfo;
|
this.decimalSeparator = cultureInfo.NumberFormat.NumberDecimalSeparator[0];
|
this.argumentSeparator = cultureInfo.TextInfo.ListSeparator[0];
|
}
|
|
/// <summary>
|
/// Read in the provided formula and convert it into a list of takens that can be processed by the
|
/// Abstract Syntax Tree Builder.
|
/// </summary>
|
/// <param name="formula">The formula that must be converted into a list of tokens.</param>
|
/// <returns>The list of tokens for the provided formula.</returns>
|
public List<Token> Read(string formula)
|
{
|
if (string.IsNullOrEmpty(formula))
|
throw new ArgumentNullException("formula");
|
|
List<Token> tokens = new List<Token>();
|
|
char[] characters = formula.ToCharArray();
|
|
bool isFormulaSubPart = true;
|
bool isScientific = false;
|
|
for(int i = 0; i < characters.Length; i++)
|
{
|
if (IsPartOfNumeric(characters[i], true, isFormulaSubPart))
|
{
|
StringBuilder buffer = new StringBuilder();
|
buffer.Append(characters[i]);
|
//string buffer = "" + characters[i];
|
int startPosition = i;
|
|
|
while (++i < characters.Length && IsPartOfNumeric(characters[i], false, isFormulaSubPart))
|
{
|
if (isScientific && IsScientificNotation(characters[i]))
|
throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
|
|
if (IsScientificNotation(characters[i]))
|
{
|
isScientific = IsScientificNotation(characters[i]);
|
|
if (characters[i + 1] == '-')
|
{
|
buffer.Append(characters[i++]);
|
}
|
}
|
|
buffer.Append(characters[i]);
|
}
|
|
// Verify if we do not have an int
|
int intValue;
|
if (int.TryParse(buffer.ToString(), out intValue))
|
{
|
tokens.Add(new Token() { TokenType = TokenType.Integer, Value = intValue, StartPosition = startPosition, Length = i - startPosition });
|
isFormulaSubPart = false;
|
}
|
else
|
{
|
double doubleValue;
|
if (double.TryParse(buffer.ToString(), NumberStyles.Float | NumberStyles.AllowThousands,
|
cultureInfo, out doubleValue))
|
{
|
tokens.Add(new Token() { TokenType = TokenType.FloatingPoint, Value = doubleValue, StartPosition = startPosition, Length = i - startPosition });
|
isScientific = false;
|
isFormulaSubPart = false;
|
}
|
else if (buffer.ToString() == "-")
|
{
|
// Verify if we have a unary minus, we use the token '_' for a unary minus in the AST builder
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = startPosition, Length = 1 });
|
}
|
// Else we skip
|
}
|
|
if (i == characters.Length)
|
{
|
// Last character read
|
continue;
|
}
|
}
|
|
if (IsPartOfVariable(characters[i], true))
|
{
|
string buffer = "" + characters[i];
|
int startPosition = i;
|
|
while (++i < characters.Length && IsPartOfVariable(characters[i], false))
|
{
|
buffer += characters[i];
|
}
|
|
tokens.Add(new Token() { TokenType = TokenType.Text, Value = buffer, StartPosition = startPosition, Length = i -startPosition });
|
isFormulaSubPart = false;
|
|
if (i == characters.Length)
|
{
|
// Last character read
|
continue;
|
}
|
}
|
if (characters[i] == this.argumentSeparator)
|
{
|
tokens.Add(new Token() { TokenType = Tokenizer.TokenType.ArgumentSeparator, Value = characters[i], StartPosition = i, Length = 1 });
|
isFormulaSubPart = false;
|
}
|
else
|
{
|
switch (characters[i])
|
{
|
case ' ':
|
continue;
|
case '+':
|
case '-':
|
case '*':
|
case '/':
|
case '^':
|
case '%':
|
case '≤':
|
case '≥':
|
case '≠':
|
if (IsUnaryMinus(characters[i], tokens))
|
{
|
// We use the token '_' for a unary minus in the AST builder
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = i, Length = 1 });
|
}
|
else
|
{
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = characters[i], StartPosition = i, Length = 1 });
|
}
|
isFormulaSubPart = true;
|
break;
|
case '(':
|
tokens.Add(new Token() { TokenType = TokenType.LeftBracket, Value = characters[i], StartPosition = i, Length = 1 });
|
isFormulaSubPart = true;
|
break;
|
case ')':
|
tokens.Add(new Token() { TokenType = TokenType.RightBracket, Value = characters[i], StartPosition = i, Length = 1 });
|
isFormulaSubPart = false;
|
break;
|
case '<':
|
if (i + 1 < characters.Length && characters[i + 1] == '=')
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≤', StartPosition = i++, Length = 2 });
|
else
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '<', StartPosition = i, Length = 1 });
|
isFormulaSubPart = false;
|
break;
|
case '>':
|
if (i + 1 < characters.Length && characters[i + 1] == '=')
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≥', StartPosition = i++, Length = 2 });
|
else
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '>', StartPosition = i, Length = 1 });
|
isFormulaSubPart = false;
|
break;
|
case '!':
|
if (i + 1 < characters.Length && characters[i + 1] == '=')
|
{
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≠', StartPosition = i++, Length = 2 });
|
isFormulaSubPart = false;
|
}
|
else
|
throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
|
break;
|
case '&':
|
if (i + 1 < characters.Length && characters[i + 1] == '&')
|
{
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '&', StartPosition = i++, Length = 2 });
|
isFormulaSubPart = false;
|
}
|
else
|
throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
|
break;
|
case '|':
|
if (i + 1 < characters.Length && characters[i + 1] == '|')
|
{
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '|', StartPosition = i++, Length = 2 });
|
isFormulaSubPart = false;
|
}
|
else
|
throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
|
break;
|
case '=':
|
if (i + 1 < characters.Length && characters[i + 1] == '=')
|
{
|
tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '=', StartPosition = i++, Length = 2 });
|
isFormulaSubPart = false;
|
}
|
else
|
throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
|
break;
|
default:
|
throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
|
}
|
}
|
}
|
|
return tokens;
|
}
|
|
private bool IsPartOfNumeric(char character, bool isFirstCharacter, bool isFormulaSubPart)
|
{
|
return character == decimalSeparator || (character >= '0' && character <= '9') || (isFormulaSubPart && isFirstCharacter && character == '-') || (!isFirstCharacter && character == 'e') || (!isFirstCharacter && character == 'E');
|
}
|
|
private bool IsPartOfVariable(char character, bool isFirstCharacter)
|
{
|
return (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') || (!isFirstCharacter && character >= '0' && character <= '9') || (!isFirstCharacter && character == '_');
|
}
|
|
private bool IsUnaryMinus(char currentToken, List<Token> tokens)
|
{
|
if (currentToken == '-')
|
{
|
Token previousToken = tokens[tokens.Count - 1];
|
|
return !(previousToken.TokenType == TokenType.FloatingPoint ||
|
previousToken.TokenType == TokenType.Integer ||
|
previousToken.TokenType == TokenType.Text ||
|
previousToken.TokenType == TokenType.RightBracket);
|
}
|
else
|
return false;
|
}
|
|
private bool IsScientificNotation(char currentToken)
|
{
|
return currentToken == 'e' || currentToken == 'E';
|
}
|
}
|
}
|