Project_SG_scripts.git

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
 
namespace Jace.Tokenizer
{
    /// <summary>
    /// A token reader that converts the input string in a list of tokens.
    /// </summary>
    public class TokenReader
    {
        private readonly CultureInfo cultureInfo;
        private readonly char decimalSeparator;
        private readonly char argumentSeparator;
 
        public TokenReader() 
            : this(CultureInfo.CurrentCulture)
        {
        }
 
        public TokenReader(CultureInfo cultureInfo)
        {
            this.cultureInfo = cultureInfo;
            this.decimalSeparator = cultureInfo.NumberFormat.NumberDecimalSeparator[0];
            this.argumentSeparator = cultureInfo.TextInfo.ListSeparator[0];
        }
 
        /// <summary>
        /// Read in the provided formula and convert it into a list of takens that can be processed by the
        /// Abstract Syntax Tree Builder.
        /// </summary>
        /// <param name="formula">The formula that must be converted into a list of tokens.</param>
        /// <returns>The list of tokens for the provided formula.</returns>
        public List<Token> Read(string formula)
        {
            if (string.IsNullOrEmpty(formula))
                throw new ArgumentNullException("formula");
 
            List<Token> tokens = new List<Token>();
 
            char[] characters = formula.ToCharArray();
 
            bool isFormulaSubPart = true;
            bool isScientific = false;
 
            for(int i = 0; i < characters.Length; i++)
            {
                if (IsPartOfNumeric(characters[i], true, isFormulaSubPart))
                {
                    StringBuilder buffer = new StringBuilder();
                    buffer.Append(characters[i]);
                    //string buffer = "" + characters[i];
                    int startPosition = i;
                                       
 
                    while (++i < characters.Length && IsPartOfNumeric(characters[i], false, isFormulaSubPart))
                    {
                        if (isScientific && IsScientificNotation(characters[i]))
                            throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
 
                        if (IsScientificNotation(characters[i]))
                        {
                            isScientific = IsScientificNotation(characters[i]);
 
                            if (characters[i + 1] == '-')
                            {
                                buffer.Append(characters[i++]);
                            }
                        }
 
                        buffer.Append(characters[i]);
                    }
 
                    // Verify if we do not have an int
                    int intValue;
                    if (int.TryParse(buffer.ToString(), out intValue))
                    {
                        tokens.Add(new Token() { TokenType = TokenType.Integer, Value = intValue, StartPosition = startPosition, Length = i - startPosition });
                        isFormulaSubPart = false;
                    }
                    else
                    {
                        double doubleValue;
                        if (double.TryParse(buffer.ToString(), NumberStyles.Float | NumberStyles.AllowThousands,
                            cultureInfo, out doubleValue))
                        {
                            tokens.Add(new Token() { TokenType = TokenType.FloatingPoint, Value = doubleValue, StartPosition = startPosition, Length = i - startPosition });
                            isScientific = false;
                            isFormulaSubPart = false;
                        }
                        else if (buffer.ToString() == "-")
                        {
                            // Verify if we have a unary minus, we use the token '_' for a unary minus in the AST builder
                            tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = startPosition, Length = 1 });
                        }
                        // Else we skip
                    }
 
                    if (i == characters.Length)
                    {
                        // Last character read
                        continue;
                    }
                }
 
                if (IsPartOfVariable(characters[i], true))
                {
                    string buffer = "" + characters[i];
                    int startPosition = i;
 
                    while (++i < characters.Length && IsPartOfVariable(characters[i], false))
                    {
                        buffer += characters[i];
                    }
 
                    tokens.Add(new Token() { TokenType = TokenType.Text, Value = buffer, StartPosition = startPosition, Length = i -startPosition });
                    isFormulaSubPart = false;
 
                    if (i == characters.Length)
                    {
                        // Last character read
                        continue;
                    }
                }
                if (characters[i] == this.argumentSeparator)
                {
                    tokens.Add(new Token() { TokenType = Tokenizer.TokenType.ArgumentSeparator, Value = characters[i], StartPosition = i, Length = 1 });
                    isFormulaSubPart = false;
                }
                else
                {
                    switch (characters[i])
                    { 
                        case ' ':
                            continue;
                        case '+':
                        case '-':
                        case '*':
                        case '/':
                        case '^':
                        case '%':
                        case '≤':
                        case '≥':
                        case '≠':
                            if (IsUnaryMinus(characters[i], tokens))
                            {
                                // We use the token '_' for a unary minus in the AST builder
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = i, Length = 1 });
                            }
                            else
                            {
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = characters[i], StartPosition = i, Length = 1 });                            
                            }
                            isFormulaSubPart = true;
                            break;
                        case '(':
                            tokens.Add(new Token() { TokenType = TokenType.LeftBracket, Value = characters[i], StartPosition = i, Length = 1 });
                            isFormulaSubPart = true;
                            break;
                        case ')':
                            tokens.Add(new Token() { TokenType = TokenType.RightBracket, Value = characters[i], StartPosition = i, Length = 1 });
                            isFormulaSubPart = false;
                            break;
                        case '<':
                            if (i + 1 < characters.Length && characters[i + 1] == '=')
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≤', StartPosition = i++, Length = 2 });
                            else
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '<', StartPosition = i, Length = 1 });
                            isFormulaSubPart = false;
                            break;
                        case '>':
                            if (i + 1 < characters.Length && characters[i + 1] == '=')
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≥', StartPosition = i++, Length = 2 });
                            else
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '>', StartPosition = i, Length = 1 });
                            isFormulaSubPart = false;
                            break;
                        case '!':
                            if (i + 1 < characters.Length && characters[i + 1] == '=')
                            {
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≠', StartPosition = i++, Length = 2 });
                                isFormulaSubPart = false;
                            }
                            else
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
                            break;
                        case '&':
                            if (i + 1 < characters.Length && characters[i + 1] == '&')
                            {
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '&', StartPosition = i++, Length = 2 });
                                isFormulaSubPart = false;
                            }
                            else
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
                            break;
                        case '|':
                            if (i + 1 < characters.Length && characters[i + 1] == '|')
                            {
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '|', StartPosition = i++, Length = 2 });
                                isFormulaSubPart = false;
                            }
                            else
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
                            break;
                        case '=':
                            if (i + 1 < characters.Length && characters[i + 1] == '=')
                            {
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '=', StartPosition = i++, Length = 2 });
                                isFormulaSubPart = false;
                            }
                            else
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
                            break;
                        default:
                            throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i));
                    }
                }
            }
 
            return tokens;
        }
 
        private bool IsPartOfNumeric(char character, bool isFirstCharacter, bool isFormulaSubPart)
        {
            return character == decimalSeparator || (character >= '0' && character <= '9') || (isFormulaSubPart && isFirstCharacter && character == '-') || (!isFirstCharacter && character == 'e') || (!isFirstCharacter && character == 'E');
        }
 
        private bool IsPartOfVariable(char character, bool isFirstCharacter)
        {
            return (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') || (!isFirstCharacter && character >= '0' && character <= '9') || (!isFirstCharacter && character == '_');
        }
 
        private bool IsUnaryMinus(char currentToken, List<Token> tokens)
        {
            if (currentToken == '-')
            {
                Token previousToken = tokens[tokens.Count - 1];
 
                return !(previousToken.TokenType == TokenType.FloatingPoint ||
                         previousToken.TokenType == TokenType.Integer ||
                         previousToken.TokenType == TokenType.Text ||
                         previousToken.TokenType == TokenType.RightBracket);
            }
            else
                return false;
        }
 
        private bool IsScientificNotation(char currentToken)
        {
            return currentToken == 'e' || currentToken == 'E';
        }
    }
}