using System; 
 | 
using System.Collections.Generic; 
 | 
using System.Globalization; 
 | 
using System.Linq; 
 | 
using System.Text; 
 | 
  
 | 
namespace Jace.Tokenizer 
 | 
{ 
 | 
    /// <summary> 
 | 
    /// A token reader that converts the input string in a list of tokens. 
 | 
    /// </summary> 
 | 
    public class TokenReader 
 | 
    { 
 | 
        private readonly CultureInfo cultureInfo; 
 | 
        private readonly char decimalSeparator; 
 | 
        private readonly char argumentSeparator; 
 | 
  
 | 
        public TokenReader()  
 | 
            : this(CultureInfo.CurrentCulture) 
 | 
        { 
 | 
        } 
 | 
  
 | 
        public TokenReader(CultureInfo cultureInfo) 
 | 
        { 
 | 
            this.cultureInfo = cultureInfo; 
 | 
            this.decimalSeparator = cultureInfo.NumberFormat.NumberDecimalSeparator[0]; 
 | 
            this.argumentSeparator = cultureInfo.TextInfo.ListSeparator[0]; 
 | 
        } 
 | 
  
 | 
        /// <summary> 
 | 
        /// Read in the provided formula and convert it into a list of takens that can be processed by the 
 | 
        /// Abstract Syntax Tree Builder. 
 | 
        /// </summary> 
 | 
        /// <param name="formula">The formula that must be converted into a list of tokens.</param> 
 | 
        /// <returns>The list of tokens for the provided formula.</returns> 
 | 
        public List<Token> Read(string formula) 
 | 
        { 
 | 
            if (string.IsNullOrEmpty(formula)) 
 | 
                throw new ArgumentNullException("formula"); 
 | 
  
 | 
            List<Token> tokens = new List<Token>(); 
 | 
  
 | 
            char[] characters = formula.ToCharArray(); 
 | 
  
 | 
            bool isFormulaSubPart = true; 
 | 
            bool isScientific = false; 
 | 
  
 | 
            for(int i = 0; i < characters.Length; i++) 
 | 
            { 
 | 
                if (IsPartOfNumeric(characters[i], true, isFormulaSubPart)) 
 | 
                { 
 | 
                    StringBuilder buffer = new StringBuilder(); 
 | 
                    buffer.Append(characters[i]); 
 | 
                    //string buffer = "" + characters[i]; 
 | 
                    int startPosition = i; 
 | 
                                        
 | 
  
 | 
                    while (++i < characters.Length && IsPartOfNumeric(characters[i], false, isFormulaSubPart)) 
 | 
                    { 
 | 
                        if (isScientific && IsScientificNotation(characters[i])) 
 | 
                            throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); 
 | 
  
 | 
                        if (IsScientificNotation(characters[i])) 
 | 
                        { 
 | 
                            isScientific = IsScientificNotation(characters[i]); 
 | 
  
 | 
                            if (characters[i + 1] == '-') 
 | 
                            { 
 | 
                                buffer.Append(characters[i++]); 
 | 
                            } 
 | 
                        } 
 | 
  
 | 
                        buffer.Append(characters[i]); 
 | 
                    } 
 | 
  
 | 
                    // Verify if we do not have an int 
 | 
                    int intValue; 
 | 
                    if (int.TryParse(buffer.ToString(), out intValue)) 
 | 
                    { 
 | 
                        tokens.Add(new Token() { TokenType = TokenType.Integer, Value = intValue, StartPosition = startPosition, Length = i - startPosition }); 
 | 
                        isFormulaSubPart = false; 
 | 
                    } 
 | 
                    else 
 | 
                    { 
 | 
                        double doubleValue; 
 | 
                        if (double.TryParse(buffer.ToString(), NumberStyles.Float | NumberStyles.AllowThousands, 
 | 
                            cultureInfo, out doubleValue)) 
 | 
                        { 
 | 
                            tokens.Add(new Token() { TokenType = TokenType.FloatingPoint, Value = doubleValue, StartPosition = startPosition, Length = i - startPosition }); 
 | 
                            isScientific = false; 
 | 
                            isFormulaSubPart = false; 
 | 
                        } 
 | 
                        else if (buffer.ToString() == "-") 
 | 
                        { 
 | 
                            // Verify if we have a unary minus, we use the token '_' for a unary minus in the AST builder 
 | 
                            tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = startPosition, Length = 1 }); 
 | 
                        } 
 | 
                        // Else we skip 
 | 
                    } 
 | 
  
 | 
                    if (i == characters.Length) 
 | 
                    { 
 | 
                        // Last character read 
 | 
                        continue; 
 | 
                    } 
 | 
                } 
 | 
  
 | 
                if (IsPartOfVariable(characters[i], true)) 
 | 
                { 
 | 
                    string buffer = "" + characters[i]; 
 | 
                    int startPosition = i; 
 | 
  
 | 
                    while (++i < characters.Length && IsPartOfVariable(characters[i], false)) 
 | 
                    { 
 | 
                        buffer += characters[i]; 
 | 
                    } 
 | 
  
 | 
                    tokens.Add(new Token() { TokenType = TokenType.Text, Value = buffer, StartPosition = startPosition, Length = i -startPosition }); 
 | 
                    isFormulaSubPart = false; 
 | 
  
 | 
                    if (i == characters.Length) 
 | 
                    { 
 | 
                        // Last character read 
 | 
                        continue; 
 | 
                    } 
 | 
                } 
 | 
                if (characters[i] == this.argumentSeparator) 
 | 
                { 
 | 
                    tokens.Add(new Token() { TokenType = Tokenizer.TokenType.ArgumentSeparator, Value = characters[i], StartPosition = i, Length = 1 }); 
 | 
                    isFormulaSubPart = false; 
 | 
                } 
 | 
                else 
 | 
                { 
 | 
                    switch (characters[i]) 
 | 
                    {  
 | 
                        case ' ': 
 | 
                            continue; 
 | 
                        case '+': 
 | 
                        case '-': 
 | 
                        case '*': 
 | 
                        case '/': 
 | 
                        case '^': 
 | 
                        case '%': 
 | 
                        case '≤': 
 | 
                        case '≥': 
 | 
                        case '≠': 
 | 
                            if (IsUnaryMinus(characters[i], tokens)) 
 | 
                            { 
 | 
                                // We use the token '_' for a unary minus in the AST builder 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '_', StartPosition = i, Length = 1 }); 
 | 
                            } 
 | 
                            else 
 | 
                            { 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = characters[i], StartPosition = i, Length = 1 });                             
 | 
                            } 
 | 
                            isFormulaSubPart = true; 
 | 
                            break; 
 | 
                        case '(': 
 | 
                            tokens.Add(new Token() { TokenType = TokenType.LeftBracket, Value = characters[i], StartPosition = i, Length = 1 }); 
 | 
                            isFormulaSubPart = true; 
 | 
                            break; 
 | 
                        case ')': 
 | 
                            tokens.Add(new Token() { TokenType = TokenType.RightBracket, Value = characters[i], StartPosition = i, Length = 1 }); 
 | 
                            isFormulaSubPart = false; 
 | 
                            break; 
 | 
                        case '<': 
 | 
                            if (i + 1 < characters.Length && characters[i + 1] == '=') 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≤', StartPosition = i++, Length = 2 }); 
 | 
                            else 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '<', StartPosition = i, Length = 1 }); 
 | 
                            isFormulaSubPart = false; 
 | 
                            break; 
 | 
                        case '>': 
 | 
                            if (i + 1 < characters.Length && characters[i + 1] == '=') 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≥', StartPosition = i++, Length = 2 }); 
 | 
                            else 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '>', StartPosition = i, Length = 1 }); 
 | 
                            isFormulaSubPart = false; 
 | 
                            break; 
 | 
                        case '!': 
 | 
                            if (i + 1 < characters.Length && characters[i + 1] == '=') 
 | 
                            { 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '≠', StartPosition = i++, Length = 2 }); 
 | 
                                isFormulaSubPart = false; 
 | 
                            } 
 | 
                            else 
 | 
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); 
 | 
                            break; 
 | 
                        case '&': 
 | 
                            if (i + 1 < characters.Length && characters[i + 1] == '&') 
 | 
                            { 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '&', StartPosition = i++, Length = 2 }); 
 | 
                                isFormulaSubPart = false; 
 | 
                            } 
 | 
                            else 
 | 
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); 
 | 
                            break; 
 | 
                        case '|': 
 | 
                            if (i + 1 < characters.Length && characters[i + 1] == '|') 
 | 
                            { 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '|', StartPosition = i++, Length = 2 }); 
 | 
                                isFormulaSubPart = false; 
 | 
                            } 
 | 
                            else 
 | 
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); 
 | 
                            break; 
 | 
                        case '=': 
 | 
                            if (i + 1 < characters.Length && characters[i + 1] == '=') 
 | 
                            { 
 | 
                                tokens.Add(new Token() { TokenType = TokenType.Operation, Value = '=', StartPosition = i++, Length = 2 }); 
 | 
                                isFormulaSubPart = false; 
 | 
                            } 
 | 
                            else 
 | 
                                throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); 
 | 
                            break; 
 | 
                        default: 
 | 
                            throw new ParseException(string.Format("Invalid token \"{0}\" detected at position {1}.", characters[i], i)); 
 | 
                    } 
 | 
                } 
 | 
            } 
 | 
  
 | 
            return tokens; 
 | 
        } 
 | 
  
 | 
        private bool IsPartOfNumeric(char character, bool isFirstCharacter, bool isFormulaSubPart) 
 | 
        { 
 | 
            return character == decimalSeparator || (character >= '0' && character <= '9') || (isFormulaSubPart && isFirstCharacter && character == '-') || (!isFirstCharacter && character == 'e') || (!isFirstCharacter && character == 'E'); 
 | 
        } 
 | 
  
 | 
        private bool IsPartOfVariable(char character, bool isFirstCharacter) 
 | 
        { 
 | 
            return (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') || (!isFirstCharacter && character >= '0' && character <= '9') || (!isFirstCharacter && character == '_'); 
 | 
        } 
 | 
  
 | 
        private bool IsUnaryMinus(char currentToken, List<Token> tokens) 
 | 
        { 
 | 
            if (currentToken == '-') 
 | 
            { 
 | 
                Token previousToken = tokens[tokens.Count - 1]; 
 | 
  
 | 
                return !(previousToken.TokenType == TokenType.FloatingPoint || 
 | 
                         previousToken.TokenType == TokenType.Integer || 
 | 
                         previousToken.TokenType == TokenType.Text || 
 | 
                         previousToken.TokenType == TokenType.RightBracket); 
 | 
            } 
 | 
            else 
 | 
                return false; 
 | 
        } 
 | 
  
 | 
        private bool IsScientificNotation(char currentToken) 
 | 
        { 
 | 
            return currentToken == 'e' || currentToken == 'E'; 
 | 
        } 
 | 
    } 
 | 
} 
 |