CogPar
A versatile parser for mathematical expressions.
 All Classes Functions Variables
Tokenizer.java
1 /*
2  * This software and all files contained in it are distrubted under the MIT license.
3  *
4  * Copyright (c) 2013 Cogito Learning Ltd
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 package uk.co.cogitolearning.cogpar;
26 
27 import java.util.LinkedList;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 
40 public class Tokenizer
41 {
45  private class TokenInfo
46  {
48  public final Pattern regex;
50  public final int token;
51 
55  public TokenInfo(Pattern regex, int token)
56  {
57  super();
58  this.regex = regex;
59  this.token = token;
60  }
61  }
62 
68  private LinkedList<TokenInfo> tokenInfos;
69 
71  private LinkedList<Token> tokens;
72 
74  private static Tokenizer expressionTokenizer = null;
75 
79  public Tokenizer()
80  {
81  super();
82  tokenInfos = new LinkedList<TokenInfo>();
83  tokens = new LinkedList<Token>();
84  }
85 
91  {
92  if (expressionTokenizer == null)
94  return expressionTokenizer;
95  }
96 
102  {
103  Tokenizer tokenizer = new Tokenizer();
104 
105  tokenizer.add("[+-]", Token.PLUSMINUS);
106  tokenizer.add("[*/]", Token.MULTDIV);
107  tokenizer.add("\\^", Token.RAISED);
108 
109  String funcs = FunctionExpressionNode.getAllFunctions();
110  tokenizer.add("(" + funcs + ")(?!\\w)", Token.FUNCTION);
111 
112  tokenizer.add("\\(", Token.OPEN_BRACKET);
113  tokenizer.add("\\)", Token.CLOSE_BRACKET);
114  tokenizer.add("(?:\\d+\\.?|\\.\\d)\\d*(?:[Ee][-+]?\\d+)?", Token.NUMBER);
115  tokenizer.add("[a-zA-Z]\\w*", Token.VARIABLE);
116 
117  return tokenizer;
118  }
119 
125  public void add(String regex, int token)
126  {
127  tokenInfos.add(new TokenInfo(Pattern.compile("^" + regex), token));
128  }
129 
137  public void tokenize(String str)
138  {
139  String s = str.trim();
140  int totalLength = s.length();
141  tokens.clear();
142  while (!s.equals(""))
143  {
144  int remaining = s.length();
145  boolean match = false;
146  for (TokenInfo info : tokenInfos)
147  {
148  Matcher m = info.regex.matcher(s);
149  if (m.find())
150  {
151  match = true;
152  String tok = m.group().trim();
153  // System.out.println("Success matching " + s + " against " +
154  // info.regex.pattern() + " : " + tok);
155  s = m.replaceFirst("").trim();
156  tokens.add(new Token(info.token, tok, totalLength - remaining));
157  break;
158  }
159  }
160  if (!match)
161  throw new ParserException("Unexpected character in input: " + s);
162  }
163  }
164 
169  public LinkedList<Token> getTokens()
170  {
171  return tokens;
172  }
173 
174 }