001/*******************************************************************************
002 * Copyright (c) 2013, 2016 EclipseSource.
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a copy
005 * of this software and associated documentation files (the "Software"), to deal
006 * in the Software without restriction, including without limitation the rights
007 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008 * copies of the Software, and to permit persons to whom the Software is
009 * furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in all
012 * copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
017 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
019 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
020 * SOFTWARE.
021 ******************************************************************************/
022package com.restfb.json;
023
024import java.io.IOException;
025import java.io.Reader;
026import java.io.StringReader;
027import java.util.Objects;
028
029
030/**
031 * A streaming parser for JSON text. The parser reports all events to a given handler.
032 */
033public class JsonParser {
034
035  private static final int MAX_NESTING_LEVEL = 1000;
036  private static final int MIN_BUFFER_SIZE = 10;
037  private static final int DEFAULT_BUFFER_SIZE = 1024;
038
039  private final JsonHandler<Object, Object> handler;
040  private Reader reader;
041  private char[] buffer;
042  private int bufferOffset;
043  private int index;
044  private int fill;
045  private int line;
046  private int lineOffset;
047  private int current;
048  private StringBuilder captureBuffer;
049  private int captureStart;
050  private int nestingLevel;
051
052  /*
053   * |                      bufferOffset
054   *                        v
055   * [a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t]        < input
056   *                       [l|m|n|o|p|q|r|s|t|?|?]    < buffer
057   *                          ^               ^
058   *                       |  index           fill
059   */
060
061  /**
062   * Creates a new JsonParser with the given handler. The parser will report all parser events to
063   * this handler.
064   *
065   * @param handler
066   *          the handler to process parser events
067   */
068  @SuppressWarnings("unchecked")
069  public JsonParser(JsonHandler<?, ?> handler) {
070    Objects.requireNonNull(handler, "handler is null");
071    this.handler = (JsonHandler<Object, Object>)handler;
072    handler.parser = this;
073  }
074
075  /**
076   * Parses the given input string. The input must contain a valid JSON value, optionally padded
077   * with whitespace.
078   *
079   * @param string
080   *          the input string, must be valid JSON
081   * @throws ParseException
082   *           if the input is not valid JSON
083   */
084  public void parse(String string) {
085    Objects.requireNonNull(string, "string is null");
086    int bufferSize = Math.max(MIN_BUFFER_SIZE, Math.min(DEFAULT_BUFFER_SIZE, string.length()));
087    try {
088      parse(new StringReader(string), bufferSize);
089    } catch (IOException exception) {
090      // StringReader does not throw IOException
091      throw new RuntimeException(exception);
092    }
093  }
094
095  /**
096   * Reads the entire input from the given reader and parses it as JSON. The input must contain a
097   * valid JSON value, optionally padded with whitespace.
098   * <p>
099   * Characters are read in chunks into a default-sized input buffer. Hence, wrapping a reader in an
100   * additional <code>BufferedReader</code> likely won't improve reading performance.
101   * </p>
102   *
103   * @param reader
104   *          the reader to read the input from
105   * @throws IOException
106   *           if an I/O error occurs in the reader
107   * @throws ParseException
108   *           if the input is not valid JSON
109   */
110  public void parse(Reader reader) throws IOException {
111    parse(reader, DEFAULT_BUFFER_SIZE);
112  }
113
114  /**
115   * Reads the entire input from the given reader and parses it as JSON. The input must contain a
116   * valid JSON value, optionally padded with whitespace.
117   * <p>
118   * Characters are read in chunks into an input buffer of the given size. Hence, wrapping a reader
119   * in an additional <code>BufferedReader</code> likely won't improve reading performance.
120   * </p>
121   *
122   * @param reader
123   *          the reader to read the input from
124   * @param buffersize
125   *          the size of the input buffer in chars
126   * @throws IOException
127   *           if an I/O error occurs in the reader
128   * @throws ParseException
129   *           if the input is not valid JSON
130   */
131  public void parse(Reader reader, int buffersize) throws IOException {
132    Objects.requireNonNull(reader, "reader is null");
133    if (buffersize <= 0) {
134      throw new IllegalArgumentException("buffersize is zero or negative");
135    }
136    this.reader = reader;
137    buffer = new char[buffersize];
138    bufferOffset = 0;
139    index = 0;
140    fill = 0;
141    line = 1;
142    lineOffset = 0;
143    current = 0;
144    captureStart = -1;
145    read();
146    skipWhiteSpace();
147    readValue();
148    skipWhiteSpace();
149    if (!isEndOfText()) {
150      throw error("Unexpected character");
151    }
152  }
153
154  private void readValue() throws IOException {
155    switch (current) {
156      case 'n':
157        readNull();
158        break;
159      case 't':
160        readTrue();
161        break;
162      case 'f':
163        readFalse();
164        break;
165      case '"':
166        readString();
167        break;
168      case '[':
169        readArray();
170        break;
171      case '{':
172        readObject();
173        break;
174      case '-':
175      case '0':
176      case '1':
177      case '2':
178      case '3':
179      case '4':
180      case '5':
181      case '6':
182      case '7':
183      case '8':
184      case '9':
185        readNumber();
186        break;
187      default:
188        throw expected("value");
189    }
190  }
191
192  private void readArray() throws IOException {
193    Object array = handler.startArray();
194    read();
195    if (++nestingLevel > MAX_NESTING_LEVEL) {
196      throw error("Nesting too deep");
197    }
198    skipWhiteSpace();
199    if (readChar(']')) {
200      nestingLevel--;
201      handler.endArray(array);
202      return;
203    }
204    do {
205      skipWhiteSpace();
206      handler.startArrayValue(array);
207      readValue();
208      handler.endArrayValue(array);
209      skipWhiteSpace();
210    } while (readChar(','));
211    if (!readChar(']')) {
212      throw expected("',' or ']'");
213    }
214    nestingLevel--;
215    handler.endArray(array);
216  }
217
218  private void readObject() throws IOException {
219    Object object = handler.startObject();
220    read();
221    if (++nestingLevel > MAX_NESTING_LEVEL) {
222      throw error("Nesting too deep");
223    }
224    skipWhiteSpace();
225    if (readChar('}')) {
226      nestingLevel--;
227      handler.endObject(object);
228      return;
229    }
230    do {
231      skipWhiteSpace();
232      handler.startObjectName(object);
233      String name = readName();
234      handler.endObjectName(object, name);
235      skipWhiteSpace();
236      if (!readChar(':')) {
237        throw expected("':'");
238      }
239      skipWhiteSpace();
240      handler.startObjectValue(object, name);
241      readValue();
242      handler.endObjectValue(object, name);
243      skipWhiteSpace();
244    } while (readChar(','));
245    if (!readChar('}')) {
246      throw expected("',' or '}'");
247    }
248    nestingLevel--;
249    handler.endObject(object);
250  }
251
252  private String readName() throws IOException {
253    if (current != '"') {
254      throw expected("name");
255    }
256    return readStringInternal();
257  }
258
259  private void readNull() throws IOException {
260    handler.startNull();
261    read();
262    readRequiredChar('u');
263    readRequiredChar('l');
264    readRequiredChar('l');
265    handler.endNull();
266  }
267
268  private void readTrue() throws IOException {
269    handler.startBoolean();
270    read();
271    readRequiredChar('r');
272    readRequiredChar('u');
273    readRequiredChar('e');
274    handler.endBoolean(true);
275  }
276
277  private void readFalse() throws IOException {
278    handler.startBoolean();
279    read();
280    readRequiredChar('a');
281    readRequiredChar('l');
282    readRequiredChar('s');
283    readRequiredChar('e');
284    handler.endBoolean(false);
285  }
286
287  private void readRequiredChar(char ch) throws IOException {
288    if (!readChar(ch)) {
289      throw expected("'" + ch + "'");
290    }
291  }
292
293  private void readString() throws IOException {
294    handler.startString();
295    handler.endString(readStringInternal());
296  }
297
298  private String readStringInternal() throws IOException {
299    read();
300    startCapture();
301    while (current != '"') {
302      if (current == '\\') {
303        pauseCapture();
304        readEscape();
305        startCapture();
306      } else if (current < 0x20) {
307        throw expected("valid string character");
308      } else {
309        read();
310      }
311    }
312    String string = endCapture();
313    read();
314    return string;
315  }
316
317  private void readEscape() throws IOException {
318    read();
319    switch (current) {
320    case '"':
321    case '/':
322    case '\\':
323      captureBuffer.append((char) current);
324      break;
325    case 'b':
326      captureBuffer.append('\b');
327      break;
328    case 'f':
329      captureBuffer.append('\f');
330      break;
331    case 'n':
332      captureBuffer.append('\n');
333      break;
334    case 'r':
335      captureBuffer.append('\r');
336      break;
337    case 't':
338      captureBuffer.append('\t');
339      break;
340    case 'u':
341      char[] hexChars = new char[4];
342      for (int i = 0; i < 4; i++) {
343        read();
344        if (!isHexDigit()) {
345          throw expected("hexadecimal digit");
346        }
347        hexChars[i] = (char) current;
348      }
349      captureBuffer.append((char) Integer.parseInt(new String(hexChars), 16));
350      break;
351    default:
352      throw expected("valid escape sequence");
353    }
354    read();
355  }
356
357  private void readNumber() throws IOException {
358    handler.startNumber();
359    startCapture();
360    readChar('-');
361    int firstDigit = current;
362    if (!readDigit()) {
363      throw expected("digit");
364    }
365    if (firstDigit != '0') {
366      while (readDigit()) {
367        // nothing to do here
368      }
369    }
370    readFraction();
371    readExponent();
372    handler.endNumber(endCapture());
373  }
374
375  private boolean readFraction() throws IOException {
376    if (!readChar('.')) {
377      return false;
378    }
379    if (!readDigit()) {
380      throw expected("digit");
381    }
382    while (readDigit()) {
383      // nothing to do here
384    }
385    return true;
386  }
387
388  private boolean readExponent() throws IOException {
389    if (!readChar('e') && !readChar('E')) {
390      return false;
391    }
392    if (!readChar('+')) {
393      readChar('-');
394    }
395    if (!readDigit()) {
396      throw expected("digit");
397    }
398    while (readDigit()) {
399      // nothing to do here
400    }
401    return true;
402  }
403
404  private boolean readChar(char ch) throws IOException {
405    if (current != ch) {
406      return false;
407    }
408    read();
409    return true;
410  }
411
412  private boolean readDigit() throws IOException {
413    if (!isDigit()) {
414      return false;
415    }
416    read();
417    return true;
418  }
419
420  private void skipWhiteSpace() throws IOException {
421    while (isWhiteSpace()) {
422      read();
423    }
424  }
425
426  private void read() throws IOException {
427    if (index == fill) {
428      if (captureStart != -1) {
429        captureBuffer.append(buffer, captureStart, fill - captureStart);
430        captureStart = 0;
431      }
432      bufferOffset += fill;
433      fill = reader.read(buffer, 0, buffer.length);
434      index = 0;
435      if (fill == -1) {
436        current = -1;
437        index++;
438        return;
439      }
440    }
441    if (current == '\n') {
442      line++;
443      lineOffset = bufferOffset + index;
444    }
445    current = buffer[index++];
446  }
447
448  private void startCapture() {
449    if (captureBuffer == null) {
450      captureBuffer = new StringBuilder();
451    }
452    captureStart = index - 1;
453  }
454
455  private void pauseCapture() {
456    int end = current == -1 ? index : index - 1;
457    captureBuffer.append(buffer, captureStart, end - captureStart);
458    captureStart = -1;
459  }
460
461  private String endCapture() {
462    int start = captureStart;
463    int end = index - 1;
464    captureStart = -1;
465    if (captureBuffer.length() > 0) {
466      captureBuffer.append(buffer, start, end - start);
467      String captured = captureBuffer.toString();
468      captureBuffer.setLength(0);
469      return captured;
470    }
471    return new String(buffer, start, end - start);
472  }
473
474  Location getLocation() {
475    int offset = bufferOffset + index - 1;
476    int column = offset - lineOffset + 1;
477    return new Location(offset, line, column);
478  }
479
480  private ParseException expected(String expected) {
481    if (isEndOfText()) {
482      return error("Unexpected end of input");
483    }
484    return error("Expected " + expected);
485  }
486
487  private ParseException error(String message) {
488    return new ParseException(message, getLocation());
489  }
490
491  private boolean isWhiteSpace() {
492    return current == ' ' || current == '\t' || current == '\n' || current == '\r';
493  }
494
495  private boolean isDigit() {
496    return current >= '0' && current <= '9';
497  }
498
499  private boolean isHexDigit() {
500    return current >= '0' && current <= '9' || current >= 'a' && current <= 'f' || current >= 'A' && current <= 'F';
501  }
502
503  private boolean isEndOfText() {
504    return current == -1;
505  }
506
507}