Home Reference Source Repository

lib/lexer.js

  1. import LexerState from './lexer-state';
  2. import Token, { EOF } from './token';
  3. import TokenTypes from './token-types';
  4. /**
  5. * @typedef {{
  6. * line: number,
  7. * column: number,
  8. * }} Position
  9. */
  10. /**
  11. * Lexes a source-string into tokens.
  12. *
  13. * @example
  14. * const lex = perplex('...')
  15. * .token('ID', /my-id-regex/)
  16. * .token('(', /\(/)
  17. * .token(')', /\)/)
  18. * .token('WS', /\s+/, true) // true means 'skip'
  19. *
  20. * while ((let t = lex.next()).type != 'EOF') {
  21. * console.log(t)
  22. * }
  23. * // alternatively:
  24. * console.log(lex.toArray())
  25. */
  26. class Lexer {
  27. /* tslint:enable */
  28. /**
  29. * Creates a new Lexer instance
  30. * @param {string} [source = ''] The source string to operate on.
  31. */
  32. constructor(source = '') {
  33. this._state = new LexerState(source);
  34. this._tokenTypes = new TokenTypes();
  35. }
  36. //
  37. // Getters/Setters
  38. //
  39. /**
  40. * Gets the current lexer position
  41. * @return {number} Returns the position
  42. */
  43. get position() {
  44. return this._state.position;
  45. }
  46. /**
  47. * Sets the current lexer position
  48. * @param {number} i The position to move to
  49. */
  50. set position(i) {
  51. this._state.position = i;
  52. }
  53. /**
  54. * Gets the source the lexer is operating on
  55. * @return {string} Returns the source
  56. */
  57. get source() {
  58. return this._state.source;
  59. }
  60. /**
  61. * Sets the source the lexer is operating on
  62. * @param {string} s The source to set
  63. */
  64. set source(s) {
  65. this._state = new LexerState(s);
  66. }
  67. //
  68. // METHODS
  69. //
  70. /**
  71. * Attaches this lexer to another lexer's state
  72. * @param {Lexer<T>} other The other lexer to attach to
  73. */
  74. attachTo(other) {
  75. this._state = other._state;
  76. }
  77. /**
  78. * Disables a token type
  79. * @param {T} type The token type to disable
  80. * @return {Lexer<T>}
  81. */
  82. disable(type) {
  83. this._tokenTypes.disable(type);
  84. return this;
  85. }
  86. /**
  87. * Enables a token type
  88. * @param {T} type The token type to enalbe
  89. * @param {?boolean} [enabled=true] Whether to enable/disable the specified token type
  90. * @return {Lexer<T>}
  91. */
  92. enable(type, enabled) {
  93. this._tokenTypes.enable(type, enabled);
  94. return this;
  95. }
  96. /**
  97. * Like {@link next}, but throws an exception if the next token is
  98. * not of the required type.
  99. * @param {T} type The token type expected from {@link next}
  100. * @return {Token<T>} Returns the {@link Token} on success
  101. */
  102. expect(type) {
  103. const t = this.next();
  104. if (t.type != type) {
  105. const pos = t.strpos();
  106. throw new Error('Expected ' + type + (t ? ', got ' + t.type : '') + ' at ' + pos.start.line + ':' + pos.start.column);
  107. }
  108. return t;
  109. }
  110. /**
  111. * Consumes and returns the next {@link Token} in the source string.
  112. * If there are no more tokens, it returns a {@link Token} of type `$EOF`
  113. * @return {Token<T>}
  114. */
  115. next() {
  116. try {
  117. const t = this.peek();
  118. this._state.position = t.end;
  119. return t;
  120. }
  121. catch (e) {
  122. this._state.position = e.end;
  123. throw e;
  124. }
  125. }
  126. /**
  127. * Returns the next {@link Token} in the source string, but does
  128. * not consume it.
  129. * If there are no more tokens, it returns a {@link Token} of type `$EOF`
  130. * @param {number} [position=`this.position`] The position at which to start reading
  131. * @return {Token<T>}
  132. */
  133. peek(position = this._state.position) {
  134. const read = (i = position) => {
  135. if (i >= this._state.source.length)
  136. return EOF(this);
  137. const n = this._tokenTypes.peek(this._state.source, i);
  138. return n
  139. ? (n.item.skip
  140. ? read(i + n.result[0].length)
  141. : new Token(n.item.type, n.result[0], n.result.map(x => x), i, i + n.result[0].length, this))
  142. : null;
  143. };
  144. const t = read();
  145. if (t)
  146. return t;
  147. // we did not find a match
  148. let unexpected = this._state.source.substring(position, position + 1);
  149. try {
  150. this.peek(position + 1);
  151. }
  152. catch (e) {
  153. unexpected += e.unexpected;
  154. }
  155. const { line, column } = this.strpos(position);
  156. const e = new Error(`Unexpected input: ${unexpected} at (${line}:${column})`);
  157. e.unexpected = unexpected;
  158. e.end = position + unexpected.length;
  159. throw e;
  160. }
  161. /**
  162. * Converts a string-index (relative to the source string) to a line and a column.
  163. * @param {number} i The index to compute
  164. * @return {Position}
  165. */
  166. strpos(i) {
  167. let lines = this._state.source.substring(0, i).split(/\r?\n/);
  168. if (!Array.isArray(lines))
  169. lines = [lines];
  170. const line = lines.length;
  171. const column = lines[lines.length - 1].length + 1;
  172. return { line, column };
  173. }
  174. /**
  175. * Converts the token stream to an array of Tokens
  176. * @return {Token<T>[]} The array of tokens (not including (EOF))
  177. */
  178. toArray() {
  179. const oldState = this._state.copy();
  180. this._state.position = 0;
  181. const tkns = [];
  182. let t;
  183. while (!(t = this.next()).isEof())
  184. tkns.push(t);
  185. this._state = oldState;
  186. return tkns;
  187. }
  188. /**
  189. * Creates a new token type
  190. * @param {T} type The token type
  191. * @param {string|RegExp} pattern The pattern to match
  192. * @param {?boolean} skip Whether this type of token should be skipped
  193. * @return {Lexer<T>}
  194. */
  195. token(type, pattern, skip) {
  196. this._tokenTypes.token(type, pattern, skip);
  197. return this;
  198. }
  199. }
  200. export default Lexer;
  201. export { EOF, Token, TokenTypes, LexerState };
  202. //# sourceMappingURL=lexer.js.map