lib/lexer.js
- import LexerState from './lexer-state';
- import Token, { EOF } from './token';
- import TokenTypes from './token-types';
- /**
- * @typedef {{
- * line: number,
- * column: number,
- * }} Position
- */
- /**
- * Lexes a source-string into tokens.
- *
- * @example
- * const lex = perplex('...')
- * .token('ID', /my-id-regex/)
- * .token('(', /\(/)
- * .token(')', /\)/)
- * .token('WS', /\s+/, true) // true means 'skip'
- *
- * while ((let t = lex.next()).type != 'EOF') {
- * console.log(t)
- * }
- * // alternatively:
- * console.log(lex.toArray())
- */
- class Lexer {
- /* tslint:enable */
- /**
- * Creates a new Lexer instance
- * @param {string} [source = ''] The source string to operate on.
- */
- constructor(source = '') {
- this._state = new LexerState(source);
- this._tokenTypes = new TokenTypes();
- }
- //
- // Getters/Setters
- //
- /**
- * Gets the current lexer position
- * @return {number} Returns the position
- */
- get position() {
- return this._state.position;
- }
- /**
- * Sets the current lexer position
- * @param {number} i The position to move to
- */
- set position(i) {
- this._state.position = i;
- }
- /**
- * Gets the source the lexer is operating on
- * @return {string} Returns the source
- */
- get source() {
- return this._state.source;
- }
- /**
- * Sets the source the lexer is operating on
- * @param {string} s The source to set
- */
- set source(s) {
- this._state = new LexerState(s);
- }
- //
- // METHODS
- //
- /**
- * Attaches this lexer to another lexer's state
- * @param {Lexer<T>} other The other lexer to attach to
- */
- attachTo(other) {
- this._state = other._state;
- }
- /**
- * Disables a token type
- * @param {T} type The token type to disable
- * @return {Lexer<T>}
- */
- disable(type) {
- this._tokenTypes.disable(type);
- return this;
- }
- /**
- * Enables a token type
- * @param {T} type The token type to enalbe
- * @param {?boolean} [enabled=true] Whether to enable/disable the specified token type
- * @return {Lexer<T>}
- */
- enable(type, enabled) {
- this._tokenTypes.enable(type, enabled);
- return this;
- }
- /**
- * Like {@link next}, but throws an exception if the next token is
- * not of the required type.
- * @param {T} type The token type expected from {@link next}
- * @return {Token<T>} Returns the {@link Token} on success
- */
- expect(type) {
- const t = this.next();
- if (t.type != type) {
- const pos = t.strpos();
- throw new Error('Expected ' + type + (t ? ', got ' + t.type : '') + ' at ' + pos.start.line + ':' + pos.start.column);
- }
- return t;
- }
- /**
- * Consumes and returns the next {@link Token} in the source string.
- * If there are no more tokens, it returns a {@link Token} of type `$EOF`
- * @return {Token<T>}
- */
- next() {
- try {
- const t = this.peek();
- this._state.position = t.end;
- return t;
- }
- catch (e) {
- this._state.position = e.end;
- throw e;
- }
- }
- /**
- * Returns the next {@link Token} in the source string, but does
- * not consume it.
- * If there are no more tokens, it returns a {@link Token} of type `$EOF`
- * @param {number} [position=`this.position`] The position at which to start reading
- * @return {Token<T>}
- */
- peek(position = this._state.position) {
- const read = (i = position) => {
- if (i >= this._state.source.length)
- return EOF(this);
- const n = this._tokenTypes.peek(this._state.source, i);
- return n
- ? (n.item.skip
- ? read(i + n.result[0].length)
- : new Token(n.item.type, n.result[0], n.result.map(x => x), i, i + n.result[0].length, this))
- : null;
- };
- const t = read();
- if (t)
- return t;
- // we did not find a match
- let unexpected = this._state.source.substring(position, position + 1);
- try {
- this.peek(position + 1);
- }
- catch (e) {
- unexpected += e.unexpected;
- }
- const { line, column } = this.strpos(position);
- const e = new Error(`Unexpected input: ${unexpected} at (${line}:${column})`);
- e.unexpected = unexpected;
- e.end = position + unexpected.length;
- throw e;
- }
- /**
- * Converts a string-index (relative to the source string) to a line and a column.
- * @param {number} i The index to compute
- * @return {Position}
- */
- strpos(i) {
- let lines = this._state.source.substring(0, i).split(/\r?\n/);
- if (!Array.isArray(lines))
- lines = [lines];
- const line = lines.length;
- const column = lines[lines.length - 1].length + 1;
- return { line, column };
- }
- /**
- * Converts the token stream to an array of Tokens
- * @return {Token<T>[]} The array of tokens (not including (EOF))
- */
- toArray() {
- const oldState = this._state.copy();
- this._state.position = 0;
- const tkns = [];
- let t;
- while (!(t = this.next()).isEof())
- tkns.push(t);
- this._state = oldState;
- return tkns;
- }
- /**
- * Creates a new token type
- * @param {T} type The token type
- * @param {string|RegExp} pattern The pattern to match
- * @param {?boolean} skip Whether this type of token should be skipped
- * @return {Lexer<T>}
- */
- token(type, pattern, skip) {
- this._tokenTypes.token(type, pattern, skip);
- return this;
- }
- }
- export default Lexer;
- export { EOF, Token, TokenTypes, LexerState };
- //# sourceMappingURL=lexer.js.map