Home Reference Source Repository

lib/lexer.js

import LexerState from './lexer-state';
import Token, { EOF } from './token';
import TokenTypes from './token-types';
/**
 * @typedef {{
 *   line: number,
 *   column: number,
 * }} Position
 */
/**
 * Lexes a source-string into tokens.
 *
 * @example
 * const lex = perplex('...')
 *   .token('ID', /my-id-regex/)
 *   .token('(', /\(/)
 *   .token(')', /\)/)
 *   .token('WS', /\s+/, true) // true means 'skip'
 *
 * while ((let t = lex.next()).type != 'EOF') {
 *   console.log(t)
 * }
 * // alternatively:
 * console.log(lex.toArray())
 */
class Lexer {
    /* tslint:enable */
    /**
     * Creates a new Lexer instance
     * @param {string} [source = ''] The source string to operate on.
     */
    constructor(source = '') {
        this._state = new LexerState(source);
        this._tokenTypes = new TokenTypes();
    }
    //
    // Getters/Setters
    //
    /**
     * Gets the current lexer position
     * @return {number} Returns the position
     */
    get position() {
        return this._state.position;
    }
    /**
     * Sets the current lexer position
     * @param {number} i The position to move to
     */
    set position(i) {
        this._state.position = i;
    }
    /**
     * Gets the source the lexer is operating on
     * @return {string} Returns the source
     */
    get source() {
        return this._state.source;
    }
    /**
     * Sets the source the lexer is operating on
     * @param {string} s The source to set
     */
    set source(s) {
        this._state = new LexerState(s);
    }
    //
    // METHODS
    //
    /**
     * Attaches this lexer to another lexer's state
     * @param {Lexer<T>} other The other lexer to attach to
     */
    attachTo(other) {
        this._state = other._state;
    }
    /**
     * Disables a token type
     * @param {T} type The token type to disable
     * @return {Lexer<T>}
     */
    disable(type) {
        this._tokenTypes.disable(type);
        return this;
    }
    /**
     * Enables a token type
     * @param {T} type The token type to enalbe
     * @param {?boolean} [enabled=true] Whether to enable/disable the specified token type
     * @return {Lexer<T>}
     */
    enable(type, enabled) {
        this._tokenTypes.enable(type, enabled);
        return this;
    }
    /**
     * Like {@link next}, but throws an exception if the next token is
     * not of the required type.
     * @param {T} type The token type expected from {@link next}
     * @return {Token<T>} Returns the {@link Token} on success
     */
    expect(type) {
        const t = this.next();
        if (t.type != type) {
            const pos = t.strpos();
            throw new Error('Expected ' + type + (t ? ', got ' + t.type : '') + ' at ' + pos.start.line + ':' + pos.start.column);
        }
        return t;
    }
    /**
     * Consumes and returns the next {@link Token} in the source string.
     * If there are no more tokens, it returns a {@link Token} of type `$EOF`
     * @return {Token<T>}
     */
    next() {
        try {
            const t = this.peek();
            this._state.position = t.end;
            return t;
        }
        catch (e) {
            this._state.position = e.end;
            throw e;
        }
    }
    /**
     * Returns the next {@link Token} in the source string, but does
     * not consume it.
     * If there are no more tokens, it returns a {@link Token} of type `$EOF`
     * @param {number} [position=`this.position`] The position at which to start reading
     * @return {Token<T>}
     */
    peek(position = this._state.position) {
        const read = (i = position) => {
            if (i >= this._state.source.length)
                return EOF(this);
            const n = this._tokenTypes.peek(this._state.source, i);
            return n
                ? (n.item.skip
                    ? read(i + n.result[0].length)
                    : new Token(n.item.type, n.result[0], n.result.map(x => x), i, i + n.result[0].length, this))
                : null;
        };
        const t = read();
        if (t)
            return t;
        // we did not find a match
        let unexpected = this._state.source.substring(position, position + 1);
        try {
            this.peek(position + 1);
        }
        catch (e) {
            unexpected += e.unexpected;
        }
        const { line, column } = this.strpos(position);
        const e = new Error(`Unexpected input: ${unexpected} at (${line}:${column})`);
        e.unexpected = unexpected;
        e.end = position + unexpected.length;
        throw e;
    }
    /**
     * Converts a string-index (relative to the source string) to a line and a column.
     * @param {number} i The index to compute
     * @return {Position}
     */
    strpos(i) {
        let lines = this._state.source.substring(0, i).split(/\r?\n/);
        if (!Array.isArray(lines))
            lines = [lines];
        const line = lines.length;
        const column = lines[lines.length - 1].length + 1;
        return { line, column };
    }
    /**
     * Converts the token stream to an array of Tokens
     * @return {Token<T>[]} The array of tokens (not including (EOF))
     */
    toArray() {
        const oldState = this._state.copy();
        this._state.position = 0;
        const tkns = [];
        let t;
        while (!(t = this.next()).isEof())
            tkns.push(t);
        this._state = oldState;
        return tkns;
    }
    /**
     * Creates a new token type
     * @param {T} type The token type
     * @param {string|RegExp} pattern The pattern to match
     * @param {?boolean} skip Whether this type of token should be skipped
     * @return {Lexer<T>}
     */
    token(type, pattern, skip) {
        this._tokenTypes.token(type, pattern, skip);
        return this;
    }
}
export default Lexer;
export { EOF, Token, TokenTypes, LexerState };
//# sourceMappingURL=lexer.js.map