lib/lexer.js
import LexerState from './lexer-state';
import Token, { EOF } from './token';
import TokenTypes from './token-types';
/**
* @typedef {{
* line: number,
* column: number,
* }} Position
*/
/**
* Lexes a source-string into tokens.
*
* @example
* const lex = perplex('...')
* .token('ID', /my-id-regex/)
* .token('(', /\(/)
* .token(')', /\)/)
* .token('WS', /\s+/, true) // true means 'skip'
*
* while ((let t = lex.next()).type != 'EOF') {
* console.log(t)
* }
* // alternatively:
* console.log(lex.toArray())
*/
class Lexer {
/* tslint:enable */
/**
* Creates a new Lexer instance
* @param {string} [source = ''] The source string to operate on.
*/
constructor(source = '') {
this._state = new LexerState(source);
this._tokenTypes = new TokenTypes();
}
//
// Getters/Setters
//
/**
* Gets the current lexer position
* @return {number} Returns the position
*/
get position() {
return this._state.position;
}
/**
* Sets the current lexer position
* @param {number} i The position to move to
*/
set position(i) {
this._state.position = i;
}
/**
* Gets the source the lexer is operating on
* @return {string} Returns the source
*/
get source() {
return this._state.source;
}
/**
* Sets the source the lexer is operating on
* @param {string} s The source to set
*/
set source(s) {
this._state = new LexerState(s);
}
//
// METHODS
//
/**
* Attaches this lexer to another lexer's state
* @param {Lexer<T>} other The other lexer to attach to
*/
attachTo(other) {
this._state = other._state;
}
/**
* Disables a token type
* @param {T} type The token type to disable
* @return {Lexer<T>}
*/
disable(type) {
this._tokenTypes.disable(type);
return this;
}
/**
* Enables a token type
* @param {T} type The token type to enalbe
* @param {?boolean} [enabled=true] Whether to enable/disable the specified token type
* @return {Lexer<T>}
*/
enable(type, enabled) {
this._tokenTypes.enable(type, enabled);
return this;
}
/**
* Like {@link next}, but throws an exception if the next token is
* not of the required type.
* @param {T} type The token type expected from {@link next}
* @return {Token<T>} Returns the {@link Token} on success
*/
expect(type) {
const t = this.next();
if (t.type != type) {
const pos = t.strpos();
throw new Error('Expected ' + type + (t ? ', got ' + t.type : '') + ' at ' + pos.start.line + ':' + pos.start.column);
}
return t;
}
/**
* Consumes and returns the next {@link Token} in the source string.
* If there are no more tokens, it returns a {@link Token} of type `$EOF`
* @return {Token<T>}
*/
next() {
try {
const t = this.peek();
this._state.position = t.end;
return t;
}
catch (e) {
this._state.position = e.end;
throw e;
}
}
/**
* Returns the next {@link Token} in the source string, but does
* not consume it.
* If there are no more tokens, it returns a {@link Token} of type `$EOF`
* @param {number} [position=`this.position`] The position at which to start reading
* @return {Token<T>}
*/
peek(position = this._state.position) {
const read = (i = position) => {
if (i >= this._state.source.length)
return EOF(this);
const n = this._tokenTypes.peek(this._state.source, i);
return n
? (n.item.skip
? read(i + n.result[0].length)
: new Token(n.item.type, n.result[0], n.result.map(x => x), i, i + n.result[0].length, this))
: null;
};
const t = read();
if (t)
return t;
// we did not find a match
let unexpected = this._state.source.substring(position, position + 1);
try {
this.peek(position + 1);
}
catch (e) {
unexpected += e.unexpected;
}
const { line, column } = this.strpos(position);
const e = new Error(`Unexpected input: ${unexpected} at (${line}:${column})`);
e.unexpected = unexpected;
e.end = position + unexpected.length;
throw e;
}
/**
* Converts a string-index (relative to the source string) to a line and a column.
* @param {number} i The index to compute
* @return {Position}
*/
strpos(i) {
let lines = this._state.source.substring(0, i).split(/\r?\n/);
if (!Array.isArray(lines))
lines = [lines];
const line = lines.length;
const column = lines[lines.length - 1].length + 1;
return { line, column };
}
/**
* Converts the token stream to an array of Tokens
* @return {Token<T>[]} The array of tokens (not including (EOF))
*/
toArray() {
const oldState = this._state.copy();
this._state.position = 0;
const tkns = [];
let t;
while (!(t = this.next()).isEof())
tkns.push(t);
this._state = oldState;
return tkns;
}
/**
* Creates a new token type
* @param {T} type The token type
* @param {string|RegExp} pattern The pattern to match
* @param {?boolean} skip Whether this type of token should be skipped
* @return {Lexer<T>}
*/
token(type, pattern, skip) {
this._tokenTypes.token(type, pattern, skip);
return this;
}
}
export default Lexer;
export { EOF, Token, TokenTypes, LexerState };
//# sourceMappingURL=lexer.js.map