sirhall / deno_tokenizer Goto Github PK

View Code? Open in Web Editor NEW

This project forked from denosaurs/tokenizer

0.0 1.0 0.0 75 KB

⚙️ A simple tokenizer for deno

License: MIT License

TypeScript 100.00%

deno_tokenizer's Introduction

Tokenizer

A simple tokenizer for deno.

Example

import { Tokenizer } from "https://deno.land/x/tokenizer/mod.ts";

const tokenizer = new Tokenizer("abc 123 HELLO [a cool](link)", [
    { type: "HELLO",  pattern: "HELLO" },
    { type: "WORD",   pattern: /[a-zA-Z]+/ },
    { type: "DIGITS", pattern: /\d+/, value: m => Number.parseInt(m.match) },
    { type: "LINK",   pattern: /\[([^\[]+)\]\(([^\)]+)\)/ },
    { type: "SPACE",  pattern: / /, ignore: true } // Or leave type blank and remove "ignore: true"
]);

// The first option:
// console.log(...tokenizer);
// => { type: "WORD", match: "abc", value: "abc", groups: [], position: { start: 0, end: 3 } },
//    { type: "DIGITS", match: "123", value: 123, groups: [], position: { start: 4, end: 7 } },
//    { type: "HELLO", match: "HELLO", value: "HELLO", groups: [], position: { start: 8, end: 13 } },
//    { type: "LINK", match: "[a cool](link)", value: "[a cool](link)", groups: [ "a cool", "link" ], position: { start: 14, end: 28 } }

// The second option:
while (!tokenizer.done) {
    console.log(tokenizer.next().value);
}
// => { type: "WORD", match: "abc", value: "abc", groups: [], position: { start: 0, end: 3 } }
// => { type: "DIGITS", match: "123", value: 123, groups: [], position: { start: 4, end: 7 } }
// => { type: "HELLO", match: "HELLO", value: "HELLO", groups: [], position: { start: 8, end: 13 } }
// => { type: "LINK", match: "[a cool](link)", value: "[a cool](link)", groups: [ "a cool", "link" ], position: { start: 14, end: 28 } }

// The third option:
// console.log(tokenizer.tokenize()); // Add a parameter to the tokenize method to override the source string
// => [ { type: "WORD", match: "abc", value: "abc", groups: [], position: { start: 0, end: 3 } },
//      { type: "DIGITS", match: "123", value: 123, groups: [], position: { start: 4, end: 7 } },
//      { type: "HELLO", match: "HELLO", value: "HELLO", groups: [], position: { start: 8, end: 13 } },
//      { type: "LINK", match: "[a cool](link)", value: "[a cool](link)", groups: [ "a cool", "link" ], position: { start: 14, end: 28 } } ]