Browser Usage
ES Module Import
In browser environments, you must initialize the WASM module before using any Lindera functions. The default export __wbg_init handles this initialization.
The recommended approach is to load dictionaries from OPFS rather than embedding them in the WASM binary:
import __wbg_init, { TokenizerBuilder, loadDictionaryFromBytes } from 'lindera-wasm-web';
import { downloadDictionary, loadDictionaryFiles, hasDictionary } from 'lindera-wasm-web/opfs';
async function main() {
// Initialize the WASM module (must be called once before using any API)
await __wbg_init();
// Download dictionary if not cached
if (!await hasDictionary("ipadic")) {
await downloadDictionary(
"https://github.com/lindera/lindera/releases/download/<version>/lindera-ipadic-<version>.zip",
"ipadic",
);
}
// Load dictionary from OPFS
const files = await loadDictionaryFiles("ipadic");
const dictionary = loadDictionaryFromBytes(
files.metadata, files.dictDa, files.dictVals, files.dictWordsIdx,
files.dictWords, files.matrixMtx, files.charDef, files.unk,
);
const builder = new TokenizerBuilder();
builder.setDictionaryInstance(dictionary);
builder.setMode("normal");
const tokenizer = builder.build();
const tokens = tokenizer.tokenize("形態素解析を行います");
tokens.forEach(token => {
console.log(`${token.surface}: ${token.details.join(',')}`);
});
}
main();
Using Embedded Dictionaries (Advanced)
If you built with an embed-* feature flag, you can use embedded dictionaries instead of OPFS:
import __wbg_init, { TokenizerBuilder } from 'lindera-wasm-web-ipadic';
async function main() {
await __wbg_init();
const builder = new TokenizerBuilder();
builder.setDictionary("embedded://ipadic");
builder.setMode("normal");
const tokenizer = builder.build();
const tokens = tokenizer.tokenize("形態素解析を行います");
tokens.forEach(token => {
console.log(`${token.surface}: ${token.details.join(',')}`);
});
}
main();
HTML Example
A minimal HTML page using lindera-wasm with OPFS dictionary loading:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Lindera WASM Demo</title>
</head>
<body>
<textarea id="input" rows="4" cols="50">関西国際空港限定トートバッグ</textarea>
<br>
<button id="tokenize" disabled>Tokenize</button>
<pre id="output">Loading dictionary...</pre>
<script type="module">
import __wbg_init, { TokenizerBuilder, loadDictionaryFromBytes } from './pkg/lindera_wasm.js';
import { downloadDictionary, loadDictionaryFiles, hasDictionary } from './pkg/opfs.js';
let tokenizer;
async function init() {
await __wbg_init();
// Download dictionary if not cached
if (!await hasDictionary("ipadic")) {
document.getElementById('output').textContent = 'Downloading dictionary...';
await downloadDictionary(
"https://github.com/lindera/lindera/releases/download/<version>/lindera-ipadic-<version>.zip",
"ipadic",
);
}
// Load dictionary from OPFS
const files = await loadDictionaryFiles("ipadic");
const dictionary = loadDictionaryFromBytes(
files.metadata, files.dictDa, files.dictVals, files.dictWordsIdx,
files.dictWords, files.matrixMtx, files.charDef, files.unk,
);
const builder = new TokenizerBuilder();
builder.setDictionaryInstance(dictionary);
builder.setMode("normal");
tokenizer = builder.build();
document.getElementById('tokenize').disabled = false;
document.getElementById('output').textContent = 'Ready!';
}
document.getElementById('tokenize').addEventListener('click', () => {
const text = document.getElementById('input').value;
const tokens = tokenizer.tokenize(text);
const output = tokens.map(t =>
`${t.surface}\t${t.details.join(',')}`
).join('\n');
document.getElementById('output').textContent = output;
});
init();
</script>
</body>
</html>
Webpack Configuration
When using Webpack 5, enable the asyncWebAssembly experiment:
// webpack.config.js
module.exports = {
experiments: {
asyncWebAssembly: true,
},
module: {
rules: [
{
test: /\.wasm$/,
type: "webassembly/async",
},
],
},
};
Then import using the bundler target build:
import { TokenizerBuilder, loadDictionaryFromBytes } from 'lindera-wasm-bundler';
import { loadDictionaryFiles } from 'lindera-wasm-bundler/opfs';
// Load dictionary from OPFS (see OPFS Dictionary Storage for setup)
const files = await loadDictionaryFiles("ipadic");
const dictionary = loadDictionaryFromBytes(
files.metadata, files.dictDa, files.dictVals, files.dictWordsIdx,
files.dictWords, files.matrixMtx, files.charDef, files.unk,
);
const builder = new TokenizerBuilder();
builder.setDictionaryInstance(dictionary);
builder.setMode("normal");
const tokenizer = builder.build();
With the bundler target, __wbg_init() is called automatically by the bundler.
Vite / Rollup Setup
Vite supports WASM out of the box with the web target. Place the built pkg/ directory in your project and import directly:
import __wbg_init, { TokenizerBuilder, loadDictionaryFromBytes } from './pkg/lindera_wasm.js';
import { loadDictionaryFiles } from './pkg/opfs.js';
await __wbg_init();
// Load dictionary from OPFS and use TokenizerBuilder as shown above
For the bundler target with Vite, you may need the vite-plugin-wasm plugin:
// vite.config.js
import wasm from 'vite-plugin-wasm';
export default {
plugins: [wasm()],
};
Chrome Extension Considerations
Chrome extensions using Manifest V3 restrict WebAssembly.compile and WebAssembly.instantiate by default. To use lindera-wasm in an extension, you need to add wasm-unsafe-eval to your Content Security Policy:
{
"content_security_policy": {
"extension_pages": "script-src 'self' 'wasm-unsafe-eval'; object-src 'self'"
}
}
Note that wasm-unsafe-eval only allows WebAssembly execution and does not permit arbitrary JavaScript eval().
Performance Tips
- Initialize once: Call
__wbg_init()once at application startup, not on every tokenization request. - Reuse the tokenizer: Create the
Tokenizerinstance once and reuse it for multiple calls totokenize(). - Web Workers: For heavy tokenization workloads, consider running Lindera in a Web Worker to avoid blocking the main thread.