# Getting Started import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Steps } from 'fumadocs-ui/components/steps'; import { Files, File, Folder } from 'fumadocs-ui/components/files'; This guide will walk you through installing LocalMode and building your first local-first AI application. ## Installation ### Install packages The minimum setup requires `@localmode/core` and at least one provider: `bash pnpm install @localmode/core @localmode/transformers ` `bash npm install @localmode/core @localmode/transformers ` `bash yarn add @localmode/core @localmode/transformers ` All underlying ML dependencies (like `@huggingface/transformers`) are automatically installed with the provider packages. ### Configure bundler (if needed) For **Next.js**, add to `next.config.js`: ```javascript title="next.config.js" /** @type {import('next').NextConfig} */ const nextConfig = { webpack: (config) => { config.resolve.alias = { ...config.resolve.alias, sharp$: false, 'onnxruntime-node$': false, }; return config; }, experimental: { serverComponentsExternalPackages: ['sharp', 'onnxruntime-node'], }, }; module.exports = nextConfig; ``` For **Vite**, models work out of the box. For workers, you may need: ```typescript title="vite.config.ts" export default defineConfig({ optimizeDeps: { exclude: ['@huggingface/transformers'], }, }); ``` ## Your First Embedding Let's create your first embedding: ```typescript title="first-embedding.ts" import { embed } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Create embedding model const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Generate embedding const { embedding, usage } = await embed({ model, value: 'Hello, world!', }); console.log('Embedding dimensions:', embedding.length); // 384 console.log('Tokens used:', usage.tokens); ``` The first time you use a model, it downloads from HuggingFace Hub and caches in IndexedDB. Subsequent loads are instant. ## Build a Semantic Search App Here's a complete example of building semantic search: ```typescript title="semantic-search.ts" import { createVectorDB, embed, embedMany, semanticSearch } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // 1. Setup const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const db = await createVectorDB({ name: 'my-documents', dimensions: 384, }); // 2. Sample documents const documents = [ 'Machine learning is a subset of artificial intelligence.', 'Neural networks are inspired by biological neurons.', 'Deep learning uses multiple layers of neural networks.', 'Natural language processing handles human language.', 'Computer vision enables machines to interpret images.', ]; // 3. Generate embeddings const { embeddings } = await embedMany({ model, values: documents, }); // 4. Store in vector database await db.addMany( documents.map((text, i) => ({ id: `doc-${i}`, vector: embeddings[i], metadata: { text }, })) ); // 5. Search const results = await semanticSearch({ db, model, query: 'How do neural networks work?', k: 3, }); console.log('Results:'); results.forEach((r, i) => { console.log(`${i + 1}. ${r.metadata.text} (score: ${r.score.toFixed(3)})`); }); ``` Output: ``` Results: 1. Neural networks are inspired by biological neurons. (score: 0.842) 2. Deep learning uses multiple layers of neural networks. (score: 0.756) 3. Machine learning is a subset of artificial intelligence. (score: 0.623) ``` ## Add RAG with Chunking For longer documents, use chunking: ```typescript title="rag-example.ts" import { createVectorDB, chunk, ingest, semanticSearch, rerank } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Setup const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const db = await createVectorDB({ name: 'documents', dimensions: 384, }); // Load and chunk a document const documentText = ` Machine learning is revolutionizing how we build software... (your long document here) `; const chunks = chunk(documentText, { strategy: 'recursive', size: 512, overlap: 50, }); // Ingest with automatic embedding await ingest({ db, model: embeddingModel, documents: chunks.map((c) => ({ text: c.text, metadata: { start: c.startIndex, end: c.endIndex }, })), }); // Search and rerank for better accuracy const query = 'What are the applications of machine learning?'; const searchResults = await semanticSearch({ db, model: embeddingModel, query, k: 10, // Get more candidates for reranking }); const reranked = await rerank({ model: rerankerModel, query, documents: searchResults.map((r) => r.metadata.text as string), topK: 3, }); console.log('Top results after reranking:'); reranked.forEach((r, i) => { console.log(`${i + 1}. Score: ${r.score.toFixed(3)}`); console.log(` ${r.document.substring(0, 100)}...`); }); ``` ## Add LLM Generation Combine with WebLLM for complete RAG: ```typescript title="rag-with-llm.ts" import { streamText } from '@localmode/core'; import { webllm } from '@localmode/webllm'; // After getting search results... const context = reranked.map((r) => r.document).join('\n\n'); const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); const stream = await streamText({ model: llm, prompt: `Based on the following context, answer the question. Context: ${context} Question: ${query} Answer:`, }); for await (const chunk of stream) { process.stdout.write(chunk.text); } ``` ## Project Structure A typical LocalMode project might look like: ```typescript title="src/lib/ai.ts" import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; // Singleton instances for reuse export const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); export const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); export const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); ``` ```typescript title="src/lib/db.ts" import { createVectorDB } from '@localmode/core'; let dbInstance: Awaited> | null = null; export async function getDB() { if (!dbInstance) { dbInstance = await createVectorDB({ name: 'my-app', dimensions: 384, }); } return dbInstance; } ``` ## Next Steps # Introduction import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Steps } from 'fumadocs-ui/components/steps'; # LocalMode **LocalMode** is a modular, local-first AI engine for the browser. Run embeddings, vector search, RAG pipelines, text classification, speech-to-text, image recognition, and LLM inference - all directly in the browser with zero server dependencies. All processing happens locally. No data ever leaves the user's device. Zero telemetry. Zero tracking. ## Why LocalMode? * **πŸ”’ Privacy-First** β€” Data never leaves the device * **⚑ Zero Dependencies** β€” Core package has no external dependencies * **πŸ“± Offline-Ready** β€” Works without network after first model download * **🎯 Type-Safe** β€” Full TypeScript support with comprehensive types * **πŸ”Œ Modular** β€” Use only what you need ## Packages ## Quick Start ### Install packages `bash pnpm install @localmode/core @localmode/transformers ` `bash npm install @localmode/core @localmode/transformers ` `bash yarn add @localmode/core @localmode/transformers ` ### Create embeddings ```typescript import { embed, embedMany } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Create embedding model const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Embed single value const { embedding } = await embed({ model, value: 'Hello, world!', }); // Embed multiple values const { embeddings } = await embedMany({ model, values: ['Hello', 'World', 'AI'], }); ``` ### Create vector database ```typescript import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'my-documents', dimensions: 384, // Matches all-MiniLM-L6-v2 }); // Add documents await db.addMany([ { id: 'doc-1', vector: embeddings[0], metadata: { text: 'Hello' } }, { id: 'doc-2', vector: embeddings[1], metadata: { text: 'World' } }, ]); // Search const results = await db.search(embedding, { k: 5 }); ``` ### Build a RAG pipeline ```typescript import { chunk, ingest, semanticSearch } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Chunk document const chunks = chunk(documentText, { strategy: 'recursive', size: 512, overlap: 50, }); // Ingest into vector DB await ingest({ db, model, documents: chunks.map((c) => ({ text: c.text, metadata: { source: 'my-document.pdf' }, })), }); // Search const results = await semanticSearch({ db, model, query: 'What is machine learning?', k: 5, }); ``` ## Architecture LocalMode follows a **"zero-dependency core, thin provider wrappers"** architecture: ``` +-------------------------------------------------------------+ | Your Application | +-------------------------------------------------------------+ | @localmode/core | | +----------+ +----------+ +----------+ +----------------+ | | | VectorDB | |Embeddings| | RAG | | Storage/Security| | | +----------+ +----------+ +----------+ +----------------+ | +-------------------------------------------------------------+ | Provider Packages (thin wrappers) | | +----------------+ +------------+ +------------------+ | | | @localmode/ | | @localmode/| | @localmode/ | | | | transformers | | webllm | | pdfjs | | | +----------------+ +------------+ +------------------+ | +-------------------------------------------------------------+ | Browser APIs | | IndexedDB β€’ WebGPU β€’ WASM β€’ Web Workers | +-------------------------------------------------------------+ ``` ## Browser Compatibility | Browser | WebGPU | WASM | IndexedDB | Web Workers | | ----------- | ------- | ---- | --------- | ----------- | | Chrome 80+ | 113+ | βœ… | βœ… | βœ… | | Edge 80+ | 113+ | βœ… | βœ… | βœ… | | Firefox 75+ | Nightly | βœ… | βœ… | βœ… | | Safari 14+ | 18+ | βœ… | βœ… | ⚠️ | * **Safari/iOS**: Private browsing blocks IndexedDB - use `MemoryStorage` fallback * **Firefox**: WebGPU only in Nightly - WASM fallback is automatic * **SharedArrayBuffer**: Requires cross-origin isolation for some features ## Next Steps # Capabilities import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { TypeTable } from 'fumadocs-ui/components/type-table'; LocalMode provides utilities to detect device capabilities and choose appropriate fallbacks. ## Full Capability Report Get a comprehensive report of available features: ```ts import { detectCapabilities } from '@localmode/core'; const capabilities = await detectCapabilities(); ``` ### Capabilities Object ## Individual Feature Checks ### WebGPU ```typescript import { isWebGPUSupported } from '@localmode/core'; if (isWebGPUSupported()) { // Use WebGPU-accelerated models console.log('WebGPU available!'); } else { // Fall back to WASM console.log('Using WASM fallback'); } ``` ### IndexedDB ```typescript import { isIndexedDBSupported } from '@localmode/core'; if (isIndexedDBSupported()) { // Use IndexedDB storage } else { // Use memory storage (Safari private browsing) } ``` ### Web Workers ```typescript import { isWebWorkersSupported } from '@localmode/core'; if (isWebWorkersSupported()) { // Offload to worker const db = await createVectorDBWithWorker({ name: 'db', dimensions: 384 }); } else { // Use main thread const db = await createVectorDB({ name: 'db', dimensions: 384 }); } ``` ### Web Locks ```typescript import { isWebLocksSupported } from '@localmode/core'; if (isWebLocksSupported()) { // Use Web Locks for cross-tab coordination } else { // Use fallback lock manager } ``` ### Crypto ```typescript import { isCryptoSupported } from '@localmode/core'; if (isCryptoSupported()) { // Use Web Crypto API for encryption } else { // Encryption not available } ``` ### Cross-Origin Isolation ```typescript import { isCrossOriginIsolated } from '@localmode/core'; if (isCrossOriginIsolated()) { // SharedArrayBuffer available // Better worker performance } else { // Some features limited } ``` ## Model Support Check Check if a specific model is supported: ```typescript import { checkModelSupport } from '@localmode/core'; const support = await checkModelSupport('Llama-3.2-1B-Instruct-q4f16_1-MLC'); if (support.supported) { console.log('Model can run on this device'); } else { console.log('Issues:', support.issues); // ['Insufficient GPU memory', 'WebGPU not available'] } ``` ## Recommended Fallbacks Get fallback recommendations: ```typescript import { getRecommendedFallbacks } from '@localmode/core'; const fallbacks = await getRecommendedFallbacks(); console.log(fallbacks); // { // embedding: 'Xenova/all-MiniLM-L6-v2', // Smaller model for limited devices // llm: 'SmolLM2-1.7B-Instruct-q4f16_1-MLC', // Compact LLM // storage: 'memory', // If IndexedDB unavailable // compute: 'wasm', // If WebGPU unavailable // } ``` ## Capability-Based Model Selection Choose models based on device capabilities: ```typescript import { detectCapabilities } from '@localmode/core'; import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; const capabilities = await detectCapabilities(); // Choose embedding model const embeddingModel = capabilities.webgpu ? transformers.embedding('Xenova/all-MiniLM-L12-v2') // Larger, better : transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Smaller, faster // Choose LLM let llm; if (capabilities.webgpu && capabilities.memory?.available > 2048) { llm = webllm.languageModel('Llama-3.2-3B-Instruct-q4f16_1-MLC'); } else if (capabilities.webgpu) { llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); } else { console.warn('WebGPU not available, LLM features disabled'); llm = null; } ``` ## Browser Compatibility | Feature | Chrome | Edge | Firefox | Safari | | ----------------- | ------ | ----- | ------- | ------ | | WebGPU | 113+ | 113+ | Nightly | 18+ | | WASM | 80+ | 80+ | 75+ | 14+ | | IndexedDB | βœ… | βœ… | βœ… | βœ…\* | | Web Workers | βœ… | βœ… | βœ… | ⚠️ | | Web Locks | βœ… | βœ… | βœ… | 15.4+ | | SharedArrayBuffer | βœ…\*\* | βœ…\*\* | βœ…\*\* | βœ…\*\* | * Safari private browsing blocks IndexedDB \*\* Requires cross-origin isolation headers ## Handling Limited Devices Gracefully handle limited capabilities: ```typescript import { detectCapabilities, isWebGPUSupported } from '@localmode/core'; async function initializeAI() { const capabilities = await detectCapabilities(); const features = { embeddings: true, vectorSearch: true, llm: false, persistence: true, }; // Check WebGPU for LLM if (!isWebGPUSupported()) { console.warn('WebGPU not available. LLM features disabled.'); features.llm = false; } else if (capabilities.memory?.available < 1024) { console.warn('Low GPU memory. LLM may be slow.'); } else { features.llm = true; } // Check IndexedDB for persistence if (!capabilities.indexedDB) { console.warn('IndexedDB not available. Data will not persist.'); features.persistence = false; } return features; } // Usage const features = await initializeAI(); if (features.llm) { // Show LLM features in UI } else { // Hide or disable LLM features } ``` ## Device Information Get GPU and memory information: ```typescript import { detectCapabilities } from '@localmode/core'; const { gpu, memory } = await detectCapabilities(); if (gpu) { console.log('GPU Vendor:', gpu.vendor); console.log('GPU Renderer:', gpu.renderer); } if (memory) { console.log('Total Memory:', memory.total, 'MB'); console.log('Available Memory:', memory.available, 'MB'); } ``` ## Best Practices 1. **Check early** - Detect capabilities at app startup 2. **Provide fallbacks** - Always have a fallback for each feature 3. **Inform users** - Show warnings for limited functionality 4. **Test everywhere** - Test on various devices and browsers 5. **Graceful degradation** - Core features should work everywhere ## Next Steps # Coming Soon import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; These features have interfaces defined in `@localmode/core` and are under active development. Provider implementations in `@localmode/transformers` and `@localmode/webllm` are in progress. The features listed below are not yet production-ready. APIs may change before the stable release. ## Classification & NLP `classify()`, `classifyMany()` β€” Sentiment analysis, emotion detection, topic classification `classifyZeroShot()` β€” Classify text into arbitrary labels without fine-tuning `extractEntities()`, `extractEntitiesMany()` β€” Extract people, organizations, locations, dates ### Interfaces ```ts import type { ClassificationModel, ZeroShotClassificationModel, NERModel } from '@localmode/core'; ``` *** ## Audio Processing `transcribe()` β€” Transcription with Whisper models, word-level timestamps `synthesizeSpeech()` β€” Generate speech audio from text ### Interfaces ```ts import type { SpeechToTextModel, TextToSpeechModel } from '@localmode/core'; ``` *** ## Vision `classifyImage()`, `classifyImageZeroShot()` β€” Categorize images `captionImage()` β€” Generate descriptions with BLIP models `detectObjects()` β€” Detect objects with bounding boxes `segmentImage()` β€” Background removal, semantic segmentation `extractImageFeatures()` β€” Feature vectors for image similarity search `imageToImage()` β€” Super resolution, style transfer ### Interfaces ```ts import type { ImageClassificationModel, ZeroShotImageClassificationModel, ImageCaptionModel, ObjectDetectionModel, SegmentationModel, ImageFeatureModel, ImageToImageModel, } from '@localmode/core'; ``` *** ## Text Processing `translate()` β€” Multi-language translation `summarize()` β€” Extractive and abstractive summarization `fillMask()` β€” BERT-style masked token prediction `answerQuestion()` β€” Extractive QA from context ### Interfaces ```ts import type { TranslationModel, SummarizationModel, FillMaskModel, QuestionAnsweringModel, } from '@localmode/core'; ``` *** ## Document Understanding `extractText()` β€” Optical character recognition from images `askDocument()` β€” Question answering on document images (invoices, forms) `askTable()` β€” Question answering on tabular data ### Interfaces ```ts import type { OCRModel, DocumentQAModel, TableQAModel } from '@localmode/core'; ``` *** ## Advanced RAG `createBM25()` β€” Keyword-based search for hybrid retrieval `hybridSearch()`, `reciprocalRankFusion()` β€” Combine vector and keyword search *** ## Text Generation `generateText()` β€” Non-streaming text generation (streaming via `streamText()` is available now) *** # Embeddings import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { TypeTable } from 'fumadocs-ui/components/type-table'; Embeddings convert text into numerical vectors that capture semantic meaning. Use them for similarity search, clustering, and RAG applications. ## embed() Generate an embedding for a single value: ```ts {5-8} import { embed } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const { embedding, usage, response } = await embed({ model, value: 'Hello, world!', }); console.log('Dimensions:', embedding.length); // 384 console.log('Tokens:', usage.tokens); // 4 console.log('Model:', response.modelId); // 'Xenova/all-MiniLM-L6-v2' ``` ```ts {3,7} const controller = new AbortController(); setTimeout(() => controller.abort(), 5000); // Cancel after 5s const { embedding } = await embed({ model, value: 'Hello, world!', abortSignal: controller.signal, }); ``` ### EmbedOptions ### EmbedResult ## embedMany() Generate embeddings for multiple values efficiently: ```ts {3-6} import { embedMany } from '@localmode/core'; const { embeddings, usage } = await embedMany({ model, values: ['Hello', 'World', 'AI', 'Machine Learning'], }); console.log('Count:', embeddings.length); // 4 console.log('Total tokens:', usage.tokens); // ~8 ``` ```ts {4-6} const { embeddings } = await embedMany({ model, values: largeArrayOfTexts, onProgress: (progress) => { console.log(`Processed ${progress.completed}/${progress.total}`); }, }); ``` ```ts {1,4,10} const controller = new AbortController(); // Cancel after 5 seconds setTimeout(() => controller.abort(), 5000); try { const { embeddings } = await embedMany({ model, values: largeArray, abortSignal: controller.signal, }); } catch (error) { if (error.name === 'AbortError') { console.log('Operation cancelled'); } } ``` ### EmbedManyOptions void', required: false, }, maxRetries: { description: 'Maximum retry attempts on failure', type: 'number', default: '2', }, }} /> ## streamEmbedMany() Stream embeddings as they're generated: ```typescript import { streamEmbedMany } from '@localmode/core'; const stream = streamEmbedMany({ model, values: texts, }); for await (const { index, embedding } of stream) { console.log(`Embedding ${index}:`, embedding.length); } ``` ## semanticSearch() Search for semantically similar documents: ```typescript import { semanticSearch, createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'docs', dimensions: 384 }); // Add documents to the database first... const results = await semanticSearch({ db, model, query: 'What is machine learning?', k: 5, }); results.forEach((result) => { console.log(`Score: ${result.score.toFixed(3)}`); console.log(`Text: ${result.metadata.text}`); }); ``` ### With Filters ```typescript const results = await semanticSearch({ db, model, query: 'AI applications', k: 5, filter: { category: { $eq: 'technology' }, year: { $gte: 2023 }, }, }); ``` ### Options ```typescript interface SemanticSearchOptions { db: VectorDB; model: EmbeddingModel; query: string; k?: number; filter?: FilterExpression; abortSignal?: AbortSignal; } ``` ## Distance Functions Compare vectors directly: ```typescript import { cosineSimilarity, euclideanDistance, dotProduct } from '@localmode/core'; const similarity = cosineSimilarity(embedding1, embedding2); console.log('Similarity:', similarity); // 0.0 to 1.0 const distance = euclideanDistance(embedding1, embedding2); console.log('Distance:', distance); const dot = dotProduct(embedding1, embedding2); console.log('Dot product:', dot); ``` ## Middleware Wrap embedding models with middleware for caching, logging, etc.: ```typescript import { wrapEmbeddingModel, cachingMiddleware, loggingMiddleware } from '@localmode/core'; const baseModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const model = wrapEmbeddingModel(baseModel, [ cachingMiddleware({ maxSize: 1000 }), loggingMiddleware({ logger: console.log }), ]); // Now all embed calls will be cached and logged const { embedding } = await embed({ model, value: 'Hello' }); ``` See [Middleware](/docs/core/middleware) for more details. ## Implementing Custom Models Create your own embedding model by implementing the `EmbeddingModel` interface: ```typescript import type { EmbeddingModel, DoEmbedOptions } from '@localmode/core'; class MyCustomEmbedder implements EmbeddingModel { readonly modelId = 'custom:my-embedder'; readonly provider = 'custom'; readonly dimensions = 768; readonly maxEmbeddingsPerCall = 100; readonly supportsParallelCalls = true; async doEmbed(options: DoEmbedOptions) { const { values } = options; // Your embedding logic here const embeddings = values.map(() => new Float32Array(768)); return { embeddings, usage: { tokens: values.length * 10 }, }; } } // Use with core functions const model = new MyCustomEmbedder(); const { embedding } = await embed({ model, value: 'Hello' }); ``` ## Best Practices 1. **Batch embeddings** - Use `embedMany()` instead of multiple `embed()` calls 2. **Use caching** - Add `cachingMiddleware()` for repeated queries 3. **Choose the right model** - Smaller models (MiniLM-L6) are faster, larger ones more accurate 4. **Preload models** - Load models during app initialization ### Recommended Models | Model | Dimensions | Size | Use Case | | ---------------------------------------------- | ---------- | ------- | --------------------- | | `Xenova/all-MiniLM-L6-v2` | 384 | \~22MB | General purpose, fast | | `Xenova/all-MiniLM-L12-v2` | 384 | \~33MB | Better accuracy | | `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | 384 | \~117MB | 50+ languages | ## Next Steps # Events import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; LocalMode provides a type-safe event system for building reactive applications. Subscribe to VectorDB lifecycle events, embedding operations, and custom events for real-time UI updates. ## Overview The event system enables: * **Reactive UI updates** β€” Re-render components when data changes * **Cross-component communication** β€” Notify different parts of your app * **Debugging & logging** β€” Track all database operations * **Custom integrations** β€” Build workflows on top of database events ## Quick Start ```ts {1,4-6,9} import { createEventEmitter, VectorDBEvents } from '@localmode/core'; // Create an event emitter const events = createEventEmitter(); // Subscribe to events events.on('add', ({ id }) => { console.log('Document added:', id); }); // Emit events events.emit('add', { id: 'doc-1' }); ``` ## Creating Event Emitters ### Typed Event Emitter ```ts import { createEventEmitter, VectorDBEvents } from '@localmode/core'; // Create with built-in VectorDB event types const dbEvents = createEventEmitter(); // Or create a new EventEmitter class directly import { EventEmitter } from '@localmode/core'; const emitter = new EventEmitter(); ``` ### Custom Event Types ```ts import { EventEmitter } from '@localmode/core'; // Define your custom event types interface MyAppEvents { userLogin: { userId: string; timestamp: Date }; searchPerformed: { query: string; resultCount: number }; documentProcessed: { docId: string; chunks: number }; } const appEvents = new EventEmitter(); // Type-safe subscriptions appEvents.on('userLogin', ({ userId, timestamp }) => { console.log(`User ${userId} logged in at ${timestamp}`); }); // Type-safe emissions appEvents.emit('userLogin', { userId: 'user-123', timestamp: new Date(), }); ``` ## VectorDB Events Built-in event types for VectorDB operations: ## Embedding Events Event types for embedding operations: ## Event Methods ### `on(event, callback)` Subscribe to an event. Returns an unsubscribe function. ```ts const unsubscribe = events.on('add', ({ id }) => { console.log('Added:', id); }); // Later: unsubscribe unsubscribe(); ``` ### `once(event, callback)` Subscribe for a single emission only. ```ts events.once('modelLoad', ({ modelId }) => { console.log('Model loaded (first time only):', modelId); }); ``` ### `emit(event, data)` Emit an event synchronously. ```ts events.emit('add', { id: 'doc-1', collection: 'default' }); ``` ### `emitAsync(event, data)` Emit an event and wait for all async handlers to complete. ```ts await events.emitAsync('add', { id: 'doc-1' }); // All handlers (including async ones) have completed ``` ### `off(event?)` Remove listeners. ```ts // Remove all listeners for specific event events.off('add'); // Remove all listeners for all events events.off(); ``` ### Utility Methods ```ts // Get listener count const count = events.listenerCount('add'); // Check if there are any listeners const hasListeners = events.hasListeners('add'); // Get all event names with listeners const eventNames = events.eventNames(); ``` ## Global Event Bus LocalMode provides a global event bus for app-wide events: ```ts import { globalEventBus } from '@localmode/core'; // Subscribe anywhere in your app globalEventBus.on('add', ({ id }) => { console.log('Document added somewhere:', id); }); // Useful for: // - Debugging all database operations // - Syncing state across components // - Global logging ``` The global event bus receives events from all VectorDB instances, making it useful for centralized logging and state management. ## Event Middleware Create middleware that emits events for VectorDB operations: ```ts import { wrapVectorDB, createEventEmitter, eventMiddleware } from '@localmode/core'; // Create event emitter const events = createEventEmitter(); // Subscribe to events events.on('add', ({ id }) => console.log('Added:', id)); events.on('delete', ({ id }) => console.log('Deleted:', id)); // Create DB with event middleware const db = wrapVectorDB({ db: baseDb, middleware: eventMiddleware(events), }); // Now all operations emit events automatically await db.add({ id: 'doc-1', vector, metadata }); // Console: "Added: doc-1" ``` ## React Integration ### Custom Hook ```tsx import { useEffect, useState } from 'react'; import { createEventEmitter, VectorDBEvents } from '@localmode/core'; const events = createEventEmitter(); function useVectorDBEvents() { const [documentCount, setDocumentCount] = useState(0); const [lastOperation, setLastOperation] = useState(null); useEffect(() => { const unsubscribeAdd = events.on('add', () => { setDocumentCount((c) => c + 1); setLastOperation('add'); }); const unsubscribeDelete = events.on('delete', () => { setDocumentCount((c) => c - 1); setLastOperation('delete'); }); const unsubscribeClear = events.on('clear', () => { setDocumentCount(0); setLastOperation('clear'); }); return () => { unsubscribeAdd(); unsubscribeDelete(); unsubscribeClear(); }; }, []); return { documentCount, lastOperation }; } ``` ### Search Analytics ```tsx function SearchAnalytics() { const [searches, setSearches] = useState< Array<{ query: string; results: number; duration: number; }> >([]); useEffect(() => { return globalEventBus.on('search', ({ resultsCount, k, durationMs }) => { setSearches((prev) => [ ...prev.slice(-99), // Keep last 100 { query: 'unknown', results: resultsCount, duration: durationMs }, ]); }); }, []); const avgDuration = searches.reduce((sum, s) => sum + s.duration, 0) / searches.length; return (

Total searches: {searches.length}

Average duration: {avgDuration.toFixed(2)}ms

); } ``` ## Full Example ```ts import { createVectorDB, embed, createEventEmitter, VectorDBEvents, EmbeddingEvents, } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Create event emitters const dbEvents = createEventEmitter(); const embedEvents = createEventEmitter(); // Set up logging dbEvents.on('add', ({ id }) => console.log(`[DB] Added: ${id}`)); dbEvents.on('search', ({ resultsCount, durationMs }) => { console.log(`[DB] Search: ${resultsCount} results in ${durationMs}ms`); }); dbEvents.on('error', ({ operation, error }) => { console.error(`[DB] Error in ${operation}:`, error); }); embedEvents.on('embedStart', ({ valueCount }) => { console.log(`[Embed] Starting ${valueCount} values`); }); embedEvents.on('embedComplete', ({ valueCount, durationMs, tokens }) => { console.log(`[Embed] Completed ${valueCount} values in ${durationMs}ms (${tokens} tokens)`); }); embedEvents.on('modelLoad', ({ modelId, durationMs }) => { console.log(`[Embed] Model ${modelId} loaded in ${durationMs}ms`); }); // Create database const db = await createVectorDB({ name: 'documents', dimensions: 384 }); const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Manually emit events (or use middleware) async function addDocument(text: string) { const id = crypto.randomUUID(); embedEvents.emit('embedStart', { valueCount: 1 }); const start = performance.now(); const { embedding, usage } = await embed({ model, value: text }); embedEvents.emit('embedComplete', { valueCount: 1, durationMs: performance.now() - start, tokens: usage.tokens, }); await db.add({ id, vector: embedding, metadata: { text } }); dbEvents.emit('add', { id }); return id; } ``` # Text Generation import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; Generate text using local language models with streaming support. ## streamText() Stream text generation for real-time responses: ```typescript import { streamText } from '@localmode/core'; import { webllm } from '@localmode/webllm'; const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); const stream = await streamText({ model, prompt: 'Explain quantum computing in simple terms.', }); for await (const chunk of stream) { process.stdout.write(chunk.text); } ``` ### With System Prompt ```typescript const stream = await streamText({ model, system: 'You are a helpful coding assistant. Be concise.', prompt: 'Write a function to reverse a string in TypeScript.', }); ``` ### Options ```typescript interface StreamTextOptions { model: LanguageModel; prompt: string; system?: string; maxTokens?: number; temperature?: number; topP?: number; stopSequences?: string[]; abortSignal?: AbortSignal; } ``` ### Stream Properties ```typescript const stream = await streamText({ model, prompt: 'Hello' }); // Iterate over text chunks for await (const chunk of stream) { console.log(chunk.text); // The generated text piece console.log(chunk.isLast); // Whether this is the last chunk } // Get full text after streaming const fullText = await stream.text; // Get usage statistics const usage = await stream.usage; console.log('Tokens:', usage.totalTokens); ``` ## generateText() Generate complete text without streaming: ```typescript import { generateText } from '@localmode/core'; const { text, usage } = await generateText({ model, prompt: 'Write a haiku about programming.', }); console.log(text); console.log('Tokens used:', usage.totalTokens); ``` ### Options ```typescript interface GenerateTextOptions { model: LanguageModel; prompt: string; system?: string; maxTokens?: number; temperature?: number; topP?: number; stopSequences?: string[]; abortSignal?: AbortSignal; } ``` ### Return Value ```typescript interface GenerateTextResult { text: string; usage: { promptTokens: number; completionTokens: number; totalTokens: number; }; response: { modelId: string; timestamp: Date; }; } ``` ## Cancellation Cancel generation mid-stream: ```typescript const controller = new AbortController(); // Cancel after 5 seconds setTimeout(() => controller.abort(), 5000); try { const stream = await streamText({ model, prompt: 'Write a long essay...', abortSignal: controller.signal, }); for await (const chunk of stream) { process.stdout.write(chunk.text); } } catch (error) { if (error.name === 'AbortError') { console.log('\nGeneration cancelled'); } } ``` ## Temperature & Sampling Control randomness in generation: ```typescript // More deterministic (good for factual responses) const stream = await streamText({ model, prompt: 'What is 2 + 2?', temperature: 0.1, }); // More creative (good for stories, brainstorming) const stream = await streamText({ model, prompt: 'Write a creative story about a robot.', temperature: 0.9, }); // Nucleus sampling const stream = await streamText({ model, prompt: 'Continue this sentence: The future of AI is...', topP: 0.9, // Consider tokens making up 90% of probability }); ``` | Parameter | Description | Range | Default | | ------------- | --------------------- | ------------- | ------------- | | `temperature` | Randomness | 0.0 - 2.0 | 1.0 | | `topP` | Nucleus sampling | 0.0 - 1.0 | 1.0 | | `maxTokens` | Max generation length | 1 - model max | Model default | ## Stop Sequences Stop generation at specific patterns: ```typescript const stream = await streamText({ model, prompt: 'List three fruits:\n1.', stopSequences: ['\n4.', '\n\n'], // Stop before 4th item or double newline }); ``` ## Chat-Style Prompts Build chat applications: ```typescript function buildPrompt(messages: Array<{ role: string; content: string }>) { return messages .map((m) => `${m.role}: ${m.content}`) .join('\n') + '\nassistant:'; } const messages = [ { role: 'user', content: 'Hello!' }, { role: 'assistant', content: 'Hi! How can I help you today?' }, { role: 'user', content: 'What is TypeScript?' }, ]; const stream = await streamText({ model, system: 'You are a helpful programming assistant.', prompt: buildPrompt(messages), stopSequences: ['user:', '\n\n'], }); ``` ## RAG Integration Combine with retrieval: ```typescript import { semanticSearch, streamText } from '@localmode/core'; async function ragQuery(question: string) { // Retrieve context const results = await semanticSearch({ db, model: embeddingModel, query: question, k: 3 }); const context = results.map((r) => r.metadata.text).join('\n\n'); // Generate answer const stream = await streamText({ model: llm, system: 'Answer based only on the provided context.', prompt: `Context:\n${context}\n\nQuestion: ${question}\n\nAnswer:`, }); return stream; } ``` ## Implementing Custom Models Create your own language model: ```typescript import type { LanguageModel, GenerateTextOptions, StreamTextOptions } from '@localmode/core'; class MyLanguageModel implements LanguageModel { readonly modelId = 'custom:my-model'; readonly provider = 'custom'; async doGenerateText(options: GenerateTextOptions) { // Your generation logic return { text: 'Generated text...', usage: { promptTokens: 10, completionTokens: 20, totalTokens: 30 }, }; } async doStreamText(options: StreamTextOptions) { // Return an async generator return (async function* () { yield { text: 'Hello', isLast: false }; yield { text: ' world!', isLast: true }; })(); } } ``` ## Best Practices 1. **Stream for UX** β€” Always use `streamText()` for user-facing apps 2. **Set max tokens** β€” Prevent runaway generation 3. **Use system prompts** β€” Guide model behavior consistently 4. **Handle errors** β€” Wrap generation in try-catch 5. **Provide cancellation** β€” Let users abort long generations ## Next Steps # Overview import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Steps, Step } from 'fumadocs-ui/components/steps'; # @localmode/core The core package contains **all functions, interfaces, types, and utilities** for building local-first AI applications. It has **zero external dependencies** β€” everything is implemented using native browser APIs. `@localmode/core` has no `dependencies` in its `package.json`. All functionality uses native Web APIs: IndexedDB, Web Crypto, Web Workers, BroadcastChannel, and more. ## Installation `bash pnpm install @localmode/core ` `bash npm install @localmode/core ` `bash yarn add @localmode/core ` ## Quick Start ### Install Dependencies ```bash pnpm install @localmode/core @localmode/transformers ``` ### Create an Embedding Model ```ts {3} import { transformers } from '@localmode/transformers'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); ``` ### Generate Embeddings ```ts {1,3-6} import { embed } from '@localmode/core'; const { embedding, usage } = await embed({ model, value: 'Hello, world!', }); ``` ### Store and Search ```ts import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'docs', dimensions: 384 }); await db.add({ id: 'doc-1', vector: embedding, metadata: { text: 'Hello' } }); const results = await db.search(queryVector, { k: 5 }); ``` ## Features ### βœ… Production Ready These features are stable and ready for production use: | Feature | Description | | ------------------- | ------------------------------------------------------- | | **Vector Database** | HNSW index for fast approximate nearest neighbor search | | **Embeddings** | `embed()`, `embedMany()`, `semanticSearch()` functions | | **Reranking** | `rerank()` for improved RAG accuracy | | **RAG Utilities** | Text chunking (recursive, markdown, code-aware) | | **Text Generation** | `streamText()` with async iteration | | **Storage** | IndexedDB persistence with memory fallback | | **Capabilities** | WebGPU, IndexedDB, Workers detection | | **Security** | Encryption, PII redaction | | **Middleware** | Caching, logging, retry, validation | ### 🚧 Coming Soon | Feature | Description | | ------------------ | ---------------------------------------------- | | **Classification** | `classify()`, `classifyZeroShot()` | | **NER** | `extractEntities()` | | **Audio** | `transcribe()`, `synthesizeSpeech()` | | **Vision** | Image classification, captioning, segmentation | | **Translation** | `translate()` | | **Summarization** | `summarize()` | | **OCR** | `extractText()` | | **Document QA** | `askDocument()`, `askTable()` | ## Architecture LocalMode follows a **function-first API** design: ```typescript // βœ… Top-level functions (correct) const { embedding } = await embed({ model, value: 'Hello' }); // ❌ Class methods (wrong) const embedder = new Embedder(model); await embedder.embed('Hello'); ``` All functions accept a **single options object** and return **structured results**: ```typescript interface EmbedResult { embedding: Float32Array; usage: { tokens: number }; response: { modelId: string; timestamp: Date }; } ``` ## Quick Reference ## Core Exports ### Embeddings ```typescript import { embed, embedMany, streamEmbedMany, semanticSearch, wrapEmbeddingModel, } from '@localmode/core'; ``` ### Vector Database ```typescript import { createVectorDB, createVectorDBWithWorker, HNSWIndex, cosineSimilarity, euclideanDistance, dotProduct, } from '@localmode/core'; ``` ### RAG Utilities ```typescript import { chunk, recursiveChunk, markdownChunk, codeChunk, ingest, createBM25, hybridFuse, reciprocalRankFusion, } from '@localmode/core'; ``` ### Text Generation ```typescript import { streamText, generateText } from '@localmode/core'; ``` ### Classification ```typescript import { classify, classifyMany, classifyZeroShot, extractEntities, extractEntitiesMany, rerank, } from '@localmode/core'; ``` ### Storage ```typescript import { IndexedDBStorage, MemoryStorage, createStorage, getStorageQuota, requestPersistence, cleanup, } from '@localmode/core'; ``` ### Capabilities ```typescript import { detectCapabilities, isWebGPUSupported, isIndexedDBSupported, checkModelSupport, getRecommendedFallbacks, } from '@localmode/core'; ``` ### Middleware ```typescript import { wrapEmbeddingModel, wrapVectorDB, cachingMiddleware, loggingMiddleware, retryMiddleware, rateLimitMiddleware, validationMiddleware, piiRedactionMiddleware, encryptionMiddleware, } from '@localmode/core'; ``` ### Security ```typescript import { encrypt, decrypt, deriveKey, isCryptoSupported, redactPII } from '@localmode/core'; ``` ### Cross-Tab Sync ```typescript import { createBroadcaster, createLockManager, isWebLocksSupported } from '@localmode/core'; ``` ### Network ```typescript import { getNetworkStatus, onNetworkChange, isOnline, isOffline, waitForOnline, } from '@localmode/core'; ``` ### Events ```typescript import { createEventEmitter, globalEventBus } from '@localmode/core'; ``` ### Errors ```typescript import { LocalModeError, EmbeddingError, ModelNotFoundError, StorageError, QuotaExceededError, ValidationError, formatErrorForUser, } from '@localmode/core'; ``` ### Testing Utilities ```typescript import { createMockEmbeddingModel, createMockStorage, createMockVectorDB, createTestVector, createSeededRandom, } from '@localmode/core'; ``` ## Type Definitions All interfaces are exported for implementing custom providers: ```typescript import type { // Models EmbeddingModel, ClassificationModel, ZeroShotClassificationModel, NERModel, RerankerModel, LanguageModel, SpeechToTextModel, TextToSpeechModel, ImageClassificationModel, ImageCaptionModel, SegmentationModel, ObjectDetectionModel, TranslationModel, SummarizationModel, FillMaskModel, QuestionAnsweringModel, OCRModel, DocumentQAModel, // Storage Storage, StoredDocument, // Vector DB VectorDB, VectorDBConfig, SearchResult, // Middleware EmbeddingModelMiddleware, VectorDBMiddleware, } from '@localmode/core'; ``` ## Next Steps # Middleware import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordions, Accordion } from 'fumadocs-ui/components/accordion'; import { TypeTable } from 'fumadocs-ui/components/type-table'; Middleware lets you extend and modify the behavior of embedding models and vector databases. ## Embedding Model Middleware Wrap embedding models with middleware: ```ts {5-8} import { wrapEmbeddingModel, cachingMiddleware, loggingMiddleware } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const baseModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const model = wrapEmbeddingModel(baseModel, [ cachingMiddleware({ maxSize: 1000 }), loggingMiddleware({ logger: console.log }), ]); ``` ## Available Middleware Cache embeddings to avoid recomputation: ```ts import { cachingMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ cachingMiddleware({ maxSize: 1000, // Maximum cache entries ttl: 60 * 60 * 1000, // Time-to-live: 1 hour storage: 'memory', // 'memory' or 'indexeddb' }), ]); // First call computes embedding const { embedding: e1 } = await embed({ model, value: 'Hello' }); // Second call returns from cache (instant) const { embedding: e2 } = await embed({ model, value: 'Hello' }); ``` **Persistent Cache:** ```ts const model = wrapEmbeddingModel(baseModel, [ cachingMiddleware({ storage: 'indexeddb', dbName: 'embedding-cache', maxSize: 10000, }), ]); // Cache persists across page reloads ``` Log all operations: ```ts import { loggingMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ loggingMiddleware({ logger: (event) => { console.log(`[${event.type}] ${event.duration}ms`); console.log(` Inputs: ${event.inputCount}`); console.log(` Tokens: ${event.tokens}`); }, }), ]); ``` **Log to Analytics:** ```ts const model = wrapEmbeddingModel(baseModel, [ loggingMiddleware({ logger: (event) => { analytics.track('embedding', { model: event.modelId, duration: event.duration, tokens: event.tokens, }); }, }), ]); ``` Automatically retry on failures: ```ts import { retryMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ retryMiddleware({ maxRetries: 3, delay: 1000, // Initial delay backoff: 'exponential', // 'linear' or 'exponential' maxDelay: 10000, // Maximum delay retryOn: (error) => { // Only retry on transient errors return error.code === 'NETWORK_ERROR'; }, }), ]); ``` boolean', }, }} /> Limit request rate: ```ts import { rateLimitMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ rateLimitMiddleware({ maxRequests: 10, // Max requests windowMs: 1000, // Per second onLimit: () => { console.warn('Rate limit hit, waiting...'); }, }), ]); ``` void', }, }} /> Validate inputs: ```ts import { validationMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ validationMiddleware({ maxLength: 8192, // Max characters per input maxBatchSize: 100, // Max inputs per batch validateInput: (input) => { if (input.trim().length === 0) { throw new Error('Empty input'); } }, }), ]); ``` void', }, }} /> Redact sensitive information before embedding: ```ts import { piiRedactionMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ piiRedactionMiddleware({ patterns: ['email', 'phone', 'ssn', 'creditCard'], replacement: '[REDACTED]', }), ]); // Emails, phone numbers, etc. are redacted before embedding const { embedding } = await embed({ model, value: 'Contact john@example.com or call 555-123-4567', }); // Actually embeds: "Contact [REDACTED] or call [REDACTED]" ``` Encrypt embeddings before storage: ```ts import { encryptionMiddleware, deriveKey } from '@localmode/core'; const key = await deriveKey('user-password', 'salt'); const model = wrapEmbeddingModel(baseModel, [ encryptionMiddleware({ key, // Embeddings are encrypted before being returned }), ]); ``` Encryption uses Web Crypto API. Make sure to store the key securelyβ€”if lost, encrypted embeddings cannot be decrypted. ## Combining Middleware Stack multiple middleware: ```typescript const model = wrapEmbeddingModel(baseModel, [ validationMiddleware({ maxLength: 8192 }), piiRedactionMiddleware({ patterns: ['email', 'phone'] }), cachingMiddleware({ maxSize: 1000 }), retryMiddleware({ maxRetries: 3 }), loggingMiddleware({ logger: console.log }), ]); ``` Middleware executes in order. Place validation first, caching before expensive operations, and logging last. ## Vector DB Middleware Wrap vector databases: ```typescript import { wrapVectorDB } from '@localmode/core'; const baseDB = await createVectorDB({ name: 'db', dimensions: 384 }); const db = wrapVectorDB(baseDB, { beforeAdd: async (docs) => { console.log('Adding', docs.length, 'documents'); return docs; }, afterAdd: async (docs) => { console.log('Added', docs.length, 'documents'); }, beforeSearch: async (vector, options) => { console.log('Searching with k =', options.k); return { vector, options }; }, afterSearch: async (results) => { console.log('Found', results.length, 'results'); return results; }, beforeDelete: async (id) => { console.log('Deleting', id); return id; }, afterDelete: async () => { console.log('Deleted'); }, }); ``` ### Vector DB Middleware Interface ```typescript interface VectorDBMiddleware { beforeAdd?: (docs: Document[]) => Promise; afterAdd?: (docs: Document[]) => Promise; beforeSearch?: ( vector: Float32Array, options: SearchOptions ) => Promise<{ vector: Float32Array; options: SearchOptions }>; afterSearch?: (results: SearchResult[]) => Promise; beforeDelete?: (id: string) => Promise; afterDelete?: () => Promise; beforeClear?: () => Promise; afterClear?: () => Promise; } ``` ## Custom Middleware Create your own middleware: ```typescript import type { EmbeddingModelMiddleware } from '@localmode/core'; function myCustomMiddleware(options: { threshold: number }): EmbeddingModelMiddleware { return { transformParams: async ({ values }) => { // Transform input values const filtered = values.filter((v) => v.length > options.threshold); return { values: filtered }; }, wrapEmbed: async ({ doEmbed, values, model }) => { const start = Date.now(); // Call the actual embedding function const result = await doEmbed({ values }); const duration = Date.now() - start; console.log(`Embedded ${values.length} values in ${duration}ms`); return result; }, }; } const model = wrapEmbeddingModel(baseModel, [myCustomMiddleware({ threshold: 10 })]); ``` ## Best Practices 1. **Order matters** - Validation first, caching early, logging last 2. **Keep middleware focused** - One concern per middleware 3. **Handle errors** - Middleware can throw; handle gracefully 4. **Consider performance** - Each middleware adds overhead 5. **Use composition** - Stack simple middleware for complex behavior ## Next Steps # Network Logging import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; LocalMode provides network logging utilities to monitor downloads, track progress, and debug network issues. This is especially useful for tracking model downloads and displaying progress to users. ## Overview The network logging system tracks: * Model downloads with progress * API requests (if any external services are used) * Upload/download byte counts * Request timing and statistics Network logging is opt-in and local-only. No data is sent anywhereβ€”this is purely for local debugging and UI progress indicators. ## Quick Start ```ts {1,4,7-9} import { onNetworkRequest, getNetworkStats } from '@localmode/core'; // Subscribe to network events const unsubscribe = onNetworkRequest((entry) => { if (entry.category === 'model' && entry.state === 'in-progress') { console.log(`Downloading: ${entry.url} (${entry.progress}%)`); } }); // Later: get statistics const stats = getNetworkStats(); console.log(`Downloaded: ${stats.totalDownloadBytes} bytes`); // Clean up when done unsubscribe(); ``` ## Creating a Network Logger ```ts import { createNetworkLogger } from '@localmode/core'; const logger = createNetworkLogger({ maxEntries: 500, // Keep last 500 entries logHeaders: false, // Don't log request headers categories: ['model'], // Only log model downloads }); ``` ### Configuration Options boolean', }, }} /> ## Subscribing to Events ### Real-time Progress Updates ```ts import { onNetworkRequest } from '@localmode/core'; const unsubscribe = onNetworkRequest((entry) => { switch (entry.state) { case 'pending': console.log(`Starting: ${entry.url}`); break; case 'in-progress': console.log(`Progress: ${entry.progress}%`); updateProgressBar(entry.progress); break; case 'completed': console.log(`Completed: ${entry.url} (${entry.duration}ms)`); break; case 'failed': console.error(`Failed: ${entry.url} - ${entry.error}`); break; } }); ``` ### Model Download Progress UI ```ts import { onNetworkRequest } from '@localmode/core'; function ModelDownloadProgress() { const [progress, setProgress] = useState(null); const [downloading, setDownloading] = useState(false); useEffect(() => { const unsubscribe = onNetworkRequest((entry) => { if (entry.category === 'model') { if (entry.state === 'in-progress') { setDownloading(true); setProgress(entry.progress ?? 0); } else if (entry.state === 'completed' || entry.state === 'failed') { setDownloading(false); setProgress(null); } } }); return unsubscribe; }, []); if (!downloading) return null; return (
{progress}%
); } ``` ## Retrieving Logs ### Get All Logs ```ts import { getNetworkLogs } from '@localmode/core'; const logs = getNetworkLogs(); console.log(`Total requests: ${logs.length}`); ``` ### Filter Logs ```ts import { getNetworkLogs } from '@localmode/core'; // Get model downloads only const modelLogs = getNetworkLogs({ category: 'model', }); // Get failed requests const failedLogs = getNetworkLogs({ state: 'failed', }); // Get recent requests (last hour) const recentLogs = getNetworkLogs({ since: new Date(Date.now() - 60 * 60 * 1000), limit: 50, order: 'desc', }); // Filter by URL pattern const huggingFaceLogs = getNetworkLogs({ urlPattern: /huggingface\.co/, }); ``` ### Filter Options ## Network Statistics ```ts import { getNetworkStats } from '@localmode/core'; const stats = getNetworkStats(); console.log(`Total requests: ${stats.totalRequests}`); console.log(`Completed: ${stats.completedRequests}`); console.log(`Failed: ${stats.failedRequests}`); console.log(`Downloaded: ${(stats.totalDownloadBytes / 1024 / 1024).toFixed(2)} MB`); console.log(`Average speed: ${(stats.averageSpeed / 1024).toFixed(2)} KB/s`); console.log(`Requests/min: ${stats.requestsPerMinute}`); // Stats by category console.log('By category:', stats.byCategory); // Stats by HTTP status console.log('By status:', stats.byStatus); ``` ### Stats Structure ', }, byStatus: { description: 'Request counts grouped by HTTP status code', type: 'Record', }, }} /> ## Clearing Logs ```ts import { clearNetworkLogs } from '@localmode/core'; // Clear all logs clearNetworkLogs(); // Clear logs older than 7 days clearNetworkLogs({ olderThan: '7d' }); // Clear logs older than specific date clearNetworkLogs({ olderThan: new Date('2024-01-01') }); ``` Duration formats: `s` (seconds), `m` (minutes), `h` (hours), `d` (days), `w` (weeks) ## Log Entry Structure | undefined', }, }} /> ## Wrapping Fetch For full request logging, wrap the global fetch: ```ts import { wrapFetchWithLogging, unwrapFetch, isFetchWrapped } from '@localmode/core'; // Wrap global fetch wrapFetchWithLogging({ category: 'api', logHeaders: true, }); // Now all fetch calls are logged await fetch('https://api.example.com/data'); // Check if fetch is wrapped console.log(isFetchWrapped()); // true // Restore original fetch unwrapFetch(); ``` Wrapping fetch affects all requests in your application. Use with care in production environments. # RAG import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Steps, Step } from 'fumadocs-ui/components/steps'; import { Accordions, Accordion } from 'fumadocs-ui/components/accordion'; import { TypeTable } from 'fumadocs-ui/components/type-table'; RAG (Retrieval-Augmented Generation) combines vector search with language models to answer questions from your documents. LocalMode provides all the building blocks: chunking, ingestion, semantic search, reranking, and hybrid search. ## RAG Pipeline Overview ### Chunk Documents Split documents into smaller, semantically meaningful pieces. ```ts import { chunk } from '@localmode/core'; const chunks = chunk(documentText, { strategy: 'recursive', size: 512, overlap: 50, }); ``` ### Generate Embeddings & Store Create embeddings and store in a vector database. ```ts import { ingest, createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'docs', dimensions: 384 }); await ingest({ db, model: embeddingModel, documents: chunks }); ``` ### Search & Retrieve Find relevant chunks using semantic search. ```ts import { semanticSearch } from '@localmode/core'; const results = await semanticSearch({ db, model: embeddingModel, query: userQuestion, k: 10, }); ``` ### Rerank for Precision Optionally rerank results for better accuracy. ```ts import { rerank } from '@localmode/core'; const reranked = await rerank({ model: rerankerModel, query: userQuestion, documents: results.map((r) => r.metadata.text), topK: 5, }); ``` ### Generate Answer Use an LLM to generate an answer from the context. ```ts import { streamText } from '@localmode/core'; const stream = await streamText({ model: llm, prompt: `Context:\n${context}\n\nQuestion: ${userQuestion}`, }); ``` ## Chunking Split documents into smaller pieces for better retrieval: Best for general text documents. Tries separators in order: paragraphs β†’ lines β†’ sentences β†’ words. ```ts {3-7} import { chunk } from '@localmode/core'; const chunks = chunk(documentText, { strategy: 'recursive', size: 512, // Target chunk size in characters overlap: 50, // Overlap between chunks }); chunks.forEach((c, i) => { console.log(`Chunk ${i}: ${c.text.substring(0, 50)}...`); console.log(` Start: ${c.startIndex}, End: ${c.endIndex}`); }); ``` **Custom Separators:** ```ts const chunks = chunk(text, { strategy: 'recursive', size: 512, separators: ['\n\n', '\n', '. ', ' '], // Try these in order }); ``` Respects markdown structureβ€”keeps headers with their content. ```ts const chunks = chunk(markdownText, { strategy: 'markdown', size: 512, overlap: 50, }); ``` Respects code structureβ€”keeps functions and classes intact. ```ts const chunks = chunk(sourceCode, { strategy: 'code', size: 512, overlap: 50, language: 'typescript', }); ``` Supported languages: `typescript`, `javascript`, `python`, `java`, `go`, `rust`, and more. ### ChunkOptions ## Ingestion Ingest documents into a vector database: ```typescript import { createVectorDB, ingest } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const db = await createVectorDB({ name: 'docs', dimensions: 384 }); await ingest({ db, model, documents: [ { text: 'First document...', metadata: { source: 'doc1.txt' } }, { text: 'Second document...', metadata: { source: 'doc2.txt' } }, ], }); ``` ### With Automatic Chunking ```typescript await ingest({ db, model, documents: [{ text: longDocument, metadata: { source: 'book.txt' } }], chunkOptions: { strategy: 'recursive', size: 512, overlap: 50, }, }); ``` ### With Progress Tracking ```typescript await ingest({ db, model, documents: largeDocumentArray, onProgress: (progress) => { console.log(`Ingested ${progress.completed}/${progress.total} documents`); }, }); ``` ## Semantic Search Search for relevant chunks: ```typescript import { semanticSearch } from '@localmode/core'; const results = await semanticSearch({ db, model, query: 'What are the benefits of machine learning?', k: 5, }); results.forEach((r) => { console.log(`Score: ${r.score.toFixed(3)}`); console.log(`Text: ${r.metadata.text}`); }); ``` ## Reranking Improve results with cross-encoder reranking: ```typescript import { rerank } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); // Get initial results const results = await semanticSearch({ db, model, query, k: 20 }); // Rerank for better accuracy const reranked = await rerank({ model: rerankerModel, query, documents: results.map((r) => r.metadata.text as string), topK: 5, }); reranked.forEach((r) => { console.log(`Score: ${r.score.toFixed(3)}`); console.log(`Text: ${r.document.substring(0, 100)}...`); }); ``` Reranking improves accuracy but adds latency. Use it when: - Accuracy is more important than speed * You're building a Q\&A system - Initial results may have false positives ## BM25 Keyword Search For exact keyword matching: ```typescript import { createBM25 } from '@localmode/core'; const bm25 = createBM25(documents.map((d) => d.text)); const keywordResults = bm25.search('machine learning'); keywordResults.forEach((r) => { console.log(`Score: ${r.score.toFixed(3)}, Index: ${r.index}`); }); ``` ## Hybrid Search Combine semantic and keyword search: ```typescript import { semanticSearch, createBM25, hybridFuse } from '@localmode/core'; // Semantic search const semanticResults = await semanticSearch({ db, model, query, k: 20 }); // BM25 keyword search const bm25 = createBM25(documents.map((d) => d.text)); const keywordResults = bm25.search(query); // Combine with fusion const hybridResults = hybridFuse({ semantic: semanticResults.map((r) => ({ id: r.id, score: r.score, })), keyword: keywordResults.map((r) => ({ id: documents[r.index].id, score: r.score, })), k: 10, alpha: 0.7, // Weight for semantic (0.7 = 70% semantic, 30% keyword) }); ``` ### Reciprocal Rank Fusion Alternative fusion method: ```typescript import { reciprocalRankFusion } from '@localmode/core'; const fused = reciprocalRankFusion({ rankings: [semanticResults.map((r) => r.id), keywordResults.map((r) => documents[r.index].id)], k: 10, constant: 60, // RRF constant (default: 60) }); ``` ## Complete RAG Pipeline Here's a complete example: ```typescript import { createVectorDB, chunk, ingest, semanticSearch, rerank, streamText } from '@localmode/core'; import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; // 1. Setup models const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // 2. Create database const db = await createVectorDB({ name: 'knowledge-base', dimensions: 384 }); // 3. Ingest documents async function ingestDocuments(documents: Array<{ text: string; source: string }>) { for (const doc of documents) { const chunks = chunk(doc.text, { strategy: 'recursive', size: 512, overlap: 50, }); await ingest({ db, model: embeddingModel, documents: chunks.map((c) => ({ text: c.text, metadata: { source: doc.source, start: c.startIndex, end: c.endIndex, }, })), }); } } // 4. Query function async function query(question: string) { // Retrieve const results = await semanticSearch({ db, model: embeddingModel, query: question, k: 10, }); // Rerank const reranked = await rerank({ model: rerankerModel, query: question, documents: results.map((r) => r.metadata.text as string), topK: 3, }); // Generate const context = reranked.map((r) => r.document).join('\n\n---\n\n'); const stream = await streamText({ model: llm, prompt: `You are a helpful assistant. Answer based only on the context provided. If the answer is not in the context, say "I don't have that information." Context: ${context} Question: ${question} Answer:`, }); return stream; } // Usage const stream = await query('What is machine learning?'); for await (const chunk of stream) { process.stdout.write(chunk.text); } ``` ## Document Loaders Load documents from various formats: ```typescript import { TextLoader, JSONLoader, CSVLoader, HTMLLoader } from '@localmode/core'; import { PDFLoader } from '@localmode/pdfjs'; // Text files const textLoader = new TextLoader(); const { documents: textDocs } = await textLoader.load(textBlob); // JSON const jsonLoader = new JSONLoader({ textField: 'content' }); const { documents: jsonDocs } = await jsonLoader.load(jsonBlob); // CSV const csvLoader = new CSVLoader({ textColumn: 'description' }); const { documents: csvDocs } = await csvLoader.load(csvBlob); // HTML const htmlLoader = new HTMLLoader({ selector: 'article' }); const { documents: htmlDocs } = await htmlLoader.load(htmlBlob); // PDF const pdfLoader = new PDFLoader({ splitByPage: true }); const { documents: pdfDocs } = await pdfLoader.load(pdfBlob); ``` ## Best Practices 1. **Chunk size** - 256-512 chars works well for most cases 2. **Overlap** - 10-20% overlap helps maintain context 3. **Reranking** - Always rerank for Q\&A applications 4. **Hybrid search** - Combine semantic + keyword for robust results 5. **Context window** - Don't exceed LLM's context limit ## Next Steps # Reranking import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; Reranking improves the accuracy of RAG (Retrieval-Augmented Generation) pipelines by re-scoring documents based on their relevance to a query. After initial vector search retrieves candidates, reranking provides more precise ordering. ## Why Rerank? Vector search retrieves documents based on embedding similarity, but rerankers use cross-attention to directly score query-document pairs, often producing more accurate rankings for the final generation step. Typical RAG pipeline: 1. **Retrieve** β€” Get 20-50 candidates via vector search (fast, approximate) 2. **Rerank** β€” Score and reorder candidates (precise, slower) 3. **Generate** β€” Use top 5-10 documents for LLM context ## Basic Usage ```ts {5-9} import { rerank } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Create reranker model const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const { results } = await rerank({ model: rerankerModel, query: 'What is machine learning?', documents: [ 'Machine learning is a type of artificial intelligence...', 'Cooking pasta requires boiling water...', 'Deep learning is a subset of machine learning...', ], topK: 2, }); // results: [ // { index: 0, score: 0.95, text: 'Machine learning is a type of...' }, // { index: 2, score: 0.88, text: 'Deep learning is a subset of...' } // ] ``` ## RAG Pipeline Example ### Perform Initial Vector Search Retrieve more candidates than you needβ€”reranking will filter to the best ones. ```ts import { semanticSearch, createVectorDB, embed } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Get 20 candidates from vector search const { embedding: queryVector } = await embed({ model: embeddingModel, value: 'What is machine learning?', }); const candidates = await db.search(queryVector, { k: 20 }); ``` ### Rerank the Candidates Score each document against the query for precise relevance ranking. ```ts const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const { results } = await rerank({ model: rerankerModel, query: 'What is machine learning?', documents: candidates.map((c) => c.metadata.text), topK: 5, // Keep only top 5 after reranking }); ``` ### Use Top Results for Generation Pass the reranked documents as context to your LLM. ```ts const context = results.map((r) => r.text).join('\n\n'); const response = await streamText({ model: languageModel, prompt: `Based on the following context, answer the question. Context: ${context} Question: What is machine learning?`, }); ``` ## API Reference ### `rerank(options)` Reranks documents by relevance to a query. ### Return Type: `RerankResult` ### `RankedDocument` ## Supported Models Cross-encoder models score query-document pairs directly: | Model | Size | Speed | Quality | Use Case | | -------------------------------- | ----- | ------ | ------- | --------------- | | `Xenova/ms-marco-MiniLM-L-6-v2` | 23MB | Fast | Good | General purpose | | `Xenova/ms-marco-MiniLM-L-12-v2` | 33MB | Medium | Better | Higher accuracy | | `Xenova/bge-reranker-base` | 110MB | Slower | Best | Maximum quality | Choose based on your needs: * **Speed-critical**: Use `ms-marco-MiniLM-L-6-v2` for fast inference * **Balanced**: Use `ms-marco-MiniLM-L-12-v2` for good accuracy with reasonable speed * **Quality-critical**: Use `bge-reranker-base` when accuracy matters most Start with `ms-marco-MiniLM-L-6-v2`β€”it's a great balance of speed and quality for most applications. ## Cancellation Support All reranking operations support `AbortSignal` for cancellation: ```ts const controller = new AbortController(); // Cancel after 5 seconds setTimeout(() => controller.abort(), 5000); try { const { results } = await rerank({ model: rerankerModel, query: 'What is AI?', documents: largeDocumentSet, abortSignal: controller.signal, }); } catch (error) { if (error.name === 'AbortError') { console.log('Reranking was cancelled'); } } ``` ## Performance Tips **Optimize your reranking pipeline:** 1. **Limit candidates**: Retrieve 20-50 candidates, not hundreds 2. **Use topK**: Only return the documents you need 3. **Batch when possible**: Rerank multiple queries together if your use case allows 4. **Cache results**: Consider caching reranked results for repeated queries ## Custom Reranker Implementation Implement the `RerankerModel` interface to create custom rerankers: ```ts import type { RerankerModel, DoRerankOptions, DoRerankResult } from '@localmode/core'; class MyCustomReranker implements RerankerModel { readonly modelId = 'custom:my-reranker'; readonly provider = 'custom'; async doRerank(options: DoRerankOptions): Promise { const { query, documents, topK } = options; // Your scoring logic here const scored = documents.map((doc, index) => ({ index, score: this.scoreDocument(query, doc), text: doc, })); // Sort by score descending scored.sort((a, b) => b.score - a.score); // Apply topK const results = topK ? scored.slice(0, topK) : scored; return { results, usage: { inputTokens: query.length + documents.join('').length, durationMs: 0, }, }; } private scoreDocument(query: string, document: string): number { // Implement your scoring logic return 0.5; } } ``` # Security import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; LocalMode provides built-in security utilities for encryption, key management, and PII redaction. LocalMode has **zero telemetry**. No data ever leaves your device. All processing happens locally in the browser. ## Encryption Encrypt sensitive data using Web Crypto API: ```typescript import { encrypt, decrypt, deriveKey } from '@localmode/core'; // Derive a key from a password const key = await deriveKey('user-password', 'unique-salt'); // Encrypt data const { ciphertext, iv } = await encrypt(key, 'sensitive data'); // Decrypt data const decrypted = await decrypt(key, ciphertext, iv); console.log(decrypted); // 'sensitive data' ``` ### Key Derivation Use PBKDF2 to derive keys from passwords: ```ts {3-6} import { deriveKey } from '@localmode/core'; const key = await deriveKey(password, salt, { iterations: 100000, // Higher = more secure, slower keyLength: 256, // AES-256 }); ``` Always use at least 100,000 iterations for PBKDF2. Lower values make brute-force attacks easier. ### Encryption Options ```ts const { ciphertext, iv } = await encrypt(key, data, { algorithm: 'AES-GCM', // Default, recommended }); ``` AES-GCM provides authenticated encryptionβ€”it protects both confidentiality and integrity. Use AES-CBC only for compatibility with legacy systems. ## Key Management Store keys securely: ```typescript import { KeyStore } from '@localmode/core'; const keyStore = new KeyStore({ name: 'my-app-keys', }); // Store a key await keyStore.set('encryption-key', key); // Retrieve a key const storedKey = await keyStore.get('encryption-key'); // Delete a key await keyStore.delete('encryption-key'); ``` Keys stored in IndexedDB are accessible to JavaScript. For sensitive applications, consider using hardware-backed keys via WebAuthn. ## Encrypting Embeddings Encrypt embeddings before storage: ```typescript import { wrapEmbeddingModel, encryptionMiddleware, deriveKey } from '@localmode/core'; const key = await deriveKey('user-password', 'salt'); const model = wrapEmbeddingModel(baseModel, [encryptionMiddleware({ key })]); // Embeddings are automatically encrypted const { embedding } = await embed({ model, value: 'sensitive text' }); ``` ## PII Redaction Remove personally identifiable information before processing: ```typescript import { redactPII } from '@localmode/core'; const text = 'Contact John at john@example.com or call 555-123-4567'; const redacted = redactPII(text, { patterns: ['email', 'phone'], replacement: '[REDACTED]', }); console.log(redacted); // 'Contact John at [REDACTED] or call [REDACTED]' ``` ### Available Patterns | Pattern | Description | Example | | ------------ | ----------------------- | ------------------------------------------- | | `email` | Email addresses | [john@example.com](mailto:john@example.com) | | `phone` | Phone numbers | 555-123-4567 | | `ssn` | Social Security numbers | 123-45-6789 | | `creditCard` | Credit card numbers | 4111-1111-1111-1111 | | `ip` | IP addresses | 192.168.1.1 | | `address` | Street addresses | 123 Main St | ### Custom Patterns ```typescript const redacted = redactPII(text, { patterns: ['email', 'phone'], custom: [ { name: 'employeeId', regex: /EMP-\d{6}/g, }, ], replacement: (match, pattern) => `[${pattern.toUpperCase()}]`, }); ``` ### PII Middleware Automatically redact PII before embedding: ```typescript import { wrapEmbeddingModel, piiRedactionMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ piiRedactionMiddleware({ patterns: ['email', 'phone', 'ssn'], replacement: '[REDACTED]', }), ]); // PII is automatically redacted before embedding const { embedding } = await embed({ model, value: 'Email me at john@example.com', }); // Actually embeds: 'Email me at [REDACTED]' ``` ## Feature Detection Check security feature availability: ```typescript import { isCryptoSupported, isCrossOriginIsolated } from '@localmode/core'; if (!isCryptoSupported()) { console.warn('Web Crypto API not available'); } if (!isCrossOriginIsolated()) { console.warn('SharedArrayBuffer not available'); } ``` ## Security Best Practices 1. **Never store passwords** - Use key derivation 2. **Unique salts** - Generate random salts for each key 3. **High iterations** - Use at least 100,000 PBKDF2 iterations 4. **Redact PII** - Always redact before processing user data 5. **Zero telemetry** - LocalMode never phones home ### Secure RAG Pipeline ```typescript import { wrapEmbeddingModel, piiRedactionMiddleware, encryptionMiddleware, deriveKey, } from '@localmode/core'; // Setup secure model const key = await deriveKey(userPassword, uniqueSalt); const secureModel = wrapEmbeddingModel(baseModel, [ piiRedactionMiddleware({ patterns: ['email', 'phone', 'ssn', 'creditCard'], }), encryptionMiddleware({ key }), ]); // All embeddings are PII-redacted and encrypted const { embedding } = await embed({ model: secureModel, value: userInput, }); ``` ## Content Security Policy For maximum security, configure CSP headers: ```typescript // next.config.js const securityHeaders = [ { key: 'Content-Security-Policy', value: [ "default-src 'self'", "script-src 'self' 'wasm-unsafe-eval'", // Required for WASM "worker-src 'self' blob:", // Required for workers "connect-src 'self' https://huggingface.co https://cdn-lfs.huggingface.co", ].join('; '), }, ]; ``` ## Cross-Origin Isolation Some features require cross-origin isolation: ```typescript // Check if isolated if (crossOriginIsolated) { // SharedArrayBuffer available // Better performance for workers } // Enable via headers: // Cross-Origin-Opener-Policy: same-origin // Cross-Origin-Embedder-Policy: require-corp ``` ## Audit Logging Log security-relevant events: ```typescript import { wrapEmbeddingModel, loggingMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ loggingMiddleware({ logger: (event) => { // Log to secure audit trail auditLog.log({ timestamp: new Date().toISOString(), action: 'embedding', model: event.modelId, inputCount: event.inputCount, // Don't log actual input values! }); }, }), ]); ``` ## Next Steps # Storage import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { TypeTable } from 'fumadocs-ui/components/type-table'; LocalMode provides flexible storage options for persisting vector databases and application data. ## Storage Options The default storage uses IndexedDB for persistence: ```ts {3-6} import { IndexedDBStorage, createVectorDB } from '@localmode/core'; const storage = new IndexedDBStorage({ name: 'my-app', storeName: 'vectors', // Optional, defaults to 'store' }); // Or use default (IndexedDB) automatically: const db = await createVectorDB({ name: 'documents', dimensions: 384, // Uses IndexedDBStorage by default }); ``` Data persists across page reloads and browser restarts. For temporary data or environments without IndexedDB: ```ts {3,4} import { MemoryStorage, createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'temp', dimensions: 384, storage: new MemoryStorage(), }); // ⚠️ Data is lost on page reload ``` Useful for: * Testing and development * Temporary caches * Safari private browsing fallback Safari's private browsing mode blocks IndexedDB. Use `MemoryStorage` as a fallback or detect this condition with `isIndexedDBSupported()`. ## Storage Interface All storage adapters implement this interface: ', }, 'set(key, value)': { description: 'Store a document with the given key', type: 'Promise', }, 'delete(key)': { description: 'Delete a document by key', type: 'Promise', }, 'keys()': { description: 'Get all stored keys', type: 'Promise', }, 'clear()': { description: 'Delete all documents', type: 'Promise', }, 'close()': { description: 'Close the storage connection', type: 'Promise', }, }} /> ### StoredDocument ', }, }} /> ## Third-Party Adapters ### Dexie.js ```typescript import { DexieStorage } from '@localmode/dexie'; import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'dexie-db', dimensions: 384, storage: new DexieStorage({ name: 'my-app', version: 1, }), }); ``` ### idb ```typescript import { IDBStorage } from '@localmode/idb'; import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'idb-db', dimensions: 384, storage: new IDBStorage({ name: 'my-app', }), }); ``` ### localForage ```typescript import { LocalForageStorage } from '@localmode/localforage'; import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'lf-db', dimensions: 384, storage: new LocalForageStorage({ name: 'my-app', driver: 'INDEXEDDB', }), }); ``` ## Custom Storage Implement your own storage adapter: ```typescript import type { Storage, StoredDocument } from '@localmode/core'; class MyCustomStorage implements Storage { private data = new Map(); async get(key: string) { return this.data.get(key); } async set(key: string, value: StoredDocument) { this.data.set(key, value); } async delete(key: string) { this.data.delete(key); } async keys() { return Array.from(this.data.keys()); } async clear() { this.data.clear(); } async close() { // Cleanup if needed } } ``` ## Storage Fallback Automatically fallback when IndexedDB is unavailable: ```typescript import { createStorageWithFallback, IndexedDBStorage, MemoryStorage } from '@localmode/core'; const storage = await createStorageWithFallback({ providers: [() => new IndexedDBStorage({ name: 'app' }), () => new MemoryStorage()], onFallback: (error, index) => { console.warn(`Storage provider ${index} failed:`, error.message); }, }); const db = await createVectorDB({ name: 'robust-db', dimensions: 384, storage, }); ``` ## Quota Management Monitor and manage storage quota: ```typescript import { getStorageQuota, requestPersistence } from '@localmode/core'; // Check available quota const quota = await getStorageQuota(); console.log('Used:', quota.usage); console.log('Available:', quota.quota); console.log('Percent used:', ((quota.usage / quota.quota) * 100).toFixed(1) + '%'); // Request persistent storage (won't be auto-cleared) const isPersisted = await requestPersistence(); if (isPersisted) { console.log('Storage is now persistent'); } ``` ### Quota Warnings ```typescript import { checkQuotaWithWarnings } from '@localmode/core'; const { ok, warning, quota } = await checkQuotaWithWarnings({ warningThreshold: 0.8, // Warn at 80% usage }); if (warning) { console.warn('Storage is almost full!', quota); } ``` ## Cleanup Remove old or unused data: ```typescript import { cleanup } from '@localmode/core'; // Clean up databases older than 30 days await cleanup({ maxAge: 30 * 24 * 60 * 60 * 1000, // 30 days in ms onDelete: (name) => console.log(`Deleted: ${name}`), }); // Clean up to free space await cleanup({ targetFreeSpace: 100 * 1024 * 1024, // 100MB }); ``` ## Cross-Tab Synchronization Keep data in sync across browser tabs: ```typescript import { createBroadcaster } from '@localmode/core'; const broadcaster = createBroadcaster('my-app-sync'); // Listen for changes from other tabs broadcaster.subscribe((message) => { if (message.type === 'document-added') { console.log('New document added in another tab:', message.id); // Refresh your UI } }); // Broadcast changes to other tabs await db.add({ id: 'new-doc', vector, metadata }); broadcaster.publish({ type: 'document-added', id: 'new-doc', }); ``` ### Web Locks Prevent concurrent writes: ```typescript import { createLockManager } from '@localmode/core'; const locks = createLockManager(); // Acquire exclusive lock before writing await locks.withLock('db-write', async () => { await db.addMany(documents); }); // Other tabs wait for lock to be released ``` ## Feature Detection Check storage capabilities: ```typescript import { isIndexedDBSupported, isWebLocksSupported } from '@localmode/core'; if (!isIndexedDBSupported()) { console.warn('IndexedDB not available, using memory storage'); } if (!isWebLocksSupported()) { console.warn('Web Locks not available, using fallback'); } ``` ## Best Practices 1. **Always use fallbacks** - Safari private browsing blocks IndexedDB 2. **Request persistence** - Prevent auto-clearing of important data 3. **Monitor quota** - Show warnings before storage is full 4. **Clean up** - Remove old data periodically 5. **Use locks** - Prevent race conditions across tabs ## Next Steps # Cross-Tab Sync import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; LocalMode provides cross-tab synchronization to keep VectorDB instances in sync across multiple browser tabs. This prevents data inconsistencies when users have your app open in multiple tabs. ## Overview Cross-tab sync uses two browser APIs: * **Web Locks API** β€” Prevents concurrent writes from corrupting data * **BroadcastChannel API** β€” Notifies other tabs when data changes Both APIs have fallbacks for unsupported browsers. If unavailable, operations proceed without synchronization (safe for single-tab usage). ## Quick Start ### Create a Lock Manager The lock manager ensures only one tab can write at a time. ```ts {3} import { getLockManager } from '@localmode/core'; const locks = getLockManager('my-database'); ``` ### Create a Broadcaster The broadcaster notifies other tabs of changes. ```ts {3} import { createBroadcaster } from '@localmode/core'; const broadcaster = createBroadcaster('my-database'); ``` ### Use Locks for Write Operations Wrap write operations in locks to prevent conflicts. ```ts await locks.withWriteLock('documents', async () => { await db.add({ id: 'doc-1', vector: embedding, metadata: { text: 'Hello world' }, }); // Notify other tabs broadcaster.notifyDocumentAdded('default', 'doc-1'); }); ``` ### Subscribe to Changes React to changes from other tabs. ```ts broadcaster.on('document_added', (message) => { console.log(`Document ${message.documentId} added in another tab`); // Refresh your UI or invalidate cache }); ``` ## Lock Manager API ### `getLockManager(dbName)` Creates or retrieves a lock manager for a database. ```ts import { getLockManager } from '@localmode/core'; const locks = getLockManager('my-database'); ``` ### Lock Methods ', }, 'withReadLock(resource, callback)': { description: 'Acquire a shared (read) lock', type: 'Promise', }, 'withWriteLock(resource, callback)': { description: 'Acquire an exclusive (write) lock', type: 'Promise', }, 'tryLock(resource, callback, mode?)': { description: 'Try to acquire lock immediately, returns null if unavailable', type: 'Promise', }, 'getLockState()': { description: 'Get current lock state for debugging', type: 'Promise<{ held: string[], pending: string[] }>', }, }} /> ### Lock Options ### Lock Examples Multiple tabs can hold read locks simultaneously: ```ts // Read lock - multiple tabs can read at once const data = await locks.withReadLock('documents', async () => { return await db.search(queryVector, { k: 10 }); }); ``` Write locks are exclusiveβ€”only one tab can hold the lock: ```ts // Write lock - exclusive access await locks.withWriteLock('documents', async () => { await db.add({ id: 'doc-1', vector, metadata }); }); ``` Non-blocking lock attemptβ€”useful for optional optimizations: ```ts // Try to get lock, return null if unavailable const result = await locks.tryLock('documents', async () => { await db.add({ id: 'doc-1', vector, metadata }); return 'success'; }); if (result === null) { console.log('Another tab is writing, try again later'); } ``` Fail if lock isn't acquired within timeout: ```ts try { await locks.withLock( 'documents', async () => { await db.add({ id: 'doc-1', vector, metadata }); }, { timeout: 5000 } // 5 second timeout ); } catch (error) { console.error('Lock timeout - another tab is holding the lock'); } ``` ## Broadcaster API ### `createBroadcaster(dbName)` Creates a broadcaster for cross-tab communication. ```ts import { createBroadcaster } from '@localmode/core'; const broadcaster = createBroadcaster('my-database'); ``` ### Notification Methods ### Event Types Subscribe to specific event types: ```ts type BroadcastMessageType = | 'document_added' | 'document_updated' | 'document_deleted' | 'documents_deleted' | 'collection_cleared' | 'database_cleared' | 'index_updated' | 'leader_elected' | 'leader_ping'; ``` ### Subscription Methods ```ts // Subscribe to specific event const unsubscribe = broadcaster.on('document_added', (message) => { console.log('Document added:', message.documentId); }); // Subscribe to all events const unsubscribeAll = broadcaster.onAny((message) => { console.log('Event:', message.type); }); // Clean up unsubscribe(); unsubscribeAll(); ``` ### Message Structure ## Leader Election For tasks that should only run in one tab (like background sync), use leader election: ```ts const broadcaster = createBroadcaster('my-database'); // Try to become the leader const isLeader = await broadcaster.electLeader(); if (isLeader) { console.log('This tab is the leader'); // Start background sync, cleanup tasks, etc. startBackgroundSync(); } // Check leader status if (broadcaster.getIsLeader()) { // Run leader-only tasks } // Resign leadership (e.g., before tab closes) broadcaster.resignLeadership(); ``` Leader election uses localStorage to coordinate between tabs. The leader sends periodic heartbeatsβ€”if a leader doesn't ping for 10 seconds, another tab can take over. ## Full Integration Example ```ts import { createVectorDB, getLockManager, createBroadcaster, embed } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Setup const db = await createVectorDB({ name: 'documents', dimensions: 384 }); const locks = getLockManager('documents'); const broadcaster = createBroadcaster('documents'); const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Subscribe to changes from other tabs broadcaster.on('document_added', async ({ documentId }) => { console.log(`Refresh UI - document ${documentId} added in another tab`); // Optionally refresh your document list or clear caches }); broadcaster.on('database_cleared', () => { console.log('Database was cleared in another tab'); // Reset your UI state }); // Add document with synchronization async function addDocument(text: string) { const { embedding } = await embed({ model: embeddingModel, value: text, }); const id = crypto.randomUUID(); await locks.withWriteLock('documents', async () => { await db.add({ id, vector: embedding, metadata: { text, createdAt: Date.now() }, }); // Notify other tabs broadcaster.notifyDocumentAdded('default', id); }); return id; } // Clean up on page unload window.addEventListener('beforeunload', () => { broadcaster.close(); }); ``` ## Browser Compatibility | Feature | Chrome | Edge | Firefox | Safari | | ---------------- | ------ | ---- | ------- | ------ | | Web Locks API | 69+ | 79+ | 96+ | 15.4+ | | BroadcastChannel | 54+ | 79+ | 38+ | 15.4+ | If these APIs are unavailable (e.g., in older browsers or certain WebView environments), operations proceed without synchronization. This is safe for single-tab usage but may cause issues with multiple tabs. ### Feature Detection ```ts import { LockManager, Broadcaster } from '@localmode/core'; if (LockManager.isSupported()) { console.log('Web Locks API available'); } if (Broadcaster.isSupported()) { console.log('BroadcastChannel API available'); } ``` # Testing Utilities import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; LocalMode provides comprehensive mock utilities for testing applications without loading real ML models. These mocks are deterministic, configurable, and match the exact API of real implementations. ## Overview Testing utilities include: * **Mock models** β€” Embedding, classification, NER, vision, audio, LLM, and more * **Mock storage** β€” In-memory storage for unit tests * **Mock VectorDB** β€” Full-featured database mock * **Test helpers** β€” Seeded random generators, test vectors, spies All mocks produce deterministic output when given the same inputs and seed, making tests reproducible. ## Mock Embedding Model The most commonly used mock for testing embedding-related features. ```ts import { createMockEmbeddingModel, embed } from '@localmode/core'; const model = createMockEmbeddingModel({ dimensions: 384, seed: 42, // Deterministic embeddings }); const { embedding } = await embed({ model, value: 'Hello world', }); // embedding is a Float32Array of length 384 // Same input + seed always produces same output ``` ### Configuration Options void', }, }} /> ### Testing Failure Handling ```ts const failingModel = createMockEmbeddingModel({ failCount: 2, // Fail first 2 attempts failError: new Error('Model load failed'), }); // First two calls will fail, third will succeed try { await embed({ model: failingModel, value: 'test' }); } catch (error) { console.log('First attempt failed'); } ``` ### Tracking Calls ```ts const calls: string[][] = []; const model = createMockEmbeddingModel({ onEmbed: ({ values }) => { calls.push(values); }, }); await embed({ model, value: 'test 1' }); await embed({ model, value: 'test 2' }); console.log(calls); // [['test 1'], ['test 2']] console.log(model.callCount); // 2 // Reset for next test model.resetCallCount(); ``` ## All Mock Models ```ts import { createMockClassificationModel } from '@localmode/core'; const model = createMockClassificationModel({ labels: ['positive', 'negative', 'neutral'], defaultScore: 0.9, }); // Uses simple heuristics: // - "great", "good", "love" β†’ positive // - "bad", "terrible", "hate" β†’ negative // - Other β†’ neutral ``` ```ts import { createMockNERModel } from '@localmode/core'; const model = createMockNERModel({ entityTypes: ['PERSON', 'ORG', 'LOC', 'DATE'], }); // Recognizes common test entities: // PERSON: John, Jane, Bob, Alice, Mike, Sarah // ORG: Microsoft, Google, Apple, Amazon, OpenAI, Meta // LOC: Seattle, New York, London, Paris, Tokyo, Berlin // DATE: Years (2024), dates (1/1/2024), month names ``` ```ts import { createMockSpeechToTextModel } from '@localmode/core'; const model = createMockSpeechToTextModel({ mockText: 'This is the transcription.', languages: ['en', 'es', 'fr'], }); const result = await model.doTranscribe({ audio: audioBlob, returnTimestamps: true, }); // result.text = 'This is the transcription.' // result.segments = [{ start: 0, end: 0.5, text: 'This' }, ...] ``` ```ts import { createMockTextToSpeechModel } from '@localmode/core'; const model = createMockTextToSpeechModel({ sampleRate: 16000, }); const { audio, sampleRate } = await model.doSynthesize({ text: 'Hello world', }); // audio is a Blob with silent audio data ``` ```ts import { createMockLanguageModel } from '@localmode/core'; const model = createMockLanguageModel({ mockResponse: 'This is the generated response.', contextLength: 4096, }); // Non-streaming const result = await model.doGenerate({ prompt: 'Hello' }); // Streaming for await (const chunk of model.doStream({ prompt: 'Hello' })) { console.log(chunk.text); } ``` ```ts import { createMockTranslationModel } from '@localmode/core'; const model = createMockTranslationModel({ translationPrefix: '[translated]', }); const { translations } = await model.doTranslate({ texts: ['Hello', 'World'], }); // translations = ['[translated] Hello', '[translated] World'] ``` ```ts import { createMockSummarizationModel } from '@localmode/core'; const model = createMockSummarizationModel(); const { summaries } = await model.doSummarize({ texts: ['This is a long document. It has many sentences.'], }); // summaries = ['This is a long document.'] (first sentence) ``` ```ts import { createMockImageCaptionModel, createMockSegmentationModel, createMockObjectDetectionModel, createMockImageFeatureModel, createMockImageToImageModel, } from '@localmode/core'; // Image captioning const captioner = createMockImageCaptionModel({ mockCaption: 'A photo showing test content.', }); // Object detection const detector = createMockObjectDetectionModel(); // Returns: [{ label: 'person', score: 0.95, box: {...} }, ...] // Segmentation const segmenter = createMockSegmentationModel(); // Returns masks with 'background' and 'object' labels // Image features const featureExtractor = createMockImageFeatureModel({ dimensions: 512, }); // Image-to-image const transformer = createMockImageToImageModel({ taskType: 'upscale', }); ``` ```ts import { createMockQuestionAnsweringModel, createMockDocumentQAModel, createMockFillMaskModel, createMockOCRModel, } from '@localmode/core'; // Question answering const qaModel = createMockQuestionAnsweringModel(); // Document QA (images) const docQA = createMockDocumentQAModel(); // Fill-mask (BERT-style) const fillMask = createMockFillMaskModel({ mockPredictions: [ { token: 'great', score: 0.85 }, { token: 'wonderful', score: 0.1 }, ], }); // OCR const ocrModel = createMockOCRModel({ mockText: 'Extracted text from image.', }); ``` ## Mock Storage In-memory storage that implements the Storage interface: ```ts import { createMockStorage } from '@localmode/core'; const storage = createMockStorage(); await storage.set('key', { id: 'doc-1', vector: new Float32Array(384), metadata: { text: 'Hello' }, }); const doc = await storage.get('key'); const keys = await storage.keys(); // Access internal data for assertions const allData = storage.getData(); // Map console.log(storage.size); // 1 await storage.clear(); ``` ## Mock VectorDB Full-featured VectorDB mock with search support: ```ts import { createMockVectorDB, createTestVector } from '@localmode/core'; const db = createMockVectorDB({ name: 'test-db', dimensions: 384, delay: 0, // Optional delay for async operations }); // Add documents await db.add({ id: 'doc-1', vector: createTestVector(384, 1), metadata: { text: 'Hello world', category: 'greeting' }, }); // Search with filters const results = await db.search(queryVector, { k: 10, threshold: 0.5, filter: { category: 'greeting' }, }); // Filter operators supported: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin await db.search(queryVector, { filter: { score: { $gte: 0.8 }, category: { $in: ['greeting', 'farewell'] }, }, }); ``` ## Test Vectors Create deterministic test vectors: ```ts import { createTestVector, createTestVectors } from '@localmode/core'; // Single vector (same seed = same vector) const vector1 = createTestVector(384, 42); const vector2 = createTestVector(384, 42); // vector1 and vector2 are identical // Multiple vectors const vectors = createTestVectors(100, 384, 0); // 100 vectors with seeds 0-99 // Unnormalized vectors const rawVector = createTestVector(384, 42, false); ``` ## Seeded Random For reproducible random numbers in tests: ```ts import { createSeededRandom } from '@localmode/core'; const rng = createSeededRandom(42); const value1 = rng(); // Always same value for seed 42 const value2 = rng(); // Next value in sequence // Reset by creating new RNG with same seed const rng2 = createSeededRandom(42); // rng2() produces same sequence as rng ``` ## Test Helpers ### Wait for Condition ```ts import { waitFor } from '@localmode/core'; // Wait for async condition await waitFor( () => document.querySelector('.loaded') !== null, 5000, // timeout 50 // check interval ); // Wait for async function await waitFor(async () => (await db.count()) > 0); ``` ### Deferred Promises ```ts import { createDeferred } from '@localmode/core'; const { promise, resolve, reject } = createDeferred(); // Later... resolve('success'); // or: reject(new Error('failed')); const result = await promise; ``` ### Spy Functions ```ts import { createSpy } from '@localmode/core'; const spy = createSpy<(x: number, y: number) => void>(); spy(1, 2); spy(3, 4); console.log(spy.callCount); // 2 console.log(spy.calls); // [[1, 2], [3, 4]] spy.reset(); console.log(spy.callCount); // 0 ``` ## Vitest Example ```ts import { describe, it, expect, beforeEach } from 'vitest'; import { createMockEmbeddingModel, createMockVectorDB, createTestVector, embed, } from '@localmode/core'; describe('Semantic Search', () => { let model: ReturnType; let db: ReturnType; beforeEach(() => { model = createMockEmbeddingModel({ dimensions: 384, seed: 42 }); db = createMockVectorDB({ dimensions: 384 }); }); it('should find similar documents', async () => { // Arrange await db.add({ id: 'doc-1', vector: createTestVector(384, 1), metadata: { text: 'Machine learning is AI' }, }); // Act const { embedding } = await embed({ model, value: 'What is ML?' }); const results = await db.search(embedding, { k: 5 }); // Assert expect(results).toHaveLength(1); expect(results[0].id).toBe('doc-1'); expect(results[0].score).toBeGreaterThan(0); }); it('should support AbortSignal', async () => { const controller = new AbortController(); controller.abort(); await expect(embed({ model, value: 'test', abortSignal: controller.signal })).rejects.toThrow(); }); it('should handle failures with retry', async () => { const failingModel = createMockEmbeddingModel({ failCount: 1, failError: new Error('Temporary failure'), }); // First call fails, second succeeds await expect(embed({ model: failingModel, value: 'test', maxRetries: 0 })).rejects.toThrow( 'Temporary failure' ); // With retry, should succeed const result = await embed({ model: failingModel, value: 'test', maxRetries: 2, }); expect(result.embedding).toBeInstanceOf(Float32Array); }); }); ``` # Vector Database import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordions, Accordion } from 'fumadocs-ui/components/accordion'; import { TypeTable } from 'fumadocs-ui/components/type-table'; LocalMode includes a high-performance vector database with HNSW (Hierarchical Navigable Small World) indexing for fast approximate nearest neighbor search. ## Creating a Database ```ts {3-6} import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'my-documents', dimensions: 384, // Must match your embedding model }); ``` ### VectorDBConfig ### With Custom Storage ```typescript import { createVectorDB, MemoryStorage } from '@localmode/core'; // Use memory storage (no persistence) const db = await createVectorDB({ name: 'temp-db', dimensions: 384, storage: new MemoryStorage(), }); // Or use a third-party adapter import { DexieStorage } from '@localmode/dexie'; const db = await createVectorDB({ name: 'dexie-db', dimensions: 384, storage: new DexieStorage({ name: 'my-app' }), }); ``` ## Adding Documents ### Single Document ```typescript await db.add({ id: 'doc-1', vector: embedding, // Float32Array metadata: { text: 'Original document text', source: 'file.pdf', page: 1, }, }); ``` ### Multiple Documents ```typescript await db.addMany([ { id: 'doc-1', vector: embeddings[0], metadata: { text: 'First' } }, { id: 'doc-2', vector: embeddings[1], metadata: { text: 'Second' } }, { id: 'doc-3', vector: embeddings[2], metadata: { text: 'Third' } }, ]); ``` The vector dimensions must match the `dimensions` specified when creating the database. Using a different size will throw a `DimensionMismatchError`. ## Searching ### Basic Search ```typescript const results = await db.search(queryVector, { k: 5 }); results.forEach((result) => { console.log(`ID: ${result.id}`); console.log(`Score: ${result.score.toFixed(4)}`); console.log(`Metadata:`, result.metadata); }); ``` ### With Filters Filter results by metadata: ```typescript const results = await db.search(queryVector, { k: 10, filter: { source: { $eq: 'manual.pdf' }, }, }); ``` ### Filter Operators | Operator | Description | Example | | -------- | --------------------- | -------------------------------- | | `$eq` | Equals | `{ status: { $eq: 'active' } }` | | `$ne` | Not equals | `{ status: { $ne: 'deleted' } }` | | `$gt` | Greater than | `{ price: { $gt: 100 } }` | | `$gte` | Greater than or equal | `{ year: { $gte: 2020 } }` | | `$lt` | Less than | `{ count: { $lt: 10 } }` | | `$lte` | Less than or equal | `{ score: { $lte: 0.5 } }` | | Operator | Description | Example | | -------- | --------------------- | ---------------------------------------- | | `$in` | Value is in array | `{ category: { $in: ['tech', 'ai'] } }` | | `$nin` | Value is not in array | `{ tag: { $nin: ['draft', 'hidden'] } }` | | Operator | Description | Example | | ------------- | ------------------ | ------------------------------------ | | `$contains` | String contains | `{ text: { $contains: 'machine' } }` | | `$startsWith` | String starts with | `{ name: { $startsWith: 'doc-' } }` | **AND conditions:** ```ts const results = await db.search(queryVector, { k: 10, filter: { $and: [ { category: { $eq: 'technology' } }, { year: { $gte: 2023 } }, { status: { $ne: 'draft' } }, ], }, }); ``` **OR conditions:** ```ts const results = await db.search(queryVector, { k: 10, filter: { $or: [{ priority: { $eq: 'high' } }, { featured: { $eq: true } }], }, }); ``` ## Updating Documents ```typescript // Update metadata only (vector unchanged) await db.update('doc-1', { metadata: { ...existingMetadata, status: 'reviewed' }, }); // Update vector and metadata await db.update('doc-1', { vector: newEmbedding, metadata: { text: 'Updated text' }, }); ``` ## Deleting Documents ```typescript // Delete single document await db.delete('doc-1'); // Delete multiple documents await db.deleteMany(['doc-1', 'doc-2', 'doc-3']); // Clear all documents await db.clear(); ``` ### Delete by Filter Delete documents matching a metadata filter: ```typescript // Delete all documents with a specific documentId const deletedCount = await db.deleteWhere({ documentId: 'doc-123', }); console.log(`Deleted ${deletedCount} documents`); // Delete documents matching multiple criteria const count = await db.deleteWhere({ $and: [ { source: { $eq: 'old-import.pdf' } }, { status: { $eq: 'archived' } }, ], }); ``` Use `deleteWhere()` when you need to remove multiple documents by metadata (e.g., all chunks from a specific file). It's more efficient than deleting documents one by one. ## Getting Documents ```typescript // Get by ID const doc = await db.get('doc-1'); if (doc) { console.log(doc.id, doc.vector, doc.metadata); } // Check if exists const exists = await db.has('doc-1'); // Get all IDs const ids = await db.keys(); // Get count const count = await db.size(); ``` ## Persistence By default, the vector database uses IndexedDB for persistence: ```typescript const db = await createVectorDB({ name: 'persistent-db', dimensions: 384, }); // Add documents await db.addMany(documents); // Data persists across page reloads! // On next load, just create with same name: const db2 = await createVectorDB({ name: 'persistent-db', // Same name dimensions: 384, }); // All documents are still there const count = await db2.size(); ``` ### Memory-Only Mode For temporary data or testing: ```typescript import { MemoryStorage } from '@localmode/core'; const db = await createVectorDB({ name: 'temp', dimensions: 384, storage: new MemoryStorage(), }); // Data lost on page reload ``` ## Web Worker Mode Offload database operations to a Web Worker for better main thread performance: ```typescript import { createVectorDBWithWorker } from '@localmode/core'; const db = await createVectorDBWithWorker({ name: 'worker-db', dimensions: 384, }); // Same API, but operations run in a worker const results = await db.search(queryVector, { k: 5 }); ``` Worker mode prevents blocking the main thread during: - Large batch insertions - Complex searches * Index rebuilding ## HNSW Configuration Tune the HNSW index for your use case: ```typescript const db = await createVectorDB({ name: 'tuned-db', dimensions: 384, hnswConfig: { // More connections = better accuracy, more memory m: 32, // Default: 16 // Higher = better index quality, slower builds efConstruction: 400, // Default: 200 // Higher = better search accuracy, slower searches efSearch: 100, // Default: 50 }, }); ``` ### Configuration Guidelines | Use Case | m | efConstruction | efSearch | | ------------------ | -- | -------------- | -------- | | Fast, low memory | 8 | 100 | 30 | | Balanced (default) | 16 | 200 | 50 | | High accuracy | 32 | 400 | 100 | | Maximum accuracy | 48 | 500 | 200 | ## Middleware Add middleware for logging, encryption, etc.: ```typescript import { wrapVectorDB, loggingMiddleware } from '@localmode/core'; const baseDB = await createVectorDB({ name: 'db', dimensions: 384 }); const db = wrapVectorDB(baseDB, { beforeSearch: async (vector, options) => { console.log('Searching with k =', options.k); return { vector, options }; }, afterSearch: async (results) => { console.log('Found', results.length, 'results'); return results; }, }); ``` ## Type Safety Full TypeScript support for metadata: ```typescript interface MyMetadata { text: string; source: string; page: number; tags: string[]; } const db = await createVectorDB({ name: 'typed-db', dimensions: 384, }); // Type-safe add await db.add({ id: 'doc-1', vector: embedding, metadata: { text: 'Hello', source: 'file.pdf', page: 1, tags: ['intro'], }, }); // Type-safe search results const results = await db.search(queryVector, { k: 5 }); results.forEach((r) => { // r.metadata is typed as MyMetadata console.log(r.metadata.text); }); ``` ## Next Steps # Overview import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Steps, Step } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; # @localmode/pdfjs PDF text extraction using PDF.js for local document processing. Extract text, metadata, and structure from PDFs entirely in the browser. ## Features * πŸ“„ **Full PDF Support** β€” Extract text from any PDF document * πŸ”’ **Password Protected** β€” Handle encrypted PDFs * πŸ“‘ **Page-Level Control** β€” Process specific pages or split by page * πŸ“Š **Metadata Extraction** β€” Get title, author, dates, etc. ## Installation `bash pnpm install @localmode/pdfjs @localmode/core ` `bash npm install @localmode/pdfjs @localmode/core ` `bash yarn add @localmode/pdfjs @localmode/core ` ## Quick Start ```typescript import { extractPDFText } from '@localmode/pdfjs'; // From file input const file = document.getElementById('fileInput').files[0]; const { text, pageCount, metadata } = await extractPDFText(file); console.log(`Extracted ${pageCount} pages`); console.log('Title:', metadata?.title); console.log('Text:', text); ``` ## API Reference ### extractPDFText() Extract text from a PDF file: ```ts {3-8} import { extractPDFText } from '@localmode/pdfjs'; const result = await extractPDFText(pdfBlob, { maxPages: 10, // Limit pages to extract includePageNumbers: true, // Add [Page N] headers pageSeparator: '\n---\n', // Separator between pages password: 'secret', // For encrypted PDFs }); console.log(result.text); // Full extracted text console.log(result.pageCount); // Total number of pages console.log(result.pages); // Array of page texts console.log(result.metadata); // PDF metadata ``` #### Options #### Return Value ### PDFLoader Document loader for integration with LocalMode core: ```typescript import { PDFLoader } from '@localmode/pdfjs'; import { loadDocument } from '@localmode/core'; const loader = new PDFLoader({ splitByPage: false, // Single doc or one per page maxPages: undefined, // All pages includePageNumbers: true, password: undefined, }); const { documents } = await loadDocument(loader, pdfBlob); for (const doc of documents) { console.log(doc.text); console.log(doc.metadata); } ``` ### Split by Page Create separate documents for each page: ```typescript import { PDFLoader } from '@localmode/pdfjs'; const loader = new PDFLoader({ splitByPage: true }); const { documents } = await loadDocument(loader, pdfBlob); console.log(`Loaded ${documents.length} pages`); documents.forEach((doc, i) => { console.log(`Page ${i + 1}: ${doc.text.substring(0, 100)}...`); console.log(` Metadata:`, doc.metadata); }); ``` ### Utility Functions ```typescript import { getPDFPageCount, isPDF } from '@localmode/pdfjs'; // Get page count without full extraction const pageCount = await getPDFPageCount(pdfBlob); console.log(`PDF has ${pageCount} pages`); // Check if file is a PDF if (await isPDF(file)) { // Process as PDF } else { // Handle other file types } ``` ## RAG Pipeline Integration Build a PDF-powered RAG system: ```typescript import { PDFLoader } from '@localmode/pdfjs'; import { createVectorDB, chunk, ingest, semanticSearch, streamText } from '@localmode/core'; import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; // Setup const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); const db = await createVectorDB({ name: 'pdf-docs', dimensions: 384 }); // Load and process PDF async function ingestPDF(file: File) { const loader = new PDFLoader({ splitByPage: true }); const { documents } = await loadDocument(loader, file); // Chunk each page const allChunks = documents.flatMap((doc, pageIndex) => chunk(doc.text, { strategy: 'recursive', size: 512, overlap: 50, }).map((c) => ({ text: c.text, metadata: { filename: file.name, page: pageIndex + 1, start: c.startIndex, end: c.endIndex, }, })) ); // Ingest into vector DB await ingest({ db, model: embeddingModel, documents: allChunks, }); return allChunks.length; } // Query async function queryPDF(question: string) { const results = await semanticSearch({ db, model: embeddingModel, query: question, k: 3, }); const context = results.map((r) => `[Page ${r.metadata.page}]\n${r.metadata.text}`).join('\n\n'); const stream = await streamText({ model: llm, prompt: `Answer based on the PDF content: ${context} Question: ${question} Answer:`, }); return stream; } ``` ## File Upload Component React example: ```typescript import { useState } from 'react'; import { extractPDFText } from '@localmode/pdfjs'; function PDFUploader() { const [text, setText] = useState(''); const [loading, setLoading] = useState(false); async function handleFile(e: React.ChangeEvent) { const file = e.target.files?.[0]; if (!file) return; setLoading(true); try { const { text, pageCount } = await extractPDFText(file); setText(text); console.log(`Extracted ${pageCount} pages`); } catch (error) { console.error('Failed to extract PDF:', error); } finally { setLoading(false); } } return (
{loading &&

Extracting text...

} {text &&
{text}
}
); } ``` ## Handling Large PDFs For large PDFs, process in chunks: ```typescript import { extractPDFText, getPDFPageCount } from '@localmode/pdfjs'; async function processLargePDF(file: File, batchSize = 10) { const totalPages = await getPDFPageCount(file); const allText: string[] = []; for (let start = 0; start < totalPages; start += batchSize) { const { pages } = await extractPDFText(file, { startPage: start, maxPages: batchSize, }); allText.push(...pages); console.log( `Processed pages ${start + 1}-${Math.min(start + batchSize, totalPages)} of ${totalPages}` ); } return allText.join('\n\n'); } ``` ## Password-Protected PDFs ```typescript import { extractPDFText } from '@localmode/pdfjs'; try { const { text } = await extractPDFText(encryptedPDF, { password: userProvidedPassword, }); console.log(text); } catch (error) { if (error.message.includes('password')) { // Prompt user for password } } ``` ## Metadata Extraction ```typescript const { metadata } = await extractPDFText(file); if (metadata) { console.log('Title:', metadata.title); console.log('Author:', metadata.author); console.log('Subject:', metadata.subject); console.log('Creator:', metadata.creator); console.log('Creation Date:', metadata.creationDate); console.log('Modification Date:', metadata.modDate); } ``` ## Best Practices 1. **Split by page** - Better for RAG; maintains page context 2. **Use page numbers** - Include in metadata for citations 3. **Handle errors** - Corrupted PDFs, wrong passwords, etc. 4. **Chunk appropriately** - 256-512 chars works well for most PDFs 5. **Check file size** - Large PDFs may need batched processing ## Next Steps # Embeddings import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; Generate dense vector representations of text for semantic search, clustering, and similarity matching. ## Basic Usage ```typescript import { transformers } from '@localmode/transformers'; import { embed, embedMany } from '@localmode/core'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Single embedding const { embedding } = await embed({ model, value: 'Machine learning is fascinating', }); console.log('Dimensions:', embedding.length); // 384 // Batch embeddings const { embeddings } = await embedMany({ model, values: ['Hello', 'World', 'AI'], }); ``` ## Recommended Models | Model | Dimensions | Size | Speed | Use Case | | ---------------------------------------------- | ---------- | ----- | ----- | ------------------------ | | `Xenova/all-MiniLM-L6-v2` | 384 | 22MB | ⚑⚑⚑ | General purpose, fastest | | `Xenova/all-MiniLM-L12-v2` | 384 | 33MB | ⚑⚑ | Better accuracy | | `Xenova/all-mpnet-base-v2` | 768 | 110MB | ⚑ | Highest quality | | `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | 384 | 117MB | ⚑⚑ | 50+ languages | | `Xenova/e5-small-v2` | 384 | 33MB | ⚑⚑⚑ | E5 family, fast | | `Xenova/bge-small-en-v1.5` | 384 | 33MB | ⚑⚑⚑ | BGE family | ## With Vector Database ```typescript import { createVectorDB, embed, embedMany, semanticSearch } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const db = await createVectorDB({ name: 'docs', dimensions: 384 }); // Index documents const documents = [ 'Machine learning enables computers to learn from data', 'Deep learning uses neural networks with many layers', 'Natural language processing analyzes human language', ]; const { embeddings } = await embedMany({ model, values: documents }); await db.addMany( documents.map((text, i) => ({ id: `doc-${i}`, vector: embeddings[i], metadata: { text }, })) ); // Search const results = await semanticSearch({ db, model, query: 'How do neural networks work?', k: 3, }); ``` ## Progress Tracking ```typescript const { embeddings } = await embedMany({ model, values: largeDocumentArray, onProgress: (progress) => { const percent = (progress.completed / progress.total * 100).toFixed(1); console.log(`Embedding: ${percent}%`); }, }); ``` ## Model Configuration ```typescript const model = transformers.embedding('Xenova/all-MiniLM-L6-v2', { quantized: true, // Use quantized model (default: true) revision: 'main', // Model revision progress: (p) => { console.log(`Loading model: ${(p.progress * 100).toFixed(1)}%`); }, }); ``` ## Multilingual Embeddings For multilingual applications: ```typescript const model = transformers.embedding('Xenova/paraphrase-multilingual-MiniLM-L12-v2'); const { embeddings } = await embedMany({ model, values: [ 'Hello world', // English 'Bonjour le monde', // French 'Hola mundo', // Spanish 'γ“γ‚“γ«γ‘γ―δΈ–η•Œ', // Japanese 'Ω…Ψ±Ψ­Ψ¨Ψ§ Ψ¨Ψ§Ω„ΨΉΨ§Ω„Ω…', // Arabic ], }); // All embeddings are in the same vector space // Cross-lingual similarity works! ``` ## Comparison: Model Quality vs Speed ```typescript import { cosineSimilarity } from '@localmode/core'; // Test sentences const s1 = 'The cat sits on the mat'; const s2 = 'A feline rests on a rug'; const s3 = 'The stock market crashed yesterday'; // Fast model const fastModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const { embeddings: fastEmbeddings } = await embedMany({ model: fastModel, values: [s1, s2, s3], }); // Quality model const qualityModel = transformers.embedding('Xenova/all-mpnet-base-v2'); const { embeddings: qualityEmbeddings } = await embedMany({ model: qualityModel, values: [s1, s2, s3], }); // Compare similarities console.log('Fast model:'); console.log(' s1-s2:', cosineSimilarity(fastEmbeddings[0], fastEmbeddings[1]).toFixed(3)); console.log(' s1-s3:', cosineSimilarity(fastEmbeddings[0], fastEmbeddings[2]).toFixed(3)); console.log('Quality model:'); console.log(' s1-s2:', cosineSimilarity(qualityEmbeddings[0], qualityEmbeddings[1]).toFixed(3)); console.log(' s1-s3:', cosineSimilarity(qualityEmbeddings[0], qualityEmbeddings[2]).toFixed(3)); ``` ## Caching Embeddings Use caching middleware to avoid recomputation: ```typescript import { wrapEmbeddingModel, cachingMiddleware } from '@localmode/core'; const baseModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const model = wrapEmbeddingModel(baseModel, [ cachingMiddleware({ maxSize: 10000, storage: 'indexeddb', dbName: 'embedding-cache', }), ]); // First call computes embedding const { embedding: e1 } = await embed({ model, value: 'Hello' }); // Second call returns from cache (instant) const { embedding: e2 } = await embed({ model, value: 'Hello' }); ``` ## Best Practices 1. **Match dimensions** β€” Ensure your vector DB dimensions match the model 2. **Batch when possible** β€” `embedMany()` is more efficient than multiple `embed()` calls 3. **Cache embeddings** β€” Use caching middleware for repeated queries 4. **Normalize if needed** β€” Some models benefit from L2 normalization 5. **Choose model wisely** β€” Balance quality vs speed for your use case ## Next Steps # Overview import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; # @localmode/transformers HuggingFace Transformers.js provider for LocalMode. Run ML models locally in the browser with WebGPU/WASM acceleration. ## Features * πŸš€ **Browser-Native** β€” Run ML models directly in the browser * πŸ”’ **Privacy-First** β€” All processing happens locally * πŸ“¦ **Model Caching** β€” Models cached in IndexedDB for instant subsequent loads * ⚑ **Optimized** β€” Uses quantized models for smaller size and faster inference ## Installation `bash pnpm install @localmode/transformers @localmode/core ` `bash npm install @localmode/transformers @localmode/core ` `bash yarn add @localmode/transformers @localmode/core ` ## Quick Start ```typescript import { transformers } from '@localmode/transformers'; import { embed, rerank } from '@localmode/core'; // Text Embeddings const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const { embedding } = await embed({ model: embeddingModel, value: 'Hello world' }); // Reranking for RAG const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const { results } = await rerank({ model: rerankerModel, query: 'What is machine learning?', documents: ['ML is a subset of AI...', 'Python is a language...'], topK: 5, }); ``` ## βœ… Live Features These features are production-ready and fully documented. | Method | Interface | Description | | --------------------------------- | ---------------- | ------------------ | | `transformers.embedding(modelId)` | `EmbeddingModel` | Text embeddings | | `transformers.reranker(modelId)` | `RerankerModel` | Document reranking | ### Recommended Models | Model | Dimensions | Size | Use Case | | ---------------------------------------------- | ---------- | ------- | --------------------- | | `Xenova/all-MiniLM-L6-v2` | 384 | \~22MB | Fast, general-purpose | | `Xenova/all-MiniLM-L12-v2` | 384 | \~33MB | Better accuracy | | `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | 384 | \~117MB | 50+ languages | | Model | Use Case | Size | | ------------------------------- | -------------------------- | ------- | | `Xenova/ms-marco-MiniLM-L-6-v2` | Document reranking for RAG | \~22MB | | `Xenova/bge-reranker-base` | Advanced reranking | \~109MB | ## 🚧 Coming Soon These features have interfaces defined and implementations available, but are under active development and testing. Full documentation will be added once they are production-ready. The features listed below are not yet production-ready. APIs may change before stable release. ### Classification & NLP | Feature | Method | Interface | | ------------------------ | ------------------------------------------ | ----------------------------- | | Text Classification | `transformers.classifier(modelId)` | `ClassificationModel` | | Zero-Shot Classification | `transformers.zeroShotClassifier(modelId)` | `ZeroShotClassificationModel` | | Named Entity Recognition | `transformers.ner(modelId)` | `NERModel` | ### Translation & Text Processing | Feature | Method | Interface | | ------------------ | ----------------------------------------- | ------------------------ | | Translation | `transformers.translator(modelId)` | `TranslationModel` | | Summarization | `transformers.summarizer(modelId)` | `SummarizationModel` | | Fill-Mask | `transformers.fillMask(modelId)` | `FillMaskModel` | | Question Answering | `transformers.questionAnswering(modelId)` | `QuestionAnsweringModel` | ### Audio | Feature | Method | Interface | | -------------- | ------------------------------------ | ------------------- | | Speech-to-Text | `transformers.speechToText(modelId)` | `SpeechToTextModel` | | Text-to-Speech | `transformers.textToSpeech(modelId)` | `TextToSpeechModel` | ### Vision | Feature | Method | Interface | | ------------------------------ | ----------------------------------------------- | ---------------------------------- | | Image Classification | `transformers.imageClassifier(modelId)` | `ImageClassificationModel` | | Zero-Shot Image Classification | `transformers.zeroShotImageClassifier(modelId)` | `ZeroShotImageClassificationModel` | | Image Captioning | `transformers.captioner(modelId)` | `ImageCaptionModel` | | Image Segmentation | `transformers.segmenter(modelId)` | `SegmentationModel` | | Object Detection | `transformers.objectDetector(modelId)` | `ObjectDetectionModel` | | OCR | `transformers.ocr(modelId)` | `OCRModel` | | Document QA | `transformers.documentQA(modelId)` | `DocumentQAModel` | ## Model Options Configure model loading: ```typescript const model = transformers.embedding('Xenova/all-MiniLM-L6-v2', { quantized: true, // Use quantized model (smaller, faster) revision: 'main', // Model revision progress: (p) => { console.log(`Loading: ${(p.progress * 100).toFixed(1)}%`); }, }); ``` ## Model Utilities Manage model loading and caching: ```typescript import { preloadModel, isModelCached, getModelStorageUsage } from '@localmode/transformers'; // Check if model is cached const cached = await isModelCached('Xenova/all-MiniLM-L6-v2'); // Preload model with progress await preloadModel('Xenova/all-MiniLM-L6-v2', { onProgress: (p) => console.log(`${p.progress}% loaded`), }); // Check storage usage const usage = await getModelStorageUsage(); ``` ### WebGPU Detection Detect WebGPU availability for optimal device selection: ```typescript import { isWebGPUAvailable, getOptimalDevice } from '@localmode/transformers'; // Check if WebGPU is available const webgpuAvailable = await isWebGPUAvailable(); if (webgpuAvailable) { console.log('WebGPU available, using GPU acceleration'); } else { console.log('Falling back to WASM'); } // Get optimal device automatically const device = await getOptimalDevice(); // 'webgpu' or 'wasm' const model = transformers.embedding('Xenova/all-MiniLM-L6-v2', { device, // Uses WebGPU if available, otherwise WASM }); ``` ## Browser Compatibility | Browser | WebGPU | WASM | Notes | | ----------- | ------ | ---- | ---------------------------- | | Chrome 113+ | βœ… | βœ… | Best performance with WebGPU | | Edge 113+ | βœ… | βœ… | Same as Chrome | | Firefox | ❌ | βœ… | WASM only | | Safari 18+ | βœ… | βœ… | WebGPU available | | iOS Safari | βœ… | βœ… | WebGPU available (iOS 26+) | ## Performance Tips 1. **Use quantized models** - Smaller and faster with minimal quality loss 2. **Preload models** - Load during app init for instant inference 3. **Use WebGPU when available** - 3-5x faster than WASM 4. **Batch operations** - Process multiple inputs together ## Next Steps # Reranking import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; Reranking uses cross-encoder models to improve the relevance of search results. It's particularly useful for RAG pipelines. ## Why Reranking? Bi-encoder (embedding) models are fast but may miss subtle relevance signals. Cross-encoder rerankers consider query-document pairs together for better accuracy. ``` Query: "How does photosynthesis work?" Initial Ranking (embeddings): 1. "Photosynthesis is a process used by plants" βœ“ 2. "The synthesis of proteins requires energy" βœ— 3. "Plants convert sunlight into chemical energy" βœ“ After Reranking: 1. "Plants convert sunlight into chemical energy" βœ“ (more specific) 2. "Photosynthesis is a process used by plants" βœ“ 3. "The synthesis of proteins requires energy" βœ— ``` ## Basic Usage ```typescript import { transformers } from '@localmode/transformers'; import { rerank } from '@localmode/core'; const model = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const results = await rerank({ model, query: 'What is machine learning?', documents: [ 'Machine learning is a subset of artificial intelligence.', 'The weather forecast predicts rain tomorrow.', 'Deep learning uses neural networks to learn patterns.', 'I went to the grocery store yesterday.', ], topK: 2, }); results.forEach((r, i) => { console.log(`${i + 1}. Score: ${r.score.toFixed(3)}`); console.log(` ${r.document}`); }); // Output: // 1. Score: 0.892 // Machine learning is a subset of artificial intelligence. // 2. Score: 0.756 // Deep learning uses neural networks to learn patterns. ``` ## With Semantic Search Typical RAG reranking pattern: ```typescript import { semanticSearch, rerank } from '@localmode/core'; async function searchWithReranking(query: string) { // Step 1: Fast semantic search (retrieve many candidates) const candidates = await semanticSearch({ db, model: embeddingModel, query, k: 20, // Get more candidates than needed }); // Step 2: Rerank for accuracy (keep top results) const reranked = await rerank({ model: rerankerModel, query, documents: candidates.map((c) => c.metadata.text as string), topK: 5, }); // Step 3: Map back to original results with metadata return reranked.map((r) => ({ ...candidates[r.originalIndex], rerankerScore: r.score, })); } ``` ## Rerank Result Structure ```typescript interface RerankResult { document: string; // The document text score: number; // Relevance score (higher = more relevant) originalIndex: number; // Index in the original documents array } ``` ## Recommended Models | Model | Size | Speed | Quality | | -------------------------------- | ------ | ----- | ------- | | `Xenova/ms-marco-MiniLM-L-6-v2` | \~22MB | ⚑⚑⚑ | Good | | `Xenova/ms-marco-MiniLM-L-12-v2` | \~33MB | ⚑⚑ | Better | ## Complete RAG Example ```typescript import { createVectorDB, chunk, ingest, semanticSearch, rerank, streamText, } from '@localmode/core'; import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; // Setup models const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // Setup database const db = await createVectorDB({ name: 'docs', dimensions: 384 }); // RAG query function async function ragQuery(question: string) { // 1. Retrieve (fast, approximate) const candidates = await semanticSearch({ db, model: embeddingModel, query: question, k: 15, }); // 2. Rerank (slower, accurate) const reranked = await rerank({ model: rerankerModel, query: question, documents: candidates.map((c) => c.metadata.text as string), topK: 3, }); // 3. Generate answer const context = reranked.map((r) => r.document).join('\n\n'); const stream = await streamText({ model: llm, prompt: `Answer based on the context: Context: ${context} Question: ${question} Answer:`, }); return stream; } ``` ## When to Use Reranking * Building Q\&A or chatbot applications * Initial search returns many similar results * Accuracy matters more than latency * Documents have subtle relevance differences * Latency is critical (real-time applications) * Results are clearly distinct * Simple keyword matching is sufficient * Processing very large result sets ## Performance Optimization ```typescript // Balance between accuracy and speed const reranked = await rerank({ model: rerankerModel, query, documents: candidates.slice(0, 10), // Limit candidates topK: 3, }); // For large result sets, rerank in batches async function rerankLargeResultSet(query: string, documents: string[], topK: number) { const batchSize = 50; const batches = []; for (let i = 0; i < documents.length; i += batchSize) { const batch = documents.slice(i, i + batchSize); const result = await rerank({ model: rerankerModel, query, documents: batch, topK: Math.min(topK, batch.length), }); batches.push(result.map((r) => ({ ...r, originalIndex: r.originalIndex + i, }))); } // Merge and re-sort return batches .flat() .sort((a, b) => b.score - a.score) .slice(0, topK); } ``` ## Best Practices 1. **Retrieve more, rerank less** β€” Get 3-5x more candidates than needed 2. **Use appropriate topK** β€” 3-5 is usually enough for RAG context 3. **Cache reranker model** β€” Load once, reuse for all queries 4. **Consider latency budget** β€” Reranking adds 50-200ms per query 5. **Test with/without** β€” Measure accuracy improvement for your use case ## Next Steps # Overview import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordions, Accordion } from 'fumadocs-ui/components/accordion'; import { TypeTable } from 'fumadocs-ui/components/type-table'; # @localmode/webllm Run large language models locally in the browser using WebGPU. Uses 4-bit quantized models for efficient inference. ## Features * πŸš€ **WebGPU Acceleration** β€” Native GPU performance in the browser * πŸ”’ **Private** β€” Models run entirely on-device * πŸ“¦ **Cached** β€” Models stored in browser cache after first download * ⚑ **Streaming** β€” Real-time token generation ## Installation `bash pnpm install @localmode/webllm @localmode/core ` `bash npm install @localmode/webllm @localmode/core ` `bash yarn add @localmode/webllm @localmode/core ` ## Quick Start ```typescript import { streamText } from '@localmode/core'; import { webllm } from '@localmode/webllm'; const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); const stream = await streamText({ model, prompt: 'Explain quantum computing in simple terms.', }); for await (const chunk of stream) { process.stdout.write(chunk.text); } ``` ## Available Models | Model | Size | Context | Best For | | ----------------------------------- | ------- | ------- | ------------------------------------- | | `Llama-3.2-1B-Instruct-q4f16_1-MLC` | \~700MB | 4K | Testing, simple tasks, fast responses | | `Llama-3.2-3B-Instruct-q4f16_1-MLC` | \~1.8GB | 4K | General purpose, production | Llama 3.2 models are the best all-around choice for browser LLM applications. Start with 1B for testing, use 3B in production. | Model | Size | Context | Best For | | ------------------------------------ | ------- | ------- | -------------------------------- | | `Phi-3.5-mini-instruct-q4f16_1-MLC` | \~2.4GB | 4K | Reasoning, coding, complex tasks | | `Phi-3-mini-4k-instruct-q4f16_1-MLC` | \~2.2GB | 4K | Reasoning, coding | Phi models excel at reasoning and code generation, often outperforming larger models on these tasks. | Model | Size | Context | Best For | | ----------------------------------- | ----- | ------- | ----------------------------- | | `Qwen2.5-1.5B-Instruct-q4f16_1-MLC` | \~1GB | 4K | Multilingual, Chinese support | | `Qwen2.5-3B-Instruct-q4f16_1-MLC` | \~2GB | 4K | Better multilingual quality | Qwen models have strong multilingual capabilities, especially for Chinese and Asian languages. | Model | Size | Context | Best For | | ----------------------------------- | ------- | ------- | ----------------------------------- | | `SmolLM2-1.7B-Instruct-q4f16_1-MLC` | \~1.1GB | 2K | Low-memory devices, quick inference | | `SmolLM2-360M-Instruct-q4f16_1-MLC` | \~250MB | 2K | Ultra-fast, minimal memory | SmolLM models are optimized for size, not quality. Use for simple tasks or when resources are very limited. | Model | Size | Context | Best For | | --------------------------- | ------- | ------- | -------------------------------- | | `gemma-2-2b-it-q4f16_1-MLC` | \~1.3GB | 8K | Longer context, Google ecosystem | * **Testing**: `Llama-3.2-1B-Instruct` - fastest to download and run * **Production**: `Llama-3.2-3B-Instruct` or `Phi-3.5-mini` - best quality * **Code/Reasoning**: `Phi-3.5-mini` - specialized for these tasks * **Multilingual**: `Qwen2.5-1.5B-Instruct` - 100+ languages * **Low Memory**: `SmolLM2-360M-Instruct` - \~250MB ## Text Generation ### Streaming ```typescript import { streamText } from '@localmode/core'; const stream = await streamText({ model: webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'), prompt: 'Write a haiku about programming.', }); let fullText = ''; for await (const chunk of stream) { fullText += chunk.text; // Update UI with each chunk } // Or get full text at once const text = await stream.text; ``` ### Non-Streaming ```typescript import { generateText } from '@localmode/core'; const { text, usage } = await generateText({ model: webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'), prompt: 'What is the capital of France?', }); console.log(text); console.log('Tokens used:', usage.totalTokens); ``` ## Configuration ### Model Options ```ts {2-6} const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC', { systemPrompt: 'You are a helpful coding assistant.', temperature: 0.7, maxTokens: 1024, topP: 0.9, }); ``` ### Custom Provider ```typescript import { createWebLLM } from '@localmode/webllm'; const myWebLLM = createWebLLM({ onProgress: (progress) => { console.log(`Loading: ${(progress.progress * 100).toFixed(1)}%`); console.log(`Status: ${progress.text}`); }, }); const model = myWebLLM.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); ``` ## Model Preloading Preload models during app initialization: ```typescript import { preloadModel, isModelCached } from '@localmode/webllm'; // Check if already cached if (!(await isModelCached('Llama-3.2-1B-Instruct-q4f16_1-MLC'))) { // Show loading UI await preloadModel('Llama-3.2-1B-Instruct-q4f16_1-MLC', { onProgress: (progress) => { updateLoadingBar(progress.progress * 100); }, }); } // Model is ready for instant inference ``` ## Model Management ### Available Models Registry Access model metadata programmatically: ```typescript import { WEBLLM_MODELS, type WebLLMModelId } from '@localmode/webllm'; // Get all available models const modelIds = Object.keys(WEBLLM_MODELS) as WebLLMModelId[]; // Access model info const llama = WEBLLM_MODELS['Llama-3.2-1B-Instruct-q4f16_1-MLC']; console.log(llama.name); // 'Llama 3.2 1B Instruct' console.log(llama.contextLength); // 4096 console.log(llama.size); // '~700MB' console.log(llama.sizeBytes); // 734003200 console.log(llama.description); // 'Fast, lightweight model...' ``` ### Model Categorization Categorize models by size for UI display: ```typescript import { getModelCategory, WEBLLM_MODELS, type WebLLMModelId } from '@localmode/webllm'; // Get category based on model size const modelId: WebLLMModelId = 'Llama-3.2-1B-Instruct-q4f16_1-MLC'; const sizeBytes = WEBLLM_MODELS[modelId].sizeBytes; const category = getModelCategory(sizeBytes); console.log(category); // 'small' | 'medium' | 'large' // Use for UI grouping function getModelsByCategory() { const categories = { small: [], medium: [], large: [] }; for (const [id, info] of Object.entries(WEBLLM_MODELS)) { const cat = getModelCategory(info.sizeBytes); categories[cat].push({ id, ...info }); } return categories; } ``` ### Delete Cached Models Remove models from browser cache to free up storage: ```typescript import { deleteModelCache, isModelCached } from '@localmode/webllm'; // Delete a specific model's cache await deleteModelCache('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // Verify deletion const stillCached = await isModelCached('Llama-3.2-1B-Instruct-q4f16_1-MLC'); console.log(stillCached); // false ``` LLM models can be large (700MB - 4GB). Use `deleteModelCache()` to let users free up storage when they no longer need a model. ### Type-Safe Model IDs Use the `WebLLMModelId` type for type-safe model selection: ```typescript import type { WebLLMModelId } from '@localmode/webllm'; // Type-safe function that only accepts valid model IDs function selectModel(modelId: WebLLMModelId) { return webllm.languageModel(modelId); } // βœ… Valid selectModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // ❌ TypeScript error: invalid model ID selectModel('invalid-model-name'); ``` ## Chat Application ```typescript import { streamText } from '@localmode/core'; interface Message { role: 'user' | 'assistant'; content: string; } async function chat(messages: Message[], userMessage: string) { const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC', { systemPrompt: 'You are a helpful assistant.', }); // Build conversation prompt const prompt = messages .map((m) => `${m.role}: ${m.content}`) .concat([`user: ${userMessage}`, 'assistant:']) .join('\n'); const stream = await streamText({ model, prompt, stopSequences: ['user:', '\n\n'], }); let response = ''; for await (const chunk of stream) { response += chunk.text; // Update UI } return response; } ``` ## RAG Integration Combine with retrieval for document-grounded chat: ```typescript import { semanticSearch, rerank, streamText } from '@localmode/core'; async function ragChat(query: string, db: VectorDB) { // 1. Retrieve context const results = await semanticSearch({ db, model: embeddingModel, query, k: 10, }); // 2. Rerank for relevance const reranked = await rerank({ model: rerankerModel, query, documents: results.map((r) => r.metadata.text as string), topK: 3, }); const context = reranked.map((r) => r.document).join('\n\n---\n\n'); // 3. Generate with context const llm = webllm.languageModel('Llama-3.2-3B-Instruct-q4f16_1-MLC'); const stream = await streamText({ model: llm, prompt: `You are a helpful assistant. Answer based only on the provided context. If the answer is not in the context, say "I don't have that information." Context: ${context} Question: ${query} Answer:`, }); return stream; } ``` ## Requirements WebLLM requires WebGPU support. Check availability: ```typescript import { isWebGPUSupported } from '@localmode/core'; if (!isWebGPUSupported()) { console.warn('WebGPU not available. LLM features disabled.'); } ``` ### Browser Support | Browser | Support | | ----------- | ---------------- | | Chrome 113+ | βœ… | | Edge 113+ | βœ… | | Firefox | ❌ (Nightly only) | | Safari 18+ | βœ… | | iOS Safari | βœ… (iOS 26+) | ### Hardware Requirements * **GPU**: Any modern GPU with WebGPU support * **VRAM**: Depends on model (1-3GB for 1-3B models) * **RAM**: 4GB minimum, 8GB+ recommended ## Best Practices 1. **Preload models** - Load during app init for instant inference 2. **Start small** - Use 1B models for testing, larger for production 3. **Stream responses** - Better UX than waiting for complete response 4. **Handle errors** - GPU errors, OOM, etc. can occur 5. **Check capabilities** - Verify WebGPU before showing LLM features ## Error Handling ```typescript import { streamText, GenerationError } from '@localmode/core'; try { const stream = await streamText({ model, prompt: 'Hello', }); for await (const chunk of stream) { // ... } } catch (error) { if (error instanceof GenerationError) { if (error.code === 'WEBGPU_NOT_SUPPORTED') { console.error('WebGPU not available'); } else if (error.code === 'MODEL_LOAD_FAILED') { console.error('Failed to load model'); } else if (error.code === 'OUT_OF_MEMORY') { console.error('Not enough GPU memory'); } } } ``` ## Next Steps