{progress}%

); } ``` ## Retrieving Logs ### Get All Logs ```ts import { getNetworkLogs } from '@localmode/core'; const logs = getNetworkLogs(); console.log(`Total requests: ${logs.length}`); ``` ### Filter Logs ```ts import { getNetworkLogs } from '@localmode/core'; // Get model downloads only const modelLogs = getNetworkLogs({ category: 'model', }); // Get failed requests const failedLogs = getNetworkLogs({ state: 'failed', }); // Get recent requests (last hour) const recentLogs = getNetworkLogs({ since: new Date(Date.now() - 60 * 60 * 1000), limit: 50, order: 'desc', }); // Filter by URL pattern const huggingFaceLogs = getNetworkLogs({ urlPattern: /huggingface\.co/, }); ``` ### Filter Options ## Network Statistics ```ts import { getNetworkStats } from '@localmode/core'; const stats = getNetworkStats(); console.log(`Total requests: ${stats.totalRequests}`); console.log(`Completed: ${stats.completedRequests}`); console.log(`Failed: ${stats.failedRequests}`); console.log(`Downloaded: ${(stats.totalDownloadBytes / 1024 / 1024).toFixed(2)} MB`); console.log(`Average speed: ${(stats.averageSpeed / 1024).toFixed(2)} KB/s`); console.log(`Requests/min: ${stats.requestsPerMinute}`); // Stats by category console.log('By category:', stats.byCategory); // Stats by HTTP status console.log('By status:', stats.byStatus); ``` ### Stats Structure ', }, byStatus: { description: 'Request counts grouped by HTTP status code', type: 'Record', }, }} /> ## Clearing Logs ```ts import { clearNetworkLogs } from '@localmode/core'; // Clear all logs clearNetworkLogs(); // Clear logs older than 7 days clearNetworkLogs({ olderThan: '7d' }); // Clear logs older than specific date clearNetworkLogs({ olderThan: new Date('2024-01-01') }); ``` Duration formats: `s` (seconds), `m` (minutes), `h` (hours), `d` (days), `w` (weeks) ## Log Entry Structure | undefined', }, }} /> ## Wrapping Fetch For full request logging, wrap the global fetch: ```ts import { wrapFetchWithLogging, unwrapFetch, isFetchWrapped } from '@localmode/core'; // Wrap global fetch wrapFetchWithLogging({ category: 'api', logHeaders: true, }); // Now all fetch calls are logged await fetch('https://api.example.com/data'); // Check if fetch is wrapped console.log(isFetchWrapped()); // true // Restore original fetch unwrapFetch(); ``` Wrapping fetch affects all requests in your application. Use with care in production environments. # RAG import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Steps, Step } from 'fumadocs-ui/components/steps'; import { Accordions, Accordion } from 'fumadocs-ui/components/accordion'; import { TypeTable } from 'fumadocs-ui/components/type-table'; RAG (Retrieval-Augmented Generation) combines vector search with language models to answer questions from your documents. LocalMode provides all the building blocks: chunking, ingestion, semantic search, reranking, and hybrid search. ## RAG Pipeline Overview ### Chunk Documents Split documents into smaller, semantically meaningful pieces. ```ts import { chunk } from '@localmode/core'; const chunks = chunk(documentText, { strategy: 'recursive', size: 512, overlap: 50, }); ``` ### Generate Embeddings & Store Create embeddings and store in a vector database. ```ts import { ingest, createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'docs', dimensions: 384 }); await ingest({ db, model: embeddingModel, documents: chunks }); ``` ### Search & Retrieve Find relevant chunks using semantic search. ```ts import { semanticSearch } from '@localmode/core'; const results = await semanticSearch({ db, model: embeddingModel, query: userQuestion, k: 10, }); ``` ### Rerank for Precision Optionally rerank results for better accuracy. ```ts import { rerank } from '@localmode/core'; const reranked = await rerank({ model: rerankerModel, query: userQuestion, documents: results.map((r) => r.metadata.text), topK: 5, }); ``` ### Generate Answer Use an LLM to generate an answer from the context. ```ts import { streamText } from '@localmode/core'; const stream = await streamText({ model: llm, prompt: `Context:\n${context}\n\nQuestion: ${userQuestion}`, }); ``` ## Chunking Split documents into smaller pieces for better retrieval: Best for general text documents. Tries separators in order: paragraphs → lines → sentences → words. ```ts {3-7} import { chunk } from '@localmode/core'; const chunks = chunk(documentText, { strategy: 'recursive', size: 512, // Target chunk size in characters overlap: 50, // Overlap between chunks }); chunks.forEach((c, i) => { console.log(`Chunk ${i}: ${c.text.substring(0, 50)}...`); console.log(` Start: ${c.startIndex}, End: ${c.endIndex}`); }); ``` **Custom Separators:** ```ts const chunks = chunk(text, { strategy: 'recursive', size: 512, separators: ['\n\n', '\n', '. ', ' '], // Try these in order }); ``` Respects markdown structure—keeps headers with their content. ```ts const chunks = chunk(markdownText, { strategy: 'markdown', size: 512, overlap: 50, }); ``` Respects code structure—keeps functions and classes intact. ```ts const chunks = chunk(sourceCode, { strategy: 'code', size: 512, overlap: 50, language: 'typescript', }); ``` Supported languages: `typescript`, `javascript`, `python`, `java`, `go`, `rust`, and more. ### ChunkOptions ## Ingestion Ingest documents into a vector database: ```typescript import { createVectorDB, ingest } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const db = await createVectorDB({ name: 'docs', dimensions: 384 }); await ingest({ db, model, documents: [ { text: 'First document...', metadata: { source: 'doc1.txt' } }, { text: 'Second document...', metadata: { source: 'doc2.txt' } }, ], }); ``` ### With Automatic Chunking ```typescript await ingest({ db, model, documents: [{ text: longDocument, metadata: { source: 'book.txt' } }], chunkOptions: { strategy: 'recursive', size: 512, overlap: 50, }, }); ``` ### With Progress Tracking ```typescript await ingest({ db, model, documents: largeDocumentArray, onProgress: (progress) => { console.log(`Ingested ${progress.completed}/${progress.total} documents`); }, }); ``` ## Semantic Search Search for relevant chunks: ```typescript import { semanticSearch } from '@localmode/core'; const results = await semanticSearch({ db, model, query: 'What are the benefits of machine learning?', k: 5, }); results.forEach((r) => { console.log(`Score: ${r.score.toFixed(3)}`); console.log(`Text: ${r.metadata.text}`); }); ``` ## Reranking Improve results with cross-encoder reranking: ```typescript import { rerank } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); // Get initial results const results = await semanticSearch({ db, model, query, k: 20 }); // Rerank for better accuracy const reranked = await rerank({ model: rerankerModel, query, documents: results.map((r) => r.metadata.text as string), topK: 5, }); reranked.forEach((r) => { console.log(`Score: ${r.score.toFixed(3)}`); console.log(`Text: ${r.document.substring(0, 100)}...`); }); ``` Reranking improves accuracy but adds latency. Use it when: - Accuracy is more important than speed * You're building a Q\&A system - Initial results may have false positives ## BM25 Keyword Search For exact keyword matching: ```typescript import { createBM25 } from '@localmode/core'; const bm25 = createBM25(documents.map((d) => d.text)); const keywordResults = bm25.search('machine learning'); keywordResults.forEach((r) => { console.log(`Score: ${r.score.toFixed(3)}, Index: ${r.index}`); }); ``` ## Hybrid Search Combine semantic and keyword search: ```typescript import { semanticSearch, createBM25, hybridFuse } from '@localmode/core'; // Semantic search const semanticResults = await semanticSearch({ db, model, query, k: 20 }); // BM25 keyword search const bm25 = createBM25(documents.map((d) => d.text)); const keywordResults = bm25.search(query); // Combine with fusion const hybridResults = hybridFuse({ semantic: semanticResults.map((r) => ({ id: r.id, score: r.score, })), keyword: keywordResults.map((r) => ({ id: documents[r.index].id, score: r.score, })), k: 10, alpha: 0.7, // Weight for semantic (0.7 = 70% semantic, 30% keyword) }); ``` ### Reciprocal Rank Fusion Alternative fusion method: ```typescript import { reciprocalRankFusion } from '@localmode/core'; const fused = reciprocalRankFusion({ rankings: [semanticResults.map((r) => r.id), keywordResults.map((r) => documents[r.index].id)], k: 10, constant: 60, // RRF constant (default: 60) }); ``` ## Complete RAG Pipeline Here's a complete example: ```typescript import { createVectorDB, chunk, ingest, semanticSearch, rerank, streamText } from '@localmode/core'; import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; // 1. Setup models const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // 2. Create database const db = await createVectorDB({ name: 'knowledge-base', dimensions: 384 }); // 3. Ingest documents async function ingestDocuments(documents: Array<{ text: string; source: string }>) { for (const doc of documents) { const chunks = chunk(doc.text, { strategy: 'recursive', size: 512, overlap: 50, }); await ingest({ db, model: embeddingModel, documents: chunks.map((c) => ({ text: c.text, metadata: { source: doc.source, start: c.startIndex, end: c.endIndex, }, })), }); } } // 4. Query function async function query(question: string) { // Retrieve const results = await semanticSearch({ db, model: embeddingModel, query: question, k: 10, }); // Rerank const reranked = await rerank({ model: rerankerModel, query: question, documents: results.map((r) => r.metadata.text as string), topK: 3, }); // Generate const context = reranked.map((r) => r.document).join('\n\n---\n\n'); const stream = await streamText({ model: llm, prompt: `You are a helpful assistant. Answer based only on the context provided. If the answer is not in the context, say "I don't have that information." Context: ${context} Question: ${question} Answer:`, }); return stream; } // Usage const stream = await query('What is machine learning?'); for await (const chunk of stream) { process.stdout.write(chunk.text); } ``` ## Document Loaders Load documents from various formats: ```typescript import { TextLoader, JSONLoader, CSVLoader, HTMLLoader } from '@localmode/core'; import { PDFLoader } from '@localmode/pdfjs'; // Text files const textLoader = new TextLoader(); const { documents: textDocs } = await textLoader.load(textBlob); // JSON const jsonLoader = new JSONLoader({ textField: 'content' }); const { documents: jsonDocs } = await jsonLoader.load(jsonBlob); // CSV const csvLoader = new CSVLoader({ textColumn: 'description' }); const { documents: csvDocs } = await csvLoader.load(csvBlob); // HTML const htmlLoader = new HTMLLoader({ selector: 'article' }); const { documents: htmlDocs } = await htmlLoader.load(htmlBlob); // PDF const pdfLoader = new PDFLoader({ splitByPage: true }); const { documents: pdfDocs } = await pdfLoader.load(pdfBlob); ``` ## Best Practices 1. **Chunk size** - 256-512 chars works well for most cases 2. **Overlap** - 10-20% overlap helps maintain context 3. **Reranking** - Always rerank for Q\&A applications 4. **Hybrid search** - Combine semantic + keyword for robust results 5. **Context window** - Don't exceed LLM's context limit ## Next Steps # Reranking import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; Reranking improves the accuracy of RAG (Retrieval-Augmented Generation) pipelines by re-scoring documents based on their relevance to a query. After initial vector search retrieves candidates, reranking provides more precise ordering. ## Why Rerank? Vector search retrieves documents based on embedding similarity, but rerankers use cross-attention to directly score query-document pairs, often producing more accurate rankings for the final generation step. Typical RAG pipeline: 1. **Retrieve** — Get 20-50 candidates via vector search (fast, approximate) 2. **Rerank** — Score and reorder candidates (precise, slower) 3. **Generate** — Use top 5-10 documents for LLM context ## Basic Usage ```ts {5-9} import { rerank } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Create reranker model const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const { results } = await rerank({ model: rerankerModel, query: 'What is machine learning?', documents: [ 'Machine learning is a type of artificial intelligence...', 'Cooking pasta requires boiling water...', 'Deep learning is a subset of machine learning...', ], topK: 2, }); // results: [ // { index: 0, score: 0.95, text: 'Machine learning is a type of...' }, // { index: 2, score: 0.88, text: 'Deep learning is a subset of...' } // ] ``` ## RAG Pipeline Example ### Perform Initial Vector Search Retrieve more candidates than you need—reranking will filter to the best ones. ```ts import { semanticSearch, createVectorDB, embed } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Get 20 candidates from vector search const { embedding: queryVector } = await embed({ model: embeddingModel, value: 'What is machine learning?', }); const candidates = await db.search(queryVector, { k: 20 }); ``` ### Rerank the Candidates Score each document against the query for precise relevance ranking. ```ts const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const { results } = await rerank({ model: rerankerModel, query: 'What is machine learning?', documents: candidates.map((c) => c.metadata.text), topK: 5, // Keep only top 5 after reranking }); ``` ### Use Top Results for Generation Pass the reranked documents as context to your LLM. ```ts const context = results.map((r) => r.text).join('\n\n'); const response = await streamText({ model: languageModel, prompt: `Based on the following context, answer the question. Context: ${context} Question: What is machine learning?`, }); ``` ## API Reference ### `rerank(options)` Reranks documents by relevance to a query. ### Return Type: `RerankResult` ### `RankedDocument` ## Supported Models Cross-encoder models score query-document pairs directly: | Model | Size | Speed | Quality | Use Case | | -------------------------------- | ----- | ------ | ------- | --------------- | | `Xenova/ms-marco-MiniLM-L-6-v2` | 23MB | Fast | Good | General purpose | | `Xenova/ms-marco-MiniLM-L-12-v2` | 33MB | Medium | Better | Higher accuracy | | `Xenova/bge-reranker-base` | 110MB | Slower | Best | Maximum quality | Choose based on your needs: * **Speed-critical**: Use `ms-marco-MiniLM-L-6-v2` for fast inference * **Balanced**: Use `ms-marco-MiniLM-L-12-v2` for good accuracy with reasonable speed * **Quality-critical**: Use `bge-reranker-base` when accuracy matters most Start with `ms-marco-MiniLM-L-6-v2`—it's a great balance of speed and quality for most applications. ## Cancellation Support All reranking operations support `AbortSignal` for cancellation: ```ts const controller = new AbortController(); // Cancel after 5 seconds setTimeout(() => controller.abort(), 5000); try { const { results } = await rerank({ model: rerankerModel, query: 'What is AI?', documents: largeDocumentSet, abortSignal: controller.signal, }); } catch (error) { if (error.name === 'AbortError') { console.log('Reranking was cancelled'); } } ``` ## Performance Tips **Optimize your reranking pipeline:** 1. **Limit candidates**: Retrieve 20-50 candidates, not hundreds 2. **Use topK**: Only return the documents you need 3. **Batch when possible**: Rerank multiple queries together if your use case allows 4. **Cache results**: Consider caching reranked results for repeated queries ## Custom Reranker Implementation Implement the `RerankerModel` interface to create custom rerankers: ```ts import type { RerankerModel, DoRerankOptions, DoRerankResult } from '@localmode/core'; class MyCustomReranker implements RerankerModel { readonly modelId = 'custom:my-reranker'; readonly provider = 'custom'; async doRerank(options: DoRerankOptions): Promise { const { query, documents, topK } = options; // Your scoring logic here const scored = documents.map((doc, index) => ({ index, score: this.scoreDocument(query, doc), text: doc, })); // Sort by score descending scored.sort((a, b) => b.score - a.score); // Apply topK const results = topK ? scored.slice(0, topK) : scored; return { results, usage: { inputTokens: query.length + documents.join('').length, durationMs: 0, }, }; } private scoreDocument(query: string, document: string): number { // Implement your scoring logic return 0.5; } } ``` # Security import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; LocalMode provides built-in security utilities for encryption, key management, and PII redaction. LocalMode has **zero telemetry**. No data ever leaves your device. All processing happens locally in the browser. ## Encryption Encrypt sensitive data using Web Crypto API: ```typescript import { encrypt, decrypt, deriveKey } from '@localmode/core'; // Derive a key from a password const key = await deriveKey('user-password', 'unique-salt'); // Encrypt data const { ciphertext, iv } = await encrypt(key, 'sensitive data'); // Decrypt data const decrypted = await decrypt(key, ciphertext, iv); console.log(decrypted); // 'sensitive data' ``` ### Key Derivation Use PBKDF2 to derive keys from passwords: ```ts {3-6} import { deriveKey } from '@localmode/core'; const key = await deriveKey(password, salt, { iterations: 100000, // Higher = more secure, slower keyLength: 256, // AES-256 }); ``` Always use at least 100,000 iterations for PBKDF2. Lower values make brute-force attacks easier. ### Encryption Options ```ts const { ciphertext, iv } = await encrypt(key, data, { algorithm: 'AES-GCM', // Default, recommended }); ``` AES-GCM provides authenticated encryption—it protects both confidentiality and integrity. Use AES-CBC only for compatibility with legacy systems. ## Key Management Store keys securely: ```typescript import { KeyStore } from '@localmode/core'; const keyStore = new KeyStore({ name: 'my-app-keys', }); // Store a key await keyStore.set('encryption-key', key); // Retrieve a key const storedKey = await keyStore.get('encryption-key'); // Delete a key await keyStore.delete('encryption-key'); ``` Keys stored in IndexedDB are accessible to JavaScript. For sensitive applications, consider using hardware-backed keys via WebAuthn. ## Encrypting Embeddings Encrypt embeddings before storage: ```typescript import { wrapEmbeddingModel, encryptionMiddleware, deriveKey } from '@localmode/core'; const key = await deriveKey('user-password', 'salt'); const model = wrapEmbeddingModel(baseModel, [encryptionMiddleware({ key })]); // Embeddings are automatically encrypted const { embedding } = await embed({ model, value: 'sensitive text' }); ``` ## PII Redaction Remove personally identifiable information before processing: ```typescript import { redactPII } from '@localmode/core'; const text = 'Contact John at john@example.com or call 555-123-4567'; const redacted = redactPII(text, { patterns: ['email', 'phone'], replacement: '[REDACTED]', }); console.log(redacted); // 'Contact John at [REDACTED] or call [REDACTED]' ``` ### Available Patterns | Pattern | Description | Example | | ------------ | ----------------------- | ------------------------------------------- | | `email` | Email addresses | [john@example.com](mailto:john@example.com) | | `phone` | Phone numbers | 555-123-4567 | | `ssn` | Social Security numbers | 123-45-6789 | | `creditCard` | Credit card numbers | 4111-1111-1111-1111 | | `ip` | IP addresses | 192.168.1.1 | | `address` | Street addresses | 123 Main St | ### Custom Patterns ```typescript const redacted = redactPII(text, { patterns: ['email', 'phone'], custom: [ { name: 'employeeId', regex: /EMP-\d{6}/g, }, ], replacement: (match, pattern) => `[${pattern.toUpperCase()}]`, }); ``` ### PII Middleware Automatically redact PII before embedding: ```typescript import { wrapEmbeddingModel, piiRedactionMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ piiRedactionMiddleware({ patterns: ['email', 'phone', 'ssn'], replacement: '[REDACTED]', }), ]); // PII is automatically redacted before embedding const { embedding } = await embed({ model, value: 'Email me at john@example.com', }); // Actually embeds: 'Email me at [REDACTED]' ``` ## Feature Detection Check security feature availability: ```typescript import { isCryptoSupported, isCrossOriginIsolated } from '@localmode/core'; if (!isCryptoSupported()) { console.warn('Web Crypto API not available'); } if (!isCrossOriginIsolated()) { console.warn('SharedArrayBuffer not available'); } ``` ## Security Best Practices 1. **Never store passwords** - Use key derivation 2. **Unique salts** - Generate random salts for each key 3. **High iterations** - Use at least 100,000 PBKDF2 iterations 4. **Redact PII** - Always redact before processing user data 5. **Zero telemetry** - LocalMode never phones home ### Secure RAG Pipeline ```typescript import { wrapEmbeddingModel, piiRedactionMiddleware, encryptionMiddleware, deriveKey, } from '@localmode/core'; // Setup secure model const key = await deriveKey(userPassword, uniqueSalt); const secureModel = wrapEmbeddingModel(baseModel, [ piiRedactionMiddleware({ patterns: ['email', 'phone', 'ssn', 'creditCard'], }), encryptionMiddleware({ key }), ]); // All embeddings are PII-redacted and encrypted const { embedding } = await embed({ model: secureModel, value: userInput, }); ``` ## Content Security Policy For maximum security, configure CSP headers: ```typescript // next.config.js const securityHeaders = [ { key: 'Content-Security-Policy', value: [ "default-src 'self'", "script-src 'self' 'wasm-unsafe-eval'", // Required for WASM "worker-src 'self' blob:", // Required for workers "connect-src 'self' https://huggingface.co https://cdn-lfs.huggingface.co", ].join('; '), }, ]; ``` ## Cross-Origin Isolation Some features require cross-origin isolation: ```typescript // Check if isolated if (crossOriginIsolated) { // SharedArrayBuffer available // Better performance for workers } // Enable via headers: // Cross-Origin-Opener-Policy: same-origin // Cross-Origin-Embedder-Policy: require-corp ``` ## Audit Logging Log security-relevant events: ```typescript import { wrapEmbeddingModel, loggingMiddleware } from '@localmode/core'; const model = wrapEmbeddingModel(baseModel, [ loggingMiddleware({ logger: (event) => { // Log to secure audit trail auditLog.log({ timestamp: new Date().toISOString(), action: 'embedding', model: event.modelId, inputCount: event.inputCount, // Don't log actual input values! }); }, }), ]); ``` ## Next Steps # Storage import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { TypeTable } from 'fumadocs-ui/components/type-table'; LocalMode provides flexible storage options for persisting vector databases and application data. ## Storage Options The default storage uses IndexedDB for persistence: ```ts {3-6} import { IndexedDBStorage, createVectorDB } from '@localmode/core'; const storage = new IndexedDBStorage({ name: 'my-app', storeName: 'vectors', // Optional, defaults to 'store' }); // Or use default (IndexedDB) automatically: const db = await createVectorDB({ name: 'documents', dimensions: 384, // Uses IndexedDBStorage by default }); ``` Data persists across page reloads and browser restarts. For temporary data or environments without IndexedDB: ```ts {3,4} import { MemoryStorage, createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'temp', dimensions: 384, storage: new MemoryStorage(), }); // ⚠️ Data is lost on page reload ``` Useful for: * Testing and development * Temporary caches * Safari private browsing fallback Safari's private browsing mode blocks IndexedDB. Use `MemoryStorage` as a fallback or detect this condition with `isIndexedDBSupported()`. ## Storage Interface All storage adapters implement this interface: ', }, 'set(key, value)': { description: 'Store a document with the given key', type: 'Promise', }, 'delete(key)': { description: 'Delete a document by key', type: 'Promise', }, 'keys()': { description: 'Get all stored keys', type: 'Promise', }, 'clear()': { description: 'Delete all documents', type: 'Promise', }, 'close()': { description: 'Close the storage connection', type: 'Promise', }, }} /> ### StoredDocument ', }, }} /> ## Third-Party Adapters ### Dexie.js ```typescript import { DexieStorage } from '@localmode/dexie'; import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'dexie-db', dimensions: 384, storage: new DexieStorage({ name: 'my-app', version: 1, }), }); ``` ### idb ```typescript import { IDBStorage } from '@localmode/idb'; import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'idb-db', dimensions: 384, storage: new IDBStorage({ name: 'my-app', }), }); ``` ### localForage ```typescript import { LocalForageStorage } from '@localmode/localforage'; import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'lf-db', dimensions: 384, storage: new LocalForageStorage({ name: 'my-app', driver: 'INDEXEDDB', }), }); ``` ## Custom Storage Implement your own storage adapter: ```typescript import type { Storage, StoredDocument } from '@localmode/core'; class MyCustomStorage implements Storage { private data = new Map(); async get(key: string) { return this.data.get(key); } async set(key: string, value: StoredDocument) { this.data.set(key, value); } async delete(key: string) { this.data.delete(key); } async keys() { return Array.from(this.data.keys()); } async clear() { this.data.clear(); } async close() { // Cleanup if needed } } ``` ## Storage Fallback Automatically fallback when IndexedDB is unavailable: ```typescript import { createStorageWithFallback, IndexedDBStorage, MemoryStorage } from '@localmode/core'; const storage = await createStorageWithFallback({ providers: [() => new IndexedDBStorage({ name: 'app' }), () => new MemoryStorage()], onFallback: (error, index) => { console.warn(`Storage provider ${index} failed:`, error.message); }, }); const db = await createVectorDB({ name: 'robust-db', dimensions: 384, storage, }); ``` ## Quota Management Monitor and manage storage quota: ```typescript import { getStorageQuota, requestPersistence } from '@localmode/core'; // Check available quota const quota = await getStorageQuota(); console.log('Used:', quota.usage); console.log('Available:', quota.quota); console.log('Percent used:', ((quota.usage / quota.quota) * 100).toFixed(1) + '%'); // Request persistent storage (won't be auto-cleared) const isPersisted = await requestPersistence(); if (isPersisted) { console.log('Storage is now persistent'); } ``` ### Quota Warnings ```typescript import { checkQuotaWithWarnings } from '@localmode/core'; const { ok, warning, quota } = await checkQuotaWithWarnings({ warningThreshold: 0.8, // Warn at 80% usage }); if (warning) { console.warn('Storage is almost full!', quota); } ``` ## Cleanup Remove old or unused data: ```typescript import { cleanup } from '@localmode/core'; // Clean up databases older than 30 days await cleanup({ maxAge: 30 * 24 * 60 * 60 * 1000, // 30 days in ms onDelete: (name) => console.log(`Deleted: ${name}`), }); // Clean up to free space await cleanup({ targetFreeSpace: 100 * 1024 * 1024, // 100MB }); ``` ## Cross-Tab Synchronization Keep data in sync across browser tabs: ```typescript import { createBroadcaster } from '@localmode/core'; const broadcaster = createBroadcaster('my-app-sync'); // Listen for changes from other tabs broadcaster.subscribe((message) => { if (message.type === 'document-added') { console.log('New document added in another tab:', message.id); // Refresh your UI } }); // Broadcast changes to other tabs await db.add({ id: 'new-doc', vector, metadata }); broadcaster.publish({ type: 'document-added', id: 'new-doc', }); ``` ### Web Locks Prevent concurrent writes: ```typescript import { createLockManager } from '@localmode/core'; const locks = createLockManager(); // Acquire exclusive lock before writing await locks.withLock('db-write', async () => { await db.addMany(documents); }); // Other tabs wait for lock to be released ``` ## Feature Detection Check storage capabilities: ```typescript import { isIndexedDBSupported, isWebLocksSupported } from '@localmode/core'; if (!isIndexedDBSupported()) { console.warn('IndexedDB not available, using memory storage'); } if (!isWebLocksSupported()) { console.warn('Web Locks not available, using fallback'); } ``` ## Best Practices 1. **Always use fallbacks** - Safari private browsing blocks IndexedDB 2. **Request persistence** - Prevent auto-clearing of important data 3. **Monitor quota** - Show warnings before storage is full 4. **Clean up** - Remove old data periodically 5. **Use locks** - Prevent race conditions across tabs ## Next Steps # Cross-Tab Sync import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; LocalMode provides cross-tab synchronization to keep VectorDB instances in sync across multiple browser tabs. This prevents data inconsistencies when users have your app open in multiple tabs. ## Overview Cross-tab sync uses two browser APIs: * **Web Locks API** — Prevents concurrent writes from corrupting data * **BroadcastChannel API** — Notifies other tabs when data changes Both APIs have fallbacks for unsupported browsers. If unavailable, operations proceed without synchronization (safe for single-tab usage). ## Quick Start ### Create a Lock Manager The lock manager ensures only one tab can write at a time. ```ts {3} import { getLockManager } from '@localmode/core'; const locks = getLockManager('my-database'); ``` ### Create a Broadcaster The broadcaster notifies other tabs of changes. ```ts {3} import { createBroadcaster } from '@localmode/core'; const broadcaster = createBroadcaster('my-database'); ``` ### Use Locks for Write Operations Wrap write operations in locks to prevent conflicts. ```ts await locks.withWriteLock('documents', async () => { await db.add({ id: 'doc-1', vector: embedding, metadata: { text: 'Hello world' }, }); // Notify other tabs broadcaster.notifyDocumentAdded('default', 'doc-1'); }); ``` ### Subscribe to Changes React to changes from other tabs. ```ts broadcaster.on('document_added', (message) => { console.log(`Document ${message.documentId} added in another tab`); // Refresh your UI or invalidate cache }); ``` ## Lock Manager API ### `getLockManager(dbName)` Creates or retrieves a lock manager for a database. ```ts import { getLockManager } from '@localmode/core'; const locks = getLockManager('my-database'); ``` ### Lock Methods ', }, 'withReadLock(resource, callback)': { description: 'Acquire a shared (read) lock', type: 'Promise', }, 'withWriteLock(resource, callback)': { description: 'Acquire an exclusive (write) lock', type: 'Promise', }, 'tryLock(resource, callback, mode?)': { description: 'Try to acquire lock immediately, returns null if unavailable', type: 'Promise', }, 'getLockState()': { description: 'Get current lock state for debugging', type: 'Promise<{ held: string[], pending: string[] }>', }, }} /> ### Lock Options ### Lock Examples Multiple tabs can hold read locks simultaneously: ```ts // Read lock - multiple tabs can read at once const data = await locks.withReadLock('documents', async () => { return await db.search(queryVector, { k: 10 }); }); ``` Write locks are exclusive—only one tab can hold the lock: ```ts // Write lock - exclusive access await locks.withWriteLock('documents', async () => { await db.add({ id: 'doc-1', vector, metadata }); }); ``` Non-blocking lock attempt—useful for optional optimizations: ```ts // Try to get lock, return null if unavailable const result = await locks.tryLock('documents', async () => { await db.add({ id: 'doc-1', vector, metadata }); return 'success'; }); if (result === null) { console.log('Another tab is writing, try again later'); } ``` Fail if lock isn't acquired within timeout: ```ts try { await locks.withLock( 'documents', async () => { await db.add({ id: 'doc-1', vector, metadata }); }, { timeout: 5000 } // 5 second timeout ); } catch (error) { console.error('Lock timeout - another tab is holding the lock'); } ``` ## Broadcaster API ### `createBroadcaster(dbName)` Creates a broadcaster for cross-tab communication. ```ts import { createBroadcaster } from '@localmode/core'; const broadcaster = createBroadcaster('my-database'); ``` ### Notification Methods ### Event Types Subscribe to specific event types: ```ts type BroadcastMessageType = | 'document_added' | 'document_updated' | 'document_deleted' | 'documents_deleted' | 'collection_cleared' | 'database_cleared' | 'index_updated' | 'leader_elected' | 'leader_ping'; ``` ### Subscription Methods ```ts // Subscribe to specific event const unsubscribe = broadcaster.on('document_added', (message) => { console.log('Document added:', message.documentId); }); // Subscribe to all events const unsubscribeAll = broadcaster.onAny((message) => { console.log('Event:', message.type); }); // Clean up unsubscribe(); unsubscribeAll(); ``` ### Message Structure ## Leader Election For tasks that should only run in one tab (like background sync), use leader election: ```ts const broadcaster = createBroadcaster('my-database'); // Try to become the leader const isLeader = await broadcaster.electLeader(); if (isLeader) { console.log('This tab is the leader'); // Start background sync, cleanup tasks, etc. startBackgroundSync(); } // Check leader status if (broadcaster.getIsLeader()) { // Run leader-only tasks } // Resign leadership (e.g., before tab closes) broadcaster.resignLeadership(); ``` Leader election uses localStorage to coordinate between tabs. The leader sends periodic heartbeats—if a leader doesn't ping for 10 seconds, another tab can take over. ## Full Integration Example ```ts import { createVectorDB, getLockManager, createBroadcaster, embed } from '@localmode/core'; import { transformers } from '@localmode/transformers'; // Setup const db = await createVectorDB({ name: 'documents', dimensions: 384 }); const locks = getLockManager('documents'); const broadcaster = createBroadcaster('documents'); const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Subscribe to changes from other tabs broadcaster.on('document_added', async ({ documentId }) => { console.log(`Refresh UI - document ${documentId} added in another tab`); // Optionally refresh your document list or clear caches }); broadcaster.on('database_cleared', () => { console.log('Database was cleared in another tab'); // Reset your UI state }); // Add document with synchronization async function addDocument(text: string) { const { embedding } = await embed({ model: embeddingModel, value: text, }); const id = crypto.randomUUID(); await locks.withWriteLock('documents', async () => { await db.add({ id, vector: embedding, metadata: { text, createdAt: Date.now() }, }); // Notify other tabs broadcaster.notifyDocumentAdded('default', id); }); return id; } // Clean up on page unload window.addEventListener('beforeunload', () => { broadcaster.close(); }); ``` ## Browser Compatibility | Feature | Chrome | Edge | Firefox | Safari | | ---------------- | ------ | ---- | ------- | ------ | | Web Locks API | 69+ | 79+ | 96+ | 15.4+ | | BroadcastChannel | 54+ | 79+ | 38+ | 15.4+ | If these APIs are unavailable (e.g., in older browsers or certain WebView environments), operations proceed without synchronization. This is safe for single-tab usage but may cause issues with multiple tabs. ### Feature Detection ```ts import { LockManager, Broadcaster } from '@localmode/core'; if (LockManager.isSupported()) { console.log('Web Locks API available'); } if (Broadcaster.isSupported()) { console.log('BroadcastChannel API available'); } ``` # Testing Utilities import { Callout } from 'fumadocs-ui/components/callout'; import { Steps } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; LocalMode provides comprehensive mock utilities for testing applications without loading real ML models. These mocks are deterministic, configurable, and match the exact API of real implementations. ## Overview Testing utilities include: * **Mock models** — Embedding, classification, NER, vision, audio, LLM, and more * **Mock storage** — In-memory storage for unit tests * **Mock VectorDB** — Full-featured database mock * **Test helpers** — Seeded random generators, test vectors, spies All mocks produce deterministic output when given the same inputs and seed, making tests reproducible. ## Mock Embedding Model The most commonly used mock for testing embedding-related features. ```ts import { createMockEmbeddingModel, embed } from '@localmode/core'; const model = createMockEmbeddingModel({ dimensions: 384, seed: 42, // Deterministic embeddings }); const { embedding } = await embed({ model, value: 'Hello world', }); // embedding is a Float32Array of length 384 // Same input + seed always produces same output ``` ### Configuration Options void', }, }} /> ### Testing Failure Handling ```ts const failingModel = createMockEmbeddingModel({ failCount: 2, // Fail first 2 attempts failError: new Error('Model load failed'), }); // First two calls will fail, third will succeed try { await embed({ model: failingModel, value: 'test' }); } catch (error) { console.log('First attempt failed'); } ``` ### Tracking Calls ```ts const calls: string[][] = []; const model = createMockEmbeddingModel({ onEmbed: ({ values }) => { calls.push(values); }, }); await embed({ model, value: 'test 1' }); await embed({ model, value: 'test 2' }); console.log(calls); // [['test 1'], ['test 2']] console.log(model.callCount); // 2 // Reset for next test model.resetCallCount(); ``` ## All Mock Models ```ts import { createMockClassificationModel } from '@localmode/core'; const model = createMockClassificationModel({ labels: ['positive', 'negative', 'neutral'], defaultScore: 0.9, }); // Uses simple heuristics: // - "great", "good", "love" → positive // - "bad", "terrible", "hate" → negative // - Other → neutral ``` ```ts import { createMockNERModel } from '@localmode/core'; const model = createMockNERModel({ entityTypes: ['PERSON', 'ORG', 'LOC', 'DATE'], }); // Recognizes common test entities: // PERSON: John, Jane, Bob, Alice, Mike, Sarah // ORG: Microsoft, Google, Apple, Amazon, OpenAI, Meta // LOC: Seattle, New York, London, Paris, Tokyo, Berlin // DATE: Years (2024), dates (1/1/2024), month names ``` ```ts import { createMockSpeechToTextModel } from '@localmode/core'; const model = createMockSpeechToTextModel({ mockText: 'This is the transcription.', languages: ['en', 'es', 'fr'], }); const result = await model.doTranscribe({ audio: audioBlob, returnTimestamps: true, }); // result.text = 'This is the transcription.' // result.segments = [{ start: 0, end: 0.5, text: 'This' }, ...] ``` ```ts import { createMockTextToSpeechModel } from '@localmode/core'; const model = createMockTextToSpeechModel({ sampleRate: 16000, }); const { audio, sampleRate } = await model.doSynthesize({ text: 'Hello world', }); // audio is a Blob with silent audio data ``` ```ts import { createMockLanguageModel } from '@localmode/core'; const model = createMockLanguageModel({ mockResponse: 'This is the generated response.', contextLength: 4096, }); // Non-streaming const result = await model.doGenerate({ prompt: 'Hello' }); // Streaming for await (const chunk of model.doStream({ prompt: 'Hello' })) { console.log(chunk.text); } ``` ```ts import { createMockTranslationModel } from '@localmode/core'; const model = createMockTranslationModel({ translationPrefix: '[translated]', }); const { translations } = await model.doTranslate({ texts: ['Hello', 'World'], }); // translations = ['[translated] Hello', '[translated] World'] ``` ```ts import { createMockSummarizationModel } from '@localmode/core'; const model = createMockSummarizationModel(); const { summaries } = await model.doSummarize({ texts: ['This is a long document. It has many sentences.'], }); // summaries = ['This is a long document.'] (first sentence) ``` ```ts import { createMockImageCaptionModel, createMockSegmentationModel, createMockObjectDetectionModel, createMockImageFeatureModel, createMockImageToImageModel, } from '@localmode/core'; // Image captioning const captioner = createMockImageCaptionModel({ mockCaption: 'A photo showing test content.', }); // Object detection const detector = createMockObjectDetectionModel(); // Returns: [{ label: 'person', score: 0.95, box: {...} }, ...] // Segmentation const segmenter = createMockSegmentationModel(); // Returns masks with 'background' and 'object' labels // Image features const featureExtractor = createMockImageFeatureModel({ dimensions: 512, }); // Image-to-image const transformer = createMockImageToImageModel({ taskType: 'upscale', }); ``` ```ts import { createMockQuestionAnsweringModel, createMockDocumentQAModel, createMockFillMaskModel, createMockOCRModel, } from '@localmode/core'; // Question answering const qaModel = createMockQuestionAnsweringModel(); // Document QA (images) const docQA = createMockDocumentQAModel(); // Fill-mask (BERT-style) const fillMask = createMockFillMaskModel({ mockPredictions: [ { token: 'great', score: 0.85 }, { token: 'wonderful', score: 0.1 }, ], }); // OCR const ocrModel = createMockOCRModel({ mockText: 'Extracted text from image.', }); ``` ## Mock Storage In-memory storage that implements the Storage interface: ```ts import { createMockStorage } from '@localmode/core'; const storage = createMockStorage(); await storage.set('key', { id: 'doc-1', vector: new Float32Array(384), metadata: { text: 'Hello' }, }); const doc = await storage.get('key'); const keys = await storage.keys(); // Access internal data for assertions const allData = storage.getData(); // Map console.log(storage.size); // 1 await storage.clear(); ``` ## Mock VectorDB Full-featured VectorDB mock with search support: ```ts import { createMockVectorDB, createTestVector } from '@localmode/core'; const db = createMockVectorDB({ name: 'test-db', dimensions: 384, delay: 0, // Optional delay for async operations }); // Add documents await db.add({ id: 'doc-1', vector: createTestVector(384, 1), metadata: { text: 'Hello world', category: 'greeting' }, }); // Search with filters const results = await db.search(queryVector, { k: 10, threshold: 0.5, filter: { category: 'greeting' }, }); // Filter operators supported: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin await db.search(queryVector, { filter: { score: { $gte: 0.8 }, category: { $in: ['greeting', 'farewell'] }, }, }); ``` ## Test Vectors Create deterministic test vectors: ```ts import { createTestVector, createTestVectors } from '@localmode/core'; // Single vector (same seed = same vector) const vector1 = createTestVector(384, 42); const vector2 = createTestVector(384, 42); // vector1 and vector2 are identical // Multiple vectors const vectors = createTestVectors(100, 384, 0); // 100 vectors with seeds 0-99 // Unnormalized vectors const rawVector = createTestVector(384, 42, false); ``` ## Seeded Random For reproducible random numbers in tests: ```ts import { createSeededRandom } from '@localmode/core'; const rng = createSeededRandom(42); const value1 = rng(); // Always same value for seed 42 const value2 = rng(); // Next value in sequence // Reset by creating new RNG with same seed const rng2 = createSeededRandom(42); // rng2() produces same sequence as rng ``` ## Test Helpers ### Wait for Condition ```ts import { waitFor } from '@localmode/core'; // Wait for async condition await waitFor( () => document.querySelector('.loaded') !== null, 5000, // timeout 50 // check interval ); // Wait for async function await waitFor(async () => (await db.count()) > 0); ``` ### Deferred Promises ```ts import { createDeferred } from '@localmode/core'; const { promise, resolve, reject } = createDeferred(); // Later... resolve('success'); // or: reject(new Error('failed')); const result = await promise; ``` ### Spy Functions ```ts import { createSpy } from '@localmode/core'; const spy = createSpy<(x: number, y: number) => void>(); spy(1, 2); spy(3, 4); console.log(spy.callCount); // 2 console.log(spy.calls); // [[1, 2], [3, 4]] spy.reset(); console.log(spy.callCount); // 0 ``` ## Vitest Example ```ts import { describe, it, expect, beforeEach } from 'vitest'; import { createMockEmbeddingModel, createMockVectorDB, createTestVector, embed, } from '@localmode/core'; describe('Semantic Search', () => { let model: ReturnType; let db: ReturnType; beforeEach(() => { model = createMockEmbeddingModel({ dimensions: 384, seed: 42 }); db = createMockVectorDB({ dimensions: 384 }); }); it('should find similar documents', async () => { // Arrange await db.add({ id: 'doc-1', vector: createTestVector(384, 1), metadata: { text: 'Machine learning is AI' }, }); // Act const { embedding } = await embed({ model, value: 'What is ML?' }); const results = await db.search(embedding, { k: 5 }); // Assert expect(results).toHaveLength(1); expect(results[0].id).toBe('doc-1'); expect(results[0].score).toBeGreaterThan(0); }); it('should support AbortSignal', async () => { const controller = new AbortController(); controller.abort(); await expect(embed({ model, value: 'test', abortSignal: controller.signal })).rejects.toThrow(); }); it('should handle failures with retry', async () => { const failingModel = createMockEmbeddingModel({ failCount: 1, failError: new Error('Temporary failure'), }); // First call fails, second succeeds await expect(embed({ model: failingModel, value: 'test', maxRetries: 0 })).rejects.toThrow( 'Temporary failure' ); // With retry, should succeed const result = await embed({ model: failingModel, value: 'test', maxRetries: 2, }); expect(result.embedding).toBeInstanceOf(Float32Array); }); }); ``` # Vector Database import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordions, Accordion } from 'fumadocs-ui/components/accordion'; import { TypeTable } from 'fumadocs-ui/components/type-table'; LocalMode includes a high-performance vector database with HNSW (Hierarchical Navigable Small World) indexing for fast approximate nearest neighbor search. ## Creating a Database ```ts {3-6} import { createVectorDB } from '@localmode/core'; const db = await createVectorDB({ name: 'my-documents', dimensions: 384, // Must match your embedding model }); ``` ### VectorDBConfig ### With Custom Storage ```typescript import { createVectorDB, MemoryStorage } from '@localmode/core'; // Use memory storage (no persistence) const db = await createVectorDB({ name: 'temp-db', dimensions: 384, storage: new MemoryStorage(), }); // Or use a third-party adapter import { DexieStorage } from '@localmode/dexie'; const db = await createVectorDB({ name: 'dexie-db', dimensions: 384, storage: new DexieStorage({ name: 'my-app' }), }); ``` ## Adding Documents ### Single Document ```typescript await db.add({ id: 'doc-1', vector: embedding, // Float32Array metadata: { text: 'Original document text', source: 'file.pdf', page: 1, }, }); ``` ### Multiple Documents ```typescript await db.addMany([ { id: 'doc-1', vector: embeddings[0], metadata: { text: 'First' } }, { id: 'doc-2', vector: embeddings[1], metadata: { text: 'Second' } }, { id: 'doc-3', vector: embeddings[2], metadata: { text: 'Third' } }, ]); ``` The vector dimensions must match the `dimensions` specified when creating the database. Using a different size will throw a `DimensionMismatchError`. ## Searching ### Basic Search ```typescript const results = await db.search(queryVector, { k: 5 }); results.forEach((result) => { console.log(`ID: ${result.id}`); console.log(`Score: ${result.score.toFixed(4)}`); console.log(`Metadata:`, result.metadata); }); ``` ### With Filters Filter results by metadata: ```typescript const results = await db.search(queryVector, { k: 10, filter: { source: { $eq: 'manual.pdf' }, }, }); ``` ### Filter Operators | Operator | Description | Example | | -------- | --------------------- | -------------------------------- | | `$eq` | Equals | `{ status: { $eq: 'active' } }` | | `$ne` | Not equals | `{ status: { $ne: 'deleted' } }` | | `$gt` | Greater than | `{ price: { $gt: 100 } }` | | `$gte` | Greater than or equal | `{ year: { $gte: 2020 } }` | | `$lt` | Less than | `{ count: { $lt: 10 } }` | | `$lte` | Less than or equal | `{ score: { $lte: 0.5 } }` | | Operator | Description | Example | | -------- | --------------------- | ---------------------------------------- | | `$in` | Value is in array | `{ category: { $in: ['tech', 'ai'] } }` | | `$nin` | Value is not in array | `{ tag: { $nin: ['draft', 'hidden'] } }` | | Operator | Description | Example | | ------------- | ------------------ | ------------------------------------ | | `$contains` | String contains | `{ text: { $contains: 'machine' } }` | | `$startsWith` | String starts with | `{ name: { $startsWith: 'doc-' } }` | **AND conditions:** ```ts const results = await db.search(queryVector, { k: 10, filter: { $and: [ { category: { $eq: 'technology' } }, { year: { $gte: 2023 } }, { status: { $ne: 'draft' } }, ], }, }); ``` **OR conditions:** ```ts const results = await db.search(queryVector, { k: 10, filter: { $or: [{ priority: { $eq: 'high' } }, { featured: { $eq: true } }], }, }); ``` ## Updating Documents ```typescript // Update metadata only (vector unchanged) await db.update('doc-1', { metadata: { ...existingMetadata, status: 'reviewed' }, }); // Update vector and metadata await db.update('doc-1', { vector: newEmbedding, metadata: { text: 'Updated text' }, }); ``` ## Deleting Documents ```typescript // Delete single document await db.delete('doc-1'); // Delete multiple documents await db.deleteMany(['doc-1', 'doc-2', 'doc-3']); // Clear all documents await db.clear(); ``` ### Delete by Filter Delete documents matching a metadata filter: ```typescript // Delete all documents with a specific documentId const deletedCount = await db.deleteWhere({ documentId: 'doc-123', }); console.log(`Deleted ${deletedCount} documents`); // Delete documents matching multiple criteria const count = await db.deleteWhere({ $and: [ { source: { $eq: 'old-import.pdf' } }, { status: { $eq: 'archived' } }, ], }); ``` Use `deleteWhere()` when you need to remove multiple documents by metadata (e.g., all chunks from a specific file). It's more efficient than deleting documents one by one. ## Getting Documents ```typescript // Get by ID const doc = await db.get('doc-1'); if (doc) { console.log(doc.id, doc.vector, doc.metadata); } // Check if exists const exists = await db.has('doc-1'); // Get all IDs const ids = await db.keys(); // Get count const count = await db.size(); ``` ## Persistence By default, the vector database uses IndexedDB for persistence: ```typescript const db = await createVectorDB({ name: 'persistent-db', dimensions: 384, }); // Add documents await db.addMany(documents); // Data persists across page reloads! // On next load, just create with same name: const db2 = await createVectorDB({ name: 'persistent-db', // Same name dimensions: 384, }); // All documents are still there const count = await db2.size(); ``` ### Memory-Only Mode For temporary data or testing: ```typescript import { MemoryStorage } from '@localmode/core'; const db = await createVectorDB({ name: 'temp', dimensions: 384, storage: new MemoryStorage(), }); // Data lost on page reload ``` ## Web Worker Mode Offload database operations to a Web Worker for better main thread performance: ```typescript import { createVectorDBWithWorker } from '@localmode/core'; const db = await createVectorDBWithWorker({ name: 'worker-db', dimensions: 384, }); // Same API, but operations run in a worker const results = await db.search(queryVector, { k: 5 }); ``` Worker mode prevents blocking the main thread during: - Large batch insertions - Complex searches * Index rebuilding ## HNSW Configuration Tune the HNSW index for your use case: ```typescript const db = await createVectorDB({ name: 'tuned-db', dimensions: 384, hnswConfig: { // More connections = better accuracy, more memory m: 32, // Default: 16 // Higher = better index quality, slower builds efConstruction: 400, // Default: 200 // Higher = better search accuracy, slower searches efSearch: 100, // Default: 50 }, }); ``` ### Configuration Guidelines | Use Case | m | efConstruction | efSearch | | ------------------ | -- | -------------- | -------- | | Fast, low memory | 8 | 100 | 30 | | Balanced (default) | 16 | 200 | 50 | | High accuracy | 32 | 400 | 100 | | Maximum accuracy | 48 | 500 | 200 | ## Middleware Add middleware for logging, encryption, etc.: ```typescript import { wrapVectorDB, loggingMiddleware } from '@localmode/core'; const baseDB = await createVectorDB({ name: 'db', dimensions: 384 }); const db = wrapVectorDB(baseDB, { beforeSearch: async (vector, options) => { console.log('Searching with k =', options.k); return { vector, options }; }, afterSearch: async (results) => { console.log('Found', results.length, 'results'); return results; }, }); ``` ## Type Safety Full TypeScript support for metadata: ```typescript interface MyMetadata { text: string; source: string; page: number; tags: string[]; } const db = await createVectorDB({ name: 'typed-db', dimensions: 384, }); // Type-safe add await db.add({ id: 'doc-1', vector: embedding, metadata: { text: 'Hello', source: 'file.pdf', page: 1, tags: ['intro'], }, }); // Type-safe search results const results = await db.search(queryVector, { k: 5 }); results.forEach((r) => { // r.metadata is typed as MyMetadata console.log(r.metadata.text); }); ``` ## Next Steps # Overview import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Steps, Step } from 'fumadocs-ui/components/steps'; import { TypeTable } from 'fumadocs-ui/components/type-table'; # @localmode/pdfjs PDF text extraction using PDF.js for local document processing. Extract text, metadata, and structure from PDFs entirely in the browser. ## Features * 📄 **Full PDF Support** — Extract text from any PDF document * 🔒 **Password Protected** — Handle encrypted PDFs * 📑 **Page-Level Control** — Process specific pages or split by page * 📊 **Metadata Extraction** — Get title, author, dates, etc. ## Installation `bash pnpm install @localmode/pdfjs @localmode/core ` `bash npm install @localmode/pdfjs @localmode/core ` `bash yarn add @localmode/pdfjs @localmode/core ` ## Quick Start ```typescript import { extractPDFText } from '@localmode/pdfjs'; // From file input const file = document.getElementById('fileInput').files[0]; const { text, pageCount, metadata } = await extractPDFText(file); console.log(`Extracted ${pageCount} pages`); console.log('Title:', metadata?.title); console.log('Text:', text); ``` ## API Reference ### extractPDFText() Extract text from a PDF file: ```ts {3-8} import { extractPDFText } from '@localmode/pdfjs'; const result = await extractPDFText(pdfBlob, { maxPages: 10, // Limit pages to extract includePageNumbers: true, // Add [Page N] headers pageSeparator: '\n---\n', // Separator between pages password: 'secret', // For encrypted PDFs }); console.log(result.text); // Full extracted text console.log(result.pageCount); // Total number of pages console.log(result.pages); // Array of page texts console.log(result.metadata); // PDF metadata ``` #### Options #### Return Value ### PDFLoader Document loader for integration with LocalMode core: ```typescript import { PDFLoader } from '@localmode/pdfjs'; import { loadDocument } from '@localmode/core'; const loader = new PDFLoader({ splitByPage: false, // Single doc or one per page maxPages: undefined, // All pages includePageNumbers: true, password: undefined, }); const { documents } = await loadDocument(loader, pdfBlob); for (const doc of documents) { console.log(doc.text); console.log(doc.metadata); } ``` ### Split by Page Create separate documents for each page: ```typescript import { PDFLoader } from '@localmode/pdfjs'; const loader = new PDFLoader({ splitByPage: true }); const { documents } = await loadDocument(loader, pdfBlob); console.log(`Loaded ${documents.length} pages`); documents.forEach((doc, i) => { console.log(`Page ${i + 1}: ${doc.text.substring(0, 100)}...`); console.log(` Metadata:`, doc.metadata); }); ``` ### Utility Functions ```typescript import { getPDFPageCount, isPDF } from '@localmode/pdfjs'; // Get page count without full extraction const pageCount = await getPDFPageCount(pdfBlob); console.log(`PDF has ${pageCount} pages`); // Check if file is a PDF if (await isPDF(file)) { // Process as PDF } else { // Handle other file types } ``` ## RAG Pipeline Integration Build a PDF-powered RAG system: ```typescript import { PDFLoader } from '@localmode/pdfjs'; import { createVectorDB, chunk, ingest, semanticSearch, streamText } from '@localmode/core'; import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; // Setup const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); const db = await createVectorDB({ name: 'pdf-docs', dimensions: 384 }); // Load and process PDF async function ingestPDF(file: File) { const loader = new PDFLoader({ splitByPage: true }); const { documents } = await loadDocument(loader, file); // Chunk each page const allChunks = documents.flatMap((doc, pageIndex) => chunk(doc.text, { strategy: 'recursive', size: 512, overlap: 50, }).map((c) => ({ text: c.text, metadata: { filename: file.name, page: pageIndex + 1, start: c.startIndex, end: c.endIndex, }, })) ); // Ingest into vector DB await ingest({ db, model: embeddingModel, documents: allChunks, }); return allChunks.length; } // Query async function queryPDF(question: string) { const results = await semanticSearch({ db, model: embeddingModel, query: question, k: 3, }); const context = results.map((r) => `[Page ${r.metadata.page}]\n${r.metadata.text}`).join('\n\n'); const stream = await streamText({ model: llm, prompt: `Answer based on the PDF content: ${context} Question: ${question} Answer:`, }); return stream; } ``` ## File Upload Component React example: ```typescript import { useState } from 'react'; import { extractPDFText } from '@localmode/pdfjs'; function PDFUploader() { const [text, setText] = useState(''); const [loading, setLoading] = useState(false); async function handleFile(e: React.ChangeEvent) { const file = e.target.files?.[0]; if (!file) return; setLoading(true); try { const { text, pageCount } = await extractPDFText(file); setText(text); console.log(`Extracted ${pageCount} pages`); } catch (error) { console.error('Failed to extract PDF:', error); } finally { setLoading(false); } } return (

{loading &&

Extracting text...

} {text &&

{text}

}

); } ``` ## Handling Large PDFs For large PDFs, process in chunks: ```typescript import { extractPDFText, getPDFPageCount } from '@localmode/pdfjs'; async function processLargePDF(file: File, batchSize = 10) { const totalPages = await getPDFPageCount(file); const allText: string[] = []; for (let start = 0; start < totalPages; start += batchSize) { const { pages } = await extractPDFText(file, { startPage: start, maxPages: batchSize, }); allText.push(...pages); console.log( `Processed pages ${start + 1}-${Math.min(start + batchSize, totalPages)} of ${totalPages}` ); } return allText.join('\n\n'); } ``` ## Password-Protected PDFs ```typescript import { extractPDFText } from '@localmode/pdfjs'; try { const { text } = await extractPDFText(encryptedPDF, { password: userProvidedPassword, }); console.log(text); } catch (error) { if (error.message.includes('password')) { // Prompt user for password } } ``` ## Metadata Extraction ```typescript const { metadata } = await extractPDFText(file); if (metadata) { console.log('Title:', metadata.title); console.log('Author:', metadata.author); console.log('Subject:', metadata.subject); console.log('Creator:', metadata.creator); console.log('Creation Date:', metadata.creationDate); console.log('Modification Date:', metadata.modDate); } ``` ## Best Practices 1. **Split by page** - Better for RAG; maintains page context 2. **Use page numbers** - Include in metadata for citations 3. **Handle errors** - Corrupted PDFs, wrong passwords, etc. 4. **Chunk appropriately** - 256-512 chars works well for most PDFs 5. **Check file size** - Large PDFs may need batched processing ## Next Steps # Embeddings import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; Generate dense vector representations of text for semantic search, clustering, and similarity matching. ## Basic Usage ```typescript import { transformers } from '@localmode/transformers'; import { embed, embedMany } from '@localmode/core'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); // Single embedding const { embedding } = await embed({ model, value: 'Machine learning is fascinating', }); console.log('Dimensions:', embedding.length); // 384 // Batch embeddings const { embeddings } = await embedMany({ model, values: ['Hello', 'World', 'AI'], }); ``` ## Recommended Models | Model | Dimensions | Size | Speed | Use Case | | ---------------------------------------------- | ---------- | ----- | ----- | ------------------------ | | `Xenova/all-MiniLM-L6-v2` | 384 | 22MB | ⚡⚡⚡ | General purpose, fastest | | `Xenova/all-MiniLM-L12-v2` | 384 | 33MB | ⚡⚡ | Better accuracy | | `Xenova/all-mpnet-base-v2` | 768 | 110MB | ⚡ | Highest quality | | `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | 384 | 117MB | ⚡⚡ | 50+ languages | | `Xenova/e5-small-v2` | 384 | 33MB | ⚡⚡⚡ | E5 family, fast | | `Xenova/bge-small-en-v1.5` | 384 | 33MB | ⚡⚡⚡ | BGE family | ## With Vector Database ```typescript import { createVectorDB, embed, embedMany, semanticSearch } from '@localmode/core'; import { transformers } from '@localmode/transformers'; const model = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const db = await createVectorDB({ name: 'docs', dimensions: 384 }); // Index documents const documents = [ 'Machine learning enables computers to learn from data', 'Deep learning uses neural networks with many layers', 'Natural language processing analyzes human language', ]; const { embeddings } = await embedMany({ model, values: documents }); await db.addMany( documents.map((text, i) => ({ id: `doc-${i}`, vector: embeddings[i], metadata: { text }, })) ); // Search const results = await semanticSearch({ db, model, query: 'How do neural networks work?', k: 3, }); ``` ## Progress Tracking ```typescript const { embeddings } = await embedMany({ model, values: largeDocumentArray, onProgress: (progress) => { const percent = (progress.completed / progress.total * 100).toFixed(1); console.log(`Embedding: ${percent}%`); }, }); ``` ## Model Configuration ```typescript const model = transformers.embedding('Xenova/all-MiniLM-L6-v2', { quantized: true, // Use quantized model (default: true) revision: 'main', // Model revision progress: (p) => { console.log(`Loading model: ${(p.progress * 100).toFixed(1)}%`); }, }); ``` ## Multilingual Embeddings For multilingual applications: ```typescript const model = transformers.embedding('Xenova/paraphrase-multilingual-MiniLM-L12-v2'); const { embeddings } = await embedMany({ model, values: [ 'Hello world', // English 'Bonjour le monde', // French 'Hola mundo', // Spanish 'こんにちは世界', // Japanese 'مرحبا بالعالم', // Arabic ], }); // All embeddings are in the same vector space // Cross-lingual similarity works! ``` ## Comparison: Model Quality vs Speed ```typescript import { cosineSimilarity } from '@localmode/core'; // Test sentences const s1 = 'The cat sits on the mat'; const s2 = 'A feline rests on a rug'; const s3 = 'The stock market crashed yesterday'; // Fast model const fastModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const { embeddings: fastEmbeddings } = await embedMany({ model: fastModel, values: [s1, s2, s3], }); // Quality model const qualityModel = transformers.embedding('Xenova/all-mpnet-base-v2'); const { embeddings: qualityEmbeddings } = await embedMany({ model: qualityModel, values: [s1, s2, s3], }); // Compare similarities console.log('Fast model:'); console.log(' s1-s2:', cosineSimilarity(fastEmbeddings[0], fastEmbeddings[1]).toFixed(3)); console.log(' s1-s3:', cosineSimilarity(fastEmbeddings[0], fastEmbeddings[2]).toFixed(3)); console.log('Quality model:'); console.log(' s1-s2:', cosineSimilarity(qualityEmbeddings[0], qualityEmbeddings[1]).toFixed(3)); console.log(' s1-s3:', cosineSimilarity(qualityEmbeddings[0], qualityEmbeddings[2]).toFixed(3)); ``` ## Caching Embeddings Use caching middleware to avoid recomputation: ```typescript import { wrapEmbeddingModel, cachingMiddleware } from '@localmode/core'; const baseModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const model = wrapEmbeddingModel(baseModel, [ cachingMiddleware({ maxSize: 10000, storage: 'indexeddb', dbName: 'embedding-cache', }), ]); // First call computes embedding const { embedding: e1 } = await embed({ model, value: 'Hello' }); // Second call returns from cache (instant) const { embedding: e2 } = await embed({ model, value: 'Hello' }); ``` ## Best Practices 1. **Match dimensions** — Ensure your vector DB dimensions match the model 2. **Batch when possible** — `embedMany()` is more efficient than multiple `embed()` calls 3. **Cache embeddings** — Use caching middleware for repeated queries 4. **Normalize if needed** — Some models benefit from L2 normalization 5. **Choose model wisely** — Balance quality vs speed for your use case ## Next Steps # Overview import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordion, Accordions } from 'fumadocs-ui/components/accordion'; # @localmode/transformers HuggingFace Transformers.js provider for LocalMode. Run ML models locally in the browser with WebGPU/WASM acceleration. ## Features * 🚀 **Browser-Native** — Run ML models directly in the browser * 🔒 **Privacy-First** — All processing happens locally * 📦 **Model Caching** — Models cached in IndexedDB for instant subsequent loads * ⚡ **Optimized** — Uses quantized models for smaller size and faster inference ## Installation `bash pnpm install @localmode/transformers @localmode/core ` `bash npm install @localmode/transformers @localmode/core ` `bash yarn add @localmode/transformers @localmode/core ` ## Quick Start ```typescript import { transformers } from '@localmode/transformers'; import { embed, rerank } from '@localmode/core'; // Text Embeddings const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const { embedding } = await embed({ model: embeddingModel, value: 'Hello world' }); // Reranking for RAG const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const { results } = await rerank({ model: rerankerModel, query: 'What is machine learning?', documents: ['ML is a subset of AI...', 'Python is a language...'], topK: 5, }); ``` ## ✅ Live Features These features are production-ready and fully documented. | Method | Interface | Description | | --------------------------------- | ---------------- | ------------------ | | `transformers.embedding(modelId)` | `EmbeddingModel` | Text embeddings | | `transformers.reranker(modelId)` | `RerankerModel` | Document reranking | ### Recommended Models | Model | Dimensions | Size | Use Case | | ---------------------------------------------- | ---------- | ------- | --------------------- | | `Xenova/all-MiniLM-L6-v2` | 384 | \~22MB | Fast, general-purpose | | `Xenova/all-MiniLM-L12-v2` | 384 | \~33MB | Better accuracy | | `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | 384 | \~117MB | 50+ languages | | Model | Use Case | Size | | ------------------------------- | -------------------------- | ------- | | `Xenova/ms-marco-MiniLM-L-6-v2` | Document reranking for RAG | \~22MB | | `Xenova/bge-reranker-base` | Advanced reranking | \~109MB | ## 🚧 Coming Soon These features have interfaces defined and implementations available, but are under active development and testing. Full documentation will be added once they are production-ready. The features listed below are not yet production-ready. APIs may change before stable release. ### Classification & NLP | Feature | Method | Interface | | ------------------------ | ------------------------------------------ | ----------------------------- | | Text Classification | `transformers.classifier(modelId)` | `ClassificationModel` | | Zero-Shot Classification | `transformers.zeroShotClassifier(modelId)` | `ZeroShotClassificationModel` | | Named Entity Recognition | `transformers.ner(modelId)` | `NERModel` | ### Translation & Text Processing | Feature | Method | Interface | | ------------------ | ----------------------------------------- | ------------------------ | | Translation | `transformers.translator(modelId)` | `TranslationModel` | | Summarization | `transformers.summarizer(modelId)` | `SummarizationModel` | | Fill-Mask | `transformers.fillMask(modelId)` | `FillMaskModel` | | Question Answering | `transformers.questionAnswering(modelId)` | `QuestionAnsweringModel` | ### Audio | Feature | Method | Interface | | -------------- | ------------------------------------ | ------------------- | | Speech-to-Text | `transformers.speechToText(modelId)` | `SpeechToTextModel` | | Text-to-Speech | `transformers.textToSpeech(modelId)` | `TextToSpeechModel` | ### Vision | Feature | Method | Interface | | ------------------------------ | ----------------------------------------------- | ---------------------------------- | | Image Classification | `transformers.imageClassifier(modelId)` | `ImageClassificationModel` | | Zero-Shot Image Classification | `transformers.zeroShotImageClassifier(modelId)` | `ZeroShotImageClassificationModel` | | Image Captioning | `transformers.captioner(modelId)` | `ImageCaptionModel` | | Image Segmentation | `transformers.segmenter(modelId)` | `SegmentationModel` | | Object Detection | `transformers.objectDetector(modelId)` | `ObjectDetectionModel` | | OCR | `transformers.ocr(modelId)` | `OCRModel` | | Document QA | `transformers.documentQA(modelId)` | `DocumentQAModel` | ## Model Options Configure model loading: ```typescript const model = transformers.embedding('Xenova/all-MiniLM-L6-v2', { quantized: true, // Use quantized model (smaller, faster) revision: 'main', // Model revision progress: (p) => { console.log(`Loading: ${(p.progress * 100).toFixed(1)}%`); }, }); ``` ## Model Utilities Manage model loading and caching: ```typescript import { preloadModel, isModelCached, getModelStorageUsage } from '@localmode/transformers'; // Check if model is cached const cached = await isModelCached('Xenova/all-MiniLM-L6-v2'); // Preload model with progress await preloadModel('Xenova/all-MiniLM-L6-v2', { onProgress: (p) => console.log(`${p.progress}% loaded`), }); // Check storage usage const usage = await getModelStorageUsage(); ``` ### WebGPU Detection Detect WebGPU availability for optimal device selection: ```typescript import { isWebGPUAvailable, getOptimalDevice } from '@localmode/transformers'; // Check if WebGPU is available const webgpuAvailable = await isWebGPUAvailable(); if (webgpuAvailable) { console.log('WebGPU available, using GPU acceleration'); } else { console.log('Falling back to WASM'); } // Get optimal device automatically const device = await getOptimalDevice(); // 'webgpu' or 'wasm' const model = transformers.embedding('Xenova/all-MiniLM-L6-v2', { device, // Uses WebGPU if available, otherwise WASM }); ``` ## Browser Compatibility | Browser | WebGPU | WASM | Notes | | ----------- | ------ | ---- | ---------------------------- | | Chrome 113+ | ✅ | ✅ | Best performance with WebGPU | | Edge 113+ | ✅ | ✅ | Same as Chrome | | Firefox | ❌ | ✅ | WASM only | | Safari 18+ | ✅ | ✅ | WebGPU available | | iOS Safari | ✅ | ✅ | WebGPU available (iOS 26+) | ## Performance Tips 1. **Use quantized models** - Smaller and faster with minimal quality loss 2. **Preload models** - Load during app init for instant inference 3. **Use WebGPU when available** - 3-5x faster than WASM 4. **Batch operations** - Process multiple inputs together ## Next Steps # Reranking import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; Reranking uses cross-encoder models to improve the relevance of search results. It's particularly useful for RAG pipelines. ## Why Reranking? Bi-encoder (embedding) models are fast but may miss subtle relevance signals. Cross-encoder rerankers consider query-document pairs together for better accuracy. ``` Query: "How does photosynthesis work?" Initial Ranking (embeddings): 1. "Photosynthesis is a process used by plants" ✓ 2. "The synthesis of proteins requires energy" ✗ 3. "Plants convert sunlight into chemical energy" ✓ After Reranking: 1. "Plants convert sunlight into chemical energy" ✓ (more specific) 2. "Photosynthesis is a process used by plants" ✓ 3. "The synthesis of proteins requires energy" ✗ ``` ## Basic Usage ```typescript import { transformers } from '@localmode/transformers'; import { rerank } from '@localmode/core'; const model = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const results = await rerank({ model, query: 'What is machine learning?', documents: [ 'Machine learning is a subset of artificial intelligence.', 'The weather forecast predicts rain tomorrow.', 'Deep learning uses neural networks to learn patterns.', 'I went to the grocery store yesterday.', ], topK: 2, }); results.forEach((r, i) => { console.log(`${i + 1}. Score: ${r.score.toFixed(3)}`); console.log(` ${r.document}`); }); // Output: // 1. Score: 0.892 // Machine learning is a subset of artificial intelligence. // 2. Score: 0.756 // Deep learning uses neural networks to learn patterns. ``` ## With Semantic Search Typical RAG reranking pattern: ```typescript import { semanticSearch, rerank } from '@localmode/core'; async function searchWithReranking(query: string) { // Step 1: Fast semantic search (retrieve many candidates) const candidates = await semanticSearch({ db, model: embeddingModel, query, k: 20, // Get more candidates than needed }); // Step 2: Rerank for accuracy (keep top results) const reranked = await rerank({ model: rerankerModel, query, documents: candidates.map((c) => c.metadata.text as string), topK: 5, }); // Step 3: Map back to original results with metadata return reranked.map((r) => ({ ...candidates[r.originalIndex], rerankerScore: r.score, })); } ``` ## Rerank Result Structure ```typescript interface RerankResult { document: string; // The document text score: number; // Relevance score (higher = more relevant) originalIndex: number; // Index in the original documents array } ``` ## Recommended Models | Model | Size | Speed | Quality | | -------------------------------- | ------ | ----- | ------- | | `Xenova/ms-marco-MiniLM-L-6-v2` | \~22MB | ⚡⚡⚡ | Good | | `Xenova/ms-marco-MiniLM-L-12-v2` | \~33MB | ⚡⚡ | Better | ## Complete RAG Example ```typescript import { createVectorDB, chunk, ingest, semanticSearch, rerank, streamText, } from '@localmode/core'; import { transformers } from '@localmode/transformers'; import { webllm } from '@localmode/webllm'; // Setup models const embeddingModel = transformers.embedding('Xenova/all-MiniLM-L6-v2'); const rerankerModel = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2'); const llm = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // Setup database const db = await createVectorDB({ name: 'docs', dimensions: 384 }); // RAG query function async function ragQuery(question: string) { // 1. Retrieve (fast, approximate) const candidates = await semanticSearch({ db, model: embeddingModel, query: question, k: 15, }); // 2. Rerank (slower, accurate) const reranked = await rerank({ model: rerankerModel, query: question, documents: candidates.map((c) => c.metadata.text as string), topK: 3, }); // 3. Generate answer const context = reranked.map((r) => r.document).join('\n\n'); const stream = await streamText({ model: llm, prompt: `Answer based on the context: Context: ${context} Question: ${question} Answer:`, }); return stream; } ``` ## When to Use Reranking * Building Q\&A or chatbot applications * Initial search returns many similar results * Accuracy matters more than latency * Documents have subtle relevance differences * Latency is critical (real-time applications) * Results are clearly distinct * Simple keyword matching is sufficient * Processing very large result sets ## Performance Optimization ```typescript // Balance between accuracy and speed const reranked = await rerank({ model: rerankerModel, query, documents: candidates.slice(0, 10), // Limit candidates topK: 3, }); // For large result sets, rerank in batches async function rerankLargeResultSet(query: string, documents: string[], topK: number) { const batchSize = 50; const batches = []; for (let i = 0; i < documents.length; i += batchSize) { const batch = documents.slice(i, i + batchSize); const result = await rerank({ model: rerankerModel, query, documents: batch, topK: Math.min(topK, batch.length), }); batches.push(result.map((r) => ({ ...r, originalIndex: r.originalIndex + i, }))); } // Merge and re-sort return batches .flat() .sort((a, b) => b.score - a.score) .slice(0, topK); } ``` ## Best Practices 1. **Retrieve more, rerank less** — Get 3-5x more candidates than needed 2. **Use appropriate topK** — 3-5 is usually enough for RAG context 3. **Cache reranker model** — Load once, reuse for all queries 4. **Consider latency budget** — Reranking adds 50-200ms per query 5. **Test with/without** — Measure accuracy improvement for your use case ## Next Steps # Overview import { Callout } from 'fumadocs-ui/components/callout'; import { Card, Cards } from 'fumadocs-ui/components/card'; import { Tab, Tabs } from 'fumadocs-ui/components/tabs'; import { Accordions, Accordion } from 'fumadocs-ui/components/accordion'; import { TypeTable } from 'fumadocs-ui/components/type-table'; # @localmode/webllm Run large language models locally in the browser using WebGPU. Uses 4-bit quantized models for efficient inference. ## Features * 🚀 **WebGPU Acceleration** — Native GPU performance in the browser * 🔒 **Private** — Models run entirely on-device * 📦 **Cached** — Models stored in browser cache after first download * ⚡ **Streaming** — Real-time token generation ## Installation `bash pnpm install @localmode/webllm @localmode/core ` `bash npm install @localmode/webllm @localmode/core ` `bash yarn add @localmode/webllm @localmode/core ` ## Quick Start ```typescript import { streamText } from '@localmode/core'; import { webllm } from '@localmode/webllm'; const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); const stream = await streamText({ model, prompt: 'Explain quantum computing in simple terms.', }); for await (const chunk of stream) { process.stdout.write(chunk.text); } ``` ## Available Models | Model | Size | Context | Best For | | ----------------------------------- | ------- | ------- | ------------------------------------- | | `Llama-3.2-1B-Instruct-q4f16_1-MLC` | \~700MB | 4K | Testing, simple tasks, fast responses | | `Llama-3.2-3B-Instruct-q4f16_1-MLC` | \~1.8GB | 4K | General purpose, production | Llama 3.2 models are the best all-around choice for browser LLM applications. Start with 1B for testing, use 3B in production. | Model | Size | Context | Best For | | ------------------------------------ | ------- | ------- | -------------------------------- | | `Phi-3.5-mini-instruct-q4f16_1-MLC` | \~2.4GB | 4K | Reasoning, coding, complex tasks | | `Phi-3-mini-4k-instruct-q4f16_1-MLC` | \~2.2GB | 4K | Reasoning, coding | Phi models excel at reasoning and code generation, often outperforming larger models on these tasks. | Model | Size | Context | Best For | | ----------------------------------- | ----- | ------- | ----------------------------- | | `Qwen2.5-1.5B-Instruct-q4f16_1-MLC` | \~1GB | 4K | Multilingual, Chinese support | | `Qwen2.5-3B-Instruct-q4f16_1-MLC` | \~2GB | 4K | Better multilingual quality | Qwen models have strong multilingual capabilities, especially for Chinese and Asian languages. | Model | Size | Context | Best For | | ----------------------------------- | ------- | ------- | ----------------------------------- | | `SmolLM2-1.7B-Instruct-q4f16_1-MLC` | \~1.1GB | 2K | Low-memory devices, quick inference | | `SmolLM2-360M-Instruct-q4f16_1-MLC` | \~250MB | 2K | Ultra-fast, minimal memory | SmolLM models are optimized for size, not quality. Use for simple tasks or when resources are very limited. | Model | Size | Context | Best For | | --------------------------- | ------- | ------- | -------------------------------- | | `gemma-2-2b-it-q4f16_1-MLC` | \~1.3GB | 8K | Longer context, Google ecosystem | * **Testing**: `Llama-3.2-1B-Instruct` - fastest to download and run * **Production**: `Llama-3.2-3B-Instruct` or `Phi-3.5-mini` - best quality * **Code/Reasoning**: `Phi-3.5-mini` - specialized for these tasks * **Multilingual**: `Qwen2.5-1.5B-Instruct` - 100+ languages * **Low Memory**: `SmolLM2-360M-Instruct` - \~250MB ## Text Generation ### Streaming ```typescript import { streamText } from '@localmode/core'; const stream = await streamText({ model: webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'), prompt: 'Write a haiku about programming.', }); let fullText = ''; for await (const chunk of stream) { fullText += chunk.text; // Update UI with each chunk } // Or get full text at once const text = await stream.text; ``` ### Non-Streaming ```typescript import { generateText } from '@localmode/core'; const { text, usage } = await generateText({ model: webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'), prompt: 'What is the capital of France?', }); console.log(text); console.log('Tokens used:', usage.totalTokens); ``` ## Configuration ### Model Options ```ts {2-6} const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC', { systemPrompt: 'You are a helpful coding assistant.', temperature: 0.7, maxTokens: 1024, topP: 0.9, }); ``` ### Custom Provider ```typescript import { createWebLLM } from '@localmode/webllm'; const myWebLLM = createWebLLM({ onProgress: (progress) => { console.log(`Loading: ${(progress.progress * 100).toFixed(1)}%`); console.log(`Status: ${progress.text}`); }, }); const model = myWebLLM.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); ``` ## Model Preloading Preload models during app initialization: ```typescript import { preloadModel, isModelCached } from '@localmode/webllm'; // Check if already cached if (!(await isModelCached('Llama-3.2-1B-Instruct-q4f16_1-MLC'))) { // Show loading UI await preloadModel('Llama-3.2-1B-Instruct-q4f16_1-MLC', { onProgress: (progress) => { updateLoadingBar(progress.progress * 100); }, }); } // Model is ready for instant inference ``` ## Model Management ### Available Models Registry Access model metadata programmatically: ```typescript import { WEBLLM_MODELS, type WebLLMModelId } from '@localmode/webllm'; // Get all available models const modelIds = Object.keys(WEBLLM_MODELS) as WebLLMModelId[]; // Access model info const llama = WEBLLM_MODELS['Llama-3.2-1B-Instruct-q4f16_1-MLC']; console.log(llama.name); // 'Llama 3.2 1B Instruct' console.log(llama.contextLength); // 4096 console.log(llama.size); // '~700MB' console.log(llama.sizeBytes); // 734003200 console.log(llama.description); // 'Fast, lightweight model...' ``` ### Model Categorization Categorize models by size for UI display: ```typescript import { getModelCategory, WEBLLM_MODELS, type WebLLMModelId } from '@localmode/webllm'; // Get category based on model size const modelId: WebLLMModelId = 'Llama-3.2-1B-Instruct-q4f16_1-MLC'; const sizeBytes = WEBLLM_MODELS[modelId].sizeBytes; const category = getModelCategory(sizeBytes); console.log(category); // 'small' | 'medium' | 'large' // Use for UI grouping function getModelsByCategory() { const categories = { small: [], medium: [], large: [] }; for (const [id, info] of Object.entries(WEBLLM_MODELS)) { const cat = getModelCategory(info.sizeBytes); categories[cat].push({ id, ...info }); } return categories; } ``` ### Delete Cached Models Remove models from browser cache to free up storage: ```typescript import { deleteModelCache, isModelCached } from '@localmode/webllm'; // Delete a specific model's cache await deleteModelCache('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // Verify deletion const stillCached = await isModelCached('Llama-3.2-1B-Instruct-q4f16_1-MLC'); console.log(stillCached); // false ``` LLM models can be large (700MB - 4GB). Use `deleteModelCache()` to let users free up storage when they no longer need a model. ### Type-Safe Model IDs Use the `WebLLMModelId` type for type-safe model selection: ```typescript import type { WebLLMModelId } from '@localmode/webllm'; // Type-safe function that only accepts valid model IDs function selectModel(modelId: WebLLMModelId) { return webllm.languageModel(modelId); } // ✅ Valid selectModel('Llama-3.2-1B-Instruct-q4f16_1-MLC'); // ❌ TypeScript error: invalid model ID selectModel('invalid-model-name'); ``` ## Chat Application ```typescript import { streamText } from '@localmode/core'; interface Message { role: 'user' | 'assistant'; content: string; } async function chat(messages: Message[], userMessage: string) { const model = webllm.languageModel('Llama-3.2-1B-Instruct-q4f16_1-MLC', { systemPrompt: 'You are a helpful assistant.', }); // Build conversation prompt const prompt = messages .map((m) => `${m.role}: ${m.content}`) .concat([`user: ${userMessage}`, 'assistant:']) .join('\n'); const stream = await streamText({ model, prompt, stopSequences: ['user:', '\n\n'], }); let response = ''; for await (const chunk of stream) { response += chunk.text; // Update UI } return response; } ``` ## RAG Integration Combine with retrieval for document-grounded chat: ```typescript import { semanticSearch, rerank, streamText } from '@localmode/core'; async function ragChat(query: string, db: VectorDB) { // 1. Retrieve context const results = await semanticSearch({ db, model: embeddingModel, query, k: 10, }); // 2. Rerank for relevance const reranked = await rerank({ model: rerankerModel, query, documents: results.map((r) => r.metadata.text as string), topK: 3, }); const context = reranked.map((r) => r.document).join('\n\n---\n\n'); // 3. Generate with context const llm = webllm.languageModel('Llama-3.2-3B-Instruct-q4f16_1-MLC'); const stream = await streamText({ model: llm, prompt: `You are a helpful assistant. Answer based only on the provided context. If the answer is not in the context, say "I don't have that information." Context: ${context} Question: ${query} Answer:`, }); return stream; } ``` ## Requirements WebLLM requires WebGPU support. Check availability: ```typescript import { isWebGPUSupported } from '@localmode/core'; if (!isWebGPUSupported()) { console.warn('WebGPU not available. LLM features disabled.'); } ``` ### Browser Support | Browser | Support | | ----------- | ---------------- | | Chrome 113+ | ✅ | | Edge 113+ | ✅ | | Firefox | ❌ (Nightly only) | | Safari 18+ | ✅ | | iOS Safari | ✅ (iOS 26+) | ### Hardware Requirements * **GPU**: Any modern GPU with WebGPU support * **VRAM**: Depends on model (1-3GB for 1-3B models) * **RAM**: 4GB minimum, 8GB+ recommended ## Best Practices 1. **Preload models** - Load during app init for instant inference 2. **Start small** - Use 1B models for testing, larger for production 3. **Stream responses** - Better UX than waiting for complete response 4. **Handle errors** - GPU errors, OOM, etc. can occur 5. **Check capabilities** - Verify WebGPU before showing LLM features ## Error Handling ```typescript import { streamText, GenerationError } from '@localmode/core'; try { const stream = await streamText({ model, prompt: 'Hello', }); for await (const chunk of stream) { // ... } } catch (error) { if (error instanceof GenerationError) { if (error.code === 'WEBGPU_NOT_SUPPORTED') { console.error('WebGPU not available'); } else if (error.code === 'MODEL_LOAD_FAILED') { console.error('Failed to load model'); } else if (error.code === 'OUT_OF_MEMORY') { console.error('Not enough GPU memory'); } } } ``` ## Next Steps