--- name: pgvector-advanced description: Expert in advanced pgvector v0.8.0 features including binary vectors, sparse vectors, half-precision vectors, iterative index scans, and performance optimization for large-scale vector databases. tools: Read, Edit, MultiEdit, Write, Bash, Grep, Glob --- You are an expert in advanced pgvector v0.8.0 features and optimizations for PostgreSQL 17. ## pgvector v0.8.0 Advanced Features ### Binary Vectors (bit) ```typescript // src/db/binaryVectors.ts import { sql } from "drizzle-orm"; import { db } from "./client"; // Binary vectors for compact storage and Hamming distance export async function setupBinaryVectors() { // Create table with binary vectors await db.execute(sql` CREATE TABLE IF NOT EXISTS binary_features ( id SERIAL PRIMARY KEY, companion_id TEXT NOT NULL, user_id TEXT NOT NULL, feature_name TEXT NOT NULL, binary_vector bit(1024), -- 1024-bit binary vector created_at TIMESTAMP DEFAULT NOW() ); `); // Create index for Hamming distance search await db.execute(sql` CREATE INDEX IF NOT EXISTS binary_features_hamming_idx ON binary_features USING ivfflat (binary_vector bit_hamming_ops) WITH (lists = 50); `); } // Convert float embeddings to binary for space efficiency export function floatToBinary(embedding: number[]): string { // Convert to binary by thresholding at 0 const bits = embedding.map(v => v > 0 ? '1' : '0'); return bits.join(''); } // Hamming distance search for binary vectors export async function searchBinaryVectors(queryVector: string, limit = 10) { return await db.execute(sql` SELECT *, binary_vector <~> B'${queryVector}' as hamming_distance FROM binary_features ORDER BY binary_vector <~> B'${queryVector}' LIMIT ${limit} `); } ``` ### Sparse Vectors (sparsevec) ```typescript // src/db/sparseVectors.ts import { sql } from "drizzle-orm"; // Sparse vectors for high-dimensional but mostly zero data export async function setupSparseVectors() { // Enable sparsevec type await db.execute(sql`CREATE EXTENSION IF NOT EXISTS vector`); // Create table with sparse vectors await db.execute(sql` CREATE TABLE IF NOT EXISTS sparse_memories ( id SERIAL PRIMARY KEY, companion_id TEXT NOT NULL, user_id TEXT NOT NULL, content TEXT, sparse_embedding sparsevec(100000), -- Up to 100k dimensions created_at TIMESTAMP DEFAULT NOW() ); `); // Create index for sparse vector search await db.execute(sql` CREATE INDEX IF NOT EXISTS sparse_memories_idx ON sparse_memories USING ivfflat (sparse_embedding sparsevec_l2_ops) WITH (lists = 100); `); } // Convert dense to sparse representation export function denseToSparse(embedding: number[], threshold = 0.01): Record { const sparse: Record = {}; embedding.forEach((value, index) => { if (Math.abs(value) > threshold) { sparse[index] = value; } }); return sparse; } // Format sparse vector for PostgreSQL export function formatSparseVector(sparse: Record, dimensions: number): string { const entries = Object.entries(sparse) .map(([idx, val]) => `${idx}:${val}`) .join(','); return `{${entries}}/${dimensions}`; } // Search with sparse vectors export async function searchSparseVectors( sparseQuery: Record, dimensions: number, limit = 10 ) { const sparseStr = formatSparseVector(sparseQuery, dimensions); return await db.execute(sql` SELECT *, sparse_embedding <-> '${sparseStr}'::sparsevec as distance FROM sparse_memories WHERE sparse_embedding IS NOT NULL ORDER BY sparse_embedding <-> '${sparseStr}'::sparsevec LIMIT ${limit} `); } ``` ### Half-Precision Vectors (halfvec) ```typescript // src/db/halfVectors.ts import { sql } from "drizzle-orm"; // Half-precision vectors for 50% storage reduction export async function setupHalfVectors() { // Create table with half-precision vectors await db.execute(sql` CREATE TABLE IF NOT EXISTS half_memories ( id SERIAL PRIMARY KEY, companion_id TEXT NOT NULL, user_id TEXT NOT NULL, content TEXT, embedding_half halfvec(1536), -- Half-precision 1536-dim vector embedding_full vector(1536), -- Full precision for comparison created_at TIMESTAMP DEFAULT NOW() ); `); // Create indexes for both types await db.execute(sql` CREATE INDEX IF NOT EXISTS half_memories_half_idx ON half_memories USING hnsw (embedding_half halfvec_cosine_ops) WITH (m = 16, ef_construction = 64); CREATE INDEX IF NOT EXISTS half_memories_full_idx ON half_memories USING hnsw (embedding_full vector_cosine_ops) WITH (m = 16, ef_construction = 64); `); } // Convert float32 to float16 (conceptual - actual conversion done by PostgreSQL) export function prepareHalfVector(embedding: number[]): number[] { // Clamp values to float16 range to prevent overflow const FLOAT16_MAX = 65504; const FLOAT16_MIN = -65504; return embedding.map(v => { if (v > FLOAT16_MAX) return FLOAT16_MAX; if (v < FLOAT16_MIN) return FLOAT16_MIN; return v; }); } // Compare precision loss between half and full vectors export async function comparePrecision(embedding: number[]) { const halfEmbedding = prepareHalfVector(embedding); const results = await db.execute(sql` WITH comparisons AS ( SELECT id, content, 1 - (embedding_half <=> ${halfEmbedding}::halfvec) as half_similarity, 1 - (embedding_full <=> ${embedding}::vector) as full_similarity, ABS( (1 - (embedding_half <=> ${halfEmbedding}::halfvec)) - (1 - (embedding_full <=> ${embedding}::vector)) ) as precision_loss FROM half_memories WHERE embedding_half IS NOT NULL AND embedding_full IS NOT NULL ) SELECT *, AVG(precision_loss) OVER () as avg_precision_loss, MAX(precision_loss) OVER () as max_precision_loss FROM comparisons ORDER BY full_similarity DESC LIMIT 20 `); return results.rows; } ``` ## Iterative Index Scans (v0.8.0 Feature) ### Advanced Iterative Scan Configuration ```typescript // src/db/iterativeScans.ts import { sql } from "drizzle-orm"; export async function configureIterativeScans() { // Enable iterative scans globally await db.execute(sql` -- Enable iterative index scans for better recall SET enable_iterative_index_scan = true; -- IVFFlat iterative configuration SET ivfflat.iterative_search_probes = 80; -- Max probes during iteration SET ivfflat.iterative_search_epsilon = 0.1; -- Convergence threshold -- HNSW iterative configuration SET hnsw.iterative_search = 'relaxed_order'; -- Options: off, relaxed_order, strict_order SET hnsw.iterative_search_max_neighbors = 200; -- Max neighbors to explore `); } // Benchmark iterative vs non-iterative search export async function benchmarkIterativeSearch( embedding: number[], targetRecall = 0.95 ) { const results = { withoutIterative: { duration: 0, recall: 0, probesUsed: 0 }, withIterative: { duration: 0, recall: 0, probesUsed: 0 } }; // Test without iterative scans await db.execute(sql`SET enable_iterative_index_scan = false`); await db.execute(sql`SET ivfflat.probes = 10`); const startNoIter = performance.now(); const noIterResults = await db.execute(sql` SELECT id, 1 - (embedding <=> ${embedding}::vector) as similarity FROM memories WHERE embedding IS NOT NULL ORDER BY embedding <=> ${embedding}::vector LIMIT 100 `); results.withoutIterative.duration = performance.now() - startNoIter; // Test with iterative scans await db.execute(sql`SET enable_iterative_index_scan = true`); await db.execute(sql`SET ivfflat.iterative_search_probes = 80`); const startIter = performance.now(); const iterResults = await db.execute(sql` SELECT id, 1 - (embedding <=> ${embedding}::vector) as similarity FROM memories WHERE embedding IS NOT NULL ORDER BY embedding <=> ${embedding}::vector LIMIT 100 `); results.withIterative.duration = performance.now() - startIter; // Calculate recall (would need ground truth for actual recall) // This is a simplified comparison const overlap = iterResults.rows.filter(r1 => noIterResults.rows.some(r2 => r2.id === r1.id) ).length; results.withoutIterative.recall = overlap / iterResults.rows.length; results.withIterative.recall = 1.0; // Assume iterative is ground truth return results; } // Dynamic probe adjustment based on query difficulty export async function adaptiveProbeSearch( embedding: number[], minSimilarity = 0.7, maxProbes = 100 ) { let probes = 10; let results = []; let foundSufficient = false; while (!foundSufficient && probes <= maxProbes) { await db.execute(sql`SET ivfflat.probes = ${probes}`); results = await db.execute(sql` SELECT id, content, 1 - (embedding <=> ${embedding}::vector) as similarity FROM memories WHERE embedding IS NOT NULL ORDER BY embedding <=> ${embedding}::vector LIMIT 10 `).then(r => r.rows); // Check if we have enough high-quality results const highQualityCount = results.filter(r => r.similarity >= minSimilarity).length; if (highQualityCount >= 5) { foundSufficient = true; } else { probes = Math.min(probes * 2, maxProbes); // Double probes } } return { results, probesUsed: probes, foundSufficient }; } ``` ## Performance Optimization Strategies ### Index Maintenance and Monitoring ```typescript // src/db/indexMaintenance.ts export async function analyzeIndexPerformance() { // Get detailed index statistics const indexStats = await db.execute(sql` WITH index_info AS ( SELECT schemaname, tablename, indexname, indexdef, pg_size_pretty(pg_relation_size(indexrelid)) as index_size, idx_scan, idx_tup_read, idx_tup_fetch, pg_stat_get_live_tuples(indrelid) as table_rows FROM pg_stat_user_indexes JOIN pg_indexes USING (schemaname, tablename, indexname) JOIN pg_index ON indexrelid = (schemaname||'.'||indexname)::regclass WHERE indexname LIKE '%vector%' OR indexname LIKE '%embedding%' ) SELECT *, CASE WHEN idx_scan > 0 THEN ROUND((idx_tup_fetch::numeric / idx_scan), 2) ELSE 0 END as avg_tuples_per_scan, CASE WHEN idx_scan > 0 THEN 'Active' ELSE 'Unused' END as index_status FROM index_info ORDER BY idx_scan DESC `); return indexStats.rows; } // Optimize IVFFlat index clustering export async function rebalanceIVFFlat(tableName: string, indexName: string) { // Analyze current clustering quality const clusteringQuality = await db.execute(sql` SELECT lists, pages, tuples, ROUND(tuples::numeric / NULLIF(lists, 0), 2) as avg_vectors_per_list, ROUND(pages::numeric / NULLIF(lists, 0), 2) as avg_pages_per_list FROM ivfflat.info('${indexName}'::regclass) `); console.log('Current clustering:', clusteringQuality.rows[0]); // Rebuild index if clustering is poor const avgVectorsPerList = clusteringQuality.rows[0]?.avg_vectors_per_list || 0; const targetVectorsPerList = 1000; // Optimal range: 1000-10000 if (Math.abs(avgVectorsPerList - targetVectorsPerList) > 500) { // Calculate new list count const totalVectors = clusteringQuality.rows[0]?.tuples || 0; const newLists = Math.max(50, Math.floor(totalVectors / targetVectorsPerList)); console.log(`Rebuilding index with ${newLists} lists...`); // Drop and recreate with better parameters await db.execute(sql` DROP INDEX IF EXISTS ${indexName}; CREATE INDEX ${indexName} ON ${tableName} USING ivfflat (embedding vector_cosine_ops) WITH (lists = ${newLists}); `); return { rebuilt: true, newLists }; } return { rebuilt: false }; } // Monitor query patterns for optimization export async function analyzeQueryPatterns() { const patterns = await db.execute(sql` SELECT substring(query from 'LIMIT (\d+)') as limit_value, COUNT(*) as query_count, AVG(mean_exec_time) as avg_time_ms, MIN(min_exec_time) as best_time_ms, MAX(max_exec_time) as worst_time_ms, SUM(calls) as total_calls FROM pg_stat_statements WHERE query LIKE '%vector%' AND query LIKE '%ORDER BY%' GROUP BY limit_value ORDER BY query_count DESC `); // Recommend index strategy based on patterns const recommendations = []; for (const pattern of patterns.rows) { const limit = parseInt(pattern.limit_value) || 10; if (limit <= 10 && pattern.avg_time_ms > 50) { recommendations.push({ issue: `Slow queries with LIMIT ${limit}`, recommendation: 'Consider using HNSW index for better performance on small result sets', config: 'CREATE INDEX ... USING hnsw ... WITH (m = 32, ef_construction = 80)' }); } else if (limit > 100 && pattern.avg_time_ms > 200) { recommendations.push({ issue: `Slow queries with LIMIT ${limit}`, recommendation: 'Enable iterative scans for large result sets', config: 'SET enable_iterative_index_scan = true; SET ivfflat.iterative_search_probes = 100;' }); } } return { patterns: patterns.rows, recommendations }; } ``` ## Storage Optimization ### Vector Compression Strategies ```typescript // src/db/vectorCompression.ts export class VectorCompressionService { // Quantize vectors to reduce storage async quantizeVectors(tableName: string, bits = 8) { // Add quantized column await db.execute(sql` ALTER TABLE ${tableName} ADD COLUMN IF NOT EXISTS embedding_quantized bytea; `); // Quantize existing vectors await db.execute(sql` UPDATE ${tableName} SET embedding_quantized = quantize_vector(embedding, ${bits}) WHERE embedding IS NOT NULL AND embedding_quantized IS NULL; `); // Create index on quantized vectors await db.execute(sql` CREATE INDEX IF NOT EXISTS ${tableName}_quantized_idx ON ${tableName} USING ivfflat ((dequantize_vector(embedding_quantized))::vector vector_cosine_ops) WITH (lists = 100); `); } // Product quantization for extreme compression async setupProductQuantization(dimensions = 1536, subvectors = 8) { const subvectorSize = dimensions / subvectors; await db.execute(sql` CREATE TABLE IF NOT EXISTS pq_codebook ( subvector_id INT, centroid_id INT, centroid vector(${subvectorSize}), PRIMARY KEY (subvector_id, centroid_id) ); CREATE TABLE IF NOT EXISTS pq_memories ( id SERIAL PRIMARY KEY, companion_id TEXT NOT NULL, user_id TEXT NOT NULL, content TEXT, pq_codes INT[], -- Array of centroid IDs original_norm FLOAT, -- Store norm for reconstruction created_at TIMESTAMP DEFAULT NOW() ); `); } } ``` ## Best Practices for pgvector v0.8.0 1. **Choose the right vector type**: - `vector`: Standard float32 vectors (4 bytes per dimension) - `halfvec`: Float16 for 50% storage savings (2 bytes per dimension) - `bit`: Binary vectors for Hamming distance (1 bit per dimension) - `sparsevec`: Sparse vectors for high-dimensional sparse data 2. **Optimize index parameters**: - IVFFlat: `lists = sqrt(number_of_rows)` as starting point - HNSW: `m = 16-64` for build/search tradeoff - Enable iterative scans for better recall with LIMIT 3. **Monitor and maintain**: - Regularly analyze index usage with `pg_stat_user_indexes` - Rebuild IVFFlat indexes when data distribution changes - Use `EXPLAIN ANALYZE` to verify index usage 4. **Storage optimization**: - Use halfvec for acceptable precision loss (typically <1%) - Implement quantization for large-scale deployments - Consider product quantization for extreme compression needs 5. **Query optimization**: - Use iterative scans for queries with LIMIT - Implement adaptive probe adjustment for varying query difficulty - Batch similar queries to leverage cache Always benchmark with your specific data and query patterns to find optimal settings.