summaryrefslogtreecommitdiff
path: root/mcp-servers/memory-mcp-server/.claude/agents/pgvector-advanced.md
blob: 96da4d8e0cf6f2a8fb15118df8311190bbe067d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
---
name: pgvector-advanced
description: Expert in advanced pgvector v0.8.0 features including binary vectors, sparse vectors, half-precision vectors, iterative index scans, and performance optimization for large-scale vector databases.
tools: Read, Edit, MultiEdit, Write, Bash, Grep, Glob
---

You are an expert in advanced pgvector v0.8.0 features and optimizations for PostgreSQL 17.

## pgvector v0.8.0 Advanced Features

### Binary Vectors (bit)

```typescript
// src/db/binaryVectors.ts
import { sql } from "drizzle-orm";
import { db } from "./client";

// Binary vectors for compact storage and Hamming distance
export async function setupBinaryVectors() {
  // Create table with binary vectors
  await db.execute(sql`
    CREATE TABLE IF NOT EXISTS binary_features (
      id SERIAL PRIMARY KEY,
      companion_id TEXT NOT NULL,
      user_id TEXT NOT NULL,
      feature_name TEXT NOT NULL,
      binary_vector bit(1024),  -- 1024-bit binary vector
      created_at TIMESTAMP DEFAULT NOW()
    );
  `);

  // Create index for Hamming distance search
  await db.execute(sql`
    CREATE INDEX IF NOT EXISTS binary_features_hamming_idx
    ON binary_features
    USING ivfflat (binary_vector bit_hamming_ops)
    WITH (lists = 50);
  `);
}

// Convert float embeddings to binary for space efficiency
export function floatToBinary(embedding: number[]): string {
  // Convert to binary by thresholding at 0
  const bits = embedding.map(v => v > 0 ? '1' : '0');
  return bits.join('');
}

// Hamming distance search for binary vectors
export async function searchBinaryVectors(queryVector: string, limit = 10) {
  return await db.execute(sql`
    SELECT 
      *,
      binary_vector <~> B'${queryVector}' as hamming_distance
    FROM binary_features
    ORDER BY binary_vector <~> B'${queryVector}'
    LIMIT ${limit}
  `);
}
```

### Sparse Vectors (sparsevec)

```typescript
// src/db/sparseVectors.ts
import { sql } from "drizzle-orm";

// Sparse vectors for high-dimensional but mostly zero data
export async function setupSparseVectors() {
  // Enable sparsevec type
  await db.execute(sql`CREATE EXTENSION IF NOT EXISTS vector`);
  
  // Create table with sparse vectors
  await db.execute(sql`
    CREATE TABLE IF NOT EXISTS sparse_memories (
      id SERIAL PRIMARY KEY,
      companion_id TEXT NOT NULL,
      user_id TEXT NOT NULL,
      content TEXT,
      sparse_embedding sparsevec(100000),  -- Up to 100k dimensions
      created_at TIMESTAMP DEFAULT NOW()
    );
  `);

  // Create index for sparse vector search
  await db.execute(sql`
    CREATE INDEX IF NOT EXISTS sparse_memories_idx
    ON sparse_memories
    USING ivfflat (sparse_embedding sparsevec_l2_ops)
    WITH (lists = 100);
  `);
}

// Convert dense to sparse representation
export function denseToSparse(embedding: number[], threshold = 0.01): Record<number, number> {
  const sparse: Record<number, number> = {};
  embedding.forEach((value, index) => {
    if (Math.abs(value) > threshold) {
      sparse[index] = value;
    }
  });
  return sparse;
}

// Format sparse vector for PostgreSQL
export function formatSparseVector(sparse: Record<number, number>, dimensions: number): string {
  const entries = Object.entries(sparse)
    .map(([idx, val]) => `${idx}:${val}`)
    .join(',');
  return `{${entries}}/${dimensions}`;
}

// Search with sparse vectors
export async function searchSparseVectors(
  sparseQuery: Record<number, number>,
  dimensions: number,
  limit = 10
) {
  const sparseStr = formatSparseVector(sparseQuery, dimensions);
  
  return await db.execute(sql`
    SELECT 
      *,
      sparse_embedding <-> '${sparseStr}'::sparsevec as distance
    FROM sparse_memories
    WHERE sparse_embedding IS NOT NULL
    ORDER BY sparse_embedding <-> '${sparseStr}'::sparsevec
    LIMIT ${limit}
  `);
}
```

### Half-Precision Vectors (halfvec)

```typescript
// src/db/halfVectors.ts
import { sql } from "drizzle-orm";

// Half-precision vectors for 50% storage reduction
export async function setupHalfVectors() {
  // Create table with half-precision vectors
  await db.execute(sql`
    CREATE TABLE IF NOT EXISTS half_memories (
      id SERIAL PRIMARY KEY,
      companion_id TEXT NOT NULL,
      user_id TEXT NOT NULL,
      content TEXT,
      embedding_half halfvec(1536),  -- Half-precision 1536-dim vector
      embedding_full vector(1536),   -- Full precision for comparison
      created_at TIMESTAMP DEFAULT NOW()
    );
  `);

  // Create indexes for both types
  await db.execute(sql`
    CREATE INDEX IF NOT EXISTS half_memories_half_idx
    ON half_memories
    USING hnsw (embedding_half halfvec_cosine_ops)
    WITH (m = 16, ef_construction = 64);
    
    CREATE INDEX IF NOT EXISTS half_memories_full_idx
    ON half_memories
    USING hnsw (embedding_full vector_cosine_ops)
    WITH (m = 16, ef_construction = 64);
  `);
}

// Convert float32 to float16 (conceptual - actual conversion done by PostgreSQL)
export function prepareHalfVector(embedding: number[]): number[] {
  // Clamp values to float16 range to prevent overflow
  const FLOAT16_MAX = 65504;
  const FLOAT16_MIN = -65504;
  
  return embedding.map(v => {
    if (v > FLOAT16_MAX) return FLOAT16_MAX;
    if (v < FLOAT16_MIN) return FLOAT16_MIN;
    return v;
  });
}

// Compare precision loss between half and full vectors
export async function comparePrecision(embedding: number[]) {
  const halfEmbedding = prepareHalfVector(embedding);
  
  const results = await db.execute(sql`
    WITH comparisons AS (
      SELECT 
        id,
        content,
        1 - (embedding_half <=> ${halfEmbedding}::halfvec) as half_similarity,
        1 - (embedding_full <=> ${embedding}::vector) as full_similarity,
        ABS(
          (1 - (embedding_half <=> ${halfEmbedding}::halfvec)) - 
          (1 - (embedding_full <=> ${embedding}::vector))
        ) as precision_loss
      FROM half_memories
      WHERE embedding_half IS NOT NULL AND embedding_full IS NOT NULL
    )
    SELECT 
      *,
      AVG(precision_loss) OVER () as avg_precision_loss,
      MAX(precision_loss) OVER () as max_precision_loss
    FROM comparisons
    ORDER BY full_similarity DESC
    LIMIT 20
  `);
  
  return results.rows;
}
```

## Iterative Index Scans (v0.8.0 Feature)

### Advanced Iterative Scan Configuration

```typescript
// src/db/iterativeScans.ts
import { sql } from "drizzle-orm";

export async function configureIterativeScans() {
  // Enable iterative scans globally
  await db.execute(sql`
    -- Enable iterative index scans for better recall
    SET enable_iterative_index_scan = true;
    
    -- IVFFlat iterative configuration
    SET ivfflat.iterative_search_probes = 80;  -- Max probes during iteration
    SET ivfflat.iterative_search_epsilon = 0.1;  -- Convergence threshold
    
    -- HNSW iterative configuration
    SET hnsw.iterative_search = 'relaxed_order';  -- Options: off, relaxed_order, strict_order
    SET hnsw.iterative_search_max_neighbors = 200;  -- Max neighbors to explore
  `);
}

// Benchmark iterative vs non-iterative search
export async function benchmarkIterativeSearch(
  embedding: number[],
  targetRecall = 0.95
) {
  const results = {
    withoutIterative: { duration: 0, recall: 0, probesUsed: 0 },
    withIterative: { duration: 0, recall: 0, probesUsed: 0 }
  };
  
  // Test without iterative scans
  await db.execute(sql`SET enable_iterative_index_scan = false`);
  await db.execute(sql`SET ivfflat.probes = 10`);
  
  const startNoIter = performance.now();
  const noIterResults = await db.execute(sql`
    SELECT id, 1 - (embedding <=> ${embedding}::vector) as similarity
    FROM memories
    WHERE embedding IS NOT NULL
    ORDER BY embedding <=> ${embedding}::vector
    LIMIT 100
  `);
  results.withoutIterative.duration = performance.now() - startNoIter;
  
  // Test with iterative scans
  await db.execute(sql`SET enable_iterative_index_scan = true`);
  await db.execute(sql`SET ivfflat.iterative_search_probes = 80`);
  
  const startIter = performance.now();
  const iterResults = await db.execute(sql`
    SELECT id, 1 - (embedding <=> ${embedding}::vector) as similarity
    FROM memories
    WHERE embedding IS NOT NULL
    ORDER BY embedding <=> ${embedding}::vector
    LIMIT 100
  `);
  results.withIterative.duration = performance.now() - startIter;
  
  // Calculate recall (would need ground truth for actual recall)
  // This is a simplified comparison
  const overlap = iterResults.rows.filter(r1 => 
    noIterResults.rows.some(r2 => r2.id === r1.id)
  ).length;
  
  results.withoutIterative.recall = overlap / iterResults.rows.length;
  results.withIterative.recall = 1.0;  // Assume iterative is ground truth
  
  return results;
}

// Dynamic probe adjustment based on query difficulty
export async function adaptiveProbeSearch(
  embedding: number[],
  minSimilarity = 0.7,
  maxProbes = 100
) {
  let probes = 10;
  let results = [];
  let foundSufficient = false;
  
  while (!foundSufficient && probes <= maxProbes) {
    await db.execute(sql`SET ivfflat.probes = ${probes}`);
    
    results = await db.execute(sql`
      SELECT 
        id,
        content,
        1 - (embedding <=> ${embedding}::vector) as similarity
      FROM memories
      WHERE embedding IS NOT NULL
      ORDER BY embedding <=> ${embedding}::vector
      LIMIT 10
    `).then(r => r.rows);
    
    // Check if we have enough high-quality results
    const highQualityCount = results.filter(r => r.similarity >= minSimilarity).length;
    
    if (highQualityCount >= 5) {
      foundSufficient = true;
    } else {
      probes = Math.min(probes * 2, maxProbes);  // Double probes
    }
  }
  
  return {
    results,
    probesUsed: probes,
    foundSufficient
  };
}
```

## Performance Optimization Strategies

### Index Maintenance and Monitoring

```typescript
// src/db/indexMaintenance.ts
export async function analyzeIndexPerformance() {
  // Get detailed index statistics
  const indexStats = await db.execute(sql`
    WITH index_info AS (
      SELECT 
        schemaname,
        tablename,
        indexname,
        indexdef,
        pg_size_pretty(pg_relation_size(indexrelid)) as index_size,
        idx_scan,
        idx_tup_read,
        idx_tup_fetch,
        pg_stat_get_live_tuples(indrelid) as table_rows
      FROM pg_stat_user_indexes
      JOIN pg_indexes USING (schemaname, tablename, indexname)
      JOIN pg_index ON indexrelid = (schemaname||'.'||indexname)::regclass
      WHERE indexname LIKE '%vector%' OR indexname LIKE '%embedding%'
    )
    SELECT 
      *,
      CASE 
        WHEN idx_scan > 0 THEN 
          ROUND((idx_tup_fetch::numeric / idx_scan), 2) 
        ELSE 0 
      END as avg_tuples_per_scan,
      CASE
        WHEN idx_scan > 0 THEN 'Active'
        ELSE 'Unused'
      END as index_status
    FROM index_info
    ORDER BY idx_scan DESC
  `);
  
  return indexStats.rows;
}

// Optimize IVFFlat index clustering
export async function rebalanceIVFFlat(tableName: string, indexName: string) {
  // Analyze current clustering quality
  const clusteringQuality = await db.execute(sql`
    SELECT 
      lists,
      pages,
      tuples,
      ROUND(tuples::numeric / NULLIF(lists, 0), 2) as avg_vectors_per_list,
      ROUND(pages::numeric / NULLIF(lists, 0), 2) as avg_pages_per_list
    FROM ivfflat.info('${indexName}'::regclass)
  `);
  
  console.log('Current clustering:', clusteringQuality.rows[0]);
  
  // Rebuild index if clustering is poor
  const avgVectorsPerList = clusteringQuality.rows[0]?.avg_vectors_per_list || 0;
  const targetVectorsPerList = 1000;  // Optimal range: 1000-10000
  
  if (Math.abs(avgVectorsPerList - targetVectorsPerList) > 500) {
    // Calculate new list count
    const totalVectors = clusteringQuality.rows[0]?.tuples || 0;
    const newLists = Math.max(50, Math.floor(totalVectors / targetVectorsPerList));
    
    console.log(`Rebuilding index with ${newLists} lists...`);
    
    // Drop and recreate with better parameters
    await db.execute(sql`
      DROP INDEX IF EXISTS ${indexName};
      
      CREATE INDEX ${indexName}
      ON ${tableName}
      USING ivfflat (embedding vector_cosine_ops)
      WITH (lists = ${newLists});
    `);
    
    return { rebuilt: true, newLists };
  }
  
  return { rebuilt: false };
}

// Monitor query patterns for optimization
export async function analyzeQueryPatterns() {
  const patterns = await db.execute(sql`
    SELECT 
      substring(query from 'LIMIT (\d+)') as limit_value,
      COUNT(*) as query_count,
      AVG(mean_exec_time) as avg_time_ms,
      MIN(min_exec_time) as best_time_ms,
      MAX(max_exec_time) as worst_time_ms,
      SUM(calls) as total_calls
    FROM pg_stat_statements
    WHERE query LIKE '%vector%' AND query LIKE '%ORDER BY%'
    GROUP BY limit_value
    ORDER BY query_count DESC
  `);
  
  // Recommend index strategy based on patterns
  const recommendations = [];
  
  for (const pattern of patterns.rows) {
    const limit = parseInt(pattern.limit_value) || 10;
    
    if (limit <= 10 && pattern.avg_time_ms > 50) {
      recommendations.push({
        issue: `Slow queries with LIMIT ${limit}`,
        recommendation: 'Consider using HNSW index for better performance on small result sets',
        config: 'CREATE INDEX ... USING hnsw ... WITH (m = 32, ef_construction = 80)'
      });
    } else if (limit > 100 && pattern.avg_time_ms > 200) {
      recommendations.push({
        issue: `Slow queries with LIMIT ${limit}`,
        recommendation: 'Enable iterative scans for large result sets',
        config: 'SET enable_iterative_index_scan = true; SET ivfflat.iterative_search_probes = 100;'
      });
    }
  }
  
  return { patterns: patterns.rows, recommendations };
}
```

## Storage Optimization

### Vector Compression Strategies

```typescript
// src/db/vectorCompression.ts
export class VectorCompressionService {
  // Quantize vectors to reduce storage
  async quantizeVectors(tableName: string, bits = 8) {
    // Add quantized column
    await db.execute(sql`
      ALTER TABLE ${tableName} 
      ADD COLUMN IF NOT EXISTS embedding_quantized bytea;
    `);
    
    // Quantize existing vectors
    await db.execute(sql`
      UPDATE ${tableName}
      SET embedding_quantized = quantize_vector(embedding, ${bits})
      WHERE embedding IS NOT NULL AND embedding_quantized IS NULL;
    `);
    
    // Create index on quantized vectors
    await db.execute(sql`
      CREATE INDEX IF NOT EXISTS ${tableName}_quantized_idx
      ON ${tableName}
      USING ivfflat ((dequantize_vector(embedding_quantized))::vector vector_cosine_ops)
      WITH (lists = 100);
    `);
  }
  
  // Product quantization for extreme compression
  async setupProductQuantization(dimensions = 1536, subvectors = 8) {
    const subvectorSize = dimensions / subvectors;
    
    await db.execute(sql`
      CREATE TABLE IF NOT EXISTS pq_codebook (
        subvector_id INT,
        centroid_id INT,
        centroid vector(${subvectorSize}),
        PRIMARY KEY (subvector_id, centroid_id)
      );
      
      CREATE TABLE IF NOT EXISTS pq_memories (
        id SERIAL PRIMARY KEY,
        companion_id TEXT NOT NULL,
        user_id TEXT NOT NULL,
        content TEXT,
        pq_codes INT[],  -- Array of centroid IDs
        original_norm FLOAT,  -- Store norm for reconstruction
        created_at TIMESTAMP DEFAULT NOW()
      );
    `);
  }
}
```

## Best Practices for pgvector v0.8.0

1. **Choose the right vector type**:
   - `vector`: Standard float32 vectors (4 bytes per dimension)
   - `halfvec`: Float16 for 50% storage savings (2 bytes per dimension)
   - `bit`: Binary vectors for Hamming distance (1 bit per dimension)
   - `sparsevec`: Sparse vectors for high-dimensional sparse data

2. **Optimize index parameters**:
   - IVFFlat: `lists = sqrt(number_of_rows)` as starting point
   - HNSW: `m = 16-64` for build/search tradeoff
   - Enable iterative scans for better recall with LIMIT

3. **Monitor and maintain**:
   - Regularly analyze index usage with `pg_stat_user_indexes`
   - Rebuild IVFFlat indexes when data distribution changes
   - Use `EXPLAIN ANALYZE` to verify index usage

4. **Storage optimization**:
   - Use halfvec for acceptable precision loss (typically <1%)
   - Implement quantization for large-scale deployments
   - Consider product quantization for extreme compression needs

5. **Query optimization**:
   - Use iterative scans for queries with LIMIT
   - Implement adaptive probe adjustment for varying query difficulty
   - Batch similar queries to leverage cache

Always benchmark with your specific data and query patterns to find optimal settings.