Skip to content

Flat Index

Overview

FlatIndex is the simplest and most direct index implementation in GoVector. It performs brute-force search by iterating through all vectors and calculating distances to find the nearest neighbors. While it has O(n) search complexity, it provides exact results and serves as an important baseline for evaluating other indexing algorithms.

Implementation Principles

Core Data Structure

type FlatIndex struct {
    mu      sync.RWMutex
    points  map[string]*PointStruct
    metric  Distance
}

Search Algorithm

func (f *FlatIndex) Search(query []float32, filter *Filter, topK int) ([]ScoredPoint, error) {
    f.mu.RLock()
    defer f.mu.RUnlock()

    candidates := make([]ScoredPoint, 0)
    for _, point := range f.points {
        if filter != nil && !MatchFilter(point.Payload, filter) {
            continue
        }

        score := CalculateDistance(f.metric, query, point.Vector)
        candidates = append(candidates, ScoredPoint{
            ID:      point.ID,
            Version: point.Version,
            Score:   score,
            Payload: point.Payload,
        })
    }

    sort.Slice(candidates, func(i, j int) bool {
        return compareScores(f.metric, candidates[i].Score, candidates[j].Score)
    })

    if len(candidates) > topK {
        candidates = candidates[:topK]
    }
    return candidates, nil
}

Distance Calculation

func CalculateDistance(metric Distance, a, b []float32) float32 {
    switch metric {
    case Cosine:
        return CosineDistance(a, b)
    case Euclidean:
        return EuclideanDistance(a, b)
    case Dot:
        return -DotProduct(a, b) // Negative because higher is better
    default:
        return EuclideanDistance(a, b)
    }
}

func compareScores(metric Distance, a, b float32) bool {
    switch metric {
    case Cosine, Euclidean:
        return a < b // Lower is better
    case Dot:
        return a > b // Higher is better
    default:
        return a < b
    }
}

Filter Matching

func MatchFilter(payload map[string]interface{}, filter *Filter) bool {
    if filter == nil {
        return true
    }

    // Must conditions (all must match)
    for _, cond := range filter.Must {
        if !matchCondition(payload, cond) {
            return false
        }
    }

    // MustNot conditions (none should match)
    for _, cond := range filter.MustNot {
        if matchCondition(payload, cond) {
            return false
        }
    }

    // Should conditions (at least one should match)
    if len(filter.Should) > 0 {
        matched := false
        for _, cond := range filter.Should {
            if matchCondition(payload, cond) {
                matched = true
                break
            }
        }
        if !matched {
            return false
        }
    }

    return true
}

func matchCondition(payload map[string]interface{}, cond *Condition) bool {
    value, exists := payload[cond.Key]
    if !exists {
        return false
    }

    if cond.Exists != nil {
        return cond.Exists.Value == exists
    }

    if cond.Match != nil {
        return reflect.DeepEqual(value, cond.Match.Value)
    }

    if cond.Range != nil {
        num, ok := toFloat64(value)
        if !ok {
            return false
        }

        if cond.Range.Gte != nil && num < *cond.Range.Gte {
            return false
        }
        if cond.Range.Lte != nil && num > *cond.Range.Lte {
            return false
        }
        if cond.Range.Gt != nil && num <= *cond.Range.Gt {
            return false
        }
        if cond.Range.Lt != nil && num >= *cond.Range.Lt {
            return false
        }
    }

    return true
}

Point Management

Adding Points

func (f *FlatIndex) AddPoints(points []*PointStruct) error {
    f.mu.Lock()
    defer f.mu.Unlock()

    for _, p := range points {
        f.points[p.ID] = p
    }
    return nil
}

Deleting Points

func (f *FlatIndex) Delete(pointIDs []string, filter *Filter) (int, error) {
    f.mu.Lock()
    defer f.mu.Unlock()

    deleted := 0
    for _, id := range pointIDs {
        if point, exists := f.points[id]; exists {
            if filter == nil || MatchFilter(point.Payload, filter) {
                delete(f.points, id)
                deleted++
            }
        }
    }
    return deleted, nil
}

Getting Point Count

func (f *FlatIndex) Size() int {
    f.mu.RLock()
    defer f.mu.RUnlock()
    return len(f.points)
}

Performance Characteristics

Time Complexity

Operation Complexity Description
Add O(1) Hash map insertion
Delete O(1) Hash map deletion
Search O(n) Linear scan of all points
Filter O(n) Applied during search

Space Complexity

Component Space Description
Points O(n) Vector storage
Index O(1) Hash map overhead
Total O(n) Linear with point count

Performance Metrics

Tested on 10,000 128-dimensional vectors:

Metric Value
Search (topK=10) ~5ms
Search (topK=100) ~8ms
Memory per vector ~512 bytes
Index build time < 100ms

Usage Scenarios

Suitable For

  • Small datasets: Under 10,000 vectors where O(n) search is acceptable
  • Accuracy requirements: Exact nearest neighbor search without approximation
  • Benchmark baseline: Comparing with approximate methods
  • Simple use cases: No complex indexing requirements

Not Suitable For

  • Large datasets: Performance degrades linearly with data size
  • Low latency requirements: Cannot meet sub-millisecond search demands
  • High throughput scenarios: Limited query per second

Configuration Options

type FlatIndexOptions struct {
    Metric Distance // Distance metric (Cosine, Euclidean, Dot)
}

Example Usage:

index := NewFlatIndex(Cosine)
err := index.AddPoints(points)
results, err := index.Search(query, nil, 10)

Thread Safety

FlatIndex uses sync.RWMutex to ensure thread safety:

  • Multiple concurrent readers are allowed
  • Writers get exclusive access
  • All public methods handle locking internally

Best Practices

  1. Batch Operations: Use AddPoints instead of multiple AddPoint calls
  2. Pre-allocation: If dataset size is known, pre-allocate the map capacity
  3. Filter Early: Apply filters during search to reduce result set
  4. Dimension Matching: Ensure all vectors have consistent dimensions