Skip to content

Data Model System

Overview

GoVector uses a carefully designed data model system to represent vectors, search results, and filter conditions. This section details the core data structures and their serialization mechanisms.

Core Data Structures

PointStruct

PointStruct is the fundamental data structure for storing vector data:

type PointStruct struct {
    ID      string                 `json:"id"`
    Version uint64                 `json:"version"`
    Vector  []float32              `json:"vector"`
    Payload map[string]interface{} `json:"payload,omitempty"`
}

Field Description:

Field Type Description
ID string Unique identifier for the point
Version uint64 Version number for optimistic locking
Vector []float32 Vector data, 32-bit floating-point array
Payload map[string]interface{} Optional metadata payload

Usage Example:

point := &PointStruct{
    ID:      "point-001",
    Version: 1,
    Vector:  []float32{0.1, 0.2, 0.3, 0.4},
    Payload: map[string]interface{}{
        "category": "electronics",
        "price":    299.99,
        "in_stock": true,
    },
}

ScoredPoint

ScoredPoint represents a search result with similarity score:

type ScoredPoint struct {
    ID      string                 `json:"id"`
    Version uint64                 `json:"version"`
    Score   float32                `json:"score"`
    Payload map[string]interface{} `json:"payload,omitempty"`
}

Field Description:

Field Type Description
ID string Point identifier
Version uint64 Point version
Score float32 Similarity score (lower is better for distance metrics)
Payload map[string]interface{} Point metadata

Filter

Filter defines search condition filtering:

type Filter struct {
    Must   []*Condition `json:"must,omitempty"`
    Should []*Condition `json:"should,omitempty"`
    MustNot []*Condition `json:"must_not,omitempty"`
}

Filtering Logic: - Must: All conditions must be satisfied (AND) - Should: At least one condition should be satisfied (OR) - MustNot: All conditions must not be satisfied (NOT)

Condition

Condition represents a single filtering condition:

type Condition struct {
    Key      string      `json:"key"`
    Match    *MatchValue `json:"match,omitempty"`
    Range    *RangeValue `json:"range,omitempty"`
    Exists   *ExistsValue `json:"exists,omitempty"`
}

Supported Condition Types:

Type Description Example
Match Exact match {"category": "electronics"}
Range Range query {"price": {"gte": 100, "lte": 500}}
Exists Field existence {"in_stock": {"exists": true}}

Usage Example:

filter := &Filter{
    Must: []*Condition{
        {
            Key: "category",
            Match: &MatchValue{
                Value: "electronics",
            },
        },
        {
            Key: "price",
            Range: &RangeValue{
                Gte: 100,
                Lte: 500,
            },
        },
    },
    MustNot: []*Condition{
        {
            Key: "in_stock",
            Match: &MatchValue{
                Value: false,
            },
        },
    },
}

Serialization Mechanism

Protobuf Definition

GoVector uses Protocol Buffers for efficient serialization:

message Point {
    string id = 1;
    uint64 version = 2;
    bytes vector = 3;
    map<string, Value> payload = 4;
}

message Value {
    oneof kind {
        string string_value = 1;
        int64 int_value = 2;
        double double_value = 3;
        bool bool_value = 4;
        repeated double array_value = 5;
    }
}

Serialization Process

func toProtoPoint(p *PointStruct) *proto.Point {
    pbPoint := &proto.Point{
        ID:      p.ID,
        Version: p.Version,
        Vector:  make([]byte, len(p.Vector)*4),
    }

    for i, v := range p.Vector {
        bits := math.Float32bits(v)
        binary.LittleEndian.PutUint32(pbPoint.Vector[i*4:i*4+4], bits)
    }

    pbPoint.Payload = make(map[string]*proto.Value)
    for k, v := range p.Payload {
        pbPoint.Payload[k] = interfaceToValue(v)
    }

    return pbPoint
}

func interfaceToValue(v interface{}) *proto.Value {
    pv := &proto.Value{}
    switch val := v.(type) {
    case string:
        pv.Kind = &proto.Value_StringValue{StringValue: val}
    case int:
        pv.Kind = &proto.Value_IntValue{IntValue: int64(val)}
    case int64:
        pv.Kind = &proto.Value_IntValue{IntValue: val}
    case float64:
        pv.Kind = &proto.Value_DoubleValue{DoubleValue: val}
    case float32:
        pv.Kind = &proto.Value_DoubleValue{DoubleValue: float64(val)}
    case bool:
        pv.Kind = &proto.Value_BoolValue{BoolValue: val}
    case []float64:
        pv.Kind = &proto.Value_ArrayValue{ArrayValue: &proto.ArrayValue{Values: val}}
    case []float32:
        fvals := make([]float64, len(val))
        for i, f := range val {
            fvals[i] = float64(f)
        }
        pv.Kind = &proto.Value_ArrayValue{ArrayValue: &proto.ArrayValue{Values: fvals}}
    }
    return pv
}

Deserialization Process

func fromProtoPoint(pbPoint *proto.Point) *PointStruct {
    vector := make([]float32, len(pbPoint.Vector)/4)
    for i := 0; i < len(vector); i++ {
        bits := binary.LittleEndian.Uint32(pbPoint.Vector[i*4 : i*4+4])
        vector[i] = math.Float32frombits(bits)
    }

    payload := make(map[string]interface{})
    for k, v := range pbPoint.Payload {
        payload[k] = valueToInterface(v)
    }

    return &PointStruct{
        ID:      pbPoint.ID,
        Version: pbPoint.Version,
        Vector:  vector,
        Payload: payload,
    }
}

Distance Calculation

Supported Metrics

GoVector supports three distance metrics:

type Distance int

const (
    Cosine   Distance = iota  // Cosine similarity
    Euclidean                  // Euclidean distance
    Dot                        // Dot product
)

Cosine Similarity

func CosineDistance(a, b []float32) float32 {
    var dotProduct float32
    var normA float32
    var normB float32

    for i := range a {
        dotProduct += a[i] * b[i]
        normA += a[i] * a[i]
        normB += b[i] * b[i]
    }

    if normA == 0 || normB == 0 {
        return 0
    }

    return dotProduct / (float32(math.Sqrt(float64(normA))) * float32(math.Sqrt(float64(normB))))
}

Euclidean Distance

func EuclideanDistance(a, b []float32) float32 {
    var sum float32
    for i := range a {
        diff := a[i] - b[i]
        sum += diff * diff
    }
    return float32(math.Sqrt(float64(sum)))
}

Dot Product

func DotProduct(a, b []float32) float32 {
    var sum float32
    for i := range a {
        sum += a[i] * b[i]
    }
    return sum
}

Payload Data Types

The Payload supports the following data types:

Type Go Type Storage Format
String string UTF-8 encoded string
Integer int, int64 64-bit signed integer
Float float32, float64 64-bit float
Boolean bool 1 byte (0 or 1)
Array []float32, []float64 Repeated float values

Version Management

Each PointStruct carries a version number used for:

  • Optimistic Locking: Prevents concurrent update conflicts
  • Cache Invalidation: Ensures data consistency
  • Change Tracking: Records data modification history
func (c *Collection) upsertWithVersion(point *PointStruct) error {
    existing, _ := c.storage.GetPoint(c.name, point.ID)

    if existing != nil {
        if point.Version <= existing.Version {
            return errors.New("version conflict: new version must be greater")
        }
    }

    point.Version++
    return c.Upsert([]*PointStruct{point})
}