Skip to main content

Overview

The vector index API provides fast similarity search over embeddings with:
  • Persistent on-disk storage (memory-mapped files)
  • INT16 quantization for 2x memory savings
  • Batch operations
  • Document metadata storage

cactus_index_init

Create or open a vector index.
cactus_index_t cactus_index_init(
    const char* index_dir,
    size_t embedding_dim
);
index_dir
string
required
Directory path for index files (created if doesn’t exist)
embedding_dim
size_t
required
Dimensionality of embedding vectors
return
cactus_index_t
Index handle, or NULL on error

cactus_index_add

Add documents to index.
int cactus_index_add(
    cactus_index_t index,
    const int* ids,
    const char** documents,
    const char** metadatas,
    const float** embeddings,
    size_t count,
    size_t embedding_dim
);
index
cactus_index_t
required
Index handle from cactus_index_init
ids
int*
required
Array of unique document IDs
documents
char**
required
Array of document text strings
metadatas
char**
Optional array of metadata JSON strings (can be NULL)
embeddings
float**
required
Array of embedding vectors
count
size_t
required
Number of documents to add
embedding_dim
size_t
required
Dimensionality of embeddings
return
int
0 on success, -1 on error

cactus_index_query

Find nearest neighbors.
int cactus_index_query(
    cactus_index_t index,
    const float** embeddings,
    size_t embeddings_count,
    size_t embedding_dim,
    const char* options_json,
    int** id_buffers,
    size_t* id_buffer_sizes,
    float** score_buffers,
    size_t* score_buffer_sizes
);
index
cactus_index_t
required
Index handle
embeddings
float**
required
Array of query embedding vectors
embeddings_count
size_t
required
Number of queries
embedding_dim
size_t
required
Embedding dimension
options_json
string
Optional JSON: {"top_k":10,"score_threshold":0.5}
id_buffers
int**
required
Output: array of result ID arrays (caller must free)
id_buffer_sizes
size_t*
required
Output: sizes of each result array
score_buffers
float**
required
Output: array of similarity score arrays (caller must free)
score_buffer_sizes
size_t*
required
Output: sizes of each score array
return
int
0 on success, -1 on error

Query Options

{
  "top_k": 10,
  "score_threshold": 0.5
}
top_k
int
default:"10"
Maximum number of results per query
score_threshold
float
default:"-1.0"
Minimum similarity score (cosine similarity, range [-1, 1]). -1.0 disables filtering

cactus_index_get

Retrieve documents by ID.
int cactus_index_get(
    cactus_index_t index,
    const int* ids,
    size_t ids_count,
    char** document_buffers,
    size_t* document_buffer_sizes,
    char** metadata_buffers,
    size_t* metadata_buffer_sizes,
    float** embedding_buffers,
    size_t* embedding_buffer_sizes
);

cactus_index_delete

Delete documents by ID.
int cactus_index_delete(
    cactus_index_t index,
    const int* ids,
    size_t ids_count
);
ids
int*
required
Array of document IDs to delete
ids_count
size_t
required
Number of IDs

cactus_index_compact

Remove deleted documents and rebuild index.
int cactus_index_compact(cactus_index_t index);

cactus_index_destroy

Close index and free resources.
void cactus_index_destroy(cactus_index_t index);

Example: Build Index

#include "cactus_ffi.h"
#include <stdio.h>
#include <stdlib.h>

int main() {
    // Load embedding model
    cactus_model_t model = cactus_init("/path/to/nomic-embed", NULL, false);
    
    // Create index
    cactus_index_t index = cactus_index_init("/path/to/index", 768);
    if (!index) {
        fprintf(stderr, "Failed to create index\n");
        return 1;
    }
    
    // Prepare documents
    const char* texts[] = {
        "The quick brown fox jumps over the lazy dog",
        "A journey of a thousand miles begins with a single step",
        "To be or not to be, that is the question"
    };
    size_t num_docs = 3;
    
    // Generate embeddings
    float** embeddings = malloc(num_docs * sizeof(float*));
    for (size_t i = 0; i < num_docs; i++) {
        embeddings[i] = malloc(768 * sizeof(float));
        size_t dim = 0;
        cactus_embed(model, texts[i], embeddings[i], 768 * sizeof(float), &dim, true);
    }
    
    // Add to index
    int ids[] = {1, 2, 3};
    cactus_index_add(
        index,
        ids,
        texts,
        NULL,  // no metadata
        (const float**)embeddings,
        num_docs,
        768
    );
    
    printf("Added %zu documents to index\n", num_docs);
    
    // Cleanup
    for (size_t i = 0; i < num_docs; i++) {
        free(embeddings[i]);
    }
    free(embeddings);
    
    cactus_index_destroy(index);
    cactus_destroy(model);
    return 0;
}

Example: Search Index

#include "cactus_ffi.h"
#include <stdio.h>
#include <stdlib.h>

int main() {
    cactus_model_t model = cactus_init("/path/to/nomic-embed", NULL, false);
    cactus_index_t index = cactus_index_init("/path/to/index", 768);
    
    // Generate query embedding
    const char* query = "What is the meaning of life?";
    float query_embed[768];
    size_t dim = 0;
    cactus_embed(model, query, query_embed, sizeof(query_embed), &dim, true);
    
    // Search
    const float* query_embeds[] = {query_embed};
    int* result_ids = NULL;
    float* scores = NULL;
    size_t result_count = 0;
    
    const char* options = "{\"top_k\":5,\"score_threshold\":0.3}";
    int status = cactus_index_query(
        index,
        query_embeds,
        1,  // single query
        768,
        options,
        &result_ids,
        &result_count,
        &scores,
        &result_count
    );
    
    if (status == 0) {
        printf("Found %zu results:\n", result_count);
        for (size_t i = 0; i < result_count; i++) {
            printf("  ID: %d, Score: %.4f\n", result_ids[i], scores[i]);
        }
        
        free(result_ids);
        free(scores);
    }
    
    cactus_index_destroy(index);
    cactus_destroy(model);
    return 0;
}

Example: Batch Queries

const char* queries[] = {
    "What is machine learning?",
    "How do neural networks work?",
    "Explain deep learning"
};
size_t num_queries = 3;

// Generate query embeddings
float** query_embeds = malloc(num_queries * sizeof(float*));
for (size_t i = 0; i < num_queries; i++) {
    query_embeds[i] = malloc(768 * sizeof(float));
    size_t dim = 0;
    cactus_embed(model, queries[i], query_embeds[i], 768 * sizeof(float), &dim, true);
}

// Batch search
int** result_ids = malloc(num_queries * sizeof(int*));
float** scores = malloc(num_queries * sizeof(float*));
size_t* result_counts = malloc(num_queries * sizeof(size_t));

cactus_index_query(
    index,
    (const float**)query_embeds,
    num_queries,
    768,
    "{\"top_k\":3}",
    result_ids,
    result_counts,
    scores,
    result_counts
);

// Process results
for (size_t q = 0; q < num_queries; q++) {
    printf("Query %zu results:\n", q);
    for (size_t i = 0; i < result_counts[q]; i++) {
        printf("  ID: %d, Score: %.4f\n", result_ids[q][i], scores[q][i]);
    }
    free(result_ids[q]);
    free(scores[q]);
    free(query_embeds[q]);
}

free(result_ids);
free(scores);
free(result_counts);
free(query_embeds);

File Format

The index stores two memory-mapped files:
  • index.bin: Quantized embeddings (INT16) + scales
  • data.bin: Document text + metadata
This design enables:
  • Fast startup (no deserialization)
  • Low memory usage (OS manages paging)
  • Incremental updates (append-only writes)

Performance

OperationThroughput
Add (768-dim)~50k docs/sec
Query (768-dim, 10k docs)~1000 queries/sec
Query (768-dim, 100k docs)~200 queries/sec
Performance assumes normalized embeddings and cosine similarity. The index uses brute-force search with SIMD acceleration.

See Also

Embeddings API

Generate embeddings

Python SDK

Python vector index API

RAG Guide

Build RAG systems

C FFI

Complete FFI reference