Overview
The embeddings API generates dense vector representations for:
Text (transformer models)
Images (vision encoders)
Audio (acoustic encoders)
Embeddings enable semantic search, retrieval-augmented generation (RAG), and similarity comparison.
cactus_embed
Generate text embeddings.
int cactus_embed (
cactus_model_t model ,
const char * text ,
float * embeddings_buffer ,
size_t buffer_size ,
size_t * embedding_dim ,
bool normalize
);
Embedding model handle from cactus_init
Output buffer for embedding vector
Output parameter: actual embedding dimension
Whether to L2-normalize the output vector
Number of float values written on success, -1 on error, -2 if buffer too small
cactus_image_embed
Generate image embeddings.
int cactus_image_embed (
cactus_model_t model ,
const char * image_path ,
float * embeddings_buffer ,
size_t buffer_size ,
size_t * embedding_dim
);
Vision model handle (e.g., Siglip2)
Output buffer for embedding vector
Output parameter: actual embedding dimension
Number of float values written on success, -1 on error, -2 if buffer too small
cactus_audio_embed
Generate audio embeddings.
int cactus_audio_embed (
cactus_model_t model ,
const char * audio_path ,
float * embeddings_buffer ,
size_t buffer_size ,
size_t * embedding_dim
);
ASR model handle (Whisper, Parakeet)
Output buffer for embedding vector
Output parameter: actual embedding dimension
Number of float values written on success, -1 on error, -2 if buffer too small
Example: Text Embeddings
#include "cactus_ffi.h"
#include <stdio.h>
#include <stdlib.h>
int main () {
// Initialize embedding model
cactus_model_t model = cactus_init ( "/path/to/nomic-embed" , NULL , false );
if ( ! model) {
fprintf (stderr, "Failed to load model \n " );
return 1 ;
}
// Allocate buffer (typical dimension: 768 or 1024)
float embeddings [ 1024 ];
size_t actual_dim = 0 ;
int result = cactus_embed (
model,
"The quick brown fox jumps over the lazy dog" ,
embeddings,
sizeof (embeddings),
& actual_dim,
true // normalize
);
if (result > 0 ) {
printf ( "Embedding dimension: %zu \n " , actual_dim);
printf ( "First 5 values: " );
for ( size_t i = 0 ; i < 5 && i < actual_dim; i ++ ) {
printf ( " %.4f " , embeddings [i]);
}
printf ( " \n " );
} else if (result == - 2 ) {
printf ( "Buffer too small, need %zu bytes \n " , actual_dim * sizeof ( float ));
} else {
printf ( "Error: %s \n " , cactus_get_last_error ());
}
cactus_destroy (model);
return 0 ;
}
Example: Image Embeddings
#include "cactus_ffi.h"
#include <math.h>
float cosine_similarity ( const float * a , const float * b , size_t dim ) {
float dot = 0.0 f , norm_a = 0.0 f , norm_b = 0.0 f ;
for ( size_t i = 0 ; i < dim; i ++ ) {
dot += a [i] * b [i];
norm_a += a [i] * a [i];
norm_b += b [i] * b [i];
}
return dot / ( sqrtf (norm_a) * sqrtf (norm_b));
}
int main () {
cactus_model_t model = cactus_init ( "/path/to/siglip2" , NULL , false );
float embed1 [ 768 ], embed2 [ 768 ];
size_t dim = 0 ;
cactus_image_embed (model, "image1.jpg" , embed1, sizeof (embed1), & dim);
cactus_image_embed (model, "image2.jpg" , embed2, sizeof (embed2), & dim);
float similarity = cosine_similarity (embed1, embed2, dim);
printf ( "Image similarity: %.4f \n " , similarity);
cactus_destroy (model);
}
Example: Audio Embeddings
#include "cactus_ffi.h"
int main () {
cactus_model_t model = cactus_init ( "/path/to/whisper" , NULL , false );
float embeddings [ 1500 ]; // Whisper encoder output
size_t dim = 0 ;
int result = cactus_audio_embed (
model,
"speech.wav" ,
embeddings,
sizeof (embeddings),
& dim
);
if (result > 0 ) {
printf ( "Audio embedding extracted: %zu dimensions \n " , dim);
// Use embeddings for similarity search, classification, etc.
}
cactus_destroy (model);
}
Model-Specific Dimensions
Model Embedding Dimension Notes nomic-embed-text 768 Text embeddings, normalized Qwen2-1.5B 1536 Last hidden state Siglip2 1152 Vision encoder output Whisper-small 768 Encoder features Parakeet-TDT 512 Acoustic features
Normalization
For similarity search and RAG, always set normalize=true for text embeddings. This ensures cosine similarity can be computed efficiently using dot products: float similarity = 0.0 f ;
for ( size_t i = 0 ; i < dim; i ++ ) {
similarity += embed_a [i] * embed_b [i];
}
Integration with Vector Index
Embeddings are typically stored in a vector index for fast retrieval:
// Generate embeddings
float embed1 [ 768 ], embed2 [ 768 ];
size_t dim = 0 ;
cactus_embed (model, "document 1" , embed1, sizeof (embed1), & dim , true );
cactus_embed (model, "document 2" , embed2, sizeof (embed2), & dim , true );
// Create index
cactus_index_t index = cactus_index_init ( "/path/to/index" , dim);
// Add documents
int ids [] = { 1 , 2 };
const char * docs [] = { "document 1" , "document 2" };
const float * embeds [] = {embed1, embed2};
cactus_index_add (index, ids, docs, NULL , embeds, 2 , dim);
// Query
float query_embed [ 768 ];
cactus_embed (model, "search query" , query_embed, sizeof (query_embed), & dim , true );
const float * query_embeds [] = {query_embed};
int * result_ids = NULL ;
float * scores = NULL ;
size_t result_count = 0 ;
cactus_index_query (
index,
query_embeds, 1 , dim,
"{ \" top_k \" :5}" ,
& result_ids , & result_count ,
& scores , & result_count
);
See Also
Vector Index Store and query embeddings
Python SDK Python embeddings API
RAG Guide Build RAG systems
Semantic Search Implement semantic search