Vector database selection, indexing strategies, and semantic search optimization.
Provides vector database operations for semantic search, including setup, indexing strategies (HNSW, IVF), hybrid search, and metadata filtering for Chroma, Pinecone, and Weaviate.
/plugin marketplace add pluginagentmarketplace/custom-plugin-ai-engineer/plugin install pluginagentmarketplace-ai-engineer-plugin@pluginagentmarketplace/custom-plugin-ai-engineerThis skill inherits all available tools. When active, it can use any tool Claude has access to.
assets/vector_store_config.yamlreferences/VECTOR_DB_COMPARISON.mdscripts/vector_store_manager.pyMaster vector storage and retrieval for AI applications.
import chromadb
from chromadb.utils import embedding_functions
# Initialize client
client = chromadb.Client() # In-memory
# client = chromadb.PersistentClient(path="./chroma_db") # Persistent
# Create collection with embedding function
embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)
collection = client.create_collection(
name="documents",
embedding_function=embedding_fn
)
# Add documents
collection.add(
documents=["Document 1 text", "Document 2 text"],
metadatas=[{"source": "file1"}, {"source": "file2"}],
ids=["doc1", "doc2"]
)
# Query
results = collection.query(
query_texts=["search query"],
n_results=5
)
from pinecone import Pinecone, ServerlessSpec
# Initialize
pc = Pinecone(api_key="YOUR_API_KEY")
# Create index
pc.create_index(
name="documents",
dimension=1536, # OpenAI embedding dimension
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-west-2")
)
index = pc.Index("documents")
# Upsert vectors
index.upsert(vectors=[
{"id": "doc1", "values": embedding1, "metadata": {"text": "..."}},
{"id": "doc2", "values": embedding2, "metadata": {"text": "..."}}
])
# Query
results = index.query(
vector=query_embedding,
top_k=10,
include_metadata=True
)
import weaviate
from weaviate.classes.config import Configure, Property, DataType
# Connect
client = weaviate.connect_to_local() # or connect_to_wcs()
# Create collection (class)
collection = client.collections.create(
name="Document",
vectorizer_config=Configure.Vectorizer.text2vec_openai(),
properties=[
Property(name="content", data_type=DataType.TEXT),
Property(name="source", data_type=DataType.TEXT)
]
)
# Add objects
collection.data.insert({
"content": "Document text here",
"source": "file.pdf"
})
# Semantic search
response = collection.query.near_text(
query="search query",
limit=5
)
| Feature | Chroma | Pinecone | Weaviate | Milvus | Qdrant |
|---|---|---|---|---|---|
| Deployment | Local/Cloud | Cloud | Self/Cloud | Self/Cloud | Self/Cloud |
| Ease of Use | ⭐⭐⭐ | ⭐⭐⭐ | ⭐⭐ | ⭐⭐ | ⭐⭐⭐ |
| Scale | Small-Med | Large | Large | Very Large | Large |
| Filtering | Basic | Advanced | GraphQL | Advanced | Advanced |
| Cost | Free | Pay-per-use | Free/Paid | Free/Paid | Free/Paid |
| Best For | Dev/POC | Production | Hybrid Search | Enterprise | Production |
# Most common for approximate nearest neighbor
# Good balance of speed and accuracy
index_params = {
"index_type": "HNSW",
"metric_type": "COSINE",
"params": {
"M": 16, # Max connections per layer
"efConstruction": 200 # Build-time accuracy
}
}
search_params = {
"ef": 100 # Search-time accuracy
}
# Good for very large datasets
# Requires training phase
index_params = {
"index_type": "IVF_FLAT",
"metric_type": "L2",
"params": {
"nlist": 1024 # Number of clusters
}
}
search_params = {
"nprobe": 10 # Clusters to search
}
# Exact search, no approximation
# Use for small datasets (<100K vectors)
index_params = {
"index_type": "FLAT",
"metric_type": "COSINE"
}
# Cosine Similarity - Best for text embeddings
cosine_sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
# Euclidean Distance (L2) - Sensitive to magnitude
l2_dist = np.linalg.norm(a - b)
# Dot Product - For normalized vectors = cosine
dot_product = np.dot(a, b)
# When to use what:
# - Cosine: Text, semantic similarity
# - L2: Images, when magnitude matters
# - Dot Product: Pre-normalized vectors
class HybridSearch:
def __init__(self, vector_store, bm25_index):
self.vector_store = vector_store
self.bm25_index = bm25_index
def search(self, query: str, k: int = 10, alpha: float = 0.5):
# Dense retrieval (semantic)
dense_results = self.vector_store.search(query, k=k*2)
# Sparse retrieval (keyword)
sparse_results = self.bm25_index.search(query, k=k*2)
# Reciprocal Rank Fusion
scores = {}
for rank, doc in enumerate(dense_results):
scores[doc.id] = scores.get(doc.id, 0) + alpha / (rank + 60)
for rank, doc in enumerate(sparse_results):
scores[doc.id] = scores.get(doc.id, 0) + (1-alpha) / (rank + 60)
# Sort and return top-k
sorted_docs = sorted(scores.items(), key=lambda x: x[1], reverse=True)
return sorted_docs[:k]
# Pinecone filtering
results = index.query(
vector=embedding,
top_k=10,
filter={
"category": {"$eq": "technical"},
"date": {"$gte": "2024-01-01"},
"$or": [
{"author": "John"},
{"author": "Jane"}
]
}
)
# Chroma filtering
results = collection.query(
query_embeddings=[embedding],
n_results=10,
where={
"$and": [
{"category": {"$eq": "technical"}},
{"year": {"$gte": 2024}}
]
}
)
# Insert in batches for better performance
BATCH_SIZE = 100
for i in range(0, len(documents), BATCH_SIZE):
batch = documents[i:i+BATCH_SIZE]
vectors = [(doc.id, doc.embedding, doc.metadata) for doc in batch]
index.upsert(vectors=vectors)
from functools import lru_cache
@lru_cache(maxsize=1000)
def cached_search(query_hash: str):
return index.query(vector=query_embedding, top_k=10)
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=2, max=30))
def upsert_with_retry(vectors):
return index.upsert(vectors=vectors)
def batch_upsert(vectors, batch_size=100):
for i in range(0, len(vectors), batch_size):
upsert_with_retry(vectors[i:i+batch_size])
| Symptom | Cause | Solution |
|---|---|---|
| Slow inserts | No batching | Batch upserts |
| Poor recall | Wrong metric | Use cosine for text |
| Connection timeout | Large payload | Reduce batch size |
def test_vector_upsert_query():
store.upsert([{"id": "1", "values": [0.1]*384}])
results = store.query([0.1]*384, top_k=1)
assert results[0]["id"] == "1"
This skill should be used when the user asks to "create a slash command", "add a command", "write a custom command", "define command arguments", "use command frontmatter", "organize commands", "create command with file references", "interactive command", "use AskUserQuestion in command", or needs guidance on slash command structure, YAML frontmatter fields, dynamic arguments, bash execution in commands, user interaction patterns, or command development best practices for Claude Code.
This skill should be used when the user asks to "create an agent", "add an agent", "write a subagent", "agent frontmatter", "when to use description", "agent examples", "agent tools", "agent colors", "autonomous agent", or needs guidance on agent structure, system prompts, triggering conditions, or agent development best practices for Claude Code plugins.
This skill should be used when the user asks to "create a hook", "add a PreToolUse/PostToolUse/Stop hook", "validate tool use", "implement prompt-based hooks", "use ${CLAUDE_PLUGIN_ROOT}", "set up event-driven automation", "block dangerous commands", or mentions hook events (PreToolUse, PostToolUse, Stop, SubagentStop, SessionStart, SessionEnd, UserPromptSubmit, PreCompact, Notification). Provides comprehensive guidance for creating and implementing Claude Code plugin hooks with focus on advanced prompt-based hooks API.