Provides instructions for using Pinecone vector database including index creation, upserting vectors, and integrated embeddings for semantic search and RAG systems.
How this skill is triggered — by the user, by Claude, or both
Slash command
/integrations:pineconeThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
Expert skill for using Pinecone - the leading vector database for AI applications, semantic search, and RAG systems.
Expert skill for using Pinecone - the leading vector database for AI applications, semantic search, and RAG systems.
# API ключи: ~/.claude/.credentials.master.env
# Переменная: PINECONE_API_KEY
PINECONE_API_KEY=os.getenv('PINECONE_API_KEY')
| Индекс | Описание |
|---|---|
support-bot | Векторы саппорт-бота |
docs-search | Поиск по документации |
agent-dev | Разработка агентов |
test-index | Тестовый |
Параметры: Dimension 3072 (OpenAI text-embedding-3-large), Metric cosine, Serverless AWS us-east-1
Best for:
Advantages:
pip install pinecone
from pinecone import Pinecone
import os
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
def create_index(name: str, dimension: int = 1536, metric: str = "cosine"):
"""
Create a new serverless index.
Args:
name: Index name
dimension: Vector dimension (1536 for OpenAI, 768 for many others)
metric: "cosine", "euclidean", or "dotproduct"
"""
from pinecone import ServerlessSpec
pc.create_index(
name=name,
dimension=dimension,
metric=metric,
spec=ServerlessSpec(
cloud="aws",
region="us-east-1"
)
)
return pc.Index(name)
# Usage
index = create_index("my-knowledge-base", dimension=1536)
def create_index_with_embeddings(name: str, embed_model: str = "multilingual-e5-large"):
"""
Create index with integrated embedding model.
Models:
- multilingual-e5-large (1024 dim, 100+ languages)
- llama-text-embed-v2 (1024 dim)
- pinecone-sparse-english-v0 (sparse)
"""
from pinecone import ServerlessSpec
pc.create_index(
name=name,
dimension=1024,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
embedding={
"model": embed_model,
"field_map": {"text": "text"}
}
)
return pc.Index(name)
def upsert_vectors(index_name: str, vectors: list):
"""
Upsert vectors with metadata.
vectors format:
[{"id": "doc1", "values": [...], "metadata": {...}}, ...]
"""
index = pc.Index(index_name)
index.upsert(
vectors=vectors,
namespace="default"
)
return len(vectors)
# Example with embeddings
import openai
def embed_and_upsert(index_name: str, texts: list, ids: list, metadata: list = None):
"""Embed texts and upsert to Pinecone."""
# Get embeddings from OpenAI
client = openai.OpenAI()
response = client.embeddings.create(
model="text-embedding-3-small",
input=texts
)
vectors = []
for i, embedding in enumerate(response.data):
vectors.append({
"id": ids[i],
"values": embedding.embedding,
"metadata": metadata[i] if metadata else {"text": texts[i]}
})
index = pc.Index(index_name)
index.upsert(vectors=vectors)
return len(vectors)
def query_index(index_name: str, query_vector: list, top_k: int = 10,
filter: dict = None, include_metadata: bool = True):
"""
Query index for similar vectors.
Args:
query_vector: Query embedding
top_k: Number of results
filter: Metadata filter
include_metadata: Include metadata in results
"""
index = pc.Index(index_name)
results = index.query(
vector=query_vector,
top_k=top_k,
filter=filter,
include_metadata=include_metadata,
namespace="default"
)
return results.matches
# Example with text query
def search_similar(index_name: str, query_text: str, top_k: int = 10):
"""Search for similar documents by text."""
# Get query embedding
client = openai.OpenAI()
response = client.embeddings.create(
model="text-embedding-3-small",
input=[query_text]
)
query_vector = response.data[0].embedding
return query_index(index_name, query_vector, top_k)
def search_text(index_name: str, query: str, top_k: int = 10):
"""
Search using integrated embeddings (no external embedding needed).
Works with indexes created with embedding parameter.
"""
index = pc.Index(index_name)
results = index.query(
data=query, # Text query, not vector
top_k=top_k,
include_metadata=True
)
return results.matches
def search_with_filter(index_name: str, query_vector: list, filters: dict):
"""
Search with metadata filters.
Filter examples:
{"category": "tech"}
{"price": {"$lt": 100}}
{"tags": {"$in": ["python", "ai"]}}
{"$and": [{"category": "tech"}, {"status": "active"}]}
"""
index = pc.Index(index_name)
results = index.query(
vector=query_vector,
top_k=10,
filter=filters,
include_metadata=True
)
return results.matches
# Filter operators:
# $eq - equal
# $ne - not equal
# $gt, $gte - greater than
# $lt, $lte - less than
# $in, $nin - in/not in array
# $and, $or - logical operators
def hybrid_search(index_name: str, query: str, top_k: int = 10, alpha: float = 0.5):
"""
Hybrid search combining semantic and keyword search.
alpha: Weight for semantic (1.0 = all semantic, 0.0 = all keyword)
"""
index = pc.Index(index_name)
# Requires index with both dense and sparse embeddings
results = index.query(
data=query,
top_k=top_k,
include_metadata=True,
sparse_vector=True # Enable sparse matching
)
return results.matches
def fetch_by_ids(index_name: str, ids: list):
"""Fetch specific vectors by IDs."""
index = pc.Index(index_name)
results = index.fetch(ids=ids, namespace="default")
return results.vectors
def update_metadata(index_name: str, id: str, metadata: dict):
"""Update metadata for a vector."""
index = pc.Index(index_name)
index.update(
id=id,
set_metadata=metadata,
namespace="default"
)
def delete_vectors(index_name: str, ids: list = None, filter: dict = None,
delete_all: bool = False):
"""
Delete vectors.
Can delete by:
- IDs
- Metadata filter
- All (delete_all=True)
"""
index = pc.Index(index_name)
if delete_all:
index.delete(delete_all=True, namespace="default")
elif filter:
index.delete(filter=filter, namespace="default")
elif ids:
index.delete(ids=ids, namespace="default")
def get_stats(index_name: str):
"""Get index statistics."""
index = pc.Index(index_name)
stats = index.describe_index_stats()
return {
"total_vector_count": stats.total_vector_count,
"dimension": stats.dimension,
"namespaces": stats.namespaces
}
def list_indexes():
"""List all indexes."""
return [index.name for index in pc.list_indexes()]
Namespaces allow partitioning data within an index:
# Upsert to specific namespace
index.upsert(vectors=vectors, namespace="documents")
index.upsert(vectors=vectors, namespace="images")
# Query specific namespace
results = index.query(vector=query, namespace="documents")
# Delete from namespace
index.delete(ids=ids, namespace="documents")
| Model | Dimensions | Languages |
|---|---|---|
| text-embedding-3-small (OpenAI) | 1536 | Multi |
| text-embedding-3-large (OpenAI) | 3072 | Multi |
| multilingual-e5-large (Pinecone) | 1024 | 100+ |
| llama-text-embed-v2 (Pinecone) | 1024 | English |
| Tier | Price | Storage |
|---|---|---|
| Starter | Free | 100K vectors |
| Standard | $0.33/GB/hour | Unlimited |
| Enterprise | Custom | Custom |
| Task | Code |
|---|---|
| Create index | pc.create_index(name, dimension, metric, spec) |
| Get index | pc.Index(name) |
| Upsert | index.upsert(vectors) |
| Query | index.query(vector, top_k) |
| Fetch | index.fetch(ids) |
| Delete | index.delete(ids) |
| Stats | index.describe_index_stats() |
npx claudepluginhub jhamidun/claude-code-config-pack --plugin integrationsSets up isolated workspaces using native worktree tools or git worktree fallback. Use before starting feature work to protect the current branch.