Analyzes images with Azure AI Vision Python SDK: generates captions/tags, detects objects/people, performs OCR, dense captions, and smart crops. For image understanding tasks.
From antigravity-awesome-skillsnpx claudepluginhub sickn33/antigravity-awesome-skills --plugin antigravity-awesome-skillsThis skill uses the workspace's default tool permissions.
Designs and optimizes AI agent action spaces, tool definitions, observation formats, error recovery, and context for higher task completion rates.
Enables AI agents to execute x402 payments with per-task budgets, spending controls, and non-custodial wallets via MCP tools. Use when agents pay for APIs, services, or other agents.
Compares coding agents like Claude Code and Aider on custom YAML-defined codebase tasks using git worktrees, measuring pass rate, cost, time, and consistency.
Client library for Azure AI Vision 4.0 image analysis including captions, tags, objects, OCR, and more.
pip install azure-ai-vision-imageanalysis
VISION_ENDPOINT=https://<resource>.cognitiveservices.azure.com
VISION_KEY=<your-api-key> # If using API key
import os
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.core.credentials import AzureKeyCredential
endpoint = os.environ["VISION_ENDPOINT"]
key = os.environ["VISION_KEY"]
client = ImageAnalysisClient(
endpoint=endpoint,
credential=AzureKeyCredential(key)
)
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.identity import DefaultAzureCredential
client = ImageAnalysisClient(
endpoint=os.environ["VISION_ENDPOINT"],
credential=DefaultAzureCredential()
)
from azure.ai.vision.imageanalysis.models import VisualFeatures
image_url = "https://example.com/image.jpg"
result = client.analyze_from_url(
image_url=image_url,
visual_features=[
VisualFeatures.CAPTION,
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.READ,
VisualFeatures.PEOPLE,
VisualFeatures.SMART_CROPS,
VisualFeatures.DENSE_CAPTIONS
],
gender_neutral_caption=True,
language="en"
)
with open("image.jpg", "rb") as f:
image_data = f.read()
result = client.analyze(
image_data=image_data,
visual_features=[VisualFeatures.CAPTION, VisualFeatures.TAGS]
)
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.CAPTION],
gender_neutral_caption=True
)
if result.caption:
print(f"Caption: {result.caption.text}")
print(f"Confidence: {result.caption.confidence:.2f}")
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.DENSE_CAPTIONS]
)
if result.dense_captions:
for caption in result.dense_captions.list:
print(f"Caption: {caption.text}")
print(f" Confidence: {caption.confidence:.2f}")
print(f" Bounding box: {caption.bounding_box}")
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.TAGS]
)
if result.tags:
for tag in result.tags.list:
print(f"Tag: {tag.name} (confidence: {tag.confidence:.2f})")
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.OBJECTS]
)
if result.objects:
for obj in result.objects.list:
print(f"Object: {obj.tags[0].name}")
print(f" Confidence: {obj.tags[0].confidence:.2f}")
box = obj.bounding_box
print(f" Bounding box: x={box.x}, y={box.y}, w={box.width}, h={box.height}")
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.READ]
)
if result.read:
for block in result.read.blocks:
for line in block.lines:
print(f"Line: {line.text}")
print(f" Bounding polygon: {line.bounding_polygon}")
# Word-level details
for word in line.words:
print(f" Word: {word.text} (confidence: {word.confidence:.2f})")
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.PEOPLE]
)
if result.people:
for person in result.people.list:
print(f"Person detected:")
print(f" Confidence: {person.confidence:.2f}")
box = person.bounding_box
print(f" Bounding box: x={box.x}, y={box.y}, w={box.width}, h={box.height}")
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.SMART_CROPS],
smart_crops_aspect_ratios=[0.9, 1.33, 1.78] # Portrait, 4:3, 16:9
)
if result.smart_crops:
for crop in result.smart_crops.list:
print(f"Aspect ratio: {crop.aspect_ratio}")
box = crop.bounding_box
print(f" Crop region: x={box.x}, y={box.y}, w={box.width}, h={box.height}")
from azure.ai.vision.imageanalysis.aio import ImageAnalysisClient
from azure.identity.aio import DefaultAzureCredential
async def analyze_image():
async with ImageAnalysisClient(
endpoint=endpoint,
credential=DefaultAzureCredential()
) as client:
result = await client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.CAPTION]
)
print(result.caption.text)
| Feature | Description |
|---|---|
CAPTION | Single sentence describing the image |
DENSE_CAPTIONS | Captions for multiple regions |
TAGS | Content tags (objects, scenes, actions) |
OBJECTS | Object detection with bounding boxes |
READ | OCR text extraction |
PEOPLE | People detection with bounding boxes |
SMART_CROPS | Suggested crop regions for thumbnails |
from azure.core.exceptions import HttpResponseError
try:
result = client.analyze_from_url(
image_url=image_url,
visual_features=[VisualFeatures.CAPTION]
)
except HttpResponseError as e:
print(f"Status code: {e.status_code}")
print(f"Reason: {e.reason}")
print(f"Message: {e.error.message}")
This skill is applicable to execute the workflow or actions described in the overview.