Implement speech-to-text transcription workflow with Deepgram. Use when building pre-recorded audio transcription, batch processing, or implementing core transcription features. Trigger with phrases like "deepgram transcription", "speech to text", "transcribe audio", "audio transcription workflow", "batch transcription".
/plugin marketplace add jeremylongshore/claude-code-plugins-plus-skills/plugin install deepgram-pack@claude-code-plugins-plusThis skill is limited to using the following tools:
Implement a complete pre-recorded audio transcription workflow using Deepgram's Nova-2 model.
deepgram-install-auth setupCreate a service class to handle transcription operations.
Add methods for both local files and remote URLs.
Configure punctuation, diarization, and formatting.
Extract and format transcription results.
| Error | Cause | Solution |
|---|---|---|
| Audio Too Long | Exceeds limits | Split into chunks or use async |
| Unsupported Format | Invalid audio type | Convert to WAV/MP3/FLAC |
| Empty Response | No speech detected | Check audio quality |
| Timeout | Large file processing | Use callback URL pattern |
// services/transcription.ts
import { createClient } from '@deepgram/sdk';
import { readFile } from 'fs/promises';
export interface TranscriptionOptions {
model?: 'nova-2' | 'nova' | 'enhanced' | 'base';
language?: string;
punctuate?: boolean;
diarize?: boolean;
smartFormat?: boolean;
utterances?: boolean;
paragraphs?: boolean;
}
export interface TranscriptionResult {
transcript: string;
confidence: number;
words: Array<{
word: string;
start: number;
end: number;
confidence: number;
}>;
utterances?: Array<{
speaker: number;
transcript: string;
start: number;
end: number;
}>;
}
export class TranscriptionService {
private client;
constructor(apiKey: string) {
this.client = createClient(apiKey);
}
async transcribeUrl(
url: string,
options: TranscriptionOptions = {}
): Promise<TranscriptionResult> {
const { result, error } = await this.client.listen.prerecorded.transcribeUrl(
{ url },
{
model: options.model || 'nova-2',
language: options.language || 'en',
punctuate: options.punctuate ?? true,
diarize: options.diarize ?? false,
smart_format: options.smartFormat ?? true,
utterances: options.utterances ?? false,
paragraphs: options.paragraphs ?? false,
}
);
if (error) throw new Error(error.message);
return this.formatResult(result);
}
async transcribeFile(
filePath: string,
options: TranscriptionOptions = {}
): Promise<TranscriptionResult> {
const audio = await readFile(filePath);
const mimetype = this.getMimeType(filePath);
const { result, error } = await this.client.listen.prerecorded.transcribeFile(
audio,
{
model: options.model || 'nova-2',
language: options.language || 'en',
punctuate: options.punctuate ?? true,
diarize: options.diarize ?? false,
smart_format: options.smartFormat ?? true,
mimetype,
}
);
if (error) throw new Error(error.message);
return this.formatResult(result);
}
private formatResult(result: any): TranscriptionResult {
const channel = result.results.channels[0];
const alternative = channel.alternatives[0];
return {
transcript: alternative.transcript,
confidence: alternative.confidence,
words: alternative.words || [],
utterances: result.results.utterances,
};
}
private getMimeType(filePath: string): string {
const ext = filePath.split('.').pop()?.toLowerCase();
const mimeTypes: Record<string, string> = {
wav: 'audio/wav',
mp3: 'audio/mpeg',
flac: 'audio/flac',
ogg: 'audio/ogg',
m4a: 'audio/mp4',
webm: 'audio/webm',
};
return mimeTypes[ext || ''] || 'audio/wav';
}
}
// services/batch-transcription.ts
import { TranscriptionService, TranscriptionResult } from './transcription';
export async function batchTranscribe(
files: string[],
options: { concurrency?: number } = {}
): Promise<Map<string, TranscriptionResult | Error>> {
const service = new TranscriptionService(process.env.DEEPGRAM_API_KEY!);
const results = new Map<string, TranscriptionResult | Error>();
const concurrency = options.concurrency || 5;
// Process in batches
for (let i = 0; i < files.length; i += concurrency) {
const batch = files.slice(i, i + concurrency);
const batchResults = await Promise.allSettled(
batch.map(file => service.transcribeFile(file))
);
batchResults.forEach((result, index) => {
const file = batch[index];
if (result.status === 'fulfilled') {
results.set(file, result.value);
} else {
results.set(file, result.reason);
}
});
}
return results;
}
// Example with speaker diarization
const result = await service.transcribeFile('./meeting.wav', {
diarize: true,
utterances: true,
});
// Format as conversation
result.utterances?.forEach(utterance => {
console.log(`Speaker ${utterance.speaker}: ${utterance.transcript}`);
});
# services/transcription.py
from deepgram import DeepgramClient, PrerecordedOptions, FileSource
from pathlib import Path
from typing import Optional
import mimetypes
class TranscriptionService:
def __init__(self, api_key: str):
self.client = DeepgramClient(api_key)
def transcribe_url(
self,
url: str,
model: str = 'nova-2',
language: str = 'en',
diarize: bool = False
) -> dict:
options = PrerecordedOptions(
model=model,
language=language,
smart_format=True,
punctuate=True,
diarize=diarize,
)
response = self.client.listen.rest.v("1").transcribe_url(
{"url": url},
options
)
return self._format_result(response)
def transcribe_file(
self,
file_path: str,
model: str = 'nova-2',
diarize: bool = False
) -> dict:
with open(file_path, 'rb') as f:
audio = f.read()
mimetype, _ = mimetypes.guess_type(file_path)
source = FileSource(audio, mimetype or 'audio/wav')
options = PrerecordedOptions(
model=model,
smart_format=True,
punctuate=True,
diarize=diarize,
)
response = self.client.listen.rest.v("1").transcribe_file(
source,
options
)
return self._format_result(response)
def _format_result(self, response) -> dict:
channel = response.results.channels[0]
alternative = channel.alternatives[0]
return {
'transcript': alternative.transcript,
'confidence': alternative.confidence,
'words': alternative.words,
}
Proceed to deepgram-core-workflow-b for real-time streaming transcription.
This skill should be used when the user asks to "create a hookify rule", "write a hook rule", "configure hookify", "add a hookify rule", or needs guidance on hookify rule syntax and patterns.
Create distinctive, production-grade frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, or applications. Generates creative, polished code that avoids generic AI aesthetics.