From Office Documents
Reads, analyzes, and creates EPUB files. Extracts metadata, chapters, and text; supports search and summary. Useful for working with ebooks in Python.
How this skill is triggered — by the user, by Claude, or both
Slash command
/office-docs:epub-toolsThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
> Use when: "epub", "ebook", "электронная книга", "kindle", "конвертируй в epub", "прочитай epub"
Use when: "epub", "ebook", "электронная книга", "kindle", "конвертируй в epub", "прочитай epub" Read, analyze, and create EPUB files.
pip install ebooklib beautifulsoup4 lxml
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
def read_epub(filepath):
"""Read EPUB and extract text content by chapter."""
book = epub.read_epub(filepath)
metadata = {
'title': book.get_metadata('DC', 'title'),
'author': book.get_metadata('DC', 'creator'),
'language': book.get_metadata('DC', 'language'),
'description': book.get_metadata('DC', 'description'),
}
chapters = []
for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
soup = BeautifulSoup(item.get_content(), 'html.parser')
text = soup.get_text(separator='\n', strip=True)
if text.strip():
title = soup.find(['h1', 'h2', 'h3'])
chapters.append({
'id': item.get_name(),
'title': title.get_text() if title else item.get_name(),
'text': text,
'word_count': len(text.split()),
})
return metadata, chapters
def epub_summary(filepath):
"""Print EPUB structure and stats."""
metadata, chapters = read_epub(filepath)
print(f"Title: {metadata.get('title', 'Unknown')}")
print(f"Author: {metadata.get('author', 'Unknown')}")
print(f"Chapters: {len(chapters)}")
total_words = sum(ch['word_count'] for ch in chapters)
print(f"Total words: {total_words:,}")
print(f"Est. reading time: {total_words // 250} min")
print()
for i, ch in enumerate(chapters, 1):
print(f" {i}. {ch['title']} ({ch['word_count']:,} words)")
return metadata, chapters
# Usage:
# metadata, chapters = epub_summary('book.epub')
# full_text = '\n\n'.join(ch['text'] for ch in chapters)
def get_chapter(filepath, chapter_num):
"""Get text of specific chapter (1-based index)."""
_, chapters = read_epub(filepath)
if 1 <= chapter_num <= len(chapters):
ch = chapters[chapter_num - 1]
return ch['title'], ch['text']
return None, f"Chapter {chapter_num} not found (total: {len(chapters)})"
def search_epub(filepath, query, case_sensitive=False):
"""Search for text across all chapters."""
_, chapters = read_epub(filepath)
results = []
for ch in chapters:
text = ch['text'] if case_sensitive else ch['text'].lower()
q = query if case_sensitive else query.lower()
if q in text:
# Find context around match
idx = text.index(q)
start = max(0, idx - 100)
end = min(len(text), idx + len(q) + 100)
context = ch['text'][start:end]
results.append({
'chapter': ch['title'],
'context': f"...{context}...",
})
return results
def markdown_to_epub(md_content, output_path, title='Untitled', author='Author', language='ru'):
"""Convert Markdown text to EPUB file."""
import re
book = epub.EpubBook()
book.set_identifier(f'id-{hash(title)}')
book.set_title(title)
book.set_language(language)
book.add_author(author)
# Split by H1 headers
sections = re.split(r'^# (.+)$', md_content, flags=re.MULTILINE)
chapters = []
toc = []
# Handle content before first H1
if sections[0].strip():
ch = epub.EpubHtml(title='Introduction', file_name='intro.xhtml', lang=language)
ch.content = f'<h1>Introduction</h1>{_md_to_html(sections[0])}'
book.add_item(ch)
chapters.append(ch)
# Process H1 sections
for i in range(1, len(sections), 2):
heading = sections[i]
body = sections[i + 1] if i + 1 < len(sections) else ''
ch_id = f'chapter_{i // 2 + 1}'
ch = epub.EpubHtml(title=heading, file_name=f'{ch_id}.xhtml', lang=language)
ch.content = f'<h1>{heading}</h1>{_md_to_html(body)}'
book.add_item(ch)
chapters.append(ch)
toc.append(epub.Link(f'{ch_id}.xhtml', heading, ch_id))
# Add styling
style = epub.EpubItem(
uid='style',
file_name='style/default.css',
media_type='text/css',
content=b'''
body { font-family: Georgia, serif; line-height: 1.6; margin: 1em; }
h1 { color: #1a1a2e; border-bottom: 2px solid #7c3aed; padding-bottom: 0.3em; }
h2 { color: #302b63; }
code { background: #f0f0f0; padding: 0.2em 0.4em; border-radius: 3px; font-size: 0.9em; }
pre { background: #1a1a2e; color: #e0e0e0; padding: 1em; border-radius: 5px; overflow-x: auto; }
blockquote { border-left: 3px solid #7c3aed; padding-left: 1em; color: #555; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background: #7c3aed; color: white; }
tr:nth-child(even) { background: #f9f9f9; }
'''
)
book.add_item(style)
for ch in chapters:
ch.add_item(style)
book.toc = toc
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.spine = ['nav'] + chapters
epub.write_epub(output_path, book)
return output_path
def _md_to_html(md_text):
"""Basic Markdown to HTML conversion."""
import re
html = md_text
# Headers
html = re.sub(r'^### (.+)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
html = re.sub(r'^## (.+)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
# Bold and italic
html = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', html)
html = re.sub(r'\*(.+?)\*', r'<em>\1</em>', html)
# Code blocks
html = re.sub(r'```(\w*)\n(.*?)```', r'<pre><code>\2</code></pre>', html, flags=re.DOTALL)
html = re.sub(r'`(.+?)`', r'<code>\1</code>', html)
# Lists
html = re.sub(r'^- (.+)$', r'<li>\1</li>', html, flags=re.MULTILINE)
html = re.sub(r'(<li>.*</li>\n?)+', r'<ul>\g<0></ul>', html)
# Blockquotes
html = re.sub(r'^> (.+)$', r'<blockquote>\1</blockquote>', html, flags=re.MULTILINE)
# Links
html = re.sub(r'\[(.+?)\]\((.+?)\)', r'<a href="\2">\1</a>', html)
# Paragraphs
html = re.sub(r'\n\n(.+?)(?=\n\n|$)', r'<p>\1</p>', html, flags=re.DOTALL)
return html
# Analyze a book
metadata, chapters = epub_summary('${HOME}/books/ai-book.epub')
# Read specific chapter
title, text = get_chapter('book.epub', 3)
# Search across book
results = search_epub('book.epub', 'machine learning')
# Create EPUB from markdown
markdown_to_epub(
md_content=open('research.md').read(),
output_path='output.epub',
title='AI Research Summary',
author='Your Name',
language='ru'
)
calibre or send EPUB to Kindle emaildocument-skills/pdf/npx claudepluginhub jhamidun/claude-code-config-pack --plugin office-docsBuilds a throwaway prototype to answer a design question about UI appearance or state/logic behavior. Guides you through two branches: interactive terminal app for logic validation, or multiple UI variations for visual exploration.