Analyzes Outlook PST/OST files for email forensics, extracting content, headers, attachments, deleted emails, and metadata using libpff, pffexport, and pypff.
npx claudepluginhub killvxk/cybersecurity-skills-zhThis skill uses the workspace's default tool permissions.
Microsoft Outlook PST(个人存储表,Personal Storage Table)和 OST(离线存储表,Offline Storage Table)文件是数字取证调查中的关键证据来源。PST 文件以基于 MAPI(消息应用程序编程接口,Messaging Application Programming Interface)属性系统的专有二进制格式存储电子邮件、日历事件、联系人、任务和备注。对这些文件进行取证分析,可以恢复已删除的邮件(来自"可恢复邮件"文件夹)、提取邮件头以追踪邮件路由、分析附件中的恶意软件或外泄数据,以及重建通信模式。现代 PST 文件使用 Unicode 格式,页面大小 4KB,最大可达 50GB;而旧版 ANSI 格式限制为 2GB。
Analyzes Outlook PST/OST files for email forensics: extracts messages, headers, attachments, deleted items, metadata using libpff, pffexport, and pypff Python library.
Analyzes Outlook PST/OST files for email forensics, extracting messages, headers, attachments, deleted items, metadata using libpff, pypff, pst-utils. For incident response and investigations.
Analyzes raw email headers for phishing investigations: parses Received chains, key fields, verifies SPF/DKIM/DMARC to detect forgery and trace sources.
Share bugs, ideas, or general feedback.
Microsoft Outlook PST(个人存储表,Personal Storage Table)和 OST(离线存储表,Offline Storage Table)文件是数字取证调查中的关键证据来源。PST 文件以基于 MAPI(消息应用程序编程接口,Messaging Application Programming Interface)属性系统的专有二进制格式存储电子邮件、日历事件、联系人、任务和备注。对这些文件进行取证分析,可以恢复已删除的邮件(来自"可恢复邮件"文件夹)、提取邮件头以追踪邮件路由、分析附件中的恶意软件或外泄数据,以及重建通信模式。现代 PST 文件使用 Unicode 格式,页面大小 4KB,最大可达 50GB;而旧版 ANSI 格式限制为 2GB。
| 来源 | 路径 |
|---|---|
| Outlook 2016+ 默认位置 | %USERPROFILE%\Documents\Outlook Files*.pst |
| Outlook 旧版 | %LOCALAPPDATA%\Microsoft\Outlook*.pst |
| OST 缓存 | %LOCALAPPDATA%\Microsoft\Outlook*.ost |
| 归档文件 | %USERPROFILE%\Documents\Outlook Files\archive.pst |
# 从 PST 文件导出所有项目
pffexport -m all evidence.pst -t exported_pst
# 仅导出电子邮件
pffexport -m items evidence.pst -t exported_emails
# 导出已恢复/已删除项目
pffexport -m recovered evidence.pst -t recovered_items
# 获取 PST 文件信息
pffinfo evidence.pst
import pypff
import os
import json
import hashlib
import email
import sys
from datetime import datetime
from collections import defaultdict
class PSTForensicAnalyzer:
"""Outlook PST/OST 文件的取证分析器。"""
def __init__(self, pst_path: str, output_dir: str):
self.pst_path = pst_path
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
self.pst = pypff.file()
self.pst.open(pst_path)
self.messages = []
self.attachments = []
self.stats = defaultdict(int)
def process_folder(self, folder, folder_path: str = ""):
"""递归处理 PST 文件夹并提取邮件。"""
folder_name = folder.name or "Root"
current_path = f"{folder_path}/{folder_name}" if folder_path else folder_name
for i in range(folder.number_of_sub_messages):
try:
message = folder.get_sub_message(i)
msg_data = self.extract_message(message, current_path)
if msg_data:
self.messages.append(msg_data)
self.stats["total_messages"] += 1
except Exception as e:
self.stats["parse_errors"] += 1
for i in range(folder.number_of_sub_folders):
try:
subfolder = folder.get_sub_folder(i)
self.process_folder(subfolder, current_path)
except Exception:
continue
def extract_message(self, message, folder_path: str) -> dict:
"""从单封邮件中提取取证元数据。"""
msg_data = {
"folder": folder_path,
"subject": message.subject or "",
"sender": message.sender_name or "",
"sender_email": "",
"creation_time": str(message.creation_time) if message.creation_time else None,
"delivery_time": str(message.delivery_time) if message.delivery_time else None,
"modification_time": str(message.modification_time) if message.modification_time else None,
"has_attachments": message.number_of_attachments > 0,
"attachment_count": message.number_of_attachments,
"body_size": len(message.plain_text_body or b""),
"html_size": len(message.html_body or b""),
}
# 提取传输头用于路由分析
headers = message.transport_headers
if headers:
msg_data["headers_present"] = True
msg_data["headers_size"] = len(headers)
# 解析关键头字段
parsed = email.message_from_string(headers)
msg_data["from_header"] = parsed.get("From", "")
msg_data["to_header"] = parsed.get("To", "")
msg_data["date_header"] = parsed.get("Date", "")
msg_data["message_id"] = parsed.get("Message-ID", "")
msg_data["x_originating_ip"] = parsed.get("X-Originating-IP", "")
msg_data["received_headers"] = parsed.get_all("Received", [])
# 处理附件
for j in range(message.number_of_attachments):
try:
attachment = message.get_attachment(j)
att_data = {
"message_subject": msg_data["subject"],
"name": attachment.name or f"attachment_{j}",
"size": attachment.size,
"content_type": "",
}
self.attachments.append(att_data)
self.stats["total_attachments"] += 1
except Exception:
continue
return msg_data
def save_attachments(self, max_size_mb: int = 100):
"""将附件导出到磁盘进行分析。"""
att_dir = os.path.join(self.output_dir, "attachments")
os.makedirs(att_dir, exist_ok=True)
root = self.pst.get_root_folder()
self._save_attachments_recursive(root, att_dir, max_size_mb)
def _save_attachments_recursive(self, folder, att_dir, max_size_mb):
for i in range(folder.number_of_sub_messages):
try:
message = folder.get_sub_message(i)
for j in range(message.number_of_attachments):
att = message.get_attachment(j)
if att.size and att.size < max_size_mb * 1024 * 1024:
name = att.name or f"unknown_{i}_{j}"
safe_name = "".join(c if c.isalnum() or c in ".-_" else "_" for c in name)
path = os.path.join(att_dir, safe_name)
try:
data = att.read_buffer(att.size)
with open(path, "wb") as f:
f.write(data)
except Exception:
continue
except Exception:
continue
for i in range(folder.number_of_sub_folders):
try:
self._save_attachments_recursive(folder.get_sub_folder(i), att_dir, max_size_mb)
except Exception:
continue
def generate_report(self) -> str:
"""生成综合 PST 取证分析报告。"""
root = self.pst.get_root_folder()
self.process_folder(root)
report = {
"analysis_timestamp": datetime.now().isoformat(),
"pst_file": self.pst_path,
"pst_size_bytes": os.path.getsize(self.pst_path),
"statistics": dict(self.stats),
"messages": self.messages[:500],
"attachments": self.attachments[:200],
}
report_path = os.path.join(self.output_dir, "pst_forensic_report.json")
with open(report_path, "w") as f:
json.dump(report, f, indent=2, default=str)
print(f"[*] 邮件总数: {self.stats['total_messages']}")
print(f"[*] 附件总数: {self.stats['total_attachments']}")
print(f"[*] 解析错误: {self.stats['parse_errors']}")
return report_path
def close(self):
self.pst.close()
def main():
if len(sys.argv) < 3:
print("用法: python process.py <pst_file> <output_dir>")
sys.exit(1)
analyzer = PSTForensicAnalyzer(sys.argv[1], sys.argv[2])
analyzer.generate_report()
analyzer.close()
if __name__ == "__main__":
main()
取证调查的关键头字段:
| 头字段 | 取证价值 |
|---|---|
| Received | 邮件路由链(从下到上阅读) |
| X-Originating-IP | 发件人实际 IP 地址 |
| Message-ID | 用于关联的唯一标识符 |
| Date | 发送时间戳 |
| Return-Path | 退信地址(可能与 From 不同) |
| DKIM-Signature | 域名认证签名 |
| Authentication-Results | SPF、DKIM、DMARC 验证结果 |
| X-Mailer | 使用的邮件客户端 |