Develops precise YARA rules for malware detection by analyzing executables for unique byte patterns, strings, PE imports, and behaviors to minimize false positives.
npx claudepluginhub killvxk/cybersecurity-skills-zhThis skill uses the workspace's default tool permissions.
YARA 是恶意软件研究人员的模式匹配多用工具,能够根据文本或二进制模式识别和分类恶意软件。有效的 YARA 规则结合唯一字符串模式、字节序列、PE 头特征、导入表分析和条件逻辑,在检测恶意软件家族的同时避免误报。现代 YARA-X(用 Rust 重写,2025 年 6 月起稳定版发布)带来了性能提升和新模块。规则应针对未打包的恶意软件特征,如硬编码的栈字符串、C2 URL、互斥体名称、加密常量和唯一代码序列,而非加壳程序签名。
Develops precise YARA rules for malware detection by identifying unique byte patterns, strings, and behavioral indicators in executable files while minimizing false positives. Useful for security assessments, incident response, and threat hunting.
Develops YARA rules for malware detection by analyzing byte patterns, strings, PE headers, and indicators in executables while minimizing false positives.
Triages and classifies malware samples using YARA rules to match strings, byte sequences, file patterns, and structures. Guides rule creation, scanning, and workflow integration for signature-based detection.
Share bugs, ideas, or general feedback.
YARA 是恶意软件研究人员的模式匹配多用工具,能够根据文本或二进制模式识别和分类恶意软件。有效的 YARA 规则结合唯一字符串模式、字节序列、PE 头特征、导入表分析和条件逻辑,在检测恶意软件家族的同时避免误报。现代 YARA-X(用 Rust 重写,2025 年 6 月起稳定版发布)带来了性能提升和新模块。规则应针对未打包的恶意软件特征,如硬编码的栈字符串、C2 URL、互斥体名称、加密常量和唯一代码序列,而非加壳程序签名。
yara-python 库pefile、pestudio)每条 YARA 规则由三个部分组成:meta(可选的描述性元数据)、strings(模式定义)和 condition(匹配逻辑)。字符串类型包括文本字符串(ASCII/wide/nocase)、带通配符和跳转的十六进制模式,以及正则表达式。条件使用布尔运算符将字符串匹配与文件属性组合。
有效的规则针对恶意软件家族独有的、能在重新编译后仍然存活的模式。硬编码的栈字符串是极好的选择,因为编译器会持续嵌入它们。C2 域名模式、自定义加密例程、唯一错误消息和特定 API 调用序列提供了稳定的检测锚点。避免使用编译器生成的样板代码和常见库字符串。
YARA 以短路方式评估条件。将最具区分性且计算代价最低的条件放在最前面。使用 filesize 限制快速跳过无关文件。尽量使用十六进制模式替代正则表达式。使用 private 规则作为复杂检测逻辑的构建块,而不生成独立匹配。
#!/usr/bin/env python3
"""提取用于 YARA 规则创建的候选字符串和字节模式。"""
import pefile
import re
import sys
from collections import Counter
def extract_strings(filepath, min_length=6):
"""从二进制文件中提取 ASCII 和宽字符字符串。"""
with open(filepath, 'rb') as f:
data = f.read()
# ASCII 字符串
ascii_strings = re.findall(
rb'[\x20-\x7e]{' + str(min_length).encode() + rb',}', data
)
# 宽字符(UTF-16LE)字符串
wide_strings = re.findall(
rb'(?:[\x20-\x7e]\x00){' + str(min_length).encode() + rb',}', data
)
return {
'ascii': [s.decode('ascii') for s in ascii_strings],
'wide': [s.decode('utf-16-le') for s in wide_strings],
}
def analyze_pe_imports(filepath):
"""提取导入表用于基于 API 的检测。"""
try:
pe = pefile.PE(filepath)
except pefile.PEFormatError:
return []
imports = []
if hasattr(pe, 'DIRECTORY_ENTRY_IMPORT'):
for entry in pe.DIRECTORY_ENTRY_IMPORT:
dll_name = entry.dll.decode('utf-8', errors='replace')
for imp in entry.imports:
if imp.name:
func_name = imp.name.decode('utf-8', errors='replace')
imports.append(f"{dll_name}!{func_name}")
return imports
def find_unique_byte_patterns(filepath, pattern_length=16):
"""查找适用于 YARA 十六进制模式的唯一字节序列。"""
with open(filepath, 'rb') as f:
data = f.read()
try:
pe = pefile.PE(filepath)
# 聚焦于代码节
for section in pe.sections:
if section.Characteristics & 0x20000000: # IMAGE_SCN_MEM_EXECUTE
code_start = section.PointerToRawData
code_end = code_start + section.SizeOfRawData
code_data = data[code_start:code_end]
break
else:
code_data = data
except Exception:
code_data = data
# 查找只出现一次的字节模式
patterns = []
for i in range(0, len(code_data) - pattern_length, 4):
pattern = code_data[i:i+pattern_length]
if pattern.count(b'\x00') < pattern_length // 3: # 跳过空字节密集的模式
hex_pattern = ' '.join(f'{b:02X}' for b in pattern)
patterns.append(hex_pattern)
# 统计频率并返回唯一出现的模式
freq = Counter(patterns)
unique = [p for p, count in freq.items() if count == 1]
return unique[:20] # 返回前 20 个候选
def suggest_rule_strings(filepath):
"""为 YARA 规则推荐字符串和模式。"""
print(f"[+] 正在分析:{filepath}")
# 提取字符串
strings = extract_strings(filepath)
# 过滤可疑/唯一字符串
suspicious_keywords = [
'http', 'https', 'cmd', 'powershell', 'mutex', 'pipe',
'password', 'credential', 'inject', 'hook', 'debug',
'sandbox', 'virtual', 'vmware', 'vbox',
]
print("\n[+] 可疑 ASCII 字符串:")
for s in strings['ascii']:
if any(kw in s.lower() for kw in suspicious_keywords):
print(f" $ = \"{s}\" ascii")
print("\n[+] 可疑宽字符字符串:")
for s in strings['wide']:
if any(kw in s.lower() for kw in suspicious_keywords):
print(f" $ = \"{s}\" wide")
# 导入分析
imports = analyze_pe_imports(filepath)
suspicious_apis = [
'VirtualAlloc', 'VirtualProtect', 'WriteProcessMemory',
'CreateRemoteThread', 'NtUnmapViewOfSection', 'RtlMoveMemory',
'OpenProcess', 'CreateToolhelp32Snapshot',
'InternetOpenA', 'HttpSendRequestA',
'CryptEncrypt', 'CryptDecrypt',
]
print("\n[+] 可疑导入:")
for imp in imports:
func = imp.split('!')[-1]
if func in suspicious_apis:
print(f" {imp}")
# 字节模式
print("\n[+] 候选十六进制模式:")
patterns = find_unique_byte_patterns(filepath)
for p in patterns[:5]:
print(f" $hex = {{ {p} }}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print(f"用法:{sys.argv[0]} <sample_path>")
sys.exit(1)
suggest_rule_strings(sys.argv[1])
import yara
import os
def create_yara_rule(rule_name, meta, strings, condition):
"""从各组件生成 YARA 规则。"""
meta_str = "\n".join(f' {k} = "{v}"' for k, v in meta.items())
strings_str = "\n".join(f" {s}" for s in strings)
rule = f"""rule {rule_name} {{
meta:
{meta_str}
strings:
{strings_str}
condition:
{condition}
}}"""
return rule
def test_yara_rule(rule_text, test_dir):
"""编译并对样本目录测试 YARA 规则。"""
try:
rules = yara.compile(source=rule_text)
except yara.SyntaxError as e:
print(f"[-] YARA 语法错误:{e}")
return None
results = {"matches": [], "no_match": []}
for filename in os.listdir(test_dir):
filepath = os.path.join(test_dir, filename)
if not os.path.isfile(filepath):
continue
matches = rules.match(filepath)
if matches:
results["matches"].append({
"file": filename,
"rules": [m.rule for m in matches],
})
else:
results["no_match"].append(filename)
print(f"[+] 匹配:{len(results['matches'])} 个")
print(f"[-] 未匹配:{len(results['no_match'])} 个")
return results
# 示例:为假设的恶意软件家族创建规则
example_rule = create_yara_rule(
rule_name="MalwareFamily_Variant_A",
meta={
"description": "Detects MalwareFamily Variant A",
"author": "Malware Analysis Team",
"date": "2025-01-01",
"hash": "abc123...",
"tlp": "WHITE",
},
strings=[
'$mutex = "Global\\\\UniqueM4lwareMutex" ascii wide',
'$c2_pattern = /https?:\\/\\/[a-z]{5,10}\\.(xyz|top|buzz)\\/gate\\.php/',
'$api1 = "VirtualAllocEx" ascii',
'$api2 = "WriteProcessMemory" ascii',
'$api3 = "CreateRemoteThread" ascii',
'$hex_decrypt = { 8B 45 ?? 33 C1 89 45 ?? 83 C1 04 }',
'$pdb = "C:\\\\Users\\\\" ascii',
],
condition=(
'uint16(0) == 0x5A4D and filesize < 2MB and '
'($mutex or $c2_pattern) and '
'2 of ($api*) and '
'$hex_decrypt'
),
)
print(example_rule)
import time
def benchmark_rule(rule_text, scan_directory, iterations=3):
"""对 YARA 规则扫描性能进行基准测试。"""
rules = yara.compile(source=rule_text)
files = []
for root, _, filenames in os.walk(scan_directory):
for f in filenames:
files.append(os.path.join(root, f))
print(f"[+] 对 {len(files)} 个文件进行基准测试"
f"({iterations} 次迭代)")
times = []
for i in range(iterations):
start = time.perf_counter()
matches = 0
for filepath in files:
try:
result = rules.match(filepath)
if result:
matches += 1
except Exception:
pass
elapsed = time.perf_counter() - start
times.append(elapsed)
print(f" 第 {i+1} 次迭代:{elapsed:.3f} 秒({matches} 次匹配)")
avg_time = sum(times) / len(times)
files_per_sec = len(files) / avg_time
print(f"\n[+] 平均:{avg_time:.3f} 秒({files_per_sec:.0f} 文件/秒)")
return avg_time