Init. commit

This commit is contained in:
Caplag
2026-03-02 21:44:22 +03:00
committed by Ivan Z
commit 9511b38280
38 changed files with 4397 additions and 0 deletions

View File

@@ -0,0 +1,188 @@
import argparse
import math
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, Iterator, List, Tuple
ASCII_RE_TEMPLATE = rb"[ -~]{%d,%d}"
UTF16LE_ASCII_RE_TEMPLATE = rb"(?:[ -~]\x00){%d,%d}"
@dataclass
class Hit:
s: str
score: float
file: Path
offset: int
kind: str
count: int = 1
def iter_files(paths: Iterable[str]) -> Iterator[Path]:
for p in paths:
path = Path(p)
if path.is_dir():
for child in sorted(path.rglob("*")):
if child.is_file():
yield child
elif path.is_file():
yield path
def shannon_entropy(s: str) -> float:
if not s:
return 0.0
freq: Dict[str, int] = {}
for ch in s:
freq[ch] = freq.get(ch, 0) + 1
n = len(s)
ent = 0.0
for c in freq.values():
p = c / n
ent -= p * math.log2(p)
return ent
BAD_SUBSTRINGS = (
"\\\\",
"\\Registry\\",
"\\Registry",
"\\BaseNamedObjects\\",
"\\BaseNamedObjects",
":\\",
"/",
"System32",
"Windows",
"Microsoft",
"CLSID",
"AppX",
"shell:::",
"atom(",
".dll",
".exe",
".sys",
".ini",
".mui",
".nls",
".png",
".jpg",
".jpeg",
".gif",
".ttf",
".otf",
".wav",
".mp3",
".mp4",
".sqlite",
)
def looks_passwordish(s: str) -> bool:
if any(ch in s for ch in ('\\', '/', ':', '<', '>', '"', "'", '=', '\t', '\r', '\n')):
return False
if any(bad in s for bad in BAD_SUBSTRINGS):
return False
if s.startswith("http://") or s.startswith("https://"):
return False
if s.count(" ") >= 4:
return False
if len(set(s)) <= 3:
return False
if re.fullmatch(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}", s):
return False
return True
def score_string(s: str) -> float:
has_lower = any("a" <= c <= "z" for c in s)
has_upper = any("A" <= c <= "Z" for c in s)
has_digit = any("0" <= c <= "9" for c in s)
has_special = any(not c.isalnum() for c in s)
ent = shannon_entropy(s)
score = ent * len(s)
score += 5.0 * has_lower
score += 5.0 * has_upper
score += 5.0 * has_digit
score += 5.0 * has_special
if " " in s:
score -= 2.0
if s.islower() or s.isupper():
score -= 1.0
if all(c in "0123456789abcdefABCDEF" for c in s):
score -= 3.0
return score
def extract_hits(data: bytes, *, min_len: int, max_len: int) -> Iterator[Tuple[str, int, str]]:
ascii_re = re.compile(ASCII_RE_TEMPLATE % (min_len, max_len))
utf16_re = re.compile(UTF16LE_ASCII_RE_TEMPLATE % (min_len, max_len))
for m in ascii_re.finditer(data):
s = m.group(0).decode("ascii", errors="ignore")
yield s, m.start(), "ascii"
for m in utf16_re.finditer(data):
raw = m.group(0)
s = raw[::2].decode("ascii", errors="ignore")
yield s, m.start(), "utf16le"
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("paths", nargs="+", help="Files/dirs to scan")
ap.add_argument("--min-len", type=int, default=8)
ap.add_argument("--max-len", type=int, default=64)
ap.add_argument("--top", type=int, default=80)
ap.add_argument("--grep", type=str, default="", help="Only show hits containing this substring")
args = ap.parse_args()
best: Dict[str, Hit] = {}
grep = args.grep
for fp in iter_files(args.paths):
if fp.suffix.lower() not in (".dmp", ".mem", ".raw", ".bin", ""):
continue
try:
data = fp.read_bytes()
except Exception:
continue
for s, off, kind in extract_hits(data, min_len=args.min_len, max_len=args.max_len):
if grep and grep not in s:
continue
if not looks_passwordish(s):
continue
sc = score_string(s)
existing = best.get(s)
if existing is None:
best[s] = Hit(s=s, score=sc, file=fp, offset=off, kind=kind)
else:
existing.count += 1
if sc > existing.score:
existing.score = sc
existing.file = fp
existing.offset = off
existing.kind = kind
hits: List[Hit] = sorted(best.values(), key=lambda h: h.score, reverse=True)
if not hits:
print("[!] No candidates found")
return 2
for h in hits[: args.top]:
print(f"{h.score:8.2f}\t{h.count:4d}\t{h.kind}\t{h.file}\t0x{h.offset:X}\t{h.s}")
return 0
if __name__ == "__main__":
raise SystemExit(main())