189 lines
4.6 KiB
Python
189 lines
4.6 KiB
Python
import argparse
|
|
import math
|
|
import re
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, Iterator, List, Tuple
|
|
|
|
|
|
ASCII_RE_TEMPLATE = rb"[ -~]{%d,%d}"
|
|
UTF16LE_ASCII_RE_TEMPLATE = rb"(?:[ -~]\x00){%d,%d}"
|
|
|
|
|
|
@dataclass
|
|
class Hit:
|
|
s: str
|
|
score: float
|
|
file: Path
|
|
offset: int
|
|
kind: str
|
|
count: int = 1
|
|
|
|
|
|
def iter_files(paths: Iterable[str]) -> Iterator[Path]:
|
|
for p in paths:
|
|
path = Path(p)
|
|
if path.is_dir():
|
|
for child in sorted(path.rglob("*")):
|
|
if child.is_file():
|
|
yield child
|
|
elif path.is_file():
|
|
yield path
|
|
|
|
|
|
def shannon_entropy(s: str) -> float:
|
|
if not s:
|
|
return 0.0
|
|
freq: Dict[str, int] = {}
|
|
for ch in s:
|
|
freq[ch] = freq.get(ch, 0) + 1
|
|
n = len(s)
|
|
ent = 0.0
|
|
for c in freq.values():
|
|
p = c / n
|
|
ent -= p * math.log2(p)
|
|
return ent
|
|
|
|
|
|
BAD_SUBSTRINGS = (
|
|
"\\\\",
|
|
"\\Registry\\",
|
|
"\\Registry",
|
|
"\\BaseNamedObjects\\",
|
|
"\\BaseNamedObjects",
|
|
":\\",
|
|
"/",
|
|
"System32",
|
|
"Windows",
|
|
"Microsoft",
|
|
"CLSID",
|
|
"AppX",
|
|
"shell:::",
|
|
"atom(",
|
|
".dll",
|
|
".exe",
|
|
".sys",
|
|
".ini",
|
|
".mui",
|
|
".nls",
|
|
".png",
|
|
".jpg",
|
|
".jpeg",
|
|
".gif",
|
|
".ttf",
|
|
".otf",
|
|
".wav",
|
|
".mp3",
|
|
".mp4",
|
|
".sqlite",
|
|
)
|
|
|
|
|
|
def looks_passwordish(s: str) -> bool:
|
|
|
|
if any(ch in s for ch in ('\\', '/', ':', '<', '>', '"', "'", '=', '\t', '\r', '\n')):
|
|
return False
|
|
if any(bad in s for bad in BAD_SUBSTRINGS):
|
|
return False
|
|
if s.startswith("http://") or s.startswith("https://"):
|
|
return False
|
|
|
|
if s.count(" ") >= 4:
|
|
return False
|
|
|
|
if len(set(s)) <= 3:
|
|
return False
|
|
|
|
if re.fullmatch(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}", s):
|
|
return False
|
|
return True
|
|
|
|
|
|
def score_string(s: str) -> float:
|
|
has_lower = any("a" <= c <= "z" for c in s)
|
|
has_upper = any("A" <= c <= "Z" for c in s)
|
|
has_digit = any("0" <= c <= "9" for c in s)
|
|
has_special = any(not c.isalnum() for c in s)
|
|
|
|
ent = shannon_entropy(s)
|
|
score = ent * len(s)
|
|
score += 5.0 * has_lower
|
|
score += 5.0 * has_upper
|
|
score += 5.0 * has_digit
|
|
score += 5.0 * has_special
|
|
if " " in s:
|
|
score -= 2.0
|
|
if s.islower() or s.isupper():
|
|
score -= 1.0
|
|
if all(c in "0123456789abcdefABCDEF" for c in s):
|
|
score -= 3.0
|
|
return score
|
|
|
|
|
|
def extract_hits(data: bytes, *, min_len: int, max_len: int) -> Iterator[Tuple[str, int, str]]:
|
|
ascii_re = re.compile(ASCII_RE_TEMPLATE % (min_len, max_len))
|
|
utf16_re = re.compile(UTF16LE_ASCII_RE_TEMPLATE % (min_len, max_len))
|
|
|
|
for m in ascii_re.finditer(data):
|
|
s = m.group(0).decode("ascii", errors="ignore")
|
|
yield s, m.start(), "ascii"
|
|
|
|
for m in utf16_re.finditer(data):
|
|
raw = m.group(0)
|
|
s = raw[::2].decode("ascii", errors="ignore")
|
|
yield s, m.start(), "utf16le"
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("paths", nargs="+", help="Files/dirs to scan")
|
|
ap.add_argument("--min-len", type=int, default=8)
|
|
ap.add_argument("--max-len", type=int, default=64)
|
|
ap.add_argument("--top", type=int, default=80)
|
|
ap.add_argument("--grep", type=str, default="", help="Only show hits containing this substring")
|
|
args = ap.parse_args()
|
|
|
|
best: Dict[str, Hit] = {}
|
|
grep = args.grep
|
|
|
|
for fp in iter_files(args.paths):
|
|
|
|
if fp.suffix.lower() not in (".dmp", ".mem", ".raw", ".bin", ""):
|
|
continue
|
|
|
|
try:
|
|
data = fp.read_bytes()
|
|
except Exception:
|
|
continue
|
|
|
|
for s, off, kind in extract_hits(data, min_len=args.min_len, max_len=args.max_len):
|
|
if grep and grep not in s:
|
|
continue
|
|
if not looks_passwordish(s):
|
|
continue
|
|
sc = score_string(s)
|
|
existing = best.get(s)
|
|
if existing is None:
|
|
best[s] = Hit(s=s, score=sc, file=fp, offset=off, kind=kind)
|
|
else:
|
|
existing.count += 1
|
|
if sc > existing.score:
|
|
existing.score = sc
|
|
existing.file = fp
|
|
existing.offset = off
|
|
existing.kind = kind
|
|
|
|
hits: List[Hit] = sorted(best.values(), key=lambda h: h.score, reverse=True)
|
|
if not hits:
|
|
print("[!] No candidates found")
|
|
return 2
|
|
|
|
for h in hits[: args.top]:
|
|
print(f"{h.score:8.2f}\t{h.count:4d}\t{h.kind}\t{h.file}\t0x{h.offset:X}\t{h.s}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|