Init. commit
This commit is contained in:
188
HumanAI-Forensic-Hard/scripts/extract_password_candidates.py
Normal file
188
HumanAI-Forensic-Hard/scripts/extract_password_candidates.py
Normal file
@@ -0,0 +1,188 @@
|
||||
import argparse
|
||||
import math
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, Iterator, List, Tuple
|
||||
|
||||
|
||||
ASCII_RE_TEMPLATE = rb"[ -~]{%d,%d}"
|
||||
UTF16LE_ASCII_RE_TEMPLATE = rb"(?:[ -~]\x00){%d,%d}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Hit:
|
||||
s: str
|
||||
score: float
|
||||
file: Path
|
||||
offset: int
|
||||
kind: str
|
||||
count: int = 1
|
||||
|
||||
|
||||
def iter_files(paths: Iterable[str]) -> Iterator[Path]:
|
||||
for p in paths:
|
||||
path = Path(p)
|
||||
if path.is_dir():
|
||||
for child in sorted(path.rglob("*")):
|
||||
if child.is_file():
|
||||
yield child
|
||||
elif path.is_file():
|
||||
yield path
|
||||
|
||||
|
||||
def shannon_entropy(s: str) -> float:
|
||||
if not s:
|
||||
return 0.0
|
||||
freq: Dict[str, int] = {}
|
||||
for ch in s:
|
||||
freq[ch] = freq.get(ch, 0) + 1
|
||||
n = len(s)
|
||||
ent = 0.0
|
||||
for c in freq.values():
|
||||
p = c / n
|
||||
ent -= p * math.log2(p)
|
||||
return ent
|
||||
|
||||
|
||||
BAD_SUBSTRINGS = (
|
||||
"\\\\",
|
||||
"\\Registry\\",
|
||||
"\\Registry",
|
||||
"\\BaseNamedObjects\\",
|
||||
"\\BaseNamedObjects",
|
||||
":\\",
|
||||
"/",
|
||||
"System32",
|
||||
"Windows",
|
||||
"Microsoft",
|
||||
"CLSID",
|
||||
"AppX",
|
||||
"shell:::",
|
||||
"atom(",
|
||||
".dll",
|
||||
".exe",
|
||||
".sys",
|
||||
".ini",
|
||||
".mui",
|
||||
".nls",
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".ttf",
|
||||
".otf",
|
||||
".wav",
|
||||
".mp3",
|
||||
".mp4",
|
||||
".sqlite",
|
||||
)
|
||||
|
||||
|
||||
def looks_passwordish(s: str) -> bool:
|
||||
|
||||
if any(ch in s for ch in ('\\', '/', ':', '<', '>', '"', "'", '=', '\t', '\r', '\n')):
|
||||
return False
|
||||
if any(bad in s for bad in BAD_SUBSTRINGS):
|
||||
return False
|
||||
if s.startswith("http://") or s.startswith("https://"):
|
||||
return False
|
||||
|
||||
if s.count(" ") >= 4:
|
||||
return False
|
||||
|
||||
if len(set(s)) <= 3:
|
||||
return False
|
||||
|
||||
if re.fullmatch(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}", s):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def score_string(s: str) -> float:
|
||||
has_lower = any("a" <= c <= "z" for c in s)
|
||||
has_upper = any("A" <= c <= "Z" for c in s)
|
||||
has_digit = any("0" <= c <= "9" for c in s)
|
||||
has_special = any(not c.isalnum() for c in s)
|
||||
|
||||
ent = shannon_entropy(s)
|
||||
score = ent * len(s)
|
||||
score += 5.0 * has_lower
|
||||
score += 5.0 * has_upper
|
||||
score += 5.0 * has_digit
|
||||
score += 5.0 * has_special
|
||||
if " " in s:
|
||||
score -= 2.0
|
||||
if s.islower() or s.isupper():
|
||||
score -= 1.0
|
||||
if all(c in "0123456789abcdefABCDEF" for c in s):
|
||||
score -= 3.0
|
||||
return score
|
||||
|
||||
|
||||
def extract_hits(data: bytes, *, min_len: int, max_len: int) -> Iterator[Tuple[str, int, str]]:
|
||||
ascii_re = re.compile(ASCII_RE_TEMPLATE % (min_len, max_len))
|
||||
utf16_re = re.compile(UTF16LE_ASCII_RE_TEMPLATE % (min_len, max_len))
|
||||
|
||||
for m in ascii_re.finditer(data):
|
||||
s = m.group(0).decode("ascii", errors="ignore")
|
||||
yield s, m.start(), "ascii"
|
||||
|
||||
for m in utf16_re.finditer(data):
|
||||
raw = m.group(0)
|
||||
s = raw[::2].decode("ascii", errors="ignore")
|
||||
yield s, m.start(), "utf16le"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("paths", nargs="+", help="Files/dirs to scan")
|
||||
ap.add_argument("--min-len", type=int, default=8)
|
||||
ap.add_argument("--max-len", type=int, default=64)
|
||||
ap.add_argument("--top", type=int, default=80)
|
||||
ap.add_argument("--grep", type=str, default="", help="Only show hits containing this substring")
|
||||
args = ap.parse_args()
|
||||
|
||||
best: Dict[str, Hit] = {}
|
||||
grep = args.grep
|
||||
|
||||
for fp in iter_files(args.paths):
|
||||
|
||||
if fp.suffix.lower() not in (".dmp", ".mem", ".raw", ".bin", ""):
|
||||
continue
|
||||
|
||||
try:
|
||||
data = fp.read_bytes()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for s, off, kind in extract_hits(data, min_len=args.min_len, max_len=args.max_len):
|
||||
if grep and grep not in s:
|
||||
continue
|
||||
if not looks_passwordish(s):
|
||||
continue
|
||||
sc = score_string(s)
|
||||
existing = best.get(s)
|
||||
if existing is None:
|
||||
best[s] = Hit(s=s, score=sc, file=fp, offset=off, kind=kind)
|
||||
else:
|
||||
existing.count += 1
|
||||
if sc > existing.score:
|
||||
existing.score = sc
|
||||
existing.file = fp
|
||||
existing.offset = off
|
||||
existing.kind = kind
|
||||
|
||||
hits: List[Hit] = sorted(best.values(), key=lambda h: h.score, reverse=True)
|
||||
if not hits:
|
||||
print("[!] No candidates found")
|
||||
return 2
|
||||
|
||||
for h in hits[: args.top]:
|
||||
print(f"{h.score:8.2f}\t{h.count:4d}\t{h.kind}\t{h.file}\t0x{h.offset:X}\t{h.s}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user