Files
obdash/obdcore/formula.py
T
justin 0f029b724a Fix #6: bound formula evaluation to stop untrusted-profile DoS
The AST sandbox whitelisted ** and << with no magnitude bound, so a hostile
profile formula (9**9**9, 1<<10**9) computed a multi-hundred-MB integer on the
scheduler thread -> CPU pin + OOM. The scheduler except clause never catches a
runaway/OOM (not a raised exception), and a derived PID with empty deps fires
every tick on connect.

- _apply() guards each BinOp: shift amount <= 256, exponent <= 64, and any int
  result bit_length > 512 raises FormulaError (caught by the scheduler -> sample
  dropped, thread survives).
- compile-time caps: expr length <= 500, AST depth <= 60; parse also catches
  RecursionError.
- test_formula_dos_bounded: giant-int payloads rejected in <0.5s; legit bit ops
  and scaling still work.

Closes #6

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_016yT89n4zR4qbrySoSiEyZs
2026-07-01 19:26:05 -04:00

139 lines
5.6 KiB
Python

"""Safe formula evaluator for vehicle-profile PID scaling.
Profiles are community-contributed data, so decode formulas must NOT be able to
execute arbitrary code -- OR exhaust CPU/memory. Formulas are arithmetic
expressions over named variables -- the de-facto OBD convention used by Torque /
FORScan / ScanGauge:
raw-mode PIDs: variables A, B, C, ... = response data bytes 0, 1, 2, ...
e.g. "(A*256+B)*0.57" "A-40" "(A>>1)&1" "A//2"
derived PIDs: variables are other PID keys
e.g. "MAP - BARO"
Only numeric literals, the named variables, arithmetic/bitwise operators, and a
small whitelist of functions are allowed. Anything else raises FormulaError at
compile time. To stop a hostile profile from freezing the acquisition thread
with a giant-integer expression (e.g. `9**9**9`, `1<<10**9`), evaluation also
BOUNDS magnitude: shift/exponent amounts and integer result bit-lengths are
capped, and expression length + nesting depth are limited at compile.
"""
import ast
import operator
_BIN = {
ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul,
ast.Div: operator.truediv, ast.FloorDiv: operator.floordiv,
ast.Mod: operator.mod, ast.Pow: operator.pow,
ast.BitAnd: operator.and_, ast.BitOr: operator.or_, ast.BitXor: operator.xor,
ast.LShift: operator.lshift, ast.RShift: operator.rshift,
}
_UNARY = {ast.USub: operator.neg, ast.UAdd: operator.pos, ast.Invert: operator.invert}
_FUNCS = {"min": min, "max": max, "abs": abs, "round": round,
"int": int, "float": float}
# magnitude / complexity limits (far above any real OBD byte arithmetic)
MAX_RESULT_BITS = 512 # ~155 decimal digits; real decode stays < 32 bits
MAX_SHIFT = 256 # bit-field decode never shifts more than a few bytes
MAX_POW_EXP = 64
MAX_EXPR_LEN = 500
MAX_DEPTH = 60
class FormulaError(ValueError):
pass
def _validate(node, allowed, depth=0):
if depth > MAX_DEPTH:
raise FormulaError("formula too deeply nested")
if isinstance(node, ast.Expression):
return _validate(node.body, allowed, depth + 1)
if isinstance(node, ast.BinOp):
if type(node.op) not in _BIN:
raise FormulaError(f"operator not allowed: {type(node.op).__name__}")
_validate(node.left, allowed, depth + 1)
_validate(node.right, allowed, depth + 1)
return
if isinstance(node, ast.UnaryOp):
if type(node.op) not in _UNARY:
raise FormulaError(f"unary op not allowed: {type(node.op).__name__}")
_validate(node.operand, allowed, depth + 1)
return
if isinstance(node, ast.Constant):
if not isinstance(node.value, (int, float)) or isinstance(node.value, bool):
raise FormulaError("only numeric constants allowed")
return
if isinstance(node, ast.Name):
if node.id not in allowed:
raise FormulaError(f"unknown variable {node.id!r} (allowed: {sorted(allowed)})")
return
if isinstance(node, ast.Call):
if not isinstance(node.func, ast.Name) or node.func.id not in _FUNCS:
raise FormulaError("only min/max/abs/round/int/float calls allowed")
if node.keywords:
raise FormulaError("keyword args not allowed")
for a in node.args:
_validate(a, allowed, depth + 1)
return
raise FormulaError(f"expression not allowed: {type(node).__name__}")
def _apply(op_type, left, right):
"""Apply a binary op with magnitude guards so an untrusted formula can't
allocate a giant integer (Pow / shift amplification)."""
if op_type in (ast.LShift, ast.RShift):
try:
r = operator.index(right)
except TypeError:
raise FormulaError("shift amount must be an integer")
if not 0 <= r <= MAX_SHIFT:
raise FormulaError("shift amount out of range")
if op_type is ast.LShift and isinstance(left, int) and \
left.bit_length() + r > MAX_RESULT_BITS:
raise FormulaError("shift result too large")
elif op_type is ast.Pow:
if isinstance(right, int):
if right > MAX_POW_EXP:
raise FormulaError("exponent too large")
if isinstance(left, int) and right > 0 and \
left.bit_length() * right > MAX_RESULT_BITS:
raise FormulaError("power result too large")
res = _BIN[op_type](left, right)
if isinstance(res, int) and res.bit_length() > MAX_RESULT_BITS:
raise FormulaError("result magnitude too large")
return res
def _eval(node, names):
if isinstance(node, ast.Expression):
return _eval(node.body, names)
if isinstance(node, ast.BinOp):
return _apply(type(node.op), _eval(node.left, names), _eval(node.right, names))
if isinstance(node, ast.UnaryOp):
return _UNARY[type(node.op)](_eval(node.operand, names))
if isinstance(node, ast.Constant):
return node.value
if isinstance(node, ast.Name):
return names[node.id]
if isinstance(node, ast.Call):
return _FUNCS[node.func.id](*[_eval(a, names) for a in node.args])
raise FormulaError(f"expression not allowed: {type(node).__name__}")
def compile_formula(expr, allowed_names):
"""Return fn(names_dict) -> number. Raises FormulaError on disallowed input."""
if len(expr) > MAX_EXPR_LEN:
raise FormulaError("formula too long")
try:
tree = ast.parse(expr, mode="eval")
except (SyntaxError, ValueError, RecursionError) as e:
raise FormulaError(f"bad formula {expr!r}: {e}")
allowed = set(allowed_names)
_validate(tree, allowed)
def fn(names):
return _eval(tree, names)
fn.expr = expr
return fn