Fix #10 + #11: transport hardening + controller resource leaks

#10 transport (obdcore/transport.py):
- TcpTransport.read raises IOError on a real socket error or peer-close instead
  of swallowing it as a timeout, so a dead WiFi link surfaces (via the #8 poll
  handler) as 'connection lost' rather than a frozen dashboard.
- TcpTransport.reset_input_buffer drains at most 64 chunks — never spins forever.
- BleTransport closes the client + stops the event-loop thread on connect
  timeout (no leak), caps the notification buffer at 64 KiB, and close() is
  robust when only partially initialised.

#11 controller (gui/controller.py, obdcore/store.py):
- connect() closes the transport and nulls the link if init()/connect() raises,
  so a failed/retried connect doesn't orphan sockets/threads.
- stop_record() unhooks store.recorder BEFORE closing it, and CsvRecorder now
  has a 'closed' guard so a poll-thread write racing close() is a no-op instead
  of an I/O-on-closed-file crash.

Closes #10
Closes #11

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_016yT89n4zR4qbrySoSiEyZs
This commit is contained in:
2026-07-01 19:39:47 -04:00
parent fa7225d6dc
commit 39fcf3fb55
4 changed files with 86 additions and 22 deletions
+13 -3
View File
@@ -76,12 +76,21 @@ class Controller:
self.link = ElmLink.ble(c["address"]) self.link = ElmLink.ble(c["address"])
else: else:
self.link = ElmLink.serial(c.get("port", port), c.get("baud", baud)) self.link = ElmLink.serial(c.get("port", port), c.get("baud", baud))
try: # don't leak the transport if handshake fails
if not mock:
self.link.init() self.link.init()
ok = self.link.connect() ok = self.link.connect()
try: try:
self.link.fast_timing(True) self.link.fast_timing(True)
except Exception: except Exception:
pass pass
except Exception:
try:
self.link.close()
except Exception:
pass
self.link = None
raise
self.poll_error = None self.poll_error = None
self.sched = PollScheduler(self.link, self.reg, self.store, clock=time.time, self.sched = PollScheduler(self.link, self.reg, self.store, clock=time.time,
on_error=self._on_poll_error) on_error=self._on_poll_error)
@@ -119,9 +128,10 @@ class Controller:
self.store.recorder = CsvRecorder(path) self.store.recorder = CsvRecorder(path)
def stop_record(self): def stop_record(self):
if self.store.recorder: rec = self.store.recorder
self.store.recorder.close() if rec:
self.store.recorder = None self.store.recorder = None # unhook first so the poll thread stops writing
rec.close()
def now(self): def now(self):
return (time.time() - self.t0) if self.t0 else 0.0 return (time.time() - self.t0) if self.t0 else 0.0
+6
View File
@@ -128,13 +128,19 @@ class CsvRecorder:
self._f = open(path, "w") self._f = open(path, "w")
self._f.write("t,key,value\n") self._f.write("t,key,value\n")
self._lock = threading.Lock() self._lock = threading.Lock()
self._closed = False
def write(self, key, t, v): def write(self, key, t, v):
with self._lock: with self._lock:
if self._closed: # a poll-thread write racing close() is a no-op
return
self._f.write(f"{t:.3f},{key},{'' if v is None else v}\n") self._f.write(f"{t:.3f},{key},{'' if v is None else v}\n")
def close(self): def close(self):
with self._lock: with self._lock:
if self._closed:
return
self._closed = True
self._f.close() self._f.close()
+32 -11
View File
@@ -51,21 +51,31 @@ class TcpTransport:
def read(self, n): def read(self, n):
try: try:
return self.sock.recv(n) data = self.sock.recv(n)
except socket.timeout: except socket.timeout:
return b"" return b"" # no data yet -- normal
except OSError: except OSError as e:
return b"" raise IOError(f"WiFi connection lost: {e}") from e
if data == b"": # peer closed the connection
raise IOError("WiFi connection closed by adapter")
return data
def reset_input_buffer(self): def reset_input_buffer(self):
self.sock.settimeout(0.05) # drain pending bytes, but never spin forever if data keeps arriving
self.sock.settimeout(0.02)
try: try:
while self.sock.recv(4096): for _ in range(64):
if not self.sock.recv(4096):
break
except socket.timeout:
pass pass
except Exception: except OSError:
pass pass
finally: finally:
try:
self.sock.settimeout(self._rt) self.sock.settimeout(self._rt)
except OSError:
pass
def close(self): def close(self):
try: try:
@@ -105,7 +115,9 @@ class BleTransport:
self._thread = threading.Thread(target=self._run, daemon=True) self._thread = threading.Thread(target=self._run, daemon=True)
self._thread.start() self._thread.start()
if not self._ready.wait(connect_timeout) or self._err: if not self._ready.wait(connect_timeout) or self._err:
raise RuntimeError(f"BLE connect failed: {self._err or 'timeout'}") err = self._err or "timeout"
self.close() # don't leak the client + event-loop thread
raise RuntimeError(f"BLE connect failed: {err}")
def _run(self): def _run(self):
import asyncio import asyncio
@@ -136,6 +148,8 @@ class BleTransport:
def on_notify(_h, data): def on_notify(_h, data):
with self._lock: with self._lock:
self._buf += bytes(data) self._buf += bytes(data)
if len(self._buf) > 65536: # cap: never grow unbounded
del self._buf[:-65536]
await self._client.start_notify(notify_char, on_notify) await self._client.start_notify(notify_char, on_notify)
self._ready.set() self._ready.set()
@@ -168,10 +182,17 @@ class BleTransport:
self._buf.clear() self._buf.clear()
def close(self): def close(self):
try:
import asyncio import asyncio
asyncio.run_coroutine_threadsafe(self._client.disconnect(), self._loop).result(timeout=3.0) loop, client = self._loop, self._client
self._loop.call_soon_threadsafe(self._loop.stop) if loop is None:
return
if client is not None:
try:
asyncio.run_coroutine_threadsafe(client.disconnect(), loop).result(timeout=3.0)
except Exception:
pass
try: # stop the loop even if disconnect failed,
loop.call_soon_threadsafe(loop.stop) # so the background thread exits
except Exception: except Exception:
pass pass
+27
View File
@@ -5,6 +5,7 @@ import os
import socket import socket
import sys import sys
import threading import threading
import time
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -89,6 +90,31 @@ def test_wifi_transport():
srv.stop() srv.stop()
def test_tcp_read_raises_on_closed_peer():
# A dead WiFi connection must surface as an error, not silently look like a
# perpetual timeout (which left the app frozen on "Connected").
srv = socket.socket()
srv.bind(("127.0.0.1", 0)); srv.listen(1)
port = srv.getsockname()[1]
def serve():
c, _ = srv.accept()
c.close() # drop the client immediately
threading.Thread(target=serve, daemon=True).start()
t = TcpTransport("127.0.0.1", port)
time.sleep(0.1)
raised = False
try:
for _ in range(5):
t.read(64)
except IOError:
raised = True
assert raised, "read should raise IOError when the peer closed the socket"
t.close(); srv.close()
print(" TCP dead-connection detected (read raises, not silent): OK")
def test_factory_helpers(): def test_factory_helpers():
# the factory methods build the right transport type # the factory methods build the right transport type
assert hasattr(ElmLink, "serial") and hasattr(ElmLink, "tcp") and hasattr(ElmLink, "ble") assert hasattr(ElmLink, "serial") and hasattr(ElmLink, "tcp") and hasattr(ElmLink, "ble")
@@ -97,5 +123,6 @@ def test_factory_helpers():
if __name__ == "__main__": if __name__ == "__main__":
test_wifi_transport() test_wifi_transport()
test_tcp_read_raises_on_closed_peer()
test_factory_helpers() test_factory_helpers()
print("\nALL TRANSPORT TESTS PASS") print("\nALL TRANSPORT TESTS PASS")