From 39fcf3fb552611da61d26b3f5e461f05a097327e Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Wed, 1 Jul 2026 19:39:47 -0400 Subject: [PATCH] Fix #10 + #11: transport hardening + controller resource leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #10 transport (obdcore/transport.py): - TcpTransport.read raises IOError on a real socket error or peer-close instead of swallowing it as a timeout, so a dead WiFi link surfaces (via the #8 poll handler) as 'connection lost' rather than a frozen dashboard. - TcpTransport.reset_input_buffer drains at most 64 chunks — never spins forever. - BleTransport closes the client + stops the event-loop thread on connect timeout (no leak), caps the notification buffer at 64 KiB, and close() is robust when only partially initialised. #11 controller (gui/controller.py, obdcore/store.py): - connect() closes the transport and nulls the link if init()/connect() raises, so a failed/retried connect doesn't orphan sockets/threads. - stop_record() unhooks store.recorder BEFORE closing it, and CsvRecorder now has a 'closed' guard so a poll-thread write racing close() is a no-op instead of an I/O-on-closed-file crash. Closes #10 Closes #11 Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_016yT89n4zR4qbrySoSiEyZs --- gui/controller.py | 26 +++++++++++++++------- obdcore/store.py | 6 +++++ obdcore/transport.py | 49 +++++++++++++++++++++++++++++------------ tests/test_transport.py | 27 +++++++++++++++++++++++ 4 files changed, 86 insertions(+), 22 deletions(-) diff --git a/gui/controller.py b/gui/controller.py index c4d1100..2589e49 100644 --- a/gui/controller.py +++ b/gui/controller.py @@ -76,12 +76,21 @@ class Controller: self.link = ElmLink.ble(c["address"]) else: self.link = ElmLink.serial(c.get("port", port), c.get("baud", baud)) - self.link.init() - ok = self.link.connect() - try: - self.link.fast_timing(True) + try: # don't leak the transport if handshake fails + if not mock: + self.link.init() + ok = self.link.connect() + try: + self.link.fast_timing(True) + except Exception: + pass except Exception: - pass + try: + self.link.close() + except Exception: + pass + self.link = None + raise self.poll_error = None self.sched = PollScheduler(self.link, self.reg, self.store, clock=time.time, on_error=self._on_poll_error) @@ -119,9 +128,10 @@ class Controller: self.store.recorder = CsvRecorder(path) def stop_record(self): - if self.store.recorder: - self.store.recorder.close() - self.store.recorder = None + rec = self.store.recorder + if rec: + self.store.recorder = None # unhook first so the poll thread stops writing + rec.close() def now(self): return (time.time() - self.t0) if self.t0 else 0.0 diff --git a/obdcore/store.py b/obdcore/store.py index 6cd371a..9dfac48 100644 --- a/obdcore/store.py +++ b/obdcore/store.py @@ -128,13 +128,19 @@ class CsvRecorder: self._f = open(path, "w") self._f.write("t,key,value\n") self._lock = threading.Lock() + self._closed = False def write(self, key, t, v): with self._lock: + if self._closed: # a poll-thread write racing close() is a no-op + return self._f.write(f"{t:.3f},{key},{'' if v is None else v}\n") def close(self): with self._lock: + if self._closed: + return + self._closed = True self._f.close() diff --git a/obdcore/transport.py b/obdcore/transport.py index 3ce6d4c..c08b2c7 100644 --- a/obdcore/transport.py +++ b/obdcore/transport.py @@ -51,21 +51,31 @@ class TcpTransport: def read(self, n): try: - return self.sock.recv(n) + data = self.sock.recv(n) except socket.timeout: - return b"" - except OSError: - return b"" + return b"" # no data yet -- normal + except OSError as e: + raise IOError(f"WiFi connection lost: {e}") from e + if data == b"": # peer closed the connection + raise IOError("WiFi connection closed by adapter") + return data def reset_input_buffer(self): - self.sock.settimeout(0.05) + # drain pending bytes, but never spin forever if data keeps arriving + self.sock.settimeout(0.02) try: - while self.sock.recv(4096): - pass - except Exception: + for _ in range(64): + if not self.sock.recv(4096): + break + except socket.timeout: + pass + except OSError: pass finally: - self.sock.settimeout(self._rt) + try: + self.sock.settimeout(self._rt) + except OSError: + pass def close(self): try: @@ -105,7 +115,9 @@ class BleTransport: self._thread = threading.Thread(target=self._run, daemon=True) self._thread.start() if not self._ready.wait(connect_timeout) or self._err: - raise RuntimeError(f"BLE connect failed: {self._err or 'timeout'}") + err = self._err or "timeout" + self.close() # don't leak the client + event-loop thread + raise RuntimeError(f"BLE connect failed: {err}") def _run(self): import asyncio @@ -136,6 +148,8 @@ class BleTransport: def on_notify(_h, data): with self._lock: self._buf += bytes(data) + if len(self._buf) > 65536: # cap: never grow unbounded + del self._buf[:-65536] await self._client.start_notify(notify_char, on_notify) self._ready.set() @@ -168,10 +182,17 @@ class BleTransport: self._buf.clear() def close(self): - try: - import asyncio - asyncio.run_coroutine_threadsafe(self._client.disconnect(), self._loop).result(timeout=3.0) - self._loop.call_soon_threadsafe(self._loop.stop) + import asyncio + loop, client = self._loop, self._client + if loop is None: + return + if client is not None: + try: + asyncio.run_coroutine_threadsafe(client.disconnect(), loop).result(timeout=3.0) + except Exception: + pass + try: # stop the loop even if disconnect failed, + loop.call_soon_threadsafe(loop.stop) # so the background thread exits except Exception: pass diff --git a/tests/test_transport.py b/tests/test_transport.py index deba90f..e618ad7 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -5,6 +5,7 @@ import os import socket import sys import threading +import time sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -89,6 +90,31 @@ def test_wifi_transport(): srv.stop() +def test_tcp_read_raises_on_closed_peer(): + # A dead WiFi connection must surface as an error, not silently look like a + # perpetual timeout (which left the app frozen on "Connected"). + srv = socket.socket() + srv.bind(("127.0.0.1", 0)); srv.listen(1) + port = srv.getsockname()[1] + + def serve(): + c, _ = srv.accept() + c.close() # drop the client immediately + + threading.Thread(target=serve, daemon=True).start() + t = TcpTransport("127.0.0.1", port) + time.sleep(0.1) + raised = False + try: + for _ in range(5): + t.read(64) + except IOError: + raised = True + assert raised, "read should raise IOError when the peer closed the socket" + t.close(); srv.close() + print(" TCP dead-connection detected (read raises, not silent): OK") + + def test_factory_helpers(): # the factory methods build the right transport type assert hasattr(ElmLink, "serial") and hasattr(ElmLink, "tcp") and hasattr(ElmLink, "ble") @@ -97,5 +123,6 @@ def test_factory_helpers(): if __name__ == "__main__": test_wifi_transport() + test_tcp_read_raises_on_closed_peer() test_factory_helpers() print("\nALL TRANSPORT TESTS PASS")