Add university-extension trials: Illinois VT + Iowa ICPT + Ohio OCPT (+123 cross-vendor trial docs) (#19)
Image rebuild (skip scrape) / build (push) Successful in 5m54s

Co-authored-by: claude <claude@jpaul.io>
Co-committed-by: claude <claude@jpaul.io>
This commit was merged in pull request #19.
This commit is contained in:
2026-06-10 08:36:19 -04:00
committed by Claude (agent)
parent 0bac06b7b6
commit a54fac240f
255 changed files with 105410 additions and 13 deletions
+20 -3
View File
@@ -330,7 +330,7 @@ def chunks_from_variety(
# signal for queries like "best corn for sandy soil Iowa 2024".
def _render_gh_plot_chunk(sidecar: dict) -> str:
def _render_gh_plot_chunk(sidecar: dict, *, include_region: bool = False) -> str:
"""Render a cross-vendor plot report (per-site head-to-head).
Originally GH-specific; now also handles ``lg_plot_reports`` and
@@ -340,6 +340,12 @@ def _render_gh_plot_chunk(sidecar: dict) -> str:
queries should still find DEKALB results inside a GH or AgriGold
plot — search filters target ``brand_in_results``, not the
publisher's brand).
``include_region`` (university-trial sources) folds the
region/district into the title + facts so it's in the embedded
text — these sources publish many same-state/year tables that are
only distinguished by region (e.g. Iowa "District South"), and
without this the region lived only in metadata/the .md body.
"""
lines: list[str] = []
crop = (sidecar.get("crop") or "").lower()
@@ -350,12 +356,18 @@ def _render_gh_plot_chunk(sidecar: dict) -> str:
state = sidecar.get("state") or sidecar.get("state_abbrev") or ""
year = sidecar.get("year") or ""
cooperator = sidecar.get("cooperator") or ""
region = (sidecar.get("region") or "").strip() if include_region else ""
lines.append(f"# {crop_label} yield trial — {state}, {year}")
title = f"# {crop_label} yield trial — {state}, {year}"
if region:
title += f" ({region})"
lines.append(title)
lines.append("")
# Publisher label — emphasizes the source brand for retrieval.
publisher_brand = sidecar.get("brand") or "Golden Harvest"
facts = [f"{publisher_brand} plot report (cross-vendor)"]
facts = [f"{publisher_brand} {'variety trial (cross-vendor, independent third-party)' if include_region else 'plot report (cross-vendor)'}"]
if region:
facts.append(f"region {region}")
if cooperator:
facts.append(f"cooperator {cooperator}")
if sidecar.get("planted_date"):
@@ -509,6 +521,11 @@ def _render_trial_chunk(sidecar: dict, md_text: str | None = None) -> str:
# for each (Golden Harvest / LG Seeds / AgriGold).
if source in ("gh_plot_reports", "lg_plot_reports", "agrigold_plot_reports"):
return _render_gh_plot_chunk(sidecar)
if source in ("illinois_vt_trials", "iowa_icpt_trials", "ohio_ocpt_trials"):
# University-extension variety trials — same results[] shape, but
# fold region/district into the embedded text (many same-state/year
# tables) + label as an independent third-party variety trial.
return _render_gh_plot_chunk(sidecar, include_region=True)
if source == "proharvest_plots":
# Structured rows → shared cross-vendor renderer (publisher brand
# read from the sidecar). Foreign-format third-party PDFs that