Add university-extension trials: Illinois VT + Iowa ICPT + Ohio OCPT (+123 cross-vendor trial docs) (#19)
Image rebuild (skip scrape) / build (push) Successful in 5m54s
Image rebuild (skip scrape) / build (push) Successful in 5m54s
Co-authored-by: claude <claude@jpaul.io> Co-committed-by: claude <claude@jpaul.io>
This commit was merged in pull request #19.
This commit is contained in:
+20
-3
@@ -330,7 +330,7 @@ def chunks_from_variety(
|
||||
# signal for queries like "best corn for sandy soil Iowa 2024".
|
||||
|
||||
|
||||
def _render_gh_plot_chunk(sidecar: dict) -> str:
|
||||
def _render_gh_plot_chunk(sidecar: dict, *, include_region: bool = False) -> str:
|
||||
"""Render a cross-vendor plot report (per-site head-to-head).
|
||||
|
||||
Originally GH-specific; now also handles ``lg_plot_reports`` and
|
||||
@@ -340,6 +340,12 @@ def _render_gh_plot_chunk(sidecar: dict) -> str:
|
||||
queries should still find DEKALB results inside a GH or AgriGold
|
||||
plot — search filters target ``brand_in_results``, not the
|
||||
publisher's brand).
|
||||
|
||||
``include_region`` (university-trial sources) folds the
|
||||
region/district into the title + facts so it's in the embedded
|
||||
text — these sources publish many same-state/year tables that are
|
||||
only distinguished by region (e.g. Iowa "District South"), and
|
||||
without this the region lived only in metadata/the .md body.
|
||||
"""
|
||||
lines: list[str] = []
|
||||
crop = (sidecar.get("crop") or "").lower()
|
||||
@@ -350,12 +356,18 @@ def _render_gh_plot_chunk(sidecar: dict) -> str:
|
||||
state = sidecar.get("state") or sidecar.get("state_abbrev") or ""
|
||||
year = sidecar.get("year") or ""
|
||||
cooperator = sidecar.get("cooperator") or ""
|
||||
region = (sidecar.get("region") or "").strip() if include_region else ""
|
||||
|
||||
lines.append(f"# {crop_label} yield trial — {state}, {year}")
|
||||
title = f"# {crop_label} yield trial — {state}, {year}"
|
||||
if region:
|
||||
title += f" ({region})"
|
||||
lines.append(title)
|
||||
lines.append("")
|
||||
# Publisher label — emphasizes the source brand for retrieval.
|
||||
publisher_brand = sidecar.get("brand") or "Golden Harvest"
|
||||
facts = [f"{publisher_brand} plot report (cross-vendor)"]
|
||||
facts = [f"{publisher_brand} {'variety trial (cross-vendor, independent third-party)' if include_region else 'plot report (cross-vendor)'}"]
|
||||
if region:
|
||||
facts.append(f"region {region}")
|
||||
if cooperator:
|
||||
facts.append(f"cooperator {cooperator}")
|
||||
if sidecar.get("planted_date"):
|
||||
@@ -509,6 +521,11 @@ def _render_trial_chunk(sidecar: dict, md_text: str | None = None) -> str:
|
||||
# for each (Golden Harvest / LG Seeds / AgriGold).
|
||||
if source in ("gh_plot_reports", "lg_plot_reports", "agrigold_plot_reports"):
|
||||
return _render_gh_plot_chunk(sidecar)
|
||||
if source in ("illinois_vt_trials", "iowa_icpt_trials", "ohio_ocpt_trials"):
|
||||
# University-extension variety trials — same results[] shape, but
|
||||
# fold region/district into the embedded text (many same-state/year
|
||||
# tables) + label as an independent third-party variety trial.
|
||||
return _render_gh_plot_chunk(sidecar, include_region=True)
|
||||
if source == "proharvest_plots":
|
||||
# Structured rows → shared cross-vendor renderer (publisher brand
|
||||
# read from the sidecar). Foreign-format third-party PDFs that
|
||||
|
||||
Reference in New Issue
Block a user