{ "_description": "seed-mcp source catalog. Each scraper module under scrape/sources/ corresponds to one entry. Run via `python -m scrape.runner --source `. The MCP container bakes this file in so corpus_status / list_versions can reflect provenance without re-scraping.", "_pioneer_excluded": "Pioneer (Corteva) is intentionally absent. Per their ToS: 'you shall not use any manual or automated software, devices or other processes (including but not limited to spiders, robots, scrapers, crawlers, avatars, data mining tools or the like) to scrape or download data from the Services'. The MCP returns a curated fallback lesson directing the user to pioneer.com / a local dealer.", "sources": [ { "name": "bayer_seeds", "vendor": "Bayer", "brands": [ "DEKALB", "Asgrow", "WestBred" ], "crops": [ "corn", "soybeans", "wheat" ], "verdict": "green", "expected_count": 475, "base_url": "https://cropscience.bayer.us", "scope_filter": "All listed varieties; no regional filter applied at scrape time (regional recommendations parsed into sidecar so the MCP can filter at search time).", "tos_check_date": "2026-05-24", "tos_note": "robots.txt explicitly whitelists RAG/LLM use cases. Same legal stance as crop-chem-docs scraper." }, { "name": "golden_harvest", "vendor": "Syngenta", "brands": [ "Golden Harvest" ], "crops": [ "corn", "soybeans" ], "verdict": "green", "expected_count": 175, "base_url": "https://www.goldenharvestseeds.com", "scope_filter": "All sitemap-listed corn + soybean varieties.", "tos_check_date": "2026-05-25", "schema_notes": "Disease ratings published on 9-to-1 scale (9 = best). Normalize to 1-9 (9 = best) at chunk time to match Bayer/NK/AgriPro convention. Note original direction in chunk_0 preamble. Tech-sheet PDF URLs in the sitemap are stale (250331) \u2014 resolve live URL from product HTML, not sitemap entry." }, { "name": "nk", "vendor": "Syngenta", "brands": [ "NK" ], "crops": [ "corn", "soybeans" ], "verdict": "green", "expected_count": 29, "base_url": "https://www.syngenta-us.com", "pdf_cdn": "https://assets.syngentaebiz.com/pdf/techsheets/", "scope_filter": "All NK corn + soy varieties. No wheat (NK doesn't sell wheat in US).", "tos_check_date": "2026-05-24", "schema_notes": "Disease + agronomic ratings live in tech-sheet PDFs only \u2014 need pdfplumber. PDF URLs share format `_YYMMDD.pdf` with Golden Harvest, so the same fetcher works for both." }, { "name": "agripro", "vendor": "Syngenta", "brands": [ "AgriPro" ], "crops": [ "wheat", "barley" ], "verdict": "green", "expected_count": 24, "base_url": "https://www.agriprowheat.com", "scope_filter": "All wheat classes (HRW/HRS/HWS/SWW/SWS) + barley. NO SRW \u2014 Syngenta's SRW lives at GrowProGenetics.com under a separate brand.", "tos_check_date": "2026-05-24", "schema_notes": "Drupal Views form; server-rendered HTML. CoAXium trait flag is implicit in product family; Clearfield/CL2 trait IS in this catalog." }, { "name": "becks_pfr", "vendor": "Beck's Hybrids", "brands": [ "Beck's PFR" ], "crops": [ "corn", "soybeans", "wheat" ], "verdict": "yellow", "expected_count": 2089, "base_url": "https://www.beckshybrids.com", "api_base": "https://mc8v24rf.api.sanity.io", "scope_filter": "All Practical Farm Research publications since 2015. PFR is head-to-head agronomy trials \u2014 fungicide timing, planting-date studies, hybrid-by-population, etc.", "tos_check_date": "2026-05-24", "schema_notes": "Public Sanity GROQ API, no auth required. Records have title/year/crop/key-findings/full-text. Treat PFR docs as a research corpus, not variety records \u2014 the chunk_0 includes the study's tl;dr finding." }, { "name": "becks_products", "vendor": "Beck's Hybrids", "brands": [ "Beck's" ], "crops": [ "corn", "soybeans", "wheat" ], "verdict": "yellow", "expected_count": 860, "base_url": "https://www.beckshybrids.com", "api_base": "https://mc8v24rf.api.sanity.io", "scope_filter": "All Beck's product records \u2014 corn + soy + wheat. Identity + RM/MG only.", "tos_check_date": "2026-05-24", "schema_notes": "Sanity GROQ exposes identity (name, RM/MG, basic traits) but agronomic + disease ratings are SeedIQ-gated (requires browser cookie). Deferred until the SeedIQ XHR endpoint is captured from a logged-in browser session. Without ratings, products are reference-only; the MCP can confirm 'Beck's has hybrid X at RM 112 with Enlist trait' but not 'rate it against drought'." }, { "name": "gh_plot_reports", "vendor": "Syngenta", "brand_aggregator": "Golden Harvest publishes", "crops": [ "corn", "soybeans", "silage" ], "verdict": "green", "expected_count": 4618, "base_url": "https://www.goldenharvestseeds.com", "scope_filter": "sitemap-listed plot reports 2024 and 2025 (4,618 reports). 2023 (3,619 reports) deferred to a future pass \u2014 most recent data is most relevant for current decisions.", "tos_check_date": "2026-05-25", "schema_notes": "Cross-vendor head-to-head yield trials at specific state/year/site. Each report lists products from multiple brands (NK, DEKALB, GH, etc.) with rank, yield, %MST, test weight, gross revenue. URL: //plot-report///. Same site/auth as golden_harvest variety scraper.", "data_type": "trial" }, { "name": "agripro_trials", "vendor": "Syngenta", "brand_aggregator": "AgriPro publishes", "crops": [ "wheat" ], "verdict": "green", "expected_count": 38, "base_url": "https://agriprowheat.com", "scope_filter": "PDF trial summaries linked from /trials-data. Regional wheat performance (PNW, Western Plains, NE Colorado, etc.).", "tos_check_date": "2026-05-25", "schema_notes": "PDF tables of varieties tested per region per year. pdfplumber for table extraction.", "data_type": "trial" } ], "_excluded_sources": [ { "name": "pioneer", "vendor": "Corteva", "verdict": "red", "reason": "Explicit ToS prohibits automated scraping. Dealer locator at /us/sales-representatives/my-local-team.html is login-gated; no public API for dealer contact info. The MCP returns a curated fallback lesson instead of erroring." } ] }