[ { "id": "bayer", "title": "Bayer Crop Science US — Product Labels", "type": "manufacturer", "homepage": "https://www.cropscience.bayer.us", "scraper": "scrape.sources.bayer", "scraper_version": "0.1.0", "license_note": "robots.txt explicitly permits scraping for AI retrieval-augmented generation (verified 2026-05)" }, { "id": "epa_ppls", "title": "EPA Pesticide Product Label System", "type": "regulator", "homepage": "https://ordspub.epa.gov/ords/pesticides/f?p=PPLS:1", "scraper": "scrape.sources.epa_ppls", "scraper_version": "0.1.0", "license_note": "US federal government — public domain (no ToS restriction)", "scope_filter": "corn / soybean / wheat only — products with at least one site matching CORN, MAIZE, POPCORN, SOYBEAN(S), or WHEAT (word-boundary match). Hits ~16% of the PPLS universe in sampling. Pass --no-row-crop-filter to scrape the full PPLS universe.", "registrant_filter": "Pre-API filter at PPIS enumeration: only products from registrants on scrape/sources/epa_registrant_allowlist.json (34 major US ag-chem companies — Syngenta, Bayer, BASF, Corteva, FMC, Nufarm, ADAMA, UPL, Albaugh, Loveland, AMVAC, Helena, Drexel, Atticus, etc.) hit the API. Cuts the 102K-row PPIS universe to ~11.5K — full backfill drops from ~28h to ~5-6h. --no-registrant-filter to skip." } ]