seed-mcp/scrape/sources/nk.py

"""NK scraper (Syngenta brand).

Source: ``https://www.syngenta-us.com`` — static HTML product pages
plus tech-sheet PDFs on the Syngenta CDN at
``assets.syngentaebiz.com/pdf/techsheets/<CODE>_YYMMDD.pdf``.

Expected count: 29 varieties (12 corn + 17 soy). No wheat.

The PDF fetcher is shared with ``golden_harvest`` — same CDN, same
``<CODE>_YYMMDD.pdf`` filename convention. Factor that into a
helper module under ``scrape.sources._syngenta_pdf`` once both
scrapers are written.

Disease + agronomic ratings live INSIDE the PDFs (the HTML pages
have marketing copy only). Use pdfplumber for table extraction.

Bonus: regional "Seed Guide" PDFs (~14 MB each) for IA, IL, MN,
etc. — additional supplemental context worth ingesting once the
per-variety scrape is solid.

TODO: implement.
"""
from __future__ import annotations

import sys


def main(argv: list[str] | None = None) -> int:
    print("nk: deferred — disease/agronomic ratings come from CDN tech-sheet PDFs only, use pdfplumber. See reference_seed_vendor_recon.md.",
          file=sys.stderr)
    # Return 0 so the monthly CI workflow doesn't fail when this
    # source is listed but not yet implemented. Real implementation
    # will return 0 on success / 1 on failure.
    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))