pytest_park.pytest_plugin

[docs] module pytest_park.pytest_plugin
from __future__ importannotationsfrom dataclasses importdataclass,fieldfrom datetime importUTC,datetimefrom pathlib importPathfrom typing importAnyimport pytestfrom pytest_park.core importbuild_postfix_comparison,build_regression_improvementsfrom pytest_park.core.reporting import(build_benchmark_header_label,build_postfix_comparison_table,build_regression_table,)from pytest_park.data importbuild_benchmark_run,load_benchmark_payloadfrom pytest_park.pytest_benchmark import_read_effective_postfixes,_read_postfix,_read_postfixes@dataclass(slots=True)class _PluginState:reference_run:Any|None=Nonecandidate_payloads:list[dict[str,Any]]=field(default_factory=list)benchmark_test_count:int=0_BENCHMARK_DISABLED_WARNING=("Warning: Benchmarking appears disabled for this run. ""Detected single-shot benchmark execution, which commonly happens in VS Code Test Explorer. ""Enable pytest-benchmark to collect real benchmark measurements.")class PytestParkBenchmarkPlugin:[docs]
    """Opt-in pytest plugin for inline pytest-benchmark comparisons."""def __init__(self,config:pytest.Config)->None:self.config=configself.state=_PluginState()def pytest_sessionstart(self,session:pytest.Session)->None:[docs]
# Probe the group-stats hook with an empty benchmarks list so any# conftest.py implementation of pytest_benchmark_group_stats registers# its postfixes into config.stash before the plugin reads them.  This# is needed in debug / single-shot mode where pytest-benchmark itself# never calls the hook._probe_benchmark_group_stats_hook(self.config)self.state.reference_run=self._load_reference_run()@pytest.hookimpl(hookwrapper=True)[docs]
def pytest_runtest_makereport(self,item:pytest.Item,call:pytest.CallInfo[Any])->Any:outcome=yieldoutcome.get_result()ifcall.when!="call":returnbenchmark=getattr(item,"funcargs",{}).get("benchmark")ifbenchmarkisnotNone:self.state.benchmark_test_count+=1stats=getattr(benchmark,"stats",None)ifstatsisNone:returnpayload=self._build_current_benchmark_payload(stats)ifnotpayload:returnself.state.candidate_payloads.append(payload)@pytest.hookimpl(trylast=True)def pytest_terminal_summary(self,terminalreporter:Any)->None:[docs]
output_lines=self._build_summary_output_lines()ifnotoutput_lines:returnifterminalreporterisnotNone:terminalreporter.section("pytest-park")forlineinoutput_lines:terminalreporter.write_line(line)returnself._write_fallback_summary("\n".join(output_lines))def _build_summary_output_lines(self)->list[str]:output:list[str]=[]warning_text=self._build_benchmark_warning_text()ifwarning_text:output.extend(warning_text.splitlines())debug_lines=self._build_debug_lines()ifdebug_lines:ifoutput:output.append("")output.extend(debug_lines)table_text=self._build_summary_table_text()iftable_text:ifoutput:output.append("")output.extend(table_text.splitlines())returnoutputdef _build_debug_lines(self)->list[str]:lines:list[str]=[]benchmarksession=getattr(self.config,"_benchmarksession",None)ifbenchmarksessionisNone:lines.append("debug: pytest-benchmark session: not found (plugin may not be active)")else:storage=getattr(benchmarksession,"storage",None)storage_path=str(getattr(storage,"path","<unknown>"))ifstorageelse"<no storage>"lines.append(f"debug: benchmark storage: {storage_path}")ref=self.state.reference_runifrefisNone:lines.append("debug: reference file: none (no saved benchmark file found)")else:lines.append(f"debug: reference file: {ref.source_file}")lines.append(f"debug: reference run_id: {ref.run_id}  cases: {len(ref.cases)}")n_payloads=len(self.state.candidate_payloads)lines.append(f"debug: candidate payloads collected: {n_payloads}")candidate_run=self._build_candidate_run()ifcandidate_runisNone:lines.append("debug: candidate run: none")else:lines.append(f"debug: candidate run_id: {candidate_run.run_id}  cases: {len(candidate_run.cases)}")orig_postfixes=_read_effective_postfixes(self.config,"benchmark_original_postfix")ref_postfixes=_read_effective_postfixes(self.config,"benchmark_reference_postfix")orig_src=("CLI/ini"if_read_postfixes(self.config,"benchmark_original_postfix")else("conftest.py"iforig_postfixeselse"not configured"))ref_src=("CLI/ini"if_read_postfixes(self.config,"benchmark_reference_postfix")else("conftest.py"ifref_postfixeselse"not configured"))lines.append(f"debug: original_postfixes: {orig_postfixes} ({orig_src})")lines.append(f"debug: reference_postfixes: {ref_postfixes} ({ref_src})")lines.append("debug: group_by: ['custom', 'group']")lines.append(f"debug: benchmark_compare: {self.config.getoption('benchmark_compare', default=None)}")lines.append(f"debug: benchmark_save: {self.config.getoption('benchmark_save', default=None)}")returnlinesdef _build_summary_table_text(self)->str|None:reference_run=self.state.reference_runcandidate_run=self._build_candidate_run()ifcandidate_runisNone:returnNoneorig_postfixes=_read_effective_postfixes(self.config,"benchmark_original_postfix")ref_postfixes=_read_effective_postfixes(self.config,"benchmark_reference_postfix")sections:list[str]=[]# 1. Regression table: flat per-method comparison vs previous runifreference_runisnotNone:regression=build_regression_improvements(candidate_run,reference_run)ifregression:candidate_label=build_benchmark_header_label(candidate_run.source_file,candidate_run.run_id)reference_label=build_benchmark_header_label(reference_run.source_file,reference_run.run_id)sections.append(build_regression_table(regression,candidate_label=candidate_label,reference_label=reference_label,))else:sections.append("Warning: No reference benchmark file found. ""Run with --benchmark-save or --benchmark-autosave first to enable regression comparison.")# 2. Postfix comparison: compare original-postfix vs reference-postfix methods#    This works within the candidate run itself, so no reference run is needed.iforig_postfixesandref_postfixes:postfix_improvements=build_postfix_comparison(candidate_run,original_postfixes=orig_postfixes,reference_postfixes=ref_postfixes,)ifpostfix_improvements:sections.extend(build_postfix_comparison_table(postfix_improvements,original_postfixes=orig_postfixes,reference_postfixes=ref_postfixes,))else:missing=[]ifnotorig_postfixes:missing.append("--benchmark-original-postfix")ifnotref_postfixes:missing.append("--benchmark-reference-postfix")sections.append(f"Warning: Postfix comparison table skipped. Provide {' and '.join(missing)} to enable it.")return"\n\n".join(sections)ifsectionselseNonedef _build_benchmark_warning_text(self)->str|None:ifnotself._should_warn_about_disabled_benchmarking():returnNonereturn_BENCHMARK_DISABLED_WARNINGdef _should_warn_about_disabled_benchmarking(self)->bool:ifself.config.getoption("benchmark_disable",default=False):returnTrueifself.state.benchmark_test_count>0andnotself.state.candidate_payloads:returnTruereturnbool(self.state.candidate_payloads)andall(_is_single_shot_benchmark_payload(payload)forpayloadinself.state.candidate_payloads)def _write_fallback_summary(self,table_text:str)->None:import sysstream=getattr(sys,"__stdout__",None)orsys.stdoutstream.write("\npytest-park\n")stream.write(f"{table_text}\n")stream.flush()def _build_candidate_run(self)->Any|None:ifnotself.state.candidate_payloads:returnNonereturnbuild_benchmark_run(self.state.candidate_payloads,run_id=self._current_run_id(),source_file="<live>",created_at=datetime.now(tz=UTC),original_postfix=_read_effective_postfixes(self.config,"benchmark_original_postfix"),reference_postfix=_read_effective_postfixes(self.config,"benchmark_reference_postfix"),)def _build_current_benchmark_payload(self,metadata:Any)->dict[str,Any]:as_dict=getattr(metadata,"as_dict",None)ifnotcallable(as_dict):return{}payload=as_dict(include_data=False)returnpayloadifisinstance(payload,dict)else{}def _load_reference_run(self)->Any|None:benchmark_session=getattr(self.config,"_benchmarksession",None)ifbenchmark_sessionisNone:returnNoneselected_payloads=_select_reference_payloads(self.config,benchmark_session)ifnotselected_payloads:returnNonepath,payload=selected_payloads[-1]ifnotisinstance(payload,dict):returnNonereturnload_benchmark_payload(payload,source_file=str(path),original_postfix=_read_effective_postfixes(self.config,"benchmark_original_postfix"),reference_postfix=_read_effective_postfixes(self.config,"benchmark_reference_postfix"),)def _current_run_id(self)->str:saved_name=self.config.getoption("benchmark_save",default=None)ifisinstance(saved_name,str)andsaved_name:returnsaved_nameautosave_name=self.config.getoption("benchmark_autosave",default=None)ifisinstance(autosave_name,str)andautosave_name:returnautosave_namereturn"current"def _probe_benchmark_group_stats_hook(config:pytest.Config)->None:    """Call pytest_benchmark_group_stats with empty benchmarks at session start.    This triggers conftest.py implementations of the hook so that    default_pytest_benchmark_group_stats registers their postfixes in    config.stash before the plugin needs them — even when pytest-benchmark    itself never calls the hook (debug / benchmark-disabled mode).    Multiple conftest.py registrations are handled safely: _register_postfixes_in_config    merges repeated calls, and firstresult semantics mean only the innermost    conftest.py's hook runs per session.    """hook=getattr(getattr(config,"pluginmanager",None),"hook",None)ifhookisNone:returngroup_stats=getattr(hook,"pytest_benchmark_group_stats",None)ifgroup_statsisNone:returntry:group_stats(config=config,benchmarks=[],group_by="name")exceptException:passdef _select_reference_payloads(config:pytest.Config,benchmark_session:Any)->list[tuple[Path|str,Any]]:compare_value=config.getoption("benchmark_compare",default=[])ifcompare_valuenotin(None,[],False):loaded=(benchmark_session.storage.load()ifcompare_valueisTrueelsebenchmark_session.storage.load(compare_value))returnlist(loaded)loaded=list(benchmark_session.storage.load())ifnotloaded:loaded=list(benchmark_session.storage.load("*"))returnloaded[-1:]def _is_single_shot_benchmark_payload(payload:dict[str,Any])->bool:stats=payload.get("stats")ifnotisinstance(stats,dict):returnFalsetry:rounds=int(stats.get("rounds",0))iterations=int(stats.get("iterations",0))except(TypeError,ValueError):returnFalsereturnrounds==1anditerations==1def pytest_addoption(parser:pytest.Parser)->None:[docs]
group=parser.getgroup("pytest-park","pytest-park benchmark comparison options")group.addoption("--benchmark-original-postfix",action="store",default="",dest="benchmark_original_postfix",help="Comma-separated postfixes identifying original/baseline implementations (e.g. '_np,_numpy').",)group.addoption("--benchmark-reference-postfix",action="store",default="",dest="benchmark_reference_postfix",help="Comma-separated postfixes identifying reference/new implementations (e.g. '_pt,_torch').",)parser.addini("benchmark_original_postfix",default="",help="Comma-separated postfixes identifying original/baseline implementations (e.g. '_np,_numpy').",)parser.addini("benchmark_reference_postfix",default="",help="Comma-separated postfixes identifying reference/new implementations (e.g. '_pt,_torch').",)def pytest_configure(config:pytest.Config)->None:[docs]
ifgetattr(config,"_pytest_park_benchmark_plugin",None)isnotNone:returnplugin=PytestParkBenchmarkPlugin(config)config._pytest_park_benchmark_plugin=pluginconfig.pluginmanager.register(plugin,"pytest-park-benchmark-plugin")def pytest_unconfigure(config:pytest.Config)->None:[docs]
plugin=getattr(config,"_pytest_park_benchmark_plugin",None)ifpluginisNone:returnconfig.pluginmanager.unregister(plugin)delattr(config,"_pytest_park_benchmark_plugin")