From bdffc884b6ea439c84d309b145a0c33307921f8e Mon Sep 17 00:00:00 2001 From: Heath Hunnicutt Date: Fri, 30 Jan 2026 08:09:17 -0500 Subject: [PATCH] feat: extract parameter descriptions from function docstrings Parse Google, NumPy, and Sphinx-style docstrings to automatically populate parameter descriptions in tool/prompt/resource JSON schemas. Previously, parameter descriptions required explicit Field(description=...) annotations. Now they can come from standard docstrings, with Field() annotations taking precedence when both are present. Closes #226 Co-Authored-By: Claude Opus 4.5 --- pyproject.toml | 1 + src/mcp/server/mcpserver/prompts/base.py | 7 +- .../server/mcpserver/resources/templates.py | 7 +- src/mcp/server/mcpserver/tools/base.py | 7 +- .../mcpserver/utilities/docstring_utils.py | 137 ++++++++++++++ .../mcpserver/utilities/func_metadata.py | 31 ++++ .../server/mcpserver/test_docstring_utils.py | 125 +++++++++++++ .../mcpserver/test_parameter_descriptions.py | 170 ++++++++++++++++++ 8 files changed, 482 insertions(+), 3 deletions(-) create mode 100644 src/mcp/server/mcpserver/utilities/docstring_utils.py create mode 100644 tests/server/mcpserver/test_docstring_utils.py diff --git a/pyproject.toml b/pyproject.toml index 96801c0b8..cdd4ef5e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ ] dependencies = [ "anyio>=4.5", + "griffe>=1.0", "httpx>=0.27.1", "httpx-sse>=0.4", "pydantic>=2.12.0", diff --git a/src/mcp/server/mcpserver/prompts/base.py b/src/mcp/server/mcpserver/prompts/base.py index 751733f9c..e92fc4589 100644 --- a/src/mcp/server/mcpserver/prompts/base.py +++ b/src/mcp/server/mcpserver/prompts/base.py @@ -10,6 +10,7 @@ from pydantic import BaseModel, Field, TypeAdapter, validate_call from mcp.server.mcpserver.utilities.context_injection import find_context_parameter, inject_context +from mcp.server.mcpserver.utilities.docstring_utils import parse_docstring from mcp.server.mcpserver.utilities.func_metadata import func_metadata from mcp.types import ContentBlock, Icon, TextContent @@ -101,10 +102,14 @@ def from_function( if context_kwarg is None: # pragma: no branch context_kwarg = find_context_parameter(fn) + # Parse docstring to extract summary and parameter descriptions + doc_summary, param_descriptions = parse_docstring(fn) + # Get schema from func_metadata, excluding context parameter func_arg_metadata = func_metadata( fn, skip_names=[context_kwarg] if context_kwarg is not None else [], + param_descriptions=param_descriptions, ) parameters = func_arg_metadata.arg_model.model_json_schema() @@ -127,7 +132,7 @@ def from_function( return cls( name=func_name, title=title, - description=description or fn.__doc__ or "", + description=description or doc_summary or fn.__doc__ or "", arguments=arguments, fn=fn, icons=icons, diff --git a/src/mcp/server/mcpserver/resources/templates.py b/src/mcp/server/mcpserver/resources/templates.py index 698ac3682..0f8b67a30 100644 --- a/src/mcp/server/mcpserver/resources/templates.py +++ b/src/mcp/server/mcpserver/resources/templates.py @@ -12,6 +12,7 @@ from mcp.server.mcpserver.resources.types import FunctionResource, Resource from mcp.server.mcpserver.utilities.context_injection import find_context_parameter, inject_context +from mcp.server.mcpserver.utilities.docstring_utils import parse_docstring from mcp.server.mcpserver.utilities.func_metadata import func_metadata from mcp.types import Annotations, Icon @@ -59,10 +60,14 @@ def from_function( if context_kwarg is None: # pragma: no branch context_kwarg = find_context_parameter(fn) + # Parse docstring to extract summary and parameter descriptions + doc_summary, param_descriptions = parse_docstring(fn) + # Get schema from func_metadata, excluding context parameter func_arg_metadata = func_metadata( fn, skip_names=[context_kwarg] if context_kwarg is not None else [], + param_descriptions=param_descriptions, ) parameters = func_arg_metadata.arg_model.model_json_schema() @@ -73,7 +78,7 @@ def from_function( uri_template=uri_template, name=func_name, title=title, - description=description or fn.__doc__ or "", + description=description or doc_summary or fn.__doc__ or "", mime_type=mime_type or "text/plain", icons=icons, annotations=annotations, diff --git a/src/mcp/server/mcpserver/tools/base.py b/src/mcp/server/mcpserver/tools/base.py index 9798fd96e..e8e94f1d6 100644 --- a/src/mcp/server/mcpserver/tools/base.py +++ b/src/mcp/server/mcpserver/tools/base.py @@ -10,6 +10,7 @@ from mcp.server.mcpserver.exceptions import ToolError from mcp.server.mcpserver.utilities.context_injection import find_context_parameter +from mcp.server.mcpserver.utilities.docstring_utils import parse_docstring from mcp.server.mcpserver.utilities.func_metadata import FuncMetadata, func_metadata from mcp.shared.exceptions import UrlElicitationRequiredError from mcp.shared.tool_name_validation import validate_and_warn_tool_name @@ -63,16 +64,20 @@ def from_function( if func_name == "": raise ValueError("You must provide a name for lambda functions") - func_doc = description or fn.__doc__ or "" is_async = _is_async_callable(fn) if context_kwarg is None: # pragma: no branch context_kwarg = find_context_parameter(fn) + # Parse docstring to extract summary and parameter descriptions + doc_summary, param_descriptions = parse_docstring(fn) + func_doc = description or doc_summary or fn.__doc__ or "" + func_arg_metadata = func_metadata( fn, skip_names=[context_kwarg] if context_kwarg is not None else [], structured_output=structured_output, + param_descriptions=param_descriptions, ) parameters = func_arg_metadata.arg_model.model_json_schema(by_alias=True) diff --git a/src/mcp/server/mcpserver/utilities/docstring_utils.py b/src/mcp/server/mcpserver/utilities/docstring_utils.py new file mode 100644 index 000000000..3e27826e6 --- /dev/null +++ b/src/mcp/server/mcpserver/utilities/docstring_utils.py @@ -0,0 +1,137 @@ +"""Utilities for parsing function docstrings to extract descriptions and parameter info. + +Supports Google, NumPy, and Sphinx docstring formats with automatic detection. +Adapted from pydantic-ai's _griffe.py implementation. +""" + +from __future__ import annotations + +import logging +import re +from collections.abc import Callable +from contextlib import contextmanager +from typing import Any, Iterator, Literal + +from griffe import Docstring, DocstringSectionKind + +try: + from griffe import GoogleOptions + + _GOOGLE_PARSER_OPTIONS = GoogleOptions(returns_named_value=False, returns_multiple_items=False) +except ImportError: + _GOOGLE_PARSER_OPTIONS = None + +DocstringStyle = Literal["google", "numpy", "sphinx"] + + +def parse_docstring( + func: Callable[..., Any], +) -> tuple[str | None, dict[str, str]]: + """Extract the function summary and parameter descriptions from a docstring. + + Automatically infers the docstring format (Google, NumPy, or Sphinx). + + Returns: + A tuple of (summary, param_descriptions) where: + - summary: The main description text (first section), or None if no docstring + - param_descriptions: Dict mapping parameter names to their descriptions + """ + doc = func.__doc__ + if doc is None: + return None, {} + + docstring_style = _infer_docstring_style(doc) + parser_options = _GOOGLE_PARSER_OPTIONS if docstring_style == "google" else None + docstring = Docstring( + doc, + lineno=1, + parser=docstring_style, + parser_options=parser_options, + ) + with _disable_griffe_logging(): + sections = docstring.parse() + + params: dict[str, str] = {} + if parameters := next( + (s for s in sections if s.kind == DocstringSectionKind.parameters), None + ): + params = {p.name: p.description for p in parameters.value if p.description} + + summary: str | None = None + if main := next( + (s for s in sections if s.kind == DocstringSectionKind.text), None + ): + summary = main.value.strip() if main.value else None + + return summary, params + + +def _infer_docstring_style(doc: str) -> DocstringStyle: + """Infer the docstring style from its content.""" + for pattern, replacements, style in _DOCSTRING_STYLE_PATTERNS: + matches = ( + re.search(pattern.format(replacement), doc, re.IGNORECASE | re.MULTILINE) + for replacement in replacements + ) + if any(matches): + return style + return "google" + + +# Pattern matching for docstring style detection. +# See https://github.com/mkdocstrings/griffe/issues/329#issuecomment-2425017804 +_DOCSTRING_STYLE_PATTERNS: list[tuple[str, list[str], DocstringStyle]] = [ + ( + r"\n[ \t]*:{0}([ \t]+\w+)*:([ \t]+.+)?\n", + [ + "param", + "parameter", + "arg", + "argument", + "type", + "returns", + "return", + "rtype", + "raises", + "raise", + ], + "sphinx", + ), + ( + r"\n[ \t]*{0}:([ \t]+.+)?\n[ \t]+.+", + [ + "args", + "arguments", + "params", + "parameters", + "raises", + "returns", + "yields", + "examples", + "attributes", + ], + "google", + ), + ( + r"\n[ \t]*{0}\n[ \t]*---+\n", + [ + "parameters", + "returns", + "yields", + "raises", + "attributes", + ], + "numpy", + ), +] + + +@contextmanager +def _disable_griffe_logging() -> Iterator[None]: + """Temporarily suppress griffe logging to avoid noisy warnings.""" + old_level = logging.root.getEffectiveLevel() + logging.root.setLevel(logging.ERROR) + try: + yield + finally: + logging.root.setLevel(old_level) diff --git a/src/mcp/server/mcpserver/utilities/func_metadata.py b/src/mcp/server/mcpserver/utilities/func_metadata.py index 4b539ce1f..ce76d54fd 100644 --- a/src/mcp/server/mcpserver/utilities/func_metadata.py +++ b/src/mcp/server/mcpserver/utilities/func_metadata.py @@ -172,6 +172,7 @@ def func_metadata( func: Callable[..., Any], skip_names: Sequence[str] = (), structured_output: bool | None = None, + param_descriptions: dict[str, str] | None = None, ) -> FuncMetadata: """Given a function, return metadata including a pydantic model representing its signature. @@ -203,6 +204,10 @@ def func_metadata( - TypedDict - converted to a Pydantic model with same fields - Dataclasses and other annotated classes - converted to Pydantic models - Generic types (list, dict, Union, etc.) - wrapped in a model with a 'result' field + param_descriptions: Optional dict mapping parameter names to descriptions + extracted from the function's docstring. These are used as fallback + descriptions when a parameter does not already have a description + from a Field() annotation. Returns: A FuncMetadata object containing: @@ -231,6 +236,13 @@ def func_metadata( if param.annotation is inspect.Parameter.empty: field_metadata.append(WithJsonSchema({"title": param.name, "type": "string"})) + + # Inject docstring parameter description as fallback, but only if the + # parameter doesn't already have a description from a Field() annotation. + if param_descriptions and param.name in param_descriptions: + if not _has_field_description(annotation, param.default): + field_kwargs["description"] = param_descriptions[param.name] + # Check if the parameter name conflicts with BaseModel attributes # This is necessary because Pydantic warns about shadowing parent attributes if hasattr(BaseModel, field_name) and callable(getattr(BaseModel, field_name)): @@ -418,6 +430,25 @@ def _try_create_model_and_schema( return None, None, False +def _has_field_description(annotation: Any, default: Any) -> bool: + """Check if a parameter already has a description from a Field() annotation. + + Checks both Annotated metadata (e.g., Annotated[int, Field(description="...")]) + and default values (e.g., param: str = Field(description="...")). + """ + # Check if the default value is a FieldInfo with a description + if isinstance(default, FieldInfo) and default.description is not None: + return True + + # Check if the annotation is Annotated with a FieldInfo that has a description + if get_origin(annotation) is Annotated: + for arg in get_args(annotation)[1:]: + if isinstance(arg, FieldInfo) and arg.description is not None: + return True + + return False + + _no_default = object() diff --git a/tests/server/mcpserver/test_docstring_utils.py b/tests/server/mcpserver/test_docstring_utils.py new file mode 100644 index 000000000..6b2c87b5d --- /dev/null +++ b/tests/server/mcpserver/test_docstring_utils.py @@ -0,0 +1,125 @@ +"""Tests for docstring parsing utilities.""" + +from mcp.server.mcpserver.utilities.docstring_utils import parse_docstring + + +def test_google_style_docstring(): + def add_numbers(a: float, b: float) -> float: + """Adds two numbers and returns the result. + + Args: + a: The first number. + b: The second number. + + Returns: + The sum of a and b. + """ + return a + b + + summary, params = parse_docstring(add_numbers) + assert summary == "Adds two numbers and returns the result." + assert params == {"a": "The first number.", "b": "The second number."} + + +def test_numpy_style_docstring(): + def multiply(x: float, y: float) -> float: + """Multiply two numbers. + + Parameters + ---------- + x + The first factor. + y + The second factor. + + Returns + ------- + float + The product of x and y. + """ + return x * y + + summary, params = parse_docstring(multiply) + assert summary == "Multiply two numbers." + assert params == {"x": "The first factor.", "y": "The second factor."} + + +def test_sphinx_style_docstring(): + def divide(numerator: float, denominator: float) -> float: + """Divide two numbers. + + :param numerator: The number to divide. + :param denominator: The number to divide by. + :returns: The quotient. + """ + return numerator / denominator + + summary, params = parse_docstring(divide) + assert summary == "Divide two numbers." + assert params == { + "numerator": "The number to divide.", + "denominator": "The number to divide by.", + } + + +def test_no_docstring(): + def no_doc(a: int) -> int: + return a + + summary, params = parse_docstring(no_doc) + assert summary is None + assert params == {} + + +def test_summary_only_docstring(): + def simple(a: int) -> int: + """A simple function.""" + return a + + summary, params = parse_docstring(simple) + assert summary == "A simple function." + assert params == {} + + +def test_multiline_summary(): + def multi(a: int) -> int: + """This is a longer description + that spans multiple lines. + + Args: + a: An integer value. + """ + return a + + summary, params = parse_docstring(multi) + assert "longer description" in summary + assert params == {"a": "An integer value."} + + +def test_empty_docstring(): + def empty_doc(a: int) -> int: + """""" + return a + + summary, params = parse_docstring(empty_doc) + # Empty docstring should return None summary + assert summary is None + assert params == {} + + +def test_params_with_types_in_docstring(): + """Google-style docstrings sometimes include types in the param descriptions.""" + + def typed_params(a: float, b: float) -> float: + """Add numbers. + + Args: + a (float): The first number. + b (float): The second number. + """ + return a + b + + summary, params = parse_docstring(typed_params) + assert summary == "Add numbers." + assert "first number" in params["a"] + assert "second number" in params["b"] diff --git a/tests/server/mcpserver/test_parameter_descriptions.py b/tests/server/mcpserver/test_parameter_descriptions.py index ec9f22c25..b805b821f 100644 --- a/tests/server/mcpserver/test_parameter_descriptions.py +++ b/tests/server/mcpserver/test_parameter_descriptions.py @@ -1,5 +1,7 @@ """Test that parameter descriptions are properly exposed through list_tools""" +from typing import Annotated + import pytest from pydantic import Field @@ -28,3 +30,171 @@ def greet( assert properties["name"]["description"] == "The name to greet" assert "title" in properties assert properties["title"]["description"] == "Optional title" + + +@pytest.mark.anyio +async def test_docstring_parameter_descriptions_google(): + """Parameter descriptions from Google-style docstrings appear in the schema.""" + mcp = MCPServer("Test Server") + + @mcp.tool() + def add_numbers(a: float, b: float) -> float: # pragma: no cover + """Add two numbers together. + + Args: + a: The first number to add. + b: The second number to add. + + Returns: + The sum of a and b. + """ + return a + b + + tools = await mcp.list_tools() + tool = tools[0] + properties = tool.input_schema["properties"] + assert properties["a"]["description"] == "The first number to add." + assert properties["b"]["description"] == "The second number to add." + # Tool description should be the summary, not the full docstring + assert tool.description == "Add two numbers together." + + +@pytest.mark.anyio +async def test_docstring_parameter_descriptions_numpy(): + """Parameter descriptions from NumPy-style docstrings appear in the schema.""" + mcp = MCPServer("Test Server") + + @mcp.tool() + def multiply(x: float, y: float) -> float: # pragma: no cover + """Multiply two numbers. + + Parameters + ---------- + x + The first factor. + y + The second factor. + """ + return x * y + + tools = await mcp.list_tools() + tool = tools[0] + properties = tool.input_schema["properties"] + assert properties["x"]["description"] == "The first factor." + assert properties["y"]["description"] == "The second factor." + assert tool.description == "Multiply two numbers." + + +@pytest.mark.anyio +async def test_docstring_parameter_descriptions_sphinx(): + """Parameter descriptions from Sphinx-style docstrings appear in the schema.""" + mcp = MCPServer("Test Server") + + @mcp.tool() + def divide(numerator: float, denominator: float) -> float: # pragma: no cover + """Divide two numbers. + + :param numerator: The number to divide. + :param denominator: The number to divide by. + :returns: The quotient. + """ + return numerator / denominator + + tools = await mcp.list_tools() + tool = tools[0] + properties = tool.input_schema["properties"] + assert properties["numerator"]["description"] == "The number to divide." + assert properties["denominator"]["description"] == "The number to divide by." + assert tool.description == "Divide two numbers." + + +@pytest.mark.anyio +async def test_field_description_takes_precedence_over_docstring(): + """Field(description=...) should take precedence over docstring descriptions.""" + mcp = MCPServer("Test Server") + + @mcp.tool() + def process( + name: str = Field(description="From Field annotation"), + value: int = 0, + ) -> str: # pragma: no cover + """Process data. + + Args: + name: From docstring. + value: The value to process. + """ + return f"{name}: {value}" + + tools = await mcp.list_tools() + tool = tools[0] + properties = tool.input_schema["properties"] + # Field annotation takes precedence + assert properties["name"]["description"] == "From Field annotation" + # Docstring description used as fallback + assert properties["value"]["description"] == "The value to process." + + +@pytest.mark.anyio +async def test_annotated_field_description_takes_precedence(): + """Annotated[type, Field(description=...)] should take precedence over docstring.""" + mcp = MCPServer("Test Server") + + @mcp.tool() + def process( + name: Annotated[str, Field(description="From Annotated Field")], + value: int = 0, + ) -> str: # pragma: no cover + """Process data. + + Args: + name: From docstring. + value: The value to process. + """ + return f"{name}: {value}" + + tools = await mcp.list_tools() + tool = tools[0] + properties = tool.input_schema["properties"] + # Annotated Field takes precedence + assert properties["name"]["description"] == "From Annotated Field" + # Docstring description used as fallback + assert properties["value"]["description"] == "The value to process." + + +@pytest.mark.anyio +async def test_explicit_description_kwarg_takes_precedence(): + """Explicit description= kwarg to @mcp.tool() takes precedence over docstring summary.""" + mcp = MCPServer("Test Server") + + @mcp.tool(description="Explicit tool description") + def my_tool(a: int) -> int: # pragma: no cover + """Docstring summary that should not be used. + + Args: + a: The value. + """ + return a + + tools = await mcp.list_tools() + tool = tools[0] + assert tool.description == "Explicit tool description" + # But parameter descriptions from docstring should still work + properties = tool.input_schema["properties"] + assert properties["a"]["description"] == "The value." + + +@pytest.mark.anyio +async def test_no_docstring_no_descriptions(): + """Functions without docstrings should work as before.""" + mcp = MCPServer("Test Server") + + @mcp.tool() + def no_doc(a: int) -> int: # pragma: no cover + return a + + tools = await mcp.list_tools() + tool = tools[0] + assert tool.description == "" + properties = tool.input_schema["properties"] + assert "description" not in properties["a"]