Fighting python type annotations
I have a very simple class that inherits from requests.Session
. The code currently looks like:
import requests
import urllib.parse
from typing import Any, Optional, Union, cast
default_gutendex_baseurl = "https://gutendex.com/"
class Gutendex(requests.Session):
def __init__(self, baseurl: Optional[str] = None):
super().__init__()
self.baseurl = baseurl or default_gutendex_baseurl
def search(self, keywords: str) -> Any:
res = self.get("/books", params={"search": keywords})
res.raise_for_status()
return res.json()
def request(
self, method: str, url: Union[str, bytes], *args, **kwargs
) -> requests.Response:
if self.baseurl and not url.startswith("http"):
url = urllib.parse.urljoin(self.baseurl, url)
return super().request(method, url, *args, **kwargs)
I'm having a hard time making mypy
happy with the request
method.
The first challenge was getting the parameters to validate; setting
url: Union[str, bytes]
was necessary to match the type annotation in
types-requests
. I've just thrown up my hands on getting *args
and
**kwargs
correct, because the only solution appears to be
reproducing the individual parameter annotations, but I'm happy to
leave that as it.
With the function signature dealt with, mypy
is now complaining
about the call to startswith
:
example.py:23: error: Argument 1 to "startswith" of "bytes" has incompatible type "str"; expected "Union[bytes, Tuple[bytes, ...]]"
I can resolve that with an explicit cast
:
if not cast(str, url).startswith("http"):
url = urllib.parse.urljoin(self.baseurl, url)
...but that seems like it's just introducing complexity.
And then it's unhappy with the call to urllib.parse.urljoin
:
example.py:24: error: Value of type variable "AnyStr" of "urljoin" cannot be "Sequence[object]"
example.py:24: error: Incompatible types in assignment (expression has type "Sequence[object]", variable has type "Union[str, bytes]")
I'm not really sure what to make of these errors.
I've fixed things for now by moving the explicit cast to the top of the method:
def request(
self, method: str, url: Union[str, bytes], *args, **kwargs
) -> requests.Response:
_url = url.decode() if isinstance(url, bytes) else url
if not _url.startswith("http"):
_url = urllib.parse.urljoin(self.baseurl, _url)
return super().request(method, _url, *args, **kwargs)
But that feels like a hacky workaround.
So:
-
I think I have the function signature as correct as I care to get it, but are the type annotations on
url
correct or are they incorrect and resulting in problems? -
What is going on with the errors around
urljoin
?
From the comments, this:
if self.baseurl and not url.startswith(
"http" if isinstance(url, str) else b"http"
):
Fails with:
example.py:25: error: Argument 1 to "startswith" of "str" has incompatible type "Union[str, bytes]"; expected "Union[str, Tuple[str, ...]]"
example.py:25: error: Argument 1 to "startswith" of "bytes" has incompatible type "Union[str, bytes]"; expected "Union[bytes, Tuple[bytes, ...]]"
Solution 1:
This resolves the entire issue:
import requests
import urllib.parse
from typing import Union, cast
default_gutendex_baseurl = "https://gutendex.com/"
class Gutendex(requests.Session):
def __init__(self, baseurl: str = None):
super().__init__()
self.baseurl = baseurl or default_gutendex_baseurl
def search(self, keywords: str) -> dict[str, str]:
res = self.get("/books", params={"search": keywords})
res.raise_for_status()
return res.json()
def request(
self, method: str, url: Union[str, bytes], *args, **kwargs
) -> requests.Response:
if isinstance(url, str):
if not url.startswith("http"):
url = urllib.parse.urljoin(self.baseurl, url)
return super().request(method, url, *args, **kwargs)
else:
raise TypeError('Gutendex does not support bytes type url arguments')
You can't just not deal with bytes
if you say you accept it. Just raise an exception or do something nicer if bytes
get passed. Or even just pass
if you like living dangerously.
This code validates just fine in mypy
.
What's a bit disappointing is that something like this doesn't validate:
if not url.startswith("http"):
url = urllib.parse.urljoin(self.baseurl, url if isinstance(url, str) else url.decode())
return super().request(method, url, *args, **kwargs)
Even though there is no way url.startswith
gets a bytes
when it's a str
or vice versa, it still won't validate. mypy
can't validate through the runtime logic, so instead you're stuck doing something like:
def request(
self, method: str, url: Union[str, bytes], *args, **kwargs
) -> requests.Response:
if isinstance(url, str):
if not url.startswith("http"):
url = urllib.parse.urljoin(self.baseurl, url)
return super().request(method, url, *args, **kwargs)
else:
if not url.startswith(b"http"):
url = urllib.parse.urljoin(self.baseurl, url.decode())
return super().request(method, url, *args, **kwargs)
Which supports both, but repeats the logic in an ugly fashion.