microblog.pub/utils/lookup.py

55 lines
1.8 KiB
Python
Raw Permalink Normal View History

2018-07-20 23:05:51 +00:00
import little_boxes.activitypub as ap
2018-07-20 22:15:47 +00:00
import mf2py
2018-07-20 23:05:51 +00:00
import requests
from little_boxes.errors import NotAnActivityError
2019-08-07 19:38:50 +00:00
from little_boxes.errors import RemoteServerUnavailableError
2018-07-22 10:04:18 +00:00
from little_boxes.webfinger import get_actor_url
2018-07-20 22:15:47 +00:00
def lookup(url: str) -> ap.BaseActivity:
"""Try to find an AP object related to the given URL."""
try:
2019-04-13 08:00:56 +00:00
if url.startswith("@"):
2018-07-30 07:41:04 +00:00
actor_url = get_actor_url(url)
if actor_url:
return ap.fetch_remote_activity(actor_url)
except NotAnActivityError:
pass
2018-07-26 20:39:12 +00:00
except requests.HTTPError:
# Some websites may returns 404, 503 or others when they don't support webfinger, and we're just taking a guess
# when performing the lookup.
pass
2019-08-07 19:38:50 +00:00
except requests.RequestException as err:
raise RemoteServerUnavailableError(f"failed to fetch {url}: {err!r}")
2018-07-20 22:15:47 +00:00
backend = ap.get_backend()
2019-08-07 19:38:50 +00:00
try:
resp = requests.head(
url,
timeout=10,
allow_redirects=True,
headers={"User-Agent": backend.user_agent()},
)
except requests.RequestException as err:
raise RemoteServerUnavailableError(f"failed to GET {url}: {err!r}")
2019-08-17 16:38:15 +00:00
try:
resp.raise_for_status()
except Exception:
return ap.fetch_remote_activity(url)
2018-07-20 22:15:47 +00:00
# If the page is HTML, maybe it contains an alternate link pointing to an AP object
for alternate in mf2py.parse(resp.text).get("alternates", []):
if alternate.get("type") == "application/activity+json":
return ap.fetch_remote_activity(alternate["url"])
try:
# Maybe the page was JSON-LD?
data = resp.json()
return ap.parse_activity(data)
2019-09-08 10:09:34 +00:00
except Exception:
2018-07-20 22:15:47 +00:00
pass
# Try content negotiation (retry with the AP Accept header)
return ap.fetch_remote_activity(url)