From a165e363032bdfe7b6704330628dc4f7e7f9c0b8 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Sun, 22 Jul 2018 11:44:42 +0200 Subject: [PATCH] Tweak the lookup and the OG metedata tasks Now we don't fetch OG metadata for AP profile --- activitypub.py | 14 +++++++------- tasks.py | 1 + utils/lookup.py | 7 +++++-- utils/opengraph.py | 13 ++++++++++++- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/activitypub.py b/activitypub.py index ddd260a..02690ac 100644 --- a/activitypub.py +++ b/activitypub.py @@ -16,6 +16,7 @@ from little_boxes import strtobool from little_boxes.activitypub import _to_list from little_boxes.backend import Backend from little_boxes.errors import ActivityGoneError +from little_boxes.errors import NotAnActivityError from little_boxes.errors import Error from config import BASE_URL @@ -319,17 +320,16 @@ class MicroblogPubBackend(Backend): @ensure_it_is_me def inbox_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None: - if isinstance(announce._data["object"], str) and not announce._data[ - "object" - ].startswith("http"): - # TODO(tsileo): actually drop it without storing it and better logging, also move the check somewhere else - # or remote it? - logger.warn( + # TODO(tsileo): actually drop it without storing it and better logging, also move the check somewhere else + # or remove it? + try: + obj = announce.get_object() + except NotAnActivityError: + logger.exception( f'received an Annouce referencing an OStatus notice ({announce._data["object"]}), dropping the message' ) return - obj = announce.get_object() DB.activities.update_one( {"remote_id": announce.id}, { diff --git a/tasks.py b/tasks.py index 1ca0134..8710422 100644 --- a/tasks.py +++ b/tasks.py @@ -6,6 +6,7 @@ import random import requests from celery import Celery from little_boxes import activitypub as ap +from little_boxes.errors import NotAnActivityError from little_boxes.errors import ActivityGoneError from little_boxes.errors import ActivityNotFoundError from little_boxes.httpsig import HTTPSigAuth diff --git a/utils/lookup.py b/utils/lookup.py index 50267fb..8e2d760 100644 --- a/utils/lookup.py +++ b/utils/lookup.py @@ -4,13 +4,16 @@ import little_boxes.activitypub as ap import mf2py import requests from little_boxes.webfinger import get_actor_url +from little_boxes.errors import NotAnActivityError def lookup(url: str) -> ap.BaseActivity: """Try to find an AP object related to the given URL.""" try: - return ap.fetch_remote_activity(get_actor_url(url)) - except Exception: + actor_url = get_actor_url(url) + if actor_url: + return ap.fetch_remote_activity(actor_url) + except NotAnActivityError: pass backend = ap.get_backend() diff --git a/utils/opengraph.py b/utils/opengraph.py index 762e5ef..b4bd704 100644 --- a/utils/opengraph.py +++ b/utils/opengraph.py @@ -1,8 +1,11 @@ import opengraph import requests from bs4 import BeautifulSoup +from little_boxes import activitypub as ap +from little_boxes.errors import NotAnActivityError from little_boxes.urlutils import check_url from little_boxes.urlutils import is_url_valid +from .lookup import lookup def links_from_note(note): @@ -10,7 +13,6 @@ def links_from_note(note): for t in note.get("tag", []): h = t.get("href") if h: - # TODO(tsileo): fetch the URL for Actor profile, type=mention tags_href.add(h) links = set() @@ -27,6 +29,15 @@ def fetch_og_metadata(user_agent, links): htmls = [] for l in links: check_url(l) + + # Remove any AP actor from the list + try: + p = lookup(l) + if p.has_type(ap.ACTOR_TYPES): + continue + except NotAnActivityError: + pass + r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15) r.raise_for_status() htmls.append(r.text)