Tweak the lookup and the OG metedata tasks

Now we don't fetch OG metadata for AP profile
This commit is contained in:
Thomas Sileo 2018-07-22 11:44:42 +02:00
parent a841efc9e8
commit a165e36303
4 changed files with 25 additions and 10 deletions

View file

@ -16,6 +16,7 @@ from little_boxes import strtobool
from little_boxes.activitypub import _to_list from little_boxes.activitypub import _to_list
from little_boxes.backend import Backend from little_boxes.backend import Backend
from little_boxes.errors import ActivityGoneError from little_boxes.errors import ActivityGoneError
from little_boxes.errors import NotAnActivityError
from little_boxes.errors import Error from little_boxes.errors import Error
from config import BASE_URL from config import BASE_URL
@ -319,17 +320,16 @@ class MicroblogPubBackend(Backend):
@ensure_it_is_me @ensure_it_is_me
def inbox_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None: def inbox_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None:
if isinstance(announce._data["object"], str) and not announce._data[ # TODO(tsileo): actually drop it without storing it and better logging, also move the check somewhere else
"object" # or remove it?
].startswith("http"): try:
# TODO(tsileo): actually drop it without storing it and better logging, also move the check somewhere else obj = announce.get_object()
# or remote it? except NotAnActivityError:
logger.warn( logger.exception(
f'received an Annouce referencing an OStatus notice ({announce._data["object"]}), dropping the message' f'received an Annouce referencing an OStatus notice ({announce._data["object"]}), dropping the message'
) )
return return
obj = announce.get_object()
DB.activities.update_one( DB.activities.update_one(
{"remote_id": announce.id}, {"remote_id": announce.id},
{ {

View file

@ -6,6 +6,7 @@ import random
import requests import requests
from celery import Celery from celery import Celery
from little_boxes import activitypub as ap from little_boxes import activitypub as ap
from little_boxes.errors import NotAnActivityError
from little_boxes.errors import ActivityGoneError from little_boxes.errors import ActivityGoneError
from little_boxes.errors import ActivityNotFoundError from little_boxes.errors import ActivityNotFoundError
from little_boxes.httpsig import HTTPSigAuth from little_boxes.httpsig import HTTPSigAuth

View file

@ -4,13 +4,16 @@ import little_boxes.activitypub as ap
import mf2py import mf2py
import requests import requests
from little_boxes.webfinger import get_actor_url from little_boxes.webfinger import get_actor_url
from little_boxes.errors import NotAnActivityError
def lookup(url: str) -> ap.BaseActivity: def lookup(url: str) -> ap.BaseActivity:
"""Try to find an AP object related to the given URL.""" """Try to find an AP object related to the given URL."""
try: try:
return ap.fetch_remote_activity(get_actor_url(url)) actor_url = get_actor_url(url)
except Exception: if actor_url:
return ap.fetch_remote_activity(actor_url)
except NotAnActivityError:
pass pass
backend = ap.get_backend() backend = ap.get_backend()

View file

@ -1,8 +1,11 @@
import opengraph import opengraph
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from little_boxes import activitypub as ap
from little_boxes.errors import NotAnActivityError
from little_boxes.urlutils import check_url from little_boxes.urlutils import check_url
from little_boxes.urlutils import is_url_valid from little_boxes.urlutils import is_url_valid
from .lookup import lookup
def links_from_note(note): def links_from_note(note):
@ -10,7 +13,6 @@ def links_from_note(note):
for t in note.get("tag", []): for t in note.get("tag", []):
h = t.get("href") h = t.get("href")
if h: if h:
# TODO(tsileo): fetch the URL for Actor profile, type=mention
tags_href.add(h) tags_href.add(h)
links = set() links = set()
@ -27,6 +29,15 @@ def fetch_og_metadata(user_agent, links):
htmls = [] htmls = []
for l in links: for l in links:
check_url(l) check_url(l)
# Remove any AP actor from the list
try:
p = lookup(l)
if p.has_type(ap.ACTOR_TYPES):
continue
except NotAnActivityError:
pass
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15) r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
r.raise_for_status() r.raise_for_status()
htmls.append(r.text) htmls.append(r.text)