mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-11-15 11:14:28 +00:00
Tweak the lookup and the OG metedata tasks
Now we don't fetch OG metadata for AP profile
This commit is contained in:
parent
a841efc9e8
commit
a165e36303
4 changed files with 25 additions and 10 deletions
|
@ -16,6 +16,7 @@ from little_boxes import strtobool
|
||||||
from little_boxes.activitypub import _to_list
|
from little_boxes.activitypub import _to_list
|
||||||
from little_boxes.backend import Backend
|
from little_boxes.backend import Backend
|
||||||
from little_boxes.errors import ActivityGoneError
|
from little_boxes.errors import ActivityGoneError
|
||||||
|
from little_boxes.errors import NotAnActivityError
|
||||||
from little_boxes.errors import Error
|
from little_boxes.errors import Error
|
||||||
|
|
||||||
from config import BASE_URL
|
from config import BASE_URL
|
||||||
|
@ -319,17 +320,16 @@ class MicroblogPubBackend(Backend):
|
||||||
|
|
||||||
@ensure_it_is_me
|
@ensure_it_is_me
|
||||||
def inbox_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None:
|
def inbox_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None:
|
||||||
if isinstance(announce._data["object"], str) and not announce._data[
|
|
||||||
"object"
|
|
||||||
].startswith("http"):
|
|
||||||
# TODO(tsileo): actually drop it without storing it and better logging, also move the check somewhere else
|
# TODO(tsileo): actually drop it without storing it and better logging, also move the check somewhere else
|
||||||
# or remote it?
|
# or remove it?
|
||||||
logger.warn(
|
try:
|
||||||
|
obj = announce.get_object()
|
||||||
|
except NotAnActivityError:
|
||||||
|
logger.exception(
|
||||||
f'received an Annouce referencing an OStatus notice ({announce._data["object"]}), dropping the message'
|
f'received an Annouce referencing an OStatus notice ({announce._data["object"]}), dropping the message'
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
obj = announce.get_object()
|
|
||||||
DB.activities.update_one(
|
DB.activities.update_one(
|
||||||
{"remote_id": announce.id},
|
{"remote_id": announce.id},
|
||||||
{
|
{
|
||||||
|
|
1
tasks.py
1
tasks.py
|
@ -6,6 +6,7 @@ import random
|
||||||
import requests
|
import requests
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
from little_boxes import activitypub as ap
|
from little_boxes import activitypub as ap
|
||||||
|
from little_boxes.errors import NotAnActivityError
|
||||||
from little_boxes.errors import ActivityGoneError
|
from little_boxes.errors import ActivityGoneError
|
||||||
from little_boxes.errors import ActivityNotFoundError
|
from little_boxes.errors import ActivityNotFoundError
|
||||||
from little_boxes.httpsig import HTTPSigAuth
|
from little_boxes.httpsig import HTTPSigAuth
|
||||||
|
|
|
@ -4,13 +4,16 @@ import little_boxes.activitypub as ap
|
||||||
import mf2py
|
import mf2py
|
||||||
import requests
|
import requests
|
||||||
from little_boxes.webfinger import get_actor_url
|
from little_boxes.webfinger import get_actor_url
|
||||||
|
from little_boxes.errors import NotAnActivityError
|
||||||
|
|
||||||
|
|
||||||
def lookup(url: str) -> ap.BaseActivity:
|
def lookup(url: str) -> ap.BaseActivity:
|
||||||
"""Try to find an AP object related to the given URL."""
|
"""Try to find an AP object related to the given URL."""
|
||||||
try:
|
try:
|
||||||
return ap.fetch_remote_activity(get_actor_url(url))
|
actor_url = get_actor_url(url)
|
||||||
except Exception:
|
if actor_url:
|
||||||
|
return ap.fetch_remote_activity(actor_url)
|
||||||
|
except NotAnActivityError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
backend = ap.get_backend()
|
backend = ap.get_backend()
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
import opengraph
|
import opengraph
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from little_boxes import activitypub as ap
|
||||||
|
from little_boxes.errors import NotAnActivityError
|
||||||
from little_boxes.urlutils import check_url
|
from little_boxes.urlutils import check_url
|
||||||
from little_boxes.urlutils import is_url_valid
|
from little_boxes.urlutils import is_url_valid
|
||||||
|
from .lookup import lookup
|
||||||
|
|
||||||
|
|
||||||
def links_from_note(note):
|
def links_from_note(note):
|
||||||
|
@ -10,7 +13,6 @@ def links_from_note(note):
|
||||||
for t in note.get("tag", []):
|
for t in note.get("tag", []):
|
||||||
h = t.get("href")
|
h = t.get("href")
|
||||||
if h:
|
if h:
|
||||||
# TODO(tsileo): fetch the URL for Actor profile, type=mention
|
|
||||||
tags_href.add(h)
|
tags_href.add(h)
|
||||||
|
|
||||||
links = set()
|
links = set()
|
||||||
|
@ -27,6 +29,15 @@ def fetch_og_metadata(user_agent, links):
|
||||||
htmls = []
|
htmls = []
|
||||||
for l in links:
|
for l in links:
|
||||||
check_url(l)
|
check_url(l)
|
||||||
|
|
||||||
|
# Remove any AP actor from the list
|
||||||
|
try:
|
||||||
|
p = lookup(l)
|
||||||
|
if p.has_type(ap.ACTOR_TYPES):
|
||||||
|
continue
|
||||||
|
except NotAnActivityError:
|
||||||
|
pass
|
||||||
|
|
||||||
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
|
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
htmls.append(r.text)
|
htmls.append(r.text)
|
||||||
|
|
Loading…
Reference in a new issue