forked from forks/microblog.pub
Add Open Graph metadata support
This commit is contained in:
parent
23faef985b
commit
648e385c49
6 changed files with 98 additions and 20 deletions
5
app.py
5
app.py
|
@ -238,6 +238,11 @@ def get_attachment_url(url, size):
|
||||||
return _get_file_url(url, size, Kind.ATTACHMENT)
|
return _get_file_url(url, size, Kind.ATTACHMENT)
|
||||||
|
|
||||||
|
|
||||||
|
@app.template_filter()
|
||||||
|
def get_og_image_url(url, size=100):
|
||||||
|
return _get_file_url(url, size, Kind.OG_IMAGE)
|
||||||
|
|
||||||
|
|
||||||
@app.template_filter()
|
@app.template_filter()
|
||||||
def permalink_id(val):
|
def permalink_id(val):
|
||||||
return str(hash(val))
|
return str(hash(val))
|
||||||
|
|
|
@ -189,9 +189,11 @@ a:hover {
|
||||||
h3 { margin: 0; }
|
h3 { margin: 0; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
.note-box {
|
||||||
|
margin-bottom: 70px;
|
||||||
|
}
|
||||||
.note {
|
.note {
|
||||||
display: flex;
|
display: flex;
|
||||||
margin-bottom: 70px;
|
|
||||||
.l {
|
.l {
|
||||||
color: $color-note-link;
|
color: $color-note-link;
|
||||||
}
|
}
|
||||||
|
@ -229,7 +231,11 @@ a:hover {
|
||||||
padding:10px 0;
|
padding:10px 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
.color-menu-background {
|
||||||
|
background: $color-menu-background;
|
||||||
|
}
|
||||||
|
.og-link { text-decoration: none; }
|
||||||
|
.og-link:hover { text-decoration: none; }
|
||||||
.bar-item-no-hover {
|
.bar-item-no-hover {
|
||||||
background: $color-menu-background;
|
background: $color-menu-background;
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
|
|
38
tasks.py
38
tasks.py
|
@ -19,6 +19,7 @@ from config import ID
|
||||||
from config import KEY
|
from config import KEY
|
||||||
from config import MEDIA_CACHE
|
from config import MEDIA_CACHE
|
||||||
from config import USER_AGENT
|
from config import USER_AGENT
|
||||||
|
from utils import opengraph
|
||||||
from utils.media import Kind
|
from utils.media import Kind
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
@ -103,12 +104,49 @@ def process_new_activity(self, iri: str) -> None:
|
||||||
self.retry(exc=err, countdown=int(random.uniform(2, 4) ** self.request.retries))
|
self.retry(exc=err, countdown=int(random.uniform(2, 4) ** self.request.retries))
|
||||||
|
|
||||||
|
|
||||||
|
@app.task(bind=True, max_retries=12) # noqa: C901
|
||||||
|
def fetch_og_metadata(self, iri: str) -> None:
|
||||||
|
try:
|
||||||
|
activity = ap.fetch_remote_activity(iri)
|
||||||
|
log.info(f"activity={activity!r}")
|
||||||
|
if activity.has_type(ap.ActivityType.CREATE):
|
||||||
|
note = activity.get_object()
|
||||||
|
links = opengraph.links_from_note(note.to_dict())
|
||||||
|
og_metadata = opengraph.fetch_og_metadata(USER_AGENT, links)
|
||||||
|
for og in og_metadata:
|
||||||
|
if not og.get("image"):
|
||||||
|
continue
|
||||||
|
MEDIA_CACHE.cache_og_image(og["image"])
|
||||||
|
|
||||||
|
log.debug(f"OG metadata {og_metadata!r}")
|
||||||
|
DB.activities.update_one(
|
||||||
|
{"remote_id": iri}, {"$set": {"meta.og_metadata": og_metadata}}
|
||||||
|
)
|
||||||
|
|
||||||
|
log.info(f"OG metadata fetched for {iri}")
|
||||||
|
except (ActivityGoneError, ActivityNotFoundError):
|
||||||
|
log.exception(f"dropping activity {iri}, skip OG metedata")
|
||||||
|
except requests.exceptions.HTTPError as http_err:
|
||||||
|
if 400 <= http_err.response.status_code < 500:
|
||||||
|
log.exception("bad request, no retry")
|
||||||
|
log.exception("failed to fetch OG metadata")
|
||||||
|
self.retry(
|
||||||
|
exc=http_err, countdown=int(random.uniform(2, 4) ** self.request.retries)
|
||||||
|
)
|
||||||
|
except Exception as err:
|
||||||
|
log.exception(f"failed to fetch OG metadata for {iri}")
|
||||||
|
self.retry(exc=err, countdown=int(random.uniform(2, 4) ** self.request.retries))
|
||||||
|
|
||||||
|
|
||||||
@app.task(bind=True, max_retries=12)
|
@app.task(bind=True, max_retries=12)
|
||||||
def cache_actor(self, iri: str, also_cache_attachments: bool = True) -> None:
|
def cache_actor(self, iri: str, also_cache_attachments: bool = True) -> None:
|
||||||
try:
|
try:
|
||||||
activity = ap.fetch_remote_activity(iri)
|
activity = ap.fetch_remote_activity(iri)
|
||||||
log.info(f"activity={activity!r}")
|
log.info(f"activity={activity!r}")
|
||||||
|
|
||||||
|
if activity.has_type(ap.ActivityType.CREATE):
|
||||||
|
fetch_og_metadata.delay(iri)
|
||||||
|
|
||||||
actor = activity.get_actor()
|
actor = activity.get_actor()
|
||||||
|
|
||||||
cache_actor_with_inbox = False
|
cache_actor_with_inbox = False
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
{% else %}
|
{% else %}
|
||||||
{% set actor = obj.attributedTo | get_actor %}
|
{% set actor = obj.attributedTo | get_actor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
<div class="note-box">
|
||||||
<div class="note h-entry" id="activity-{{ obj.id | permalink_id }}">
|
<div class="note h-entry" id="activity-{{ obj.id | permalink_id }}">
|
||||||
|
|
||||||
<div class="h-card p-author">
|
<div class="h-card p-author">
|
||||||
|
@ -63,6 +64,26 @@
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{% if meta and meta.og_metadata %}
|
||||||
|
{% for og in meta.og_metadata %}
|
||||||
|
<a href="{{ og.url }}" class="og-link" style="margin:30px 0;clear:both;display: flex;">
|
||||||
|
<div>
|
||||||
|
<img style="width:100px;border-radius:3px;" src="{{ og.image | get_og_image_url }}">
|
||||||
|
</div>
|
||||||
|
<div style="padding:0 20px;">
|
||||||
|
<strong>{{ og.title }}</strong>
|
||||||
|
<p>{{ og.description | truncate(80) }}</p>
|
||||||
|
<small>{{ og.site_name }}</small>
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="bottom-bar">
|
<div class="bottom-bar">
|
||||||
{% if perma %}<span class="perma-item">{{ obj.published | format_time }}</span>
|
{% if perma %}<span class="perma-item">{{ obj.published | format_time }}</span>
|
||||||
{% if not (obj.id | is_from_outbox) %}
|
{% if not (obj.id | is_from_outbox) %}
|
||||||
|
@ -163,10 +184,10 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ class Kind(Enum):
|
||||||
ATTACHMENT = "attachment"
|
ATTACHMENT = "attachment"
|
||||||
ACTOR_ICON = "actor_icon"
|
ACTOR_ICON = "actor_icon"
|
||||||
UPLOAD = "upload"
|
UPLOAD = "upload"
|
||||||
|
OG_IMAGE = "og"
|
||||||
|
|
||||||
|
|
||||||
class MediaCache(object):
|
class MediaCache(object):
|
||||||
|
@ -38,6 +39,24 @@ class MediaCache(object):
|
||||||
self.fs = gridfs.GridFS(gridfs_db)
|
self.fs = gridfs.GridFS(gridfs_db)
|
||||||
self.user_agent = user_agent
|
self.user_agent = user_agent
|
||||||
|
|
||||||
|
def cache_og_image(self, url: str) -> None:
|
||||||
|
if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
|
||||||
|
return
|
||||||
|
i = load(url, self.user_agent)
|
||||||
|
# Save the original attachment (gzipped)
|
||||||
|
i.thumbnail((100, 100))
|
||||||
|
with BytesIO() as buf:
|
||||||
|
with GzipFile(mode="wb", fileobj=buf) as f1:
|
||||||
|
i.save(f1, format=i.format)
|
||||||
|
buf.seek(0)
|
||||||
|
self.fs.put(
|
||||||
|
buf,
|
||||||
|
url=url,
|
||||||
|
size=100,
|
||||||
|
content_type=i.get_format_mimetype(),
|
||||||
|
kind=Kind.OG_IMAGE.value,
|
||||||
|
)
|
||||||
|
|
||||||
def cache_attachment(self, url: str) -> None:
|
def cache_attachment(self, url: str) -> None:
|
||||||
if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
|
if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
|
||||||
return
|
return
|
||||||
|
@ -141,6 +160,8 @@ class MediaCache(object):
|
||||||
def cache(self, url: str, kind: Kind) -> None:
|
def cache(self, url: str, kind: Kind) -> None:
|
||||||
if kind == Kind.ACTOR_ICON:
|
if kind == Kind.ACTOR_ICON:
|
||||||
self.cache_actor_icon(url)
|
self.cache_actor_icon(url)
|
||||||
|
elif kind == Kind.OG_IMAGE:
|
||||||
|
self.cache_og_image(url)
|
||||||
else:
|
else:
|
||||||
self.cache_attachment(url)
|
self.cache_attachment(url)
|
||||||
|
|
||||||
|
|
|
@ -23,24 +23,11 @@ def links_from_note(note):
|
||||||
return links
|
return links
|
||||||
|
|
||||||
|
|
||||||
def fetch_og_metadata(user_agent, col, remote_id):
|
def fetch_og_metadata(user_agent, links):
|
||||||
doc = col.find_one({"remote_id": remote_id})
|
|
||||||
if not doc:
|
|
||||||
raise ValueError
|
|
||||||
note = doc["activity"]["object"]
|
|
||||||
print(note)
|
|
||||||
links = links_from_note(note)
|
|
||||||
if not links:
|
|
||||||
return 0
|
|
||||||
# FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph
|
|
||||||
htmls = []
|
htmls = []
|
||||||
for l in links:
|
for l in links:
|
||||||
check_url(l)
|
check_url(l)
|
||||||
r = requests.get(l, headers={"User-Agent": user_agent})
|
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
htmls.append(r.text)
|
htmls.append(r.text)
|
||||||
links_og_metadata = [dict(opengraph.OpenGraph(html=html)) for html in htmls]
|
return [dict(opengraph.OpenGraph(html=html)) for html in htmls]
|
||||||
col.update_one(
|
|
||||||
{"remote_id": remote_id}, {"$set": {"meta.og_metadata": links_og_metadata}}
|
|
||||||
)
|
|
||||||
return len(links)
|
|
||||||
|
|
Loading…
Reference in a new issue