mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2024-12-21 20:54:27 +00:00
Tweak URL parsing
This commit is contained in:
parent
1e6a290fb3
commit
4f98ff6bbf
6 changed files with 12 additions and 10 deletions
|
@ -116,7 +116,7 @@ class Actor:
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def server(self) -> str:
|
def server(self) -> str:
|
||||||
return urlparse(self.ap_id).netloc
|
return urlparse(self.ap_id).hostname # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class RemoteActor(Actor):
|
class RemoteActor(Actor):
|
||||||
|
|
|
@ -1255,7 +1255,7 @@ async def _process_note_object(
|
||||||
is_mention = True
|
is_mention = True
|
||||||
|
|
||||||
inbox_object = models.InboxObject(
|
inbox_object = models.InboxObject(
|
||||||
server=urlparse(ro.ap_id).netloc,
|
server=urlparse(ro.ap_id).hostname,
|
||||||
actor_id=from_actor.id,
|
actor_id=from_actor.id,
|
||||||
ap_actor_id=from_actor.ap_id,
|
ap_actor_id=from_actor.ap_id,
|
||||||
ap_type=ro.ap_type,
|
ap_type=ro.ap_type,
|
||||||
|
@ -1521,7 +1521,7 @@ async def save_to_inbox(
|
||||||
)
|
)
|
||||||
|
|
||||||
inbox_object = models.InboxObject(
|
inbox_object = models.InboxObject(
|
||||||
server=urlparse(activity_ro.ap_id).netloc,
|
server=urlparse(activity_ro.ap_id).hostname,
|
||||||
actor_id=actor.id,
|
actor_id=actor.id,
|
||||||
ap_actor_id=actor.ap_id,
|
ap_actor_id=actor.ap_id,
|
||||||
ap_type=activity_ro.ap_type,
|
ap_type=activity_ro.ap_type,
|
||||||
|
@ -1668,7 +1668,7 @@ async def save_to_inbox(
|
||||||
)
|
)
|
||||||
announced_object = RemoteObject(announced_raw_object, announced_actor)
|
announced_object = RemoteObject(announced_raw_object, announced_actor)
|
||||||
announced_inbox_object = models.InboxObject(
|
announced_inbox_object = models.InboxObject(
|
||||||
server=urlparse(announced_object.ap_id).netloc,
|
server=urlparse(announced_object.ap_id).hostname,
|
||||||
actor_id=announced_actor.id,
|
actor_id=announced_actor.id,
|
||||||
ap_actor_id=announced_actor.ap_id,
|
ap_actor_id=announced_actor.ap_id,
|
||||||
ap_type=announced_object.ap_type,
|
ap_type=announced_object.ap_type,
|
||||||
|
|
|
@ -24,7 +24,7 @@ def _get_prop(props: dict[str, Any], name: str, default=None) -> Any:
|
||||||
|
|
||||||
async def get_client_id_data(url: str) -> IndieAuthClient | None:
|
async def get_client_id_data(url: str) -> IndieAuthClient | None:
|
||||||
# Don't fetch localhost URL
|
# Don't fetch localhost URL
|
||||||
if urlparse(url).netloc == "localhost":
|
if urlparse(url).hostname == "localhost":
|
||||||
return IndieAuthClient(
|
return IndieAuthClient(
|
||||||
logo=None,
|
logo=None,
|
||||||
name=url,
|
name=url,
|
||||||
|
|
|
@ -39,7 +39,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None:
|
||||||
"title": soup.find("title").text,
|
"title": soup.find("title").text,
|
||||||
"image": None,
|
"image": None,
|
||||||
"description": None,
|
"description": None,
|
||||||
"site_name": urlparse(url).netloc,
|
"site_name": urlparse(url).hostname,
|
||||||
}
|
}
|
||||||
for field in OpenGraphMeta.__fields__.keys():
|
for field in OpenGraphMeta.__fields__.keys():
|
||||||
og_field = f"og:{field}"
|
og_field = f"og:{field}"
|
||||||
|
@ -60,7 +60,7 @@ async def external_urls(
|
||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
ro: ap_object.RemoteObject | OutboxObject | InboxObject,
|
ro: ap_object.RemoteObject | OutboxObject | InboxObject,
|
||||||
) -> set[str]:
|
) -> set[str]:
|
||||||
note_host = urlparse(ro.ap_id).netloc
|
note_host = urlparse(ro.ap_id).hostname
|
||||||
|
|
||||||
tags_hrefs = set()
|
tags_hrefs = set()
|
||||||
for tag in ro.tags:
|
for tag in ro.tags:
|
||||||
|
@ -84,7 +84,7 @@ async def external_urls(
|
||||||
mimetype, _ = mimetypes.guess_type(h)
|
mimetype, _ = mimetypes.guess_type(h)
|
||||||
if (
|
if (
|
||||||
ph.scheme in {"http", "https"}
|
ph.scheme in {"http", "https"}
|
||||||
and ph.netloc != note_host
|
and ph.hostname != note_host
|
||||||
and is_url_valid(h)
|
and is_url_valid(h)
|
||||||
and (
|
and (
|
||||||
not mimetype
|
not mimetype
|
||||||
|
|
|
@ -27,11 +27,13 @@ def replace_url(u: str) -> str:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed_href = urlparse(u)
|
parsed_href = urlparse(u)
|
||||||
|
if not parsed_href.hostname:
|
||||||
|
raise ValueError("Missing hostname")
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(f"Failed to parse url={u}")
|
logger.warning(f"Failed to parse url={u}")
|
||||||
return u
|
return u
|
||||||
|
|
||||||
if new_netloc := PRIVACY_REPLACE.get(parsed_href.netloc.removeprefix("www.")):
|
if new_netloc := PRIVACY_REPLACE.get(parsed_href.hostname.removeprefix("www.")):
|
||||||
return parsed_href._replace(netloc=new_netloc).geturl()
|
return parsed_href._replace(netloc=new_netloc).geturl()
|
||||||
|
|
||||||
return u
|
return u
|
||||||
|
|
|
@ -220,7 +220,7 @@ class InboxObjectFactory(factory.alchemy.SQLAlchemyModelFactory):
|
||||||
if "published" in ro.ap_object:
|
if "published" in ro.ap_object:
|
||||||
ap_published_at = isoparse(ro.ap_object["published"])
|
ap_published_at = isoparse(ro.ap_object["published"])
|
||||||
return cls(
|
return cls(
|
||||||
server=urlparse(ro.ap_id).netloc,
|
server=urlparse(ro.ap_id).hostname,
|
||||||
actor_id=actor.id,
|
actor_id=actor.id,
|
||||||
ap_actor_id=actor.ap_id,
|
ap_actor_id=actor.ap_id,
|
||||||
ap_type=ro.ap_type,
|
ap_type=ro.ap_type,
|
||||||
|
|
Loading…
Reference in a new issue