mastodon-ld/masto_ld/interfaces/smw.py

128 lines
4.7 KiB
Python

"""
Adapted from https://github.com/auto-pi-lot/autopilot/blob/main/autopilot/utils/wiki.py
So sort of shit.
"""
import requests
from typing import List, Union
from urllib.parse import urljoin
class SMW:
def __init__(self, url:str, api_suffix="api.php"):
self.url = url
self.api_url = urljoin(self.url, api_suffix)
def tags(self, page:str):
"""SUPER MEGA HACK DO NOT READ"""
res = self.ask(filters="[["+page+"]]", properties='Tag')
tags = res[0]['Tag']
clean_tags = []
for tag in tags:
shortname = tag['ShortName']['item'][0]
longurl = tag['LongName']['item'][0]['fulltext']
clean_tags = (shortname, longurl)
return clean_tags
def ask(self,
filters:Union[List[str],str],
properties:Union[None,List[str],str]=None,
clean:bool=True
) -> List[dict]:
"""
Args:
filters (list, str): A list of strings or a single string of semantic
mediawiki formatted property filters, eg ``"[[Category:Hardware]]"``
or ``"[[Has Contributor::sneakers-the-rat]]"``. Refer to the
`semantic mediawiki documentation <https://www.semantic-mediawiki.org/wiki/Help:Selecting_pages>`_
for more information on syntax
properties (None, list, str): Properties to return from filtered pages,
see the `available properties <https://wiki.auto-pi-lot.com/index.php/Special:Properties>`_
on the wiki and the `semantic mediawiki documentation <https://www.semantic-mediawiki.org/wiki/Help:Selecting_pages>`_
for more information on syntax. If ``None`` (default), just return
the names of the pages
full_url (bool): If ``True`` (default), prepend ``f'{WIKI_URL}api.php?action=ask&query='``
to the returned string to make it `ready for an API call <https://www.semantic-mediawiki.org/wiki/Help:API:ask>`_
"""
query_str = self._make_ask_string(filters, properties, full_url=True)
result = requests.get(query_str)
if clean:
unnested = []
for entry in result.json()['query']['results']:
entry_name = list(entry.keys())[0]
nested_entry = entry[entry_name]
unnest_entry = _clean_smw_result(nested_entry)
unnested.append(unnest_entry)
return unnested
else:
return result.json()
def _make_ask_string(self,
filters: Union[List[str], str],
properties: Union[None, List[str], str] = None,
full_url: bool = True) -> str:
"""
Create a query string to request semantic information from a semantic wiki
Returns:
str: the formatted query string
"""
# combine the components, separated by pipes or pip question marks as the case may be
if isinstance(filters, str):
filters = [filters]
if len(filters) == 0:
raise ValueError(f'You need to provide at least one filter! Cant get the whole wiki!')
query_str = "|".join(filters)
if isinstance(properties, str):
properties = [properties]
elif properties is None:
properties = []
if len(properties) > 0:
# double join with ?| so it goes between
# all the properties *and* between filters and
query_str = "|?".join((
query_str,
"|?".join(properties)
))
# add api call boilerplate and URI-encode
query_str = requests.utils.quote(query_str) + "&format=json&api_version=3"
if full_url:
return f"{self.api_url}?action=ask&query=" + query_str
else:
return query_str
def _clean_smw_result(nested_entry:dict) -> dict:
# unnest entries that are [[Has type::page]] and thus have extra metadata
unnest_entry = {}
printouts = nested_entry.get('printouts', {})
if len(printouts)>0:
for k, v in printouts.items():
if isinstance(v, list) and len(v) > 1:
unnest_entry[k] = []
for subv in v:
if isinstance(subv, dict) and 'fulltext' in subv.keys():
subv = subv['fulltext']
unnest_entry[k].append(subv)
elif isinstance(v, list) and len(v) == 1:
unnest_entry[k] = v[0]
if isinstance(unnest_entry[k], dict) and 'fulltext' in unnest_entry[k].keys():
unnest_entry[k] = unnest_entry[k]['fulltext']
else:
unnest_entry[k] = v
unnest_entry['name'] = nested_entry['fulltext']
unnest_entry['url'] = nested_entry['fullurl']
return unnest_entry