128 lines
4.7 KiB
Python
128 lines
4.7 KiB
Python
"""
|
|
Adapted from https://github.com/auto-pi-lot/autopilot/blob/main/autopilot/utils/wiki.py
|
|
|
|
So sort of shit.
|
|
|
|
"""
|
|
|
|
import requests
|
|
from typing import List, Union
|
|
from urllib.parse import urljoin
|
|
|
|
class SMW:
|
|
|
|
def __init__(self, url:str, api_suffix="api.php"):
|
|
self.url = url
|
|
self.api_url = urljoin(self.url, api_suffix)
|
|
|
|
def tags(self, page:str):
|
|
"""SUPER MEGA HACK DO NOT READ"""
|
|
res = self.ask(filters="[["+page+"]]", properties='Tag')
|
|
tags = res[0]['Tag']
|
|
clean_tags = []
|
|
for tag in tags:
|
|
shortname = tag['ShortName']['item'][0]
|
|
longurl = tag['LongName']['item'][0]['fulltext']
|
|
clean_tags = (shortname, longurl)
|
|
return clean_tags
|
|
|
|
|
|
def ask(self,
|
|
filters:Union[List[str],str],
|
|
properties:Union[None,List[str],str]=None,
|
|
clean:bool=True
|
|
) -> List[dict]:
|
|
"""
|
|
|
|
Args:
|
|
filters (list, str): A list of strings or a single string of semantic
|
|
mediawiki formatted property filters, eg ``"[[Category:Hardware]]"``
|
|
or ``"[[Has Contributor::sneakers-the-rat]]"``. Refer to the
|
|
`semantic mediawiki documentation <https://www.semantic-mediawiki.org/wiki/Help:Selecting_pages>`_
|
|
for more information on syntax
|
|
properties (None, list, str): Properties to return from filtered pages,
|
|
see the `available properties <https://wiki.auto-pi-lot.com/index.php/Special:Properties>`_
|
|
on the wiki and the `semantic mediawiki documentation <https://www.semantic-mediawiki.org/wiki/Help:Selecting_pages>`_
|
|
for more information on syntax. If ``None`` (default), just return
|
|
the names of the pages
|
|
full_url (bool): If ``True`` (default), prepend ``f'{WIKI_URL}api.php?action=ask&query='``
|
|
to the returned string to make it `ready for an API call <https://www.semantic-mediawiki.org/wiki/Help:API:ask>`_
|
|
|
|
"""
|
|
query_str = self._make_ask_string(filters, properties, full_url=True)
|
|
result = requests.get(query_str)
|
|
if clean:
|
|
unnested = []
|
|
for entry in result.json()['query']['results']:
|
|
entry_name = list(entry.keys())[0]
|
|
nested_entry = entry[entry_name]
|
|
unnest_entry = _clean_smw_result(nested_entry)
|
|
unnested.append(unnest_entry)
|
|
return unnested
|
|
else:
|
|
return result.json()
|
|
|
|
|
|
def _make_ask_string(self,
|
|
filters: Union[List[str], str],
|
|
properties: Union[None, List[str], str] = None,
|
|
full_url: bool = True) -> str:
|
|
"""
|
|
Create a query string to request semantic information from a semantic wiki
|
|
|
|
Returns:
|
|
str: the formatted query string
|
|
"""
|
|
# combine the components, separated by pipes or pip question marks as the case may be
|
|
if isinstance(filters, str):
|
|
filters = [filters]
|
|
|
|
if len(filters) == 0:
|
|
raise ValueError(f'You need to provide at least one filter! Cant get the whole wiki!')
|
|
|
|
query_str = "|".join(filters)
|
|
|
|
if isinstance(properties, str):
|
|
properties = [properties]
|
|
elif properties is None:
|
|
properties = []
|
|
|
|
if len(properties) > 0:
|
|
# double join with ?| so it goes between
|
|
# all the properties *and* between filters and
|
|
query_str = "|?".join((
|
|
query_str,
|
|
"|?".join(properties)
|
|
))
|
|
|
|
# add api call boilerplate and URI-encode
|
|
query_str = requests.utils.quote(query_str) + "&format=json&api_version=3"
|
|
|
|
if full_url:
|
|
return f"{self.api_url}?action=ask&query=" + query_str
|
|
else:
|
|
return query_str
|
|
|
|
def _clean_smw_result(nested_entry:dict) -> dict:
|
|
# unnest entries that are [[Has type::page]] and thus have extra metadata
|
|
unnest_entry = {}
|
|
printouts = nested_entry.get('printouts', {})
|
|
if len(printouts)>0:
|
|
for k, v in printouts.items():
|
|
if isinstance(v, list) and len(v) > 1:
|
|
unnest_entry[k] = []
|
|
for subv in v:
|
|
if isinstance(subv, dict) and 'fulltext' in subv.keys():
|
|
subv = subv['fulltext']
|
|
unnest_entry[k].append(subv)
|
|
elif isinstance(v, list) and len(v) == 1:
|
|
unnest_entry[k] = v[0]
|
|
if isinstance(unnest_entry[k], dict) and 'fulltext' in unnest_entry[k].keys():
|
|
unnest_entry[k] = unnest_entry[k]['fulltext']
|
|
else:
|
|
unnest_entry[k] = v
|
|
|
|
unnest_entry['name'] = nested_entry['fulltext']
|
|
unnest_entry['url'] = nested_entry['fullurl']
|
|
return unnest_entry
|