""" Adapted from https://github.com/auto-pi-lot/autopilot/blob/main/autopilot/utils/wiki.py So sort of shit. """ import requests from typing import List, Union from urllib.parse import urljoin class SMW: def __init__(self, url:str, api_suffix="api.php"): self.url = url self.api_url = urljoin(self.url, api_suffix) def tags(self, page:str): """SUPER MEGA HACK DO NOT READ""" res = self.ask(filters="[["+page+"]]", properties='Tag') tags = res[0]['Tag'] clean_tags = [] for tag in tags: shortname = tag['ShortName']['item'][0] longurl = tag['LongName']['item'][0]['fulltext'] clean_tags = (shortname, longurl) return clean_tags def ask(self, filters:Union[List[str],str], properties:Union[None,List[str],str]=None, clean:bool=True ) -> List[dict]: """ Args: filters (list, str): A list of strings or a single string of semantic mediawiki formatted property filters, eg ``"[[Category:Hardware]]"`` or ``"[[Has Contributor::sneakers-the-rat]]"``. Refer to the `semantic mediawiki documentation `_ for more information on syntax properties (None, list, str): Properties to return from filtered pages, see the `available properties `_ on the wiki and the `semantic mediawiki documentation `_ for more information on syntax. If ``None`` (default), just return the names of the pages full_url (bool): If ``True`` (default), prepend ``f'{WIKI_URL}api.php?action=ask&query='`` to the returned string to make it `ready for an API call `_ """ query_str = self._make_ask_string(filters, properties, full_url=True) result = requests.get(query_str) if clean: unnested = [] for entry in result.json()['query']['results']: entry_name = list(entry.keys())[0] nested_entry = entry[entry_name] unnest_entry = _clean_smw_result(nested_entry) unnested.append(unnest_entry) return unnested else: return result.json() def _make_ask_string(self, filters: Union[List[str], str], properties: Union[None, List[str], str] = None, full_url: bool = True) -> str: """ Create a query string to request semantic information from a semantic wiki Returns: str: the formatted query string """ # combine the components, separated by pipes or pip question marks as the case may be if isinstance(filters, str): filters = [filters] if len(filters) == 0: raise ValueError(f'You need to provide at least one filter! Cant get the whole wiki!') query_str = "|".join(filters) if isinstance(properties, str): properties = [properties] elif properties is None: properties = [] if len(properties) > 0: # double join with ?| so it goes between # all the properties *and* between filters and query_str = "|?".join(( query_str, "|?".join(properties) )) # add api call boilerplate and URI-encode query_str = requests.utils.quote(query_str) + "&format=json&api_version=3" if full_url: return f"{self.api_url}?action=ask&query=" + query_str else: return query_str def _clean_smw_result(nested_entry:dict) -> dict: # unnest entries that are [[Has type::page]] and thus have extra metadata unnest_entry = {} printouts = nested_entry.get('printouts', {}) if len(printouts)>0: for k, v in printouts.items(): if isinstance(v, list) and len(v) > 1: unnest_entry[k] = [] for subv in v: if isinstance(subv, dict) and 'fulltext' in subv.keys(): subv = subv['fulltext'] unnest_entry[k].append(subv) elif isinstance(v, list) and len(v) == 1: unnest_entry[k] = v[0] if isinstance(unnest_entry[k], dict) and 'fulltext' in unnest_entry[k].keys(): unnest_entry[k] = unnest_entry[k]['fulltext'] else: unnest_entry[k] = v unnest_entry['name'] = nested_entry['fulltext'] unnest_entry['url'] = nested_entry['fullurl'] return unnest_entry