diff --git a/poetry.lock b/poetry.lock index a382db6..b70f3a9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -58,6 +58,17 @@ python-versions = "*" [package.extras] test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] +[[package]] +name = "faker" +version = "15.1.0" +description = "Faker is a Python package that generates fake data for you." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +python-dateutil = ">=2.4" + [[package]] name = "idna" version = "3.3" @@ -138,7 +149,7 @@ python-versions = ">=3.6" name = "pyparsing" version = "3.0.9" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "dev" +category = "main" optional = false python-versions = ">=3.6.8" @@ -166,6 +177,17 @@ tomli = ">=1.0.0" [package.extras] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + [[package]] name = "pywikibot" version = "7.7.0" @@ -246,6 +268,14 @@ pygments = ">=2.6.0,<3.0.0" [package.extras] jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + [[package]] name = "tomli" version = "2.0.1" @@ -289,7 +319,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "bb1b2a68b8228c090421801d8c46f07bec2d60bece2f23f77f5920faa6b2de87" +content-hash = "c5c99285ff355e2be2ef7a9133813b15c6b445b52f191f5720911d059b937750" [metadata.files] atomicwrites = [ @@ -316,6 +346,7 @@ commonmark = [ {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"}, {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"}, ] +faker = [] idna = [ {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, @@ -355,6 +386,10 @@ pytest = [ {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, ] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] pywikibot = [] requests = [ {file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"}, @@ -368,6 +403,10 @@ rich = [ {file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"}, {file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"}, ] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, diff --git a/pyproject.toml b/pyproject.toml index 100542e..49bebd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,9 +17,11 @@ tweepy = "^4.10.0" rich = "^12.4.4" parse = "^1.19.0" pywikibot = "^7.7.0" +pyparsing = "^3.0.9" [tool.poetry.dev-dependencies] pytest = "^7.1.2" +Faker = "^15.1.0" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/tests/test_patterns/test_wikilinks.py b/tests/test_patterns/test_wikilinks.py index f48933c..ddbc52e 100644 --- a/tests/test_patterns/test_wikilinks.py +++ b/tests/test_patterns/test_wikilinks.py @@ -1,28 +1,133 @@ +from wiki_postbot.patterns.wikilink import Wikilink, NBack +import pytest +from faker import Faker +import typing +import pdb + +class TestStr: + base = ( + "[[My Basic Wikilink]]", + Wikilink("My Basic Wikilink") + ) + multi = ( + "[[Link1]] other text. [[Link2]]", + [Wikilink("Link1"), Wikilink("Link2")] + ) + nback_one = ( + "[[^My Wikilink]]", + Wikilink("My Wikilink", nback=NBack(1,1)) + ) + nback_wildcard = ( + "[[^*My Wikilink]]", + Wikilink("My Wikilink", nback=NBack(wildcard=True)) + ) + nback_range = ( + "[[^{1,3}Link]]", + Wikilink("Link", nback=NBack(1,3)) + ) + nback_start = ( + "[[^{2,}Link]]", + Wikilink("Link", nback=NBack(start=2)) + ) + nback_end = ( + "[[^{,3}Link]]", + Wikilink("Link", nback=NBack(end=3)) + ) + nback_end_shorthand = ( + "[[^{3}Link]]", + Wikilink("Link", nback=NBack(end=3)) + ) -def test_wikilink(): - pass +def pad_garbage(string:str) -> str: + """Pad a string with garbage text""" + fake = Faker() + return fake.paragraph() + " " + string + " " + fake.paragraph() -def test_nback_one(): - pass -def test_nback_all(): - pass +@pytest.mark.parametrize( + "test_string,expected", + [TestStr.base]) +def test_wikilink(test_string, expected): + """ + Parse a string with a basic wikilink in it + """ + test_string = pad_garbage(test_string) + wl = Wikilink.parse(test_string) + assert len(wl) == 1 + assert wl[0] == expected -def test_nback_range_full(): - pass +@pytest.mark.parametrize( + "test_string,expected", + [TestStr.multi]) +def test_wikilinks(test_string, expected): + """ + Parse a string that has multiple wikilinks + """ + test_string = pad_garbage(test_string) + wl = Wikilink.parse(test_string) + assert len(wl) == 2 + assert wl[0] == expected[0] + assert wl[1] == expected[1] -def test_nback_range_start(): - pass -def test_nback_range_end(): - pass +@pytest.mark.parametrize( + "test_string,expected", + [TestStr.nback_one]) +def test_nback_one(test_string, expected): + test_string = pad_garbage(test_string) + wl = Wikilink.parse(test_string) + # pdb.set_trace() + assert len(wl) == 1 + assert wl[0] == expected + +@pytest.mark.parametrize( + "test_string,expected", + [TestStr.nback_wildcard]) +def test_nback_all(test_string, expected): + test_string = pad_garbage(test_string) + wl = Wikilink.parse(test_string) + # pdb.set_trace() + assert len(wl) == 1 + assert wl[0] == expected + +@pytest.mark.parametrize( + "test_string,expected", + [TestStr.nback_range]) +def test_nback_range_full(test_string, expected): + test_string = pad_garbage(test_string) + wl = Wikilink.parse(test_string) + # pdb.set_trace() + assert len(wl) == 1 + assert wl[0] == expected + +@pytest.mark.parametrize( + "test_string,expected", + [TestStr.nback_start]) +def test_nback_range_start(test_string, expected): + test_string = pad_garbage(test_string) + wl = Wikilink.parse(test_string) + # pdb.set_trace() + assert len(wl) == 1 + assert wl[0] == expected + +@pytest.mark.parametrize( + "test_string,expected", + [TestStr.nback_end, TestStr.nback_end_shorthand]) +def test_nback_range_end(test_string, expected): + test_string = pad_garbage(test_string) + wl = Wikilink.parse(test_string) + # pdb.set_trace() + assert len(wl) == 1 + assert wl[0] == expected def test_triplet_full(): + pass def test_triplet_implicit_single(): """Test an implicit triplet in a single message""" + pass def test_triplet_implicit_thread(): """Test an implicit triplet where the subject is higher up in the thread""" - + pass diff --git a/wiki_postbot/actions/checks.py b/wiki_postbot/actions/checks.py index 595e32d..5ae5cd6 100644 --- a/wiki_postbot/actions/checks.py +++ b/wiki_postbot/actions/checks.py @@ -1,5 +1,5 @@ from wiki_postbot.actions.action import Action, Result -from wiki_postbot.patterns import WIKILINK +from wiki_postbot.patterns.wikilink import WIKILINK from tweepy import Response import re diff --git a/wiki_postbot/actions/inline.py b/wiki_postbot/actions/inline.py index efceb4a..305743b 100644 --- a/wiki_postbot/actions/inline.py +++ b/wiki_postbot/actions/inline.py @@ -18,44 +18,6 @@ class WikiLink(Inline): """ Detect a wikilink and add it to the wiki! - This action uses an extended wikilink syntax that includes - - * **n-back links** - allows the user to specify messages in threads that are not the initiating message, and - * **Semantic wikilinks** - specify a triplet subject-predicate-object link - - In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made. - - # N-Back Links - - For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So - `[[^LINK]]` and `[[^ LINK]]` are both valid. - - * **Preceding Message** - `[[^LINK]]` - * **Entire Preceding Thread** - `[[^*LINK]]` - * **Ranges** - ** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive. - eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and - `n == 0` indicates the initiating message. - ** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it. - ** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message - - # Semantic Wikilinks - - Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the - - Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic. - - `SUB`, `PRED`, and `OBJ` are placeholders for the parts of - a triplet in the following examples. - - * **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the - `OBJ`ect page with the indicated predicate. - - eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]` - - * **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread. - A subject can also be declared with a complete triplet. - .. note: These commands will not include the full text of messages from users that have not opted in to the bot, diff --git a/wiki_postbot/patterns/__init__.py b/wiki_postbot/patterns/__init__.py index e69de29..6289eeb 100644 --- a/wiki_postbot/patterns/__init__.py +++ b/wiki_postbot/patterns/__init__.py @@ -0,0 +1 @@ +from wiki_postbot.patterns.patterns import Pattern \ No newline at end of file diff --git a/wiki_postbot/patterns/patterns.py b/wiki_postbot/patterns/patterns.py index ad89bae..3849a4a 100644 --- a/wiki_postbot/patterns/patterns.py +++ b/wiki_postbot/patterns/patterns.py @@ -2,3 +2,10 @@ Basic regex patterns that are simply `re.compile`d """ +class Pattern: + """ + Metaclass for detecting patterns + + Not sure what should go here but making it just for the sake of structure + """ + diff --git a/wiki_postbot/patterns/wikilink.py b/wiki_postbot/patterns/wikilink.py index 56b7aa3..fee1583 100644 --- a/wiki_postbot/patterns/wikilink.py +++ b/wiki_postbot/patterns/wikilink.py @@ -1,8 +1,175 @@ import re +from wiki_postbot.patterns import Pattern +from dataclasses import dataclass +from typing import Optional, Union, List +import pyparsing as pp + WIKILINK = re.compile(r'\[\[(.*?)\]\]', re.IGNORECASE) """ Basic structure of wikilink, used to detect presence """ -class +class NBack: + FIELDS = ('wildcard', 'start', 'end') + + def __init__(self, start:Optional[int]=None, end:Optional[int]=None, + wildcard:Optional[Union[str,bool]]=None, + one:Optional[str]=None): + + if wildcard: + self.wildcard = True + self.start = None + self.end = None + return + else: + self.wildcard = False + + if one: + self.start = 1 + self.end = 1 + else: + if start is not None: + start = int(start) + if end is not None: + end = int(end) + self.start = start + self.end = end + + if self.start is not None and self.end is not None: + if self.start > self.end: + raise ValueError(f"Start value must be less than end value, got start:{self.start}, end:{self.end}") + + @classmethod + def make_parser(cls) -> pp.ParserElement: + # -------------------------------------------------- + # n-back links immediately follow the [[ and can be one of + # ^ + # ^* + # ^{n,m} + # ^{n,} + # ^{,m} + # ^{m} + + # make elements + caret = pp.Literal("^") + lcurly = pp.Literal('{').suppress() + rcurly = pp.Literal('}').suppress() + integer = pp.Word(pp.nums) + comma = pp.Literal(',').suppress() + nb_range = caret + lcurly + + # combine into matches + nb_wildcard = caret.suppress() + "*" + # start or end can be omitted if comma is present + nb_full = nb_range + pp.Optional(integer("start")) + comma + pp.Optional(integer("end")) + rcurly + # if no comma present, it's just an end + nb_end = nb_range + integer("end") + rcurly + + # combine into full nback parser + nback = pp.Group(nb_wildcard('wildcard') | nb_full | nb_end | caret("one")).set_results_name("nback") + return nback + + def __eq__(self, other:'NBack'): + return all([getattr(self, f) == getattr(other, f) for f in self.FIELDS]) + +class Wikilink(Pattern): + """ + Pattern for detecting wikilinks! + + This pattern implements an extended wikilink syntax that includes + + * **n-back links** - allows the user to specify messages in threads that are not the initiating message, and + * **Semantic wikilinks** - specify a triplet subject-predicate-object link + + In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made. + + # N-Back Links + + For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So + `[[^LINK]]` and `[[^ LINK]]` are both valid. + + * **Preceding Message** - `[[^LINK]]` + * **Entire Preceding Thread** - `[[^*LINK]]` + * **Ranges** + ** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive. + eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and + `n == 0` indicates the initiating message. + ** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it. + ** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message + + # Semantic Wikilinks + + Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the + + Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic. + + `SUB`, `PRED`, and `OBJ` are placeholders for the parts of + a triplet in the following examples. + + * **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the + `OBJ`ect page with the indicated predicate. + + eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]` + + * **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread. + A subject can also be declared with a complete triplet. + """ + FIELDS = ('link', 'nback', 'predicate', 'object', 'section') + + def __init__( + self, + link: str, + nback: Optional[Union[NBack, tuple, dict]] = None, + predicate: Optional[str] = None, + object: Optional[str] = None, + section: Optional[str] = None, + **kwargs): + super(Wikilink, self).__init__(**kwargs) + + self.link = link + if isinstance(nback, (tuple, list)): + nback = NBack(*nback) + elif isinstance(nback, dict): + nback = NBack(**nback) + elif isinstance(nback, pp.ParseResults): + nback = NBack(**dict(nback)) + + self.nback = nback + self.predicate = predicate + self.object = object + self.section = section + + @classmethod + def make_parser(cls) -> pp.ParserElement: + """ + Make the parser to detect wikilinks! + """ + # All wikilinks start with [[ and end with ]] + lbracket = pp.Literal('[[').suppress() + rbracket = pp.Literal(']]').suppress() + + #nback parser + nback = NBack.make_parser() + + # main wikilink subject text + link = pp.Word(pp.printables+ " ", excludeChars="#[]{}|") + + # Combine all + parser = lbracket + pp.Optional(nback) + link("link") + rbracket + return parser + + @classmethod + def parse(cls, string:str, return_parsed:bool=False) -> List['Wikilink']: + parser = cls.make_parser() + results = parser.search_string(string) + if return_parsed: + return results + else: + return [Wikilink(**dict(res.items())) for res in results] + + def __eq__(self, other:'Wikilink'): + return all(getattr(self, f) == getattr(other, f) for f in self.FIELDS) + + +