nback links completed

This commit is contained in:
sneakers-the-rat 2022-10-11 17:52:35 -07:00
parent cfefa71471
commit 8d2b92e319
8 changed files with 338 additions and 55 deletions

43
poetry.lock generated
View file

@ -58,6 +58,17 @@ python-versions = "*"
[package.extras] [package.extras]
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
[[package]]
name = "faker"
version = "15.1.0"
description = "Faker is a Python package that generates fake data for you."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
python-dateutil = ">=2.4"
[[package]] [[package]]
name = "idna" name = "idna"
version = "3.3" version = "3.3"
@ -138,7 +149,7 @@ python-versions = ">=3.6"
name = "pyparsing" name = "pyparsing"
version = "3.0.9" version = "3.0.9"
description = "pyparsing module - Classes and methods to define and execute parsing grammars" description = "pyparsing module - Classes and methods to define and execute parsing grammars"
category = "dev" category = "main"
optional = false optional = false
python-versions = ">=3.6.8" python-versions = ">=3.6.8"
@ -166,6 +177,17 @@ tomli = ">=1.0.0"
[package.extras] [package.extras]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
[[package]]
name = "python-dateutil"
version = "2.8.2"
description = "Extensions to the standard Python datetime module"
category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
[package.dependencies]
six = ">=1.5"
[[package]] [[package]]
name = "pywikibot" name = "pywikibot"
version = "7.7.0" version = "7.7.0"
@ -246,6 +268,14 @@ pygments = ">=2.6.0,<3.0.0"
[package.extras] [package.extras]
jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]] [[package]]
name = "tomli" name = "tomli"
version = "2.0.1" version = "2.0.1"
@ -289,7 +319,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.9" python-versions = "^3.9"
content-hash = "bb1b2a68b8228c090421801d8c46f07bec2d60bece2f23f77f5920faa6b2de87" content-hash = "c5c99285ff355e2be2ef7a9133813b15c6b445b52f191f5720911d059b937750"
[metadata.files] [metadata.files]
atomicwrites = [ atomicwrites = [
@ -316,6 +346,7 @@ commonmark = [
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"}, {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"}, {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
] ]
faker = []
idna = [ idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
@ -355,6 +386,10 @@ pytest = [
{file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"},
{file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"},
] ]
python-dateutil = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
]
pywikibot = [] pywikibot = []
requests = [ requests = [
{file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"}, {file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"},
@ -368,6 +403,10 @@ rich = [
{file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"}, {file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"},
{file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"}, {file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"},
] ]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
tomli = [ tomli = [
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},

View file

@ -17,9 +17,11 @@ tweepy = "^4.10.0"
rich = "^12.4.4" rich = "^12.4.4"
parse = "^1.19.0" parse = "^1.19.0"
pywikibot = "^7.7.0" pywikibot = "^7.7.0"
pyparsing = "^3.0.9"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
pytest = "^7.1.2" pytest = "^7.1.2"
Faker = "^15.1.0"
[build-system] [build-system]
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]

View file

@ -1,28 +1,133 @@
from wiki_postbot.patterns.wikilink import Wikilink, NBack
import pytest
from faker import Faker
import typing
import pdb
class TestStr:
base = (
"[[My Basic Wikilink]]",
Wikilink("My Basic Wikilink")
)
multi = (
"[[Link1]] other text. [[Link2]]",
[Wikilink("Link1"), Wikilink("Link2")]
)
nback_one = (
"[[^My Wikilink]]",
Wikilink("My Wikilink", nback=NBack(1,1))
)
nback_wildcard = (
"[[^*My Wikilink]]",
Wikilink("My Wikilink", nback=NBack(wildcard=True))
)
nback_range = (
"[[^{1,3}Link]]",
Wikilink("Link", nback=NBack(1,3))
)
nback_start = (
"[[^{2,}Link]]",
Wikilink("Link", nback=NBack(start=2))
)
nback_end = (
"[[^{,3}Link]]",
Wikilink("Link", nback=NBack(end=3))
)
nback_end_shorthand = (
"[[^{3}Link]]",
Wikilink("Link", nback=NBack(end=3))
)
def test_wikilink(): def pad_garbage(string:str) -> str:
pass """Pad a string with garbage text"""
fake = Faker()
return fake.paragraph() + " " + string + " " + fake.paragraph()
def test_nback_one():
pass
def test_nback_all(): @pytest.mark.parametrize(
pass "test_string,expected",
[TestStr.base])
def test_wikilink(test_string, expected):
"""
Parse a string with a basic wikilink in it
"""
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
assert len(wl) == 1
assert wl[0] == expected
def test_nback_range_full(): @pytest.mark.parametrize(
pass "test_string,expected",
[TestStr.multi])
def test_wikilinks(test_string, expected):
"""
Parse a string that has multiple wikilinks
"""
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
assert len(wl) == 2
assert wl[0] == expected[0]
assert wl[1] == expected[1]
def test_nback_range_start():
pass
def test_nback_range_end(): @pytest.mark.parametrize(
pass "test_string,expected",
[TestStr.nback_one])
def test_nback_one(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_wildcard])
def test_nback_all(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_range])
def test_nback_range_full(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_start])
def test_nback_range_start(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_end, TestStr.nback_end_shorthand])
def test_nback_range_end(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
def test_triplet_full(): def test_triplet_full():
pass
def test_triplet_implicit_single(): def test_triplet_implicit_single():
"""Test an implicit triplet in a single message""" """Test an implicit triplet in a single message"""
pass
def test_triplet_implicit_thread(): def test_triplet_implicit_thread():
"""Test an implicit triplet where the subject is higher up in the thread""" """Test an implicit triplet where the subject is higher up in the thread"""
pass

View file

@ -1,5 +1,5 @@
from wiki_postbot.actions.action import Action, Result from wiki_postbot.actions.action import Action, Result
from wiki_postbot.patterns import WIKILINK from wiki_postbot.patterns.wikilink import WIKILINK
from tweepy import Response from tweepy import Response
import re import re

View file

@ -18,44 +18,6 @@ class WikiLink(Inline):
""" """
Detect a wikilink and add it to the wiki! Detect a wikilink and add it to the wiki!
This action uses an extended wikilink syntax that includes
* **n-back links** - allows the user to specify messages in threads that are not the initiating message, and
* **Semantic wikilinks** - specify a triplet subject-predicate-object link
In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made.
# N-Back Links
For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So
`[[^LINK]]` and `[[^ LINK]]` are both valid.
* **Preceding Message** - `[[^LINK]]`
* **Entire Preceding Thread** - `[[^*LINK]]`
* **Ranges**
** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive.
eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and
`n == 0` indicates the initiating message.
** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it.
** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message
# Semantic Wikilinks
Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the
Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic.
`SUB`, `PRED`, and `OBJ` are placeholders for the parts of
a triplet in the following examples.
* **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the
`OBJ`ect page with the indicated predicate.
eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]`
* **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread.
A subject can also be declared with a complete triplet.
.. note: .. note:
These commands will not include the full text of messages from users that have not opted in to the bot, These commands will not include the full text of messages from users that have not opted in to the bot,

View file

@ -0,0 +1 @@
from wiki_postbot.patterns.patterns import Pattern

View file

@ -2,3 +2,10 @@
Basic regex patterns that are simply `re.compile`d Basic regex patterns that are simply `re.compile`d
""" """
class Pattern:
"""
Metaclass for detecting patterns
Not sure what should go here but making it just for the sake of structure
"""

View file

@ -1,8 +1,175 @@
import re import re
from wiki_postbot.patterns import Pattern
from dataclasses import dataclass
from typing import Optional, Union, List
import pyparsing as pp
WIKILINK = re.compile(r'\[\[(.*?)\]\]', re.IGNORECASE) WIKILINK = re.compile(r'\[\[(.*?)\]\]', re.IGNORECASE)
""" """
Basic structure of wikilink, used to detect presence Basic structure of wikilink, used to detect presence
""" """
class class NBack:
FIELDS = ('wildcard', 'start', 'end')
def __init__(self, start:Optional[int]=None, end:Optional[int]=None,
wildcard:Optional[Union[str,bool]]=None,
one:Optional[str]=None):
if wildcard:
self.wildcard = True
self.start = None
self.end = None
return
else:
self.wildcard = False
if one:
self.start = 1
self.end = 1
else:
if start is not None:
start = int(start)
if end is not None:
end = int(end)
self.start = start
self.end = end
if self.start is not None and self.end is not None:
if self.start > self.end:
raise ValueError(f"Start value must be less than end value, got start:{self.start}, end:{self.end}")
@classmethod
def make_parser(cls) -> pp.ParserElement:
# --------------------------------------------------
# n-back links immediately follow the [[ and can be one of
# ^
# ^*
# ^{n,m}
# ^{n,}
# ^{,m}
# ^{m}
# make elements
caret = pp.Literal("^")
lcurly = pp.Literal('{').suppress()
rcurly = pp.Literal('}').suppress()
integer = pp.Word(pp.nums)
comma = pp.Literal(',').suppress()
nb_range = caret + lcurly
# combine into matches
nb_wildcard = caret.suppress() + "*"
# start or end can be omitted if comma is present
nb_full = nb_range + pp.Optional(integer("start")) + comma + pp.Optional(integer("end")) + rcurly
# if no comma present, it's just an end
nb_end = nb_range + integer("end") + rcurly
# combine into full nback parser
nback = pp.Group(nb_wildcard('wildcard') | nb_full | nb_end | caret("one")).set_results_name("nback")
return nback
def __eq__(self, other:'NBack'):
return all([getattr(self, f) == getattr(other, f) for f in self.FIELDS])
class Wikilink(Pattern):
"""
Pattern for detecting wikilinks!
This pattern implements an extended wikilink syntax that includes
* **n-back links** - allows the user to specify messages in threads that are not the initiating message, and
* **Semantic wikilinks** - specify a triplet subject-predicate-object link
In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made.
# N-Back Links
For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So
`[[^LINK]]` and `[[^ LINK]]` are both valid.
* **Preceding Message** - `[[^LINK]]`
* **Entire Preceding Thread** - `[[^*LINK]]`
* **Ranges**
** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive.
eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and
`n == 0` indicates the initiating message.
** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it.
** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message
# Semantic Wikilinks
Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the
Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic.
`SUB`, `PRED`, and `OBJ` are placeholders for the parts of
a triplet in the following examples.
* **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the
`OBJ`ect page with the indicated predicate.
eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]`
* **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread.
A subject can also be declared with a complete triplet.
"""
FIELDS = ('link', 'nback', 'predicate', 'object', 'section')
def __init__(
self,
link: str,
nback: Optional[Union[NBack, tuple, dict]] = None,
predicate: Optional[str] = None,
object: Optional[str] = None,
section: Optional[str] = None,
**kwargs):
super(Wikilink, self).__init__(**kwargs)
self.link = link
if isinstance(nback, (tuple, list)):
nback = NBack(*nback)
elif isinstance(nback, dict):
nback = NBack(**nback)
elif isinstance(nback, pp.ParseResults):
nback = NBack(**dict(nback))
self.nback = nback
self.predicate = predicate
self.object = object
self.section = section
@classmethod
def make_parser(cls) -> pp.ParserElement:
"""
Make the parser to detect wikilinks!
"""
# All wikilinks start with [[ and end with ]]
lbracket = pp.Literal('[[').suppress()
rbracket = pp.Literal(']]').suppress()
#nback parser
nback = NBack.make_parser()
# main wikilink subject text
link = pp.Word(pp.printables+ " ", excludeChars="#[]{}|")
# Combine all
parser = lbracket + pp.Optional(nback) + link("link") + rbracket
return parser
@classmethod
def parse(cls, string:str, return_parsed:bool=False) -> List['Wikilink']:
parser = cls.make_parser()
results = parser.search_string(string)
if return_parsed:
return results
else:
return [Wikilink(**dict(res.items())) for res in results]
def __eq__(self, other:'Wikilink'):
return all(getattr(self, f) == getattr(other, f) for f in self.FIELDS)