nback links completed

This commit is contained in:
sneakers-the-rat 2022-10-11 17:52:35 -07:00
parent cfefa71471
commit 8d2b92e319
8 changed files with 338 additions and 55 deletions

43
poetry.lock generated
View File

@ -58,6 +58,17 @@ python-versions = "*"
[package.extras]
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
[[package]]
name = "faker"
version = "15.1.0"
description = "Faker is a Python package that generates fake data for you."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
python-dateutil = ">=2.4"
[[package]]
name = "idna"
version = "3.3"
@ -138,7 +149,7 @@ python-versions = ">=3.6"
name = "pyparsing"
version = "3.0.9"
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
category = "dev"
category = "main"
optional = false
python-versions = ">=3.6.8"
@ -166,6 +177,17 @@ tomli = ">=1.0.0"
[package.extras]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
[[package]]
name = "python-dateutil"
version = "2.8.2"
description = "Extensions to the standard Python datetime module"
category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
[package.dependencies]
six = ">=1.5"
[[package]]
name = "pywikibot"
version = "7.7.0"
@ -246,6 +268,14 @@ pygments = ">=2.6.0,<3.0.0"
[package.extras]
jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "tomli"
version = "2.0.1"
@ -289,7 +319,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "bb1b2a68b8228c090421801d8c46f07bec2d60bece2f23f77f5920faa6b2de87"
content-hash = "c5c99285ff355e2be2ef7a9133813b15c6b445b52f191f5720911d059b937750"
[metadata.files]
atomicwrites = [
@ -316,6 +346,7 @@ commonmark = [
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
]
faker = []
idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
@ -355,6 +386,10 @@ pytest = [
{file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"},
{file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"},
]
python-dateutil = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
]
pywikibot = []
requests = [
{file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"},
@ -368,6 +403,10 @@ rich = [
{file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"},
{file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"},
]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
tomli = [
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},

View File

@ -17,9 +17,11 @@ tweepy = "^4.10.0"
rich = "^12.4.4"
parse = "^1.19.0"
pywikibot = "^7.7.0"
pyparsing = "^3.0.9"
[tool.poetry.dev-dependencies]
pytest = "^7.1.2"
Faker = "^15.1.0"
[build-system]
requires = ["poetry-core>=1.0.0"]

View File

@ -1,28 +1,133 @@
from wiki_postbot.patterns.wikilink import Wikilink, NBack
import pytest
from faker import Faker
import typing
import pdb
class TestStr:
base = (
"[[My Basic Wikilink]]",
Wikilink("My Basic Wikilink")
)
multi = (
"[[Link1]] other text. [[Link2]]",
[Wikilink("Link1"), Wikilink("Link2")]
)
nback_one = (
"[[^My Wikilink]]",
Wikilink("My Wikilink", nback=NBack(1,1))
)
nback_wildcard = (
"[[^*My Wikilink]]",
Wikilink("My Wikilink", nback=NBack(wildcard=True))
)
nback_range = (
"[[^{1,3}Link]]",
Wikilink("Link", nback=NBack(1,3))
)
nback_start = (
"[[^{2,}Link]]",
Wikilink("Link", nback=NBack(start=2))
)
nback_end = (
"[[^{,3}Link]]",
Wikilink("Link", nback=NBack(end=3))
)
nback_end_shorthand = (
"[[^{3}Link]]",
Wikilink("Link", nback=NBack(end=3))
)
def test_wikilink():
pass
def pad_garbage(string:str) -> str:
"""Pad a string with garbage text"""
fake = Faker()
return fake.paragraph() + " " + string + " " + fake.paragraph()
def test_nback_one():
pass
def test_nback_all():
pass
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.base])
def test_wikilink(test_string, expected):
"""
Parse a string with a basic wikilink in it
"""
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
assert len(wl) == 1
assert wl[0] == expected
def test_nback_range_full():
pass
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.multi])
def test_wikilinks(test_string, expected):
"""
Parse a string that has multiple wikilinks
"""
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
assert len(wl) == 2
assert wl[0] == expected[0]
assert wl[1] == expected[1]
def test_nback_range_start():
pass
def test_nback_range_end():
pass
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_one])
def test_nback_one(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_wildcard])
def test_nback_all(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_range])
def test_nback_range_full(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_start])
def test_nback_range_start(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
@pytest.mark.parametrize(
"test_string,expected",
[TestStr.nback_end, TestStr.nback_end_shorthand])
def test_nback_range_end(test_string, expected):
test_string = pad_garbage(test_string)
wl = Wikilink.parse(test_string)
# pdb.set_trace()
assert len(wl) == 1
assert wl[0] == expected
def test_triplet_full():
pass
def test_triplet_implicit_single():
"""Test an implicit triplet in a single message"""
pass
def test_triplet_implicit_thread():
"""Test an implicit triplet where the subject is higher up in the thread"""
pass

View File

@ -1,5 +1,5 @@
from wiki_postbot.actions.action import Action, Result
from wiki_postbot.patterns import WIKILINK
from wiki_postbot.patterns.wikilink import WIKILINK
from tweepy import Response
import re

View File

@ -18,44 +18,6 @@ class WikiLink(Inline):
"""
Detect a wikilink and add it to the wiki!
This action uses an extended wikilink syntax that includes
* **n-back links** - allows the user to specify messages in threads that are not the initiating message, and
* **Semantic wikilinks** - specify a triplet subject-predicate-object link
In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made.
# N-Back Links
For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So
`[[^LINK]]` and `[[^ LINK]]` are both valid.
* **Preceding Message** - `[[^LINK]]`
* **Entire Preceding Thread** - `[[^*LINK]]`
* **Ranges**
** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive.
eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and
`n == 0` indicates the initiating message.
** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it.
** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message
# Semantic Wikilinks
Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the
Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic.
`SUB`, `PRED`, and `OBJ` are placeholders for the parts of
a triplet in the following examples.
* **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the
`OBJ`ect page with the indicated predicate.
eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]`
* **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread.
A subject can also be declared with a complete triplet.
.. note:
These commands will not include the full text of messages from users that have not opted in to the bot,

View File

@ -0,0 +1 @@
from wiki_postbot.patterns.patterns import Pattern

View File

@ -2,3 +2,10 @@
Basic regex patterns that are simply `re.compile`d
"""
class Pattern:
"""
Metaclass for detecting patterns
Not sure what should go here but making it just for the sake of structure
"""

View File

@ -1,8 +1,175 @@
import re
from wiki_postbot.patterns import Pattern
from dataclasses import dataclass
from typing import Optional, Union, List
import pyparsing as pp
WIKILINK = re.compile(r'\[\[(.*?)\]\]', re.IGNORECASE)
"""
Basic structure of wikilink, used to detect presence
"""
class
class NBack:
FIELDS = ('wildcard', 'start', 'end')
def __init__(self, start:Optional[int]=None, end:Optional[int]=None,
wildcard:Optional[Union[str,bool]]=None,
one:Optional[str]=None):
if wildcard:
self.wildcard = True
self.start = None
self.end = None
return
else:
self.wildcard = False
if one:
self.start = 1
self.end = 1
else:
if start is not None:
start = int(start)
if end is not None:
end = int(end)
self.start = start
self.end = end
if self.start is not None and self.end is not None:
if self.start > self.end:
raise ValueError(f"Start value must be less than end value, got start:{self.start}, end:{self.end}")
@classmethod
def make_parser(cls) -> pp.ParserElement:
# --------------------------------------------------
# n-back links immediately follow the [[ and can be one of
# ^
# ^*
# ^{n,m}
# ^{n,}
# ^{,m}
# ^{m}
# make elements
caret = pp.Literal("^")
lcurly = pp.Literal('{').suppress()
rcurly = pp.Literal('}').suppress()
integer = pp.Word(pp.nums)
comma = pp.Literal(',').suppress()
nb_range = caret + lcurly
# combine into matches
nb_wildcard = caret.suppress() + "*"
# start or end can be omitted if comma is present
nb_full = nb_range + pp.Optional(integer("start")) + comma + pp.Optional(integer("end")) + rcurly
# if no comma present, it's just an end
nb_end = nb_range + integer("end") + rcurly
# combine into full nback parser
nback = pp.Group(nb_wildcard('wildcard') | nb_full | nb_end | caret("one")).set_results_name("nback")
return nback
def __eq__(self, other:'NBack'):
return all([getattr(self, f) == getattr(other, f) for f in self.FIELDS])
class Wikilink(Pattern):
"""
Pattern for detecting wikilinks!
This pattern implements an extended wikilink syntax that includes
* **n-back links** - allows the user to specify messages in threads that are not the initiating message, and
* **Semantic wikilinks** - specify a triplet subject-predicate-object link
In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made.
# N-Back Links
For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So
`[[^LINK]]` and `[[^ LINK]]` are both valid.
* **Preceding Message** - `[[^LINK]]`
* **Entire Preceding Thread** - `[[^*LINK]]`
* **Ranges**
** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive.
eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and
`n == 0` indicates the initiating message.
** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it.
** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message
# Semantic Wikilinks
Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the
Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic.
`SUB`, `PRED`, and `OBJ` are placeholders for the parts of
a triplet in the following examples.
* **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the
`OBJ`ect page with the indicated predicate.
eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]`
* **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread.
A subject can also be declared with a complete triplet.
"""
FIELDS = ('link', 'nback', 'predicate', 'object', 'section')
def __init__(
self,
link: str,
nback: Optional[Union[NBack, tuple, dict]] = None,
predicate: Optional[str] = None,
object: Optional[str] = None,
section: Optional[str] = None,
**kwargs):
super(Wikilink, self).__init__(**kwargs)
self.link = link
if isinstance(nback, (tuple, list)):
nback = NBack(*nback)
elif isinstance(nback, dict):
nback = NBack(**nback)
elif isinstance(nback, pp.ParseResults):
nback = NBack(**dict(nback))
self.nback = nback
self.predicate = predicate
self.object = object
self.section = section
@classmethod
def make_parser(cls) -> pp.ParserElement:
"""
Make the parser to detect wikilinks!
"""
# All wikilinks start with [[ and end with ]]
lbracket = pp.Literal('[[').suppress()
rbracket = pp.Literal(']]').suppress()
#nback parser
nback = NBack.make_parser()
# main wikilink subject text
link = pp.Word(pp.printables+ " ", excludeChars="#[]{}|")
# Combine all
parser = lbracket + pp.Optional(nback) + link("link") + rbracket
return parser
@classmethod
def parse(cls, string:str, return_parsed:bool=False) -> List['Wikilink']:
parser = cls.make_parser()
results = parser.search_string(string)
if return_parsed:
return results
else:
return [Wikilink(**dict(res.items())) for res in results]
def __eq__(self, other:'Wikilink'):
return all(getattr(self, f) == getattr(other, f) for f in self.FIELDS)