nback links completed
This commit is contained in:
parent
cfefa71471
commit
8d2b92e319
8 changed files with 338 additions and 55 deletions
43
poetry.lock
generated
43
poetry.lock
generated
|
@ -58,6 +58,17 @@ python-versions = "*"
|
|||
[package.extras]
|
||||
test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "faker"
|
||||
version = "15.1.0"
|
||||
description = "Faker is a Python package that generates fake data for you."
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.dependencies]
|
||||
python-dateutil = ">=2.4"
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.3"
|
||||
|
@ -138,7 +149,7 @@ python-versions = ">=3.6"
|
|||
name = "pyparsing"
|
||||
version = "3.0.9"
|
||||
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
|
||||
category = "dev"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6.8"
|
||||
|
||||
|
@ -166,6 +177,17 @@ tomli = ">=1.0.0"
|
|||
[package.extras]
|
||||
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.8.2"
|
||||
description = "Extensions to the standard Python datetime module"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
|
||||
|
||||
[package.dependencies]
|
||||
six = ">=1.5"
|
||||
|
||||
[[package]]
|
||||
name = "pywikibot"
|
||||
version = "7.7.0"
|
||||
|
@ -246,6 +268,14 @@ pygments = ">=2.6.0,<3.0.0"
|
|||
[package.extras]
|
||||
jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.16.0"
|
||||
description = "Python 2 and 3 compatibility utilities"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.0.1"
|
||||
|
@ -289,7 +319,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "bb1b2a68b8228c090421801d8c46f07bec2d60bece2f23f77f5920faa6b2de87"
|
||||
content-hash = "c5c99285ff355e2be2ef7a9133813b15c6b445b52f191f5720911d059b937750"
|
||||
|
||||
[metadata.files]
|
||||
atomicwrites = [
|
||||
|
@ -316,6 +346,7 @@ commonmark = [
|
|||
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
|
||||
{file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
|
||||
]
|
||||
faker = []
|
||||
idna = [
|
||||
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
|
||||
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
|
||||
|
@ -355,6 +386,10 @@ pytest = [
|
|||
{file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"},
|
||||
{file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"},
|
||||
]
|
||||
python-dateutil = [
|
||||
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
|
||||
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
|
||||
]
|
||||
pywikibot = []
|
||||
requests = [
|
||||
{file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"},
|
||||
|
@ -368,6 +403,10 @@ rich = [
|
|||
{file = "rich-12.4.4-py3-none-any.whl", hash = "sha256:d2bbd99c320a2532ac71ff6a3164867884357da3e3301f0240090c5d2fdac7ec"},
|
||||
{file = "rich-12.4.4.tar.gz", hash = "sha256:4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2"},
|
||||
]
|
||||
six = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||
]
|
||||
tomli = [
|
||||
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
|
||||
|
|
|
@ -17,9 +17,11 @@ tweepy = "^4.10.0"
|
|||
rich = "^12.4.4"
|
||||
parse = "^1.19.0"
|
||||
pywikibot = "^7.7.0"
|
||||
pyparsing = "^3.0.9"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pytest = "^7.1.2"
|
||||
Faker = "^15.1.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
|
|
|
@ -1,28 +1,133 @@
|
|||
from wiki_postbot.patterns.wikilink import Wikilink, NBack
|
||||
import pytest
|
||||
from faker import Faker
|
||||
import typing
|
||||
import pdb
|
||||
|
||||
class TestStr:
|
||||
base = (
|
||||
"[[My Basic Wikilink]]",
|
||||
Wikilink("My Basic Wikilink")
|
||||
)
|
||||
multi = (
|
||||
"[[Link1]] other text. [[Link2]]",
|
||||
[Wikilink("Link1"), Wikilink("Link2")]
|
||||
)
|
||||
nback_one = (
|
||||
"[[^My Wikilink]]",
|
||||
Wikilink("My Wikilink", nback=NBack(1,1))
|
||||
)
|
||||
nback_wildcard = (
|
||||
"[[^*My Wikilink]]",
|
||||
Wikilink("My Wikilink", nback=NBack(wildcard=True))
|
||||
)
|
||||
nback_range = (
|
||||
"[[^{1,3}Link]]",
|
||||
Wikilink("Link", nback=NBack(1,3))
|
||||
)
|
||||
nback_start = (
|
||||
"[[^{2,}Link]]",
|
||||
Wikilink("Link", nback=NBack(start=2))
|
||||
)
|
||||
nback_end = (
|
||||
"[[^{,3}Link]]",
|
||||
Wikilink("Link", nback=NBack(end=3))
|
||||
)
|
||||
nback_end_shorthand = (
|
||||
"[[^{3}Link]]",
|
||||
Wikilink("Link", nback=NBack(end=3))
|
||||
)
|
||||
|
||||
|
||||
def test_wikilink():
|
||||
pass
|
||||
def pad_garbage(string:str) -> str:
|
||||
"""Pad a string with garbage text"""
|
||||
fake = Faker()
|
||||
return fake.paragraph() + " " + string + " " + fake.paragraph()
|
||||
|
||||
def test_nback_one():
|
||||
pass
|
||||
|
||||
def test_nback_all():
|
||||
pass
|
||||
@pytest.mark.parametrize(
|
||||
"test_string,expected",
|
||||
[TestStr.base])
|
||||
def test_wikilink(test_string, expected):
|
||||
"""
|
||||
Parse a string with a basic wikilink in it
|
||||
"""
|
||||
test_string = pad_garbage(test_string)
|
||||
wl = Wikilink.parse(test_string)
|
||||
assert len(wl) == 1
|
||||
assert wl[0] == expected
|
||||
|
||||
def test_nback_range_full():
|
||||
pass
|
||||
@pytest.mark.parametrize(
|
||||
"test_string,expected",
|
||||
[TestStr.multi])
|
||||
def test_wikilinks(test_string, expected):
|
||||
"""
|
||||
Parse a string that has multiple wikilinks
|
||||
"""
|
||||
test_string = pad_garbage(test_string)
|
||||
wl = Wikilink.parse(test_string)
|
||||
assert len(wl) == 2
|
||||
assert wl[0] == expected[0]
|
||||
assert wl[1] == expected[1]
|
||||
|
||||
def test_nback_range_start():
|
||||
pass
|
||||
|
||||
def test_nback_range_end():
|
||||
pass
|
||||
@pytest.mark.parametrize(
|
||||
"test_string,expected",
|
||||
[TestStr.nback_one])
|
||||
def test_nback_one(test_string, expected):
|
||||
test_string = pad_garbage(test_string)
|
||||
wl = Wikilink.parse(test_string)
|
||||
# pdb.set_trace()
|
||||
assert len(wl) == 1
|
||||
assert wl[0] == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_string,expected",
|
||||
[TestStr.nback_wildcard])
|
||||
def test_nback_all(test_string, expected):
|
||||
test_string = pad_garbage(test_string)
|
||||
wl = Wikilink.parse(test_string)
|
||||
# pdb.set_trace()
|
||||
assert len(wl) == 1
|
||||
assert wl[0] == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_string,expected",
|
||||
[TestStr.nback_range])
|
||||
def test_nback_range_full(test_string, expected):
|
||||
test_string = pad_garbage(test_string)
|
||||
wl = Wikilink.parse(test_string)
|
||||
# pdb.set_trace()
|
||||
assert len(wl) == 1
|
||||
assert wl[0] == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_string,expected",
|
||||
[TestStr.nback_start])
|
||||
def test_nback_range_start(test_string, expected):
|
||||
test_string = pad_garbage(test_string)
|
||||
wl = Wikilink.parse(test_string)
|
||||
# pdb.set_trace()
|
||||
assert len(wl) == 1
|
||||
assert wl[0] == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_string,expected",
|
||||
[TestStr.nback_end, TestStr.nback_end_shorthand])
|
||||
def test_nback_range_end(test_string, expected):
|
||||
test_string = pad_garbage(test_string)
|
||||
wl = Wikilink.parse(test_string)
|
||||
# pdb.set_trace()
|
||||
assert len(wl) == 1
|
||||
assert wl[0] == expected
|
||||
|
||||
def test_triplet_full():
|
||||
pass
|
||||
|
||||
def test_triplet_implicit_single():
|
||||
"""Test an implicit triplet in a single message"""
|
||||
pass
|
||||
|
||||
def test_triplet_implicit_thread():
|
||||
"""Test an implicit triplet where the subject is higher up in the thread"""
|
||||
|
||||
pass
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from wiki_postbot.actions.action import Action, Result
|
||||
from wiki_postbot.patterns import WIKILINK
|
||||
from wiki_postbot.patterns.wikilink import WIKILINK
|
||||
from tweepy import Response
|
||||
import re
|
||||
|
||||
|
|
|
@ -18,44 +18,6 @@ class WikiLink(Inline):
|
|||
"""
|
||||
Detect a wikilink and add it to the wiki!
|
||||
|
||||
This action uses an extended wikilink syntax that includes
|
||||
|
||||
* **n-back links** - allows the user to specify messages in threads that are not the initiating message, and
|
||||
* **Semantic wikilinks** - specify a triplet subject-predicate-object link
|
||||
|
||||
In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made.
|
||||
|
||||
# N-Back Links
|
||||
|
||||
For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So
|
||||
`[[^LINK]]` and `[[^ LINK]]` are both valid.
|
||||
|
||||
* **Preceding Message** - `[[^LINK]]`
|
||||
* **Entire Preceding Thread** - `[[^*LINK]]`
|
||||
* **Ranges**
|
||||
** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive.
|
||||
eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and
|
||||
`n == 0` indicates the initiating message.
|
||||
** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it.
|
||||
** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message
|
||||
|
||||
# Semantic Wikilinks
|
||||
|
||||
Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the
|
||||
|
||||
Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic.
|
||||
|
||||
`SUB`, `PRED`, and `OBJ` are placeholders for the parts of
|
||||
a triplet in the following examples.
|
||||
|
||||
* **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the
|
||||
`OBJ`ect page with the indicated predicate.
|
||||
|
||||
eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]`
|
||||
|
||||
* **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread.
|
||||
A subject can also be declared with a complete triplet.
|
||||
|
||||
.. note:
|
||||
|
||||
These commands will not include the full text of messages from users that have not opted in to the bot,
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
from wiki_postbot.patterns.patterns import Pattern
|
|
@ -2,3 +2,10 @@
|
|||
Basic regex patterns that are simply `re.compile`d
|
||||
"""
|
||||
|
||||
class Pattern:
|
||||
"""
|
||||
Metaclass for detecting patterns
|
||||
|
||||
Not sure what should go here but making it just for the sake of structure
|
||||
"""
|
||||
|
||||
|
|
|
@ -1,8 +1,175 @@
|
|||
import re
|
||||
from wiki_postbot.patterns import Pattern
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Union, List
|
||||
import pyparsing as pp
|
||||
|
||||
|
||||
WIKILINK = re.compile(r'\[\[(.*?)\]\]', re.IGNORECASE)
|
||||
"""
|
||||
Basic structure of wikilink, used to detect presence
|
||||
"""
|
||||
|
||||
class
|
||||
class NBack:
|
||||
FIELDS = ('wildcard', 'start', 'end')
|
||||
|
||||
def __init__(self, start:Optional[int]=None, end:Optional[int]=None,
|
||||
wildcard:Optional[Union[str,bool]]=None,
|
||||
one:Optional[str]=None):
|
||||
|
||||
if wildcard:
|
||||
self.wildcard = True
|
||||
self.start = None
|
||||
self.end = None
|
||||
return
|
||||
else:
|
||||
self.wildcard = False
|
||||
|
||||
if one:
|
||||
self.start = 1
|
||||
self.end = 1
|
||||
else:
|
||||
if start is not None:
|
||||
start = int(start)
|
||||
if end is not None:
|
||||
end = int(end)
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
if self.start is not None and self.end is not None:
|
||||
if self.start > self.end:
|
||||
raise ValueError(f"Start value must be less than end value, got start:{self.start}, end:{self.end}")
|
||||
|
||||
@classmethod
|
||||
def make_parser(cls) -> pp.ParserElement:
|
||||
# --------------------------------------------------
|
||||
# n-back links immediately follow the [[ and can be one of
|
||||
# ^
|
||||
# ^*
|
||||
# ^{n,m}
|
||||
# ^{n,}
|
||||
# ^{,m}
|
||||
# ^{m}
|
||||
|
||||
# make elements
|
||||
caret = pp.Literal("^")
|
||||
lcurly = pp.Literal('{').suppress()
|
||||
rcurly = pp.Literal('}').suppress()
|
||||
integer = pp.Word(pp.nums)
|
||||
comma = pp.Literal(',').suppress()
|
||||
nb_range = caret + lcurly
|
||||
|
||||
# combine into matches
|
||||
nb_wildcard = caret.suppress() + "*"
|
||||
# start or end can be omitted if comma is present
|
||||
nb_full = nb_range + pp.Optional(integer("start")) + comma + pp.Optional(integer("end")) + rcurly
|
||||
# if no comma present, it's just an end
|
||||
nb_end = nb_range + integer("end") + rcurly
|
||||
|
||||
# combine into full nback parser
|
||||
nback = pp.Group(nb_wildcard('wildcard') | nb_full | nb_end | caret("one")).set_results_name("nback")
|
||||
return nback
|
||||
|
||||
def __eq__(self, other:'NBack'):
|
||||
return all([getattr(self, f) == getattr(other, f) for f in self.FIELDS])
|
||||
|
||||
class Wikilink(Pattern):
|
||||
"""
|
||||
Pattern for detecting wikilinks!
|
||||
|
||||
This pattern implements an extended wikilink syntax that includes
|
||||
|
||||
* **n-back links** - allows the user to specify messages in threads that are not the initiating message, and
|
||||
* **Semantic wikilinks** - specify a triplet subject-predicate-object link
|
||||
|
||||
In each of the following examples, `LINK` is a placeholder for the text of the wikilink to be made.
|
||||
|
||||
# N-Back Links
|
||||
|
||||
For all of these, whitespace in-between the n-back specifier and the link text will be ignored. So
|
||||
`[[^LINK]]` and `[[^ LINK]]` are both valid.
|
||||
|
||||
* **Preceding Message** - `[[^LINK]]`
|
||||
* **Entire Preceding Thread** - `[[^*LINK]]`
|
||||
* **Ranges**
|
||||
** **Fully specified** - `[[^{n,m}LINK]]` where `n` and `m` are the start and end of the range to be included, inclusive.
|
||||
eg. `[[^{2,5}LINK]]` would specify four messages: the 2nd one above the initiating message through the 5th, and
|
||||
`n == 0` indicates the initiating message.
|
||||
** **End specified** - `[[^{,m}LINK]]` OR `[[^{m}LINK]]` - include the initiating message and the `m` messages above it.
|
||||
** **Start specified** - `[[^{n,}LINK]]` - include all preceding messages in the thread before the `nth` message
|
||||
|
||||
# Semantic Wikilinks
|
||||
|
||||
Semantic wikilinks create a subject, predicate, object triplet. The subject will be the page that the
|
||||
|
||||
Semantic wikilinks use `::` as a delimiter between terms, and a `::` indicates that a wikilink is semantic.
|
||||
|
||||
`SUB`, `PRED`, and `OBJ` are placeholders for the parts of
|
||||
a triplet in the following examples.
|
||||
|
||||
* **Complete Triplet** - `[[SUB::PRED::OBJ]]` - create a semantic wikilink on the `SUB`ject page that links to the
|
||||
`OBJ`ect page with the indicated predicate.
|
||||
|
||||
eg. `[[Paper::Has DOI::https://doi.org/10.xxx/yyyy]]`
|
||||
|
||||
* **Implicit Triplet** - `[[PRED::OBJ]]` after a `[[SUB]]` wikilink has been previously used in the message or thread.
|
||||
A subject can also be declared with a complete triplet.
|
||||
"""
|
||||
FIELDS = ('link', 'nback', 'predicate', 'object', 'section')
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
link: str,
|
||||
nback: Optional[Union[NBack, tuple, dict]] = None,
|
||||
predicate: Optional[str] = None,
|
||||
object: Optional[str] = None,
|
||||
section: Optional[str] = None,
|
||||
**kwargs):
|
||||
super(Wikilink, self).__init__(**kwargs)
|
||||
|
||||
self.link = link
|
||||
if isinstance(nback, (tuple, list)):
|
||||
nback = NBack(*nback)
|
||||
elif isinstance(nback, dict):
|
||||
nback = NBack(**nback)
|
||||
elif isinstance(nback, pp.ParseResults):
|
||||
nback = NBack(**dict(nback))
|
||||
|
||||
self.nback = nback
|
||||
self.predicate = predicate
|
||||
self.object = object
|
||||
self.section = section
|
||||
|
||||
@classmethod
|
||||
def make_parser(cls) -> pp.ParserElement:
|
||||
"""
|
||||
Make the parser to detect wikilinks!
|
||||
"""
|
||||
# All wikilinks start with [[ and end with ]]
|
||||
lbracket = pp.Literal('[[').suppress()
|
||||
rbracket = pp.Literal(']]').suppress()
|
||||
|
||||
#nback parser
|
||||
nback = NBack.make_parser()
|
||||
|
||||
# main wikilink subject text
|
||||
link = pp.Word(pp.printables+ " ", excludeChars="#[]{}|")
|
||||
|
||||
# Combine all
|
||||
parser = lbracket + pp.Optional(nback) + link("link") + rbracket
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
def parse(cls, string:str, return_parsed:bool=False) -> List['Wikilink']:
|
||||
parser = cls.make_parser()
|
||||
results = parser.search_string(string)
|
||||
if return_parsed:
|
||||
return results
|
||||
else:
|
||||
return [Wikilink(**dict(res.items())) for res in results]
|
||||
|
||||
def __eq__(self, other:'Wikilink'):
|
||||
return all(getattr(self, f) == getattr(other, f) for f in self.FIELDS)
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue