simplifying link parsing

This commit is contained in:
Wouter Groeneveld 2017-01-13 13:17:54 +01:00
parent 3a7a38c215
commit 6810e0168f
2 changed files with 13 additions and 15 deletions

View File

@ -1,6 +1,8 @@
from collections import OrderedDict
from re import compile
class MarkdownHeader:
pattern = compile('(=+)(.*?)(=+)')
head = "="
config = {
'======': 1,
@ -11,13 +13,9 @@ class MarkdownHeader:
}
def convert(self, text):
config = OrderedDict(sorted(MarkdownHeader.config.items(), key = lambda t : t[1]))
for key, val in config.items():
if text.startswith(key):
return ('#' * val) + self.strip(text)
return text
def strip(self, text):
return text.replace(MarkdownHeader.head, "")
result = text
for regex_head in MarkdownHeader.pattern.findall(text):
orig_header = regex_head[0] + regex_head[1] + regex_head[2]
new_header = ('#' * MarkdownHeader.config[regex_head[0]]) + regex_head[1]
result = result.replace(orig_header, new_header)
return result

View File

@ -3,17 +3,17 @@ from os import walk
import re
class MarkdownLinks:
known_shortcodes = ('wp')
pattern = re.compile('\[\[(.*?)\]\]')
# see http://pythex.org/
pattern = re.compile('(\[\[)(.*?)(\]\])')
def convert(self, text):
result = text
for regex_link in MarkdownLinks.pattern.findall(text):
origlink = "[[" + regex_link + "]]"
origlink = ''.join(regex_link)
convertedlink = ""
if "http" in regex_link or "www" in regex_link:
if "http" in origlink or "www" in origlink:
convertedlink = self.convert_as_external_link(origlink)
elif ">" in regex_link:
elif ">" in origlink:
convertedlink = self.convert_as_interwiki_link(origlink)
else:
convertedlink = self.convert_as_internal_link(origlink)