simplifying link parsing

This commit is contained in:
Wouter Groeneveld 2017-01-13 13:17:54 +01:00
parent 3a7a38c215
commit 6810e0168f
2 changed files with 13 additions and 15 deletions

View File

@ -1,6 +1,8 @@
from collections import OrderedDict from collections import OrderedDict
from re import compile
class MarkdownHeader: class MarkdownHeader:
pattern = compile('(=+)(.*?)(=+)')
head = "=" head = "="
config = { config = {
'======': 1, '======': 1,
@ -11,13 +13,9 @@ class MarkdownHeader:
} }
def convert(self, text): def convert(self, text):
config = OrderedDict(sorted(MarkdownHeader.config.items(), key = lambda t : t[1])) result = text
for regex_head in MarkdownHeader.pattern.findall(text):
for key, val in config.items(): orig_header = regex_head[0] + regex_head[1] + regex_head[2]
if text.startswith(key): new_header = ('#' * MarkdownHeader.config[regex_head[0]]) + regex_head[1]
return ('#' * val) + self.strip(text) result = result.replace(orig_header, new_header)
return text return result
def strip(self, text):
return text.replace(MarkdownHeader.head, "")

View File

@ -3,17 +3,17 @@ from os import walk
import re import re
class MarkdownLinks: class MarkdownLinks:
known_shortcodes = ('wp') # see http://pythex.org/
pattern = re.compile('\[\[(.*?)\]\]') pattern = re.compile('(\[\[)(.*?)(\]\])')
def convert(self, text): def convert(self, text):
result = text result = text
for regex_link in MarkdownLinks.pattern.findall(text): for regex_link in MarkdownLinks.pattern.findall(text):
origlink = "[[" + regex_link + "]]" origlink = ''.join(regex_link)
convertedlink = "" convertedlink = ""
if "http" in regex_link or "www" in regex_link: if "http" in origlink or "www" in origlink:
convertedlink = self.convert_as_external_link(origlink) convertedlink = self.convert_as_external_link(origlink)
elif ">" in regex_link: elif ">" in origlink:
convertedlink = self.convert_as_interwiki_link(origlink) convertedlink = self.convert_as_interwiki_link(origlink)
else: else:
convertedlink = self.convert_as_internal_link(origlink) convertedlink = self.convert_as_internal_link(origlink)