From 6810e0168ff47fb2dfa502df58b49ca12822fd13 Mon Sep 17 00:00:00 2001 From: Wouter Groeneveld Date: Fri, 13 Jan 2017 13:17:54 +0100 Subject: [PATCH] simplifying link parsing --- src/markdown_headers.py | 18 ++++++++---------- src/markdown_links.py | 10 +++++----- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/markdown_headers.py b/src/markdown_headers.py index 1c6b9ff..576f235 100644 --- a/src/markdown_headers.py +++ b/src/markdown_headers.py @@ -1,6 +1,8 @@ from collections import OrderedDict +from re import compile class MarkdownHeader: + pattern = compile('(=+)(.*?)(=+)') head = "=" config = { '======': 1, @@ -11,13 +13,9 @@ class MarkdownHeader: } def convert(self, text): - config = OrderedDict(sorted(MarkdownHeader.config.items(), key = lambda t : t[1])) - - for key, val in config.items(): - if text.startswith(key): - return ('#' * val) + self.strip(text) - return text - - - def strip(self, text): - return text.replace(MarkdownHeader.head, "") + result = text + for regex_head in MarkdownHeader.pattern.findall(text): + orig_header = regex_head[0] + regex_head[1] + regex_head[2] + new_header = ('#' * MarkdownHeader.config[regex_head[0]]) + regex_head[1] + result = result.replace(orig_header, new_header) + return result diff --git a/src/markdown_links.py b/src/markdown_links.py index 16c3821..e2dfb06 100644 --- a/src/markdown_links.py +++ b/src/markdown_links.py @@ -3,17 +3,17 @@ from os import walk import re class MarkdownLinks: - known_shortcodes = ('wp') - pattern = re.compile('\[\[(.*?)\]\]') + # see http://pythex.org/ + pattern = re.compile('(\[\[)(.*?)(\]\])') def convert(self, text): result = text for regex_link in MarkdownLinks.pattern.findall(text): - origlink = "[[" + regex_link + "]]" + origlink = ''.join(regex_link) convertedlink = "" - if "http" in regex_link or "www" in regex_link: + if "http" in origlink or "www" in origlink: convertedlink = self.convert_as_external_link(origlink) - elif ">" in regex_link: + elif ">" in origlink: convertedlink = self.convert_as_interwiki_link(origlink) else: convertedlink = self.convert_as_internal_link(origlink)