code conversion

This commit is contained in:
wgroeneveld 2017-01-14 20:14:18 +01:00
parent 600098bac3
commit 190441453b
6 changed files with 182 additions and 4 deletions

View File

@ -4,17 +4,46 @@ A DokuWiki to Hugo file exporter to quickly migrate your existing PHP wiki to Hu
See https://www.dokuwiki.org/wiki:syntax
## Following Dokuwiki syntax converted:
### general
* code, file, inlinecode with single quotes
* bold, italic, sub/sup, strikethrough
* headings with equal sign
* linebreaks (double backslash) are at this moment replaced with HTML BRs.
### embedding HTML
Since Hugo still supports html tags, we don't need to do anything but to remove the `<html/>` tags.
See also the `MarkdownInlineHtml` class in simplestyle module.
### links
Simple internal links are converted to relrefs like `[[mylink]]`. Local links with double colon are replaced by forward slash.
`[[sub:link]]` would become a link to the sub/link article.
If it's a HTTP(S) link, it stays that way.
#### interwiki
You'll have to come up with your own shortcodes for those.
See wp.html in the layouts directory. You could customize interwiki links from dokuwiki: `[[custom>somelink]]` would refer to some custom wiki.
Simply add custom.html and link to the website of your choice. Use Hugo's `{{ index .Params 0 }}` to get the link content.
## TODO
* Figure out image links ala http://php.net|{{wiki:dokuwiki-128.png}}
* Tables, should complex ones be supported or can I do a manual convert?
* build file structure - wire everything together
* build header TOML with timestamps, draft false etc
* lists
* ordered lists using dash to markdown?
* emoticons
* no formatting (nowiki) - should this just be a pre?
* code hilighting (inline and multiline and downloadable)
* embedding html - remove html tag and done?
* preventing of wiki markup %% - what to do with it?
## Not supported and probably will never be

34
src/markdown/code.py Normal file
View File

@ -0,0 +1,34 @@
from abc import ABC
from re import compile
class BaseMarkdownCode(ABC):
markdown = "```"
def __init__(self, tag):
self.tag = tag
self.pattern = compile('(<' + tag + '(.*?)>)')
def strip_lang(self, language):
if(language is ''):
return language
lang = language[1:len(language)]
if(' ' in lang):
lang = lang[0:lang.index(' ')]
return lang
def convert(self, text):
result = text
for match in self.pattern.findall(text):
language = self.strip_lang(match[1])
result = result.replace(match[0], BaseMarkdownCode.markdown + language)
return result.replace('</' + self.tag + '>', BaseMarkdownCode.markdown)
class MarkdownFile(BaseMarkdownCode):
def __init__(self):
super().__init__('file')
class MarkdownCode(BaseMarkdownCode):
def __init__(self):
super().__init__('code')

View File

@ -33,6 +33,11 @@ class MarkdownLineBreak(SimpleReplacementStyle):
def __init__(self):
super().__init__('<br/>', '\\')
# inline html is supported with Hugo, don't need the tags.
class MarkdownInlineHtml:
def convert(self, text):
return text.replace('<html>', '').replace('</html>', '')
# bold in Doku is bold in MD
class MarkdownBold(NopStyle):
pass
@ -44,3 +49,7 @@ class MarkdownItalic(SimpleStyleBetweenTags):
class MarkdownStrikeThrough(SimpleStyleBetweenTags):
def __init__(self):
super().__init__('~~', '<del>', '</del>')
class MarkdownInlineCode(SimpleStyleBetweenTags):
def __init__(self):
super().__init__('`', "''", "''")

View File

@ -11,6 +11,7 @@ class MarkdownConverter:
def __init__(self, file):
self.file = file
self.converters = (
# TODO auto-discover these. How do I do that, without interfaces?
MarkdownHeader(),
MarkdownLinks(),
MarkdownItalic(),

View File

@ -0,0 +1,93 @@
from unittest import TestCase
from src.markdown.code import MarkdownCode, MarkdownFile
class TestMarkdownCode(TestCase):
def setUp(self):
self.code_converter = MarkdownCode()
self.file_converter = MarkdownFile()
def test_convert_file_without_language(self):
src = """
blabla
<file>
this
that
</file>
blehbleh
"""
expected = """
blabla
```
this
that
```
blehbleh
"""
self.assertEqual(expected, self.file_converter.convert(src))
def test_convert_file_with_some_language(self):
src = """
blabla
<file php myfile.php>
$cool = "yoo";
echo $cool;
</file>
blehbleh
"""
expected = """
blabla
```php
$cool = "yoo";
echo $cool;
```
blehbleh
"""
self.assertEqual(expected, self.file_converter.convert(src))
def test_convert_code_with_specific_language(self):
src = """
blabla
<code php>
$_REQUEST = 'sup';
echo "yoo";
</code>
blehbleh
"""
expected = """
blabla
```php
$_REQUEST = 'sup';
echo "yoo";
```
blehbleh
"""
self.assertEqual(expected, self.code_converter.convert(src))
def test_convert_code_without_language(self):
src = """
blabla
<code>
this
that
</code>
blehbleh
"""
expected = """
blabla
```
this
that
```
blehbleh
"""
self.assertEqual(expected, self.code_converter.convert(src))

View File

@ -1,10 +1,22 @@
from unittest import TestCase
from src.markdown.simplestyle import MarkdownBold, MarkdownItalic, MarkdownStrikeThrough, MarkdownLineBreak
from src.markdown.simplestyle import MarkdownBold, MarkdownItalic, MarkdownStrikeThrough, MarkdownLineBreak, \
MarkdownInlineCode, MarkdownInlineHtml
class TestMarkdownSimpleStyles(TestCase):
def test_inline_html_simply_removes_tags(self):
src = "<html><strong>sup</strong></html>"
expected = "<strong>sup</strong>"
self.assertEqual(expected, MarkdownInlineHtml().convert(src))
def test_convert_inline_code(self):
inline_converter = MarkdownInlineCode()
src = "hi this is ''some code''"
expected = "hi this is `some code`"
self.assertEqual(expected, inline_converter.convert(src))
def test_some_linebreaks(self):
src = '''
hello \\