User:John Vandenberg/wiki xml
Appearance
Processing MediaWiki syntax using XML.
flexbisonparse
[edit]Python support
[edit]In the flexbisonparse direction, add the following files:
- "pymod.c"
#include "Python.h"
const char* wikiparse_do_parse (const char* input);
char wikiparse_toxml__doc__[] =
"toxml(wikitext) -- Convert Wikitext to XML\n";
PyObject *wikiparse_toxml(PyObject *self, PyObject *args) {
PyObject *result = NULL;
const char *wikitext, *xml;
if (!PyArg_ParseTuple(args, "s", &wikitext))
return NULL;
xml = wikiparse_do_parse(wikitext);
result = Py_BuildValue("s", xml);
return result;
}
static PyMethodDef wikiparse_functions[] = {
{"toxml", (PyCFunction)wikiparse_toxml, METH_VARARGS,
wikiparse_toxml__doc__},
{NULL, NULL, 0, NULL}
};
/* module entry-point (module-initialization) function */
void initwikiparse(void) {
/* Create the module and add the functions */
PyObject *m = Py_InitModule3("wikiparse", wikiparse_functions, "MediaWiki syntax parser");
}
- setup.py
from distutils.core import setup, Extension
setup(name="wikiparse", version="0.1",
ext_modules=[Extension("wikiparse", ["pymod.c", "lex.yy.c", "wikiparse.tab.c", "parsetree.c"])])
- wikiparse.py
import wikiparse
import sys
def parse(page):
return wikiparse.toxml(page)
def main():
print "%s" % parse("hello")
print "%s" % parse( .join( sys.stdin.readlines() ) )
if __name__ == "__main__":
main()
Execute:
$ make $ python setup.py install $ cat test.txt | python ./wikiparse.py | xmllint --format -