DocExport — малоизвестное расширение MediaWiki, автором которого является Стас Фомин.
Позволяет выкачать любую страницу вики в формате Microsoft Word (но всё равно на языке HTML).
Полезно для тех личных и особенно корпоративных вики, материалы в которых служат предметом дальнейшего бумажного документооборота.
2 июля 2009 года (по просьбе, которую Mithgol the Webmaster высказал комментарием на Хабрахабре) Стас Фомин выложил исходный код этого расширения.
Однако, так как тогдашний его адрес вызывает в настоящее время ошибку, то уместно выложить код расширения и здесь, дабы не пропал.
Код DocExport, по-видимому, используется также проектом Mediawiki4Intranet в доработанном виде, но там его посмотреть не так-то просто, если нет под рукою Mercurial.
DocExport.php[править | править код]
* MediaWiki DocExport extension
* Version 1.4 compatible with MediaWiki 1.16 and Vector skin
* Copyright В© 2008-2011 Stas Fomin, Vitaliy Filippov
* 1) Adds a content-action tab "purge"
* 2) Adds "clean HTML", "->m$word", "->openoffice" links to toolbox (in the left left)
* "clean HTML" leads to &useskin=cleanmonobook by default,
* you can change it with $egDocexportCleanHtmlParams
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
if (!defined('MEDIAWIKI'))
<p>This is the DocExport extension. To enable it, put </p>
<p>at the bottom of your LocalSettings.php.</p>
$wgHooks['SkinTemplateContentActions'][] = 'DocExport::onSkinTemplateContentActions';
$wgHooks['UnknownAction'][] = 'DocExport::onUnknownAction';
$wgHooks['SkinTemplateNavigation'][] = 'DocExport::onSkinTemplateNavigation';
$wgHooks['SkinTemplateToolboxEnd'][] = 'DocExport::SkinTemplateToolboxEnd';
$wgHooks['MagicWordwgVariableIDs'][] = 'DocExport::MagicWordwgVariableIDs';
$wgHooks['ParserGetVariableValueSwitch'][] = 'DocExport::ParserGetVariableValueSwitch';
$wgHooks['ParserFirstCallInit'][] = 'DocExport::ParserFirstCallInit';
$wgExtensionMessagesFiles['DocExport'] = dirname(__FILE__).'/DocExport.i18n.php';
$wgExtensionFunctions[] = 'DocExport::Setup';
$wgExtensionCredits['other'][] = array(
'name' => 'DocExport',
'author' => 'Stas Fomin',
'version' => DocExport::$version,
'description' => 'Adds 3 new actions for pages: render as HTML for M$WORD / OpenOffice, purge article',
'url' => '',
if (!isset($egDocexportCleanHtmlParams))
$egDocexportCleanHtmlParams = "useskin=cleanmonobook";
class DocExport
static $version = '1.4 (2011-09-29)';
static $required_mw = '1.11';
static $actions = NULL;
static $css = '';
static function Setup()
// A current MW-Version is required so check for it...
//// hooks ////
// Hook that creates {{DOCEXPORT}} magic word
static function MagicWordwgVariableIDs(&$mVariablesIDs)
$mVariablesIDs[] = 'docexport';
return true;
// Hook that evaluates {{DOCEXPORT}} magic word
static function ParserGetVariableValueSwitch(&$parser, &$varCache, &$index, &$ret)
if ($index == 'docexport')
$ret = !empty($parser->extIsDocExport) ? '1' : '';
return true;
// Parser function used to add custom css for export
static function docexportcss($parser, $args)
self::$css .= trim($args)."\n";
return '';
// Sets function hook to parser
static function ParserFirstCallInit($parser)
$parser->setFunctionHook('docexportcss', 'DocExport::docexportcss');
return true;
// Hook used to display a tab in standard skins
static function onSkinTemplateContentActions(&$content_actions)
if (!empty(self::$actions['purge']))
$content_actions['purge'] = self::$actions['purge'];
return true;
// Hook used to display a tab in Vector (MediaWiki 1.16+) skin
// Also standard skins in MediaWiki 1.18
static function onSkinTemplateNavigation(&$skin, &$links)
if (!empty(self::$actions['purge']))
$links[get_class($skin) == 'SkinVector' ? 'views' : 'actions']['purge'] = self::$actions['purge'];
return true;
// Hook for handling DocExport actions
static function onUnknownAction($action, $article)
$action = strtolower($action);
if ($action == 'export2word' || $action == 'export2oo')
self::sendTo($article, substr($action, 7));
return false;
return true;
// Output our TOOLBOX links
static function SkinTemplateToolboxEnd($tpl)
foreach (array('cleanmonobook', 'export2word', 'export2oo') as $link)
if (!empty(self::$actions[$link]))
print '<li id="t-'.$link.'" title="'.
'"><a href="'.self::$actions[$link]['href'].'">'.
return true;
//// non-hooks ////
// fills self::$actions for current title
static function fillActions()
// Actions already filled?
if (self::$actions !== NULL)
return true;
self::$actions = array();
global $wgTitle, $wgRequest, $egDocexportCleanHtmlParams;
$disallow_actions = array('edit', 'submit'); // disallowed actions
$action = $wgRequest->getVal('action');
$current_ns = $wgTitle->getNamespace();
// Disable for special pages
if ($current_ns < 0)
return false;
// Disable for edit/preview
if (in_array($action, $disallow_actions))
return false;
self::$actions['export2word'] = array(
'text' => wfMsg('docexport-msword-export-link'),
'tooltip' => wfMsg('tooltip-ca-export2word'),
'href' => $wgRequest->appendQuery('action=export2word'),
'class' => '',
self::$actions['export2oo'] = array(
'text' => wfMsg('docexport-oo-export-link'),
'tooltip' => wfMsg('tooltip-ca-export2oo'),
'href' => $wgTitle->getFullURL('action=export2oo'),
'class' => '',
self::$actions['purge'] = array(
'text' => wfMsg('docexport-purge-tab'),
'tooltip' => wfMsg('tooltip-ca-purge'),
'href' => $wgTitle->getFullURL('action=purge'),
'class' => '',
self::$actions['cleanmonobook'] = array(
'text' => wfMsg('link-cleanmonobook'),
'tooltip' => wfMsg('tooltip-link-cleanmonobook'),
'href' => $wgTitle->getLocalURL($egDocexportCleanHtmlParams),
'class' => '',
return true;
// Output HTML code with correct content-type for M$WORD / OO
static function sendTo($article, $to)
global $wgServer, $wgParser;
$html = self::getPureHTML($article);
$title = $article->getTitle();
// Fetch styles from MediaWiki:docexport-$to.css, expand templates
$st = wfMsgNoTrans("docexport-$to.css");
$st = $wgParser->preprocess($st, Title::makeTitleSafe(NS_MEDIAWIKI, "docexport-$to.css"), new ParserOptions());
if ($to == 'word')
// Add styles for HTML list numbering
$html = self::multinumLists($html, $st);
// Enable page numbering
$html = "<div class=\"SectionNumbered\">$html</div>";
if (!empty(self::$css))
if (preg_match('/mso-(even|first|)-?(header|footer)/is', self::$css))
// Remove headers/footers when page is using custom ones
$st = preg_replace('/mso-(even|first|)-?(header|footer)\s*:[^;]*;\s*/is', '', $st);
$st = trim($st)."\n".self::$css;
$html =
'<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"><html><head>' .
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' .
($to == 'word' ? '<meta name=ProgId content=Word.Document>' : '') .
'<style type="text/css"><!--' . "\n" .
$st .
'/*-->*/</style></head><body>' .
$html .
header('Content-type: '.($to == 'word' ? 'application/msword' : 'vnd.oasis.opendocument.text'));
header('Content-Length: '.strlen($html));
$filename = $title.($to == 'word' ? '.doc' : '.odp');
header('Content-Disposition: attachment; filename="'.$filename.'"');
echo $html;
/* Load HTML content into a DOMDocument */
static function loadDOM($html)
$dom = new DOMDocument();
$oe = error_reporting();
error_reporting($oe & ~E_WARNING);
$dom->loadHTML("<?xml version='1.0' encoding='UTF-8'?>".mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
return $dom;
/* Export children of $element to an HTML string */
static function saveChildren($element, $trim = false)
$xml = $element->ownerDocument->saveXML($element, LIBXML_NOEMPTYTAG);
$xml = preg_replace('/^\s*<[^>]*>(.*?)<\/[^\>]*>\s*$/uis', '\1', $xml);
$xml = preg_replace('#(<(br|input)(\s+[^<>]*[^/])?)></\2>#', '\1 />', $xml);
return $xml;
/* Make HTML ordered lists with class=multinum or inside an element with class=multinum
numbered hierarchically */
static function multinumLists($html, &$css)
if (!preg_match('/<([a-z0-9-:]+)[^<>]*class="[^<>\"\'\s]*multinum[^<>]*>/is', $html))
return $html;
$maxlevel = array();
$dom = self::loadDOM($html);
$stack = array(array($dom->documentElement, 0, false, 0));
$maxlist = 0;
while ($stack)
list($p, $i, $multi, $listindex) = $stack[0];
if ($i >= $p->childNodes->length)
$e = $p->childNodes->item($i);
if ($e->nodeType == XML_ELEMENT_NODE)
if (!$multi && preg_match('/\bmultinum\b/s', $e->getAttribute('class')))
// Begin multinumbered list
$stack[0][2] = $multi = 1;
if ($multi && $e->nodeName == 'li')
// Add M$Word pseudo-style
$level = $multi-1;
$style = "mso-list: l$listindex level$level lfo$level";
if (!($a = $e->getAttribute('style')))
$e->setAttribute('style', $style);
$a->value = rtrim($a->value, "; \t\r\n") . '; ' . $style;
elseif ($multi && $e->nodeName == 'ol')
if ($multi < 2)
$listindex = ++$maxlist;
$maxlevel[$multi][$listindex] = true;
if ($e->childNodes->length)
array_unshift($stack, array($e, 0, $multi, $listindex));
// Append CSS classes to $st
$st = '';
for ($i = 1; $maxlevel[$i]; $i++)
$st .= '%'.$i.'\.';
$k = array_keys($maxlevel[$i]);
foreach ($k as &$list)
$list = "@list l$list:level$i";
$css .= implode(", ", $k) . " { mso-level-text:\"$st\"; }\n";
return self::saveChildren($dom->documentElement->childNodes->item(0));
static function getPureHTML($article)
global $wgOut, $wgUser, $wgParser;
$title = $article->getTitle();
if (method_exists($title, 'userCanReadEx') && !$title->userCanReadEx())
// Support HaloACL rights
print '<html><body>DocExport: Permission Denied</body></html>';
$parserOptions = ParserOptions::newFromUser($wgUser);
$wgParser->mShowToc = false;
$wgParser->extIsDocExport = true;
$parserOutput = $wgParser->parse($article->preSaveTransform($article->getContent())."\n", $title, $parserOptions);
$wgParser->extIsDocExport = false;
$html = self::html2print($parserOutput->getText(), $title);
return $html;
static function html2print($html, $title = NULL)
global $wgScriptPath, $wgServer;
$html = self::clearScreenOnly($html);
// Remove [svg] graphviz links
$html = str_replace('[svg]</a>', '</a>', $html);
// Remove hyperlinks to images on the server
$html = self::clearHrefs($html);
// Remove enclosing <object type="image/svg+xml"> for SVG+PNG images
$html = preg_replace('#<object[^<>]*type=[\"\']?image/svg\+xml[^<>]*>(.*?)</object\s*>#is', '\1', $html);
// Make image urls absolute
$html = str_replace('src="'.$wgScriptPath, 'src="'.$wgServer.$wgScriptPath, $html);
// Replace links to anchors within self to just anchors
if ($title)
$html = str_replace('href="'.$title->getLocalUrl().'#', 'href="#', $html);
return $html;
static function clearScreenOnly($text)
return self::cutBlock($text, "/<\\s*div\\s*class=\"(screenonly|printfooter)\"/i","/<\\/\\s*div\\s*>/i");
static function clearHrefs($text)
global $wgScriptPath;
$regexp = "/<a[^<>]*href=[\"\']?" . str_replace("/", "\/", $wgScriptPath) . "\/images[^<>]*>/i";
return self::stripTags($text, $regexp, '#</\s*a\s*>#i');
static function stripTags($text, $startRegexp, $endRegexp)
$stripped = '';
while ('' != $text)
$p = preg_split($startRegexp, $text, 2);
$stripped .= $p[0];
if ((count($p) < 2) || ('' == $p[1]))
$text = '';
$q = preg_split($endRegexp, $p[1], 2);
$stripped .= $q[0];
$text = $q[1];
return $stripped;
static function cutBlock($text, $startRegexp, $endRegexp)
$stripped = '';
while ('' != $text)
$p = preg_split($startRegexp, $text, 2);
$stripped .= $p[0];
if ((count($p) < 2) || ('' == $p[1]))
$text = '';
$q = preg_split($endRegexp, $p[1], 2);
$text = $q[1];
return $stripped;
DocExport.i18n.php[править | править код]
/* Internationalization file for the DocExport Extension */
$messages = array();
$messages['en'] = array(
'docexport-msword-export-link' => '→M$WORD',
'docexport-oo-export-link' => '→OOffice',
'docexport-purge-tab' => 'purge',
'tooltip-ca-export2word' => 'Export to MS Word',
'tooltip-ca-export2oo' => 'Export to Open Office',
'tooltip-ca-purge' => 'Purge/refresh article, clear cache…',
'link-cleanmonobook' => 'Clean page',
'tooltip-link-cleanmonobook' => 'Show clean page version, without any toolboxes/navigation, but with screen styles - useful for saving in HTM/MHT formats.',
// CSS styles for OpenOffice export
'docexport-oo.css' => '{{MediaWiki:docexport-oo-orig.css}}',
'docexport-oo-orig.css' =>
'<!-- Do not edit this page. Edit MediaWiki:docexport-oo.css instead.
These are the original styles for wiki article export to OpenOffice. -->
td, th { vertical-align: top; }
p, li { text-align: justify; }
body { font-size: 12pt; }
.maximg img { width: 17cm; height: auto !important; }
// CSS styles for M$Word export
'docexport-word.css' => '{{MediaWiki:docexport-word-orig.css}}',
'docexport-word-orig.css' =>
'<!-- Do not edit this page. Edit MediaWiki:docexport-word.css instead.
These are the original styles for wiki article export to M$ Word. -->
p, table, li, dt, dl, h1, h2, h3, h4, h5, h6 { font-family: Arial; }
td, th { vertical-align: top; }
dt { font-weight: bold; }
p, li { text-align: justify; }
body { font-size: 12pt; }
ul li { list-style-type: square; }
img { max-width: 17cm; height: auto !important; }
.maximg img { width: 642px; height: auto !important; }
@page SectionNumbered {
mso-even-header:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") eh1;
mso-header:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") h1;
mso-even-footer:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") ef1;
mso-footer:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") f1;
mso-first-header:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") fh1;
mso-first-footer:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") ff1;
div.SectionNumbered { page: SectionNumbered; }
@page SectionLandscape {
mso-page-orientation: landscape;
size: 297mm 210mm;
mso-even-header:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") eh1;
mso-header:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") h1;
mso-even-footer:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") ef1;
mso-footer:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") f1;
mso-first-header:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") fh1;
mso-first-footer:url("{{SERVER}}{{SCRIPTPATH}}/extensions/DocExport/header.htm") ff1;
div.SectionLandscape { page: SectionLandscape; }
div.SectionLandscape .maximg img { width: 25cm; }
$messages['ru'] = array(
'docexport-msword-export-link' => '→M$WORD',
'docexport-oo-export-link' => '→OOffice',
'docexport-purge-tab' => 'Обновить',
'tooltip-ca-export2word' => 'Экспорт в MS Word',
'tooltip-ca-export2oo' => 'Экспорт в Open Office',
'tooltip-ca-purge' => 'Обновить статью, сбросить кеш…',
'link-cleanmonobook' => 'Чистый HTML',
'tooltip-link-cleanmonobook' => 'Показать версию страницы без навигации, но с экранными стилями - удобно для сохранения в HTM/MHT-форматы.',
$magicWords = array();
$magicWords['en'] = array(
'docexport' => array('1', 'DOCEXPORT'),
'docexportcss' => array('1', 'docexportcss'),
header.htm[править | править код]
<html xmlns:v="urn:schemas-microsoft-com:vml"
<!-- Headers/footers for M$Word. Now just contains page numbers. -->
<meta http-equiv=Content-Type content="text/html; charset=windows-1251">
<meta name=ProgId content=Word.Document>
<meta name=Generator content="Microsoft Word 14">
<meta name=Originator content="Microsoft Word 14">
<link id=Main-File rel=Main-File href="../toc.htm">
<body lang=RU>
<div style='mso-element:footnote-separator' id=fs>
<p class=MsoNormal><span style='mso-special-character:footnote-separator'><![if !supportFootnotes]>
<hr align=left size=1 width="33%">
<div style='mso-element:footnote-continuation-separator' id=fcs>
<p class=MsoNormal><span style='mso-special-character:footnote-continuation-separator'><![if !supportFootnotes]>
<hr align=left size=1>
<div style='mso-element:endnote-separator' id=es>
<p class=MsoNormal><span style='mso-special-character:footnote-separator'><![if !supportFootnotes]>
<hr align=left size=1 width="33%">
<div style='mso-element:endnote-continuation-separator' id=ecs>
<p class=MsoNormal><span style='mso-special-character:footnote-continuation-separator'><![if !supportFootnotes]>
<hr align=left size=1>
<div style='mso-element:header' id=eh1>
<p class=MsoHeader><o:p> </o:p></p>
<div style='mso-element:header' id=h1>
<p class=MsoHeader><o:p> </o:p></p>
<div style='mso-element:footer' id=ef1>
<p class=MsoFooter><o:p> </o:p></p>
<div style='mso-element:footer' id=f1><w:Sdt SdtDocPart="t"
DocPartType="Page Numbers (Bottom of Page)" DocPartUnique="t" ID="1707683786">
<p class=MsoFooter align=right style='text-align:right'><!--[if supportFields]><span
style='mso-element:field-begin'></span>PAGE<span style='mso-spacerun:yes'>
</span>\* MERGEFORMAT<span style='mso-element:field-separator'></span><![endif]--><span
style='mso-no-proof:yes'>2</span><!--[if supportFields]><span
<p class=MsoFooter><o:p> </o:p></p>
<div style='mso-element:header' id=fh1>
<p class=MsoHeader><o:p> </o:p></p>
<div style='mso-element:footer' id=ff1>
<p class=MsoFooter><o:p> </o:p></p>