Skip to content

Commit

Permalink
preprocess scripts: init
Browse files Browse the repository at this point in the history
  • Loading branch information
myfreeer committed Jun 13, 2018
1 parent abd146f commit 123ee6a
Show file tree
Hide file tree
Showing 3 changed files with 305 additions and 0 deletions.
15 changes: 15 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
*.chm
*.hhp
*.hhk
*.hhc
*.cab
*.zip
*.rar
*.7z
reference/*
zh/*
en/*
chmhelp/*
common/*
chm_temp/*
cppreference-doc/*
107 changes: 107 additions & 0 deletions preprocess-zh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/bash

set -e
git clone https://github.com/PeterFeicht/cppreference-doc.git --depth=1
cd cppreference-doc
git apply -3 ../zh.diff
make source
set +e

# init files and vars
startup_scripts_replace="startup_scripts.js"
startup_scripts_path="$(find | grep -iP 'load\.php.*?modules=startup&only=scripts.*?' | head -1)"

site_scripts_replace="site_scripts.js"
site_scripts_path="$(find | grep -iP 'load\.php.*?modules=site&only=scripts.*?' | head -1)"

site_modules_replace="site_modules.css"
site_modules_path="$(find | grep -iP 'load\.php.*?modules=site&only=styles.*?' | head -1)"

skin_scripts_replace="skin_scripts.js"
skin_scripts_path="$(find | grep -iP 'load\.php.*?modules=skins.*&only=scripts.*?' | head -1)"

ext_replace="ext.css"
ext_path="$(find | grep -iP 'load\.php.*?modules=.*ext.*&only=styles.*?' | head -1)"

LIST="startup_scripts site_scripts site_modules skin_scripts ext"
extra_fonts="DejaVuSans.ttf DejaVuSans-Bold.ttf DejaVuSansMono.ttf DejaVuSansMono-Bold.ttf"
CPUS="$(cat /proc/cpuinfo | grep -c '^processor')"

# https://gist.github.com/cdown/1163649/8a35c36fdd24b373788a7057ed483a5bcd8cd43e
url_encode() {
local _length="${#1}"
for (( _offset = 0 ; _offset < _length ; _offset++ )); do
_print_offset="${1:_offset:1}"
case "${_print_offset}" in
[a-zA-Z0-9.~_-]) printf "${_print_offset}" ;;
' ') printf + ;;
*) printf '%%%X' "'${_print_offset}" ;;
esac
done
}

copy_file(){
local var=$1
local path="$(eval echo "\${${var}_path}")"
local replace="$(eval echo "\${${var}_replace}")"
local dir="$(dirname "${path}")"
cp -f -T "${path}" "${dir}/${replace}"
}

remove_file(){
local var=$1
local path="$(eval echo "\${${var}_path}")"
local name="$(basename "${path}")"
find -iname "${name}" | xargs rm -f
}

replace_in_html(){
local var=$1
local path="$(eval echo "\${${var}_path}")"
local replace="$(eval echo "\${${var}_replace}")"
local name="$(basename "${path}")"
local encoded_name="$(url_encode "${name}")"
find ./ -iname '*.html' -type f | xargs -P "${CPUS}" sed -i "s/${name}/${replace}/gi"
find ./ -iname '*.html' -type f | xargs -P "${CPUS}" sed -i "s/${encoded_name}/${replace}/gi"
}

echo pre-processing...
for i in $LIST; do copy_file $i; done

# backup extra fonts
mkdir -p font_temp
for i in $extra_fonts; do
find -iname $i -exec cp {} font_temp/$i \;
done

# original preprocess
make doc_html

# restore extra fonts
if [[ -d 'reference/common' ]]; then
font_path='reference/common'
elif [[ -d 'output/common' ]]; then
font_path='output/common'
fi
if [[ -d $font_path ]]; then
for i in $extra_fonts; do
cp -f font_temp/$i $font_path/$i
done
fi
rm -rf font_temp

echo post-processing...
for i in $LIST; do
echo processing $i
remove_file $i
replace_in_html $i
done

find -iname "${startup_scripts_replace}" | xargs sed -i 's/document\.write/void /ig'
find -iname "${site_scripts_replace}" | xargs sed -i '1 i if(window.mw)'
find -iname "${skin_scripts_replace}" | xargs sed -i '1 i if(window.mw)'
find -iname '*.css' | xargs sed -i -r 's/\.\.\/([^.]+?)\.ttf/\1.ttf/ig'
echo Done.

mv reference/* ../
cd ..
183 changes: 183 additions & 0 deletions zh.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
diff --git a/Makefile b/Makefile
index deadfe2..46abab2 100644
--- a/Makefile
+++ b/Makefile
@@ -230,7 +230,7 @@ indexes:
./index2autolinker.py index-functions-c.xml output/indexes/autolink-c
./index2autolinker.py index-functions-cpp.xml output/indexes/autolink-cpp

-#redownloads the source documentation directly from en.cppreference.com
+#redownloads the source documentation directly from zh.cppreference.com
source:
rm -rf "reference"
mkdir "reference"
@@ -239,15 +239,15 @@ source:
regex=".*index\\.php.*|.*/Special:.*|.*/Talk:.*" \
regex+="|.*/Help:.*|.*/File:.*|.*/Cppreference:.*" \
regex+="|.*/WhatLinksHere:.*|.*/Template:.*|.*/Category:.*" \
- regex+="|.*action=.*|.*printable=.*|.*en.cppreference.com/book.*" ; \
+ regex+="|.*action=.*|.*printable=.*|.*zh.cppreference.com/book.*" ; \
echo $$regex ; \
wget --adjust-extension --page-requisites --convert-links \
- --force-directories --recursive --level=15 \
- --span-hosts --domains=en.cppreference.com,upload.cppreference.com \
+ --force-directories --recursive --level=17 \
+ --span-hosts --domains=zh.cppreference.com,upload.cppreference.com \
--reject-regex $$regex \
- --timeout=5 --tries=50 --no-verbose \
+ --timeout=5 --tries=150 --no-verbose \
--retry-connrefused --waitretry=10 --read-timeout=20 \
- http://en.cppreference.com/w/ ; \
+ http://zh.cppreference.com/w/ ; \
popd > /dev/null

- ./export.py --url=http://en.cppreference.com/mwiki reference/cppreference-export-ns0,4,8,10.xml 0 4 8 10
+ ./export.py --url=http://zh.cppreference.com/mwiki reference/cppreference-export-ns0,4,8,10.xml 0 4 8 10
diff --git a/commands/preprocess.py b/commands/preprocess.py
index ed67174..ada56fe 100644
--- a/commands/preprocess.py
+++ b/commands/preprocess.py
@@ -40,15 +40,15 @@ def rearrange_archive(root):
# rearrange the archive. {root} here is output/reference

# before
- # {root}/en.cppreference.com/w/ : html
- # {root}/en.cppreference.com/mwiki/ : data
- # {root}/en.cppreference.com/ : data
+ # {root}/zh.cppreference.com/w/ : html
+ # {root}/zh.cppreference.com/mwiki/ : data
+ # {root}/zh.cppreference.com/ : data
# ... (other languages)
# {root}/upload.cppreference.com/mwiki/ : data

# after
# {root}/common/ : all common data
- # {root}/en/ : html for en
+ # {root}/zh/ : html for zh
# ... (other languages)

data_path = os.path.join(root, 'common')
@@ -56,7 +56,7 @@ def rearrange_archive(root):
shutil.move(os.path.join(root, 'upload.cppreference.com/mwiki'), data_path)
shutil.rmtree(os.path.join(root, 'upload.cppreference.com'))

- for lang in ["en"]:
+ for lang in ["zh"]:
path = os.path.join(root, lang + ".cppreference.com/")
src_html_path = path + "w/"
src_data_path = path + "mwiki/"
@@ -214,7 +214,7 @@ def has_class(el, classes_to_check):
return False

def preprocess_html_file(root, fn, rename_map):
- parser = etree.HTMLParser()
+ parser = etree.HTMLParser(encoding="utf-8")
html = etree.parse(fn, parser)

# remove non-printable elements
diff --git a/gadgets/standard_revisions-tests/base.py b/gadgets/standard_revisions-tests/base.py
index 848d431..67d00f4 100644
--- a/gadgets/standard_revisions-tests/base.py
+++ b/gadgets/standard_revisions-tests/base.py
@@ -27,7 +27,7 @@ import unittest, time, re

class Driver:
def __init__(self):
- base_url = "http://en.cppreference.com/"
+ base_url = "http://zh.cppreference.com/"
driver = webdriver.Firefox()
driver.implicitly_wait(30)
try:
diff --git a/gadgets/sync_tests_mwiki.py b/gadgets/sync_tests_mwiki.py
index 9aa3fc7..13294ec 100755
--- a/gadgets/sync_tests_mwiki.py
+++ b/gadgets/sync_tests_mwiki.py
@@ -114,7 +114,7 @@ def perform_sync(url, direction, dest_root, title_filter, user, password,
# Supply information to config that would otherwise be defined in
# user-config.py
pywikibot.config2.family = 'cppreference'
- pywikibot.config2.mylang = 'en'
+ pywikibot.config2.mylang = 'zh'
pywikibot.config2.family_files['cppreference'] = url
pywikibot.config2.step = 100
pywikibot.config2.put_throttle = 0
diff --git a/index2ddg.py b/index2ddg.py
index 9789e56..7a3a3dc 100755
--- a/index2ddg.py
+++ b/index2ddg.py
@@ -447,7 +447,7 @@ def process_identifier(out, redirects, root, link, item_ident, item_type,
abstract = abstract.replace('\n','\\n')
line += abstract + '\t'
# source url
- line += 'http://en.cppreference.com/w/' + link + '\n'
+ line += 'http://zh.cppreference.com/w/' + link + '\n'
out.write(line)

build_redirects(redirects, item_ident, item_type)
diff --git a/index_transform/browser.py b/index_transform/browser.py
index d2e625c..82cb11f 100644
--- a/index_transform/browser.py
+++ b/index_transform/browser.py
@@ -42,7 +42,7 @@ class Index2Browser(IndexTransform):

res = u''
res += '<tt><b>' + xml_escape(full_name) + '</b></tt> [<span class="link">'
- res += '<a href="http://en.cppreference.com/w/' + xml_escape(full_link) + '">'
+ res += '<a href="http://zh.cppreference.com/w/' + xml_escape(full_link) + '">'
res += full_link + '</a></span>] <span class="mark">' + mark + '</span>\n'
return res

diff --git a/preprocess.py b/preprocess.py
index cb6e8cc..a8ac18f 100755
--- a/preprocess.py
+++ b/preprocess.py
@@ -28,12 +28,12 @@ def main():
parser.add_argument('--dst', type=str, help='Destination folder to put preprocessed archive to')
args = parser.parse_args()

- root = args.dst
- src = args.src
+ root = args.src
+ # src = args.src

# copy the source tree
- rmtree_if_exists(root)
- shutil.copytree(src, root)
+ # rmtree_if_exists(root)
+ # shutil.copytree(src, root)

rearrange_archive(root)

diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
index e4aa687..d6f143f 100644
--- a/tests/test_preprocess.py
+++ b/tests/test_preprocess.py
@@ -6,23 +6,23 @@ from lxml import etree

class TestConvertLoaderName(unittest.TestCase):
def test_convert_loader_name(self):
- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
modules=site&only=scripts&skin=cppreference2&*'
self.assertEqual('site_scripts.js', convert_loader_name(url))

- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
modules=site&only=styles&skin=cppreference2&*'
self.assertEqual('site_modules.css', convert_loader_name(url))

- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
modules=skins.cppreference2&only=scripts&skin=cppreference2&*'
self.assertEqual('skin_scripts.js', convert_loader_name(url))

- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
modules=startup&only=scripts&skin=cppreference2&*'
self.assertEqual('startup_scripts.js', convert_loader_name(url))

- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
modules=ext.gadget.ColiruCompiler%2CMathJax%7Cext.rtlcite%7Cmediawiki.\
legacy.commonPrint%2Cshared%7Cskins.cppreference2&only=styles&skin=\
cppreference2&*'

0 comments on commit 123ee6a

Please sign in to comment.