hxp
2020-09-14 ef3c73201add25c57e77b8a502fe7f337cbce9c7
1111 简繁体转换工具
4个文件已添加
8617 ■■■■■ 已修改文件
Tool/简繁体转换工具/chscht.py 56 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
Tool/简繁体转换工具/langconv.py 263 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
Tool/简繁体转换工具/zh_wiki.py 8288 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
Tool/简繁体转换工具/拖动文件导出繁体.bat 10 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
Tool/¼ò·±Ìåת»»¹¤¾ß/chscht.py
New file
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
from langconv import *
import sys
import os
print(sys.version)
print(sys.version_info)
# è½¬æ¢ç¹ä½“到简体
def cht_to_chs(line):
    line = Converter('zh-hans').convert(line)
    line.encode('utf-8')
    return line
# è½¬æ¢ç®€ä½“到繁体
def chs_to_cht(line):
    line = Converter('zh-hant').convert(line)
    line.encode('utf-8')
    return line
def printlog(msg):
    print msg.decode('utf-8').encode('gbk')
    return
def main():
    print 'Number of arguments:', len(sys.argv), 'arguments.'
    # å…ƒç´ 0为自身模块名,不处理,从元素1开始处理
    reload(sys)
    sys.setdefaultencoding("utf-8")
    print "----------------------------"
    exportCount = 0
    for filePath in sys.argv[1:]:
        fileBaseName = os.path.basename(filePath) # æ–‡ä»¶åï¼Œå«æ‰©å±•名
#        fileExtension = os.path.splitext(filePath)[-1] # æ‰©å±•名
#        fileName = fileBaseName.replace(fileExtension, "") # ä¸å«æ‰©å±•名
        fw = open(r"ret\%s" % fileBaseName, "w")
        fr = open(filePath, "r")
        lines = fr.readlines()
        for line in lines:
            fw.write(chs_to_cht(line))
        fr.close()
        fw.close()
        exportCount += 1
        printlog("转换成功: %s" % filePath)
    print "----------------------------"
    printlog("转换文件数: %s" % exportCount)
    printlog("转换结果文件夹 'ret' æ–‡ä»¶å¤¹ä¸‹!")
    print "----------------------------"
    return
if __name__ == "__main__":
    main()
Tool/¼ò·±Ìåת»»¹¤¾ß/langconv.py
New file
@@ -0,0 +1,263 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from copy import deepcopy
from zh_wiki import zh2Hant, zh2Hans
UEMPTY = ''
# states
(START, END, FAIL, WAIT_TAIL) = list(range(4))
# conditions
(TAIL, ERROR, MATCHED_SWITCH, UNMATCHED_SWITCH, CONNECTOR) = list(range(5))
MAPS = {}
class Node(object):
    def __init__(self, from_word, to_word=None, is_tail=True,
                 have_child=False):
        self.from_word = from_word
        if to_word is None:
            self.to_word = from_word
            self.data = (is_tail, have_child, from_word)
            self.is_original = True
        else:
            self.to_word = to_word or from_word
            self.data = (is_tail, have_child, to_word)
            self.is_original = False
        self.is_tail = is_tail
        self.have_child = have_child
    def is_original_long_word(self):
        return self.is_original and len(self.from_word)>1
    def is_follow(self, chars):
        return chars != self.from_word[:-1]
    def __str__(self):
        return '<Node, %s, %s, %s, %s>' % (
            repr(self.from_word), repr(self.to_word),
            self.is_tail, self.have_child)
    __repr__ = __str__
class ConvertMap(object):
    def __init__(self, name, mapping=None):
        self.name = name
        self._map = {}
        if mapping:
            self.set_convert_map(mapping)
    def set_convert_map(self, mapping):
        convert_map = {}
        have_child = {}
        max_key_length = 0
        for key in sorted(mapping.keys()):
            if len(key)>1:
                for i in range(1, len(key)):
                    parent_key = key[:i]
                    have_child[parent_key] = True
            have_child[key] = False
            max_key_length = max(max_key_length, len(key))
        for key in sorted(have_child.keys()):
            convert_map[key] = (key in mapping, have_child[key],
                    mapping.get(key, UEMPTY))
        self._map = convert_map
        self.max_key_length = max_key_length
    def __getitem__(self, k):
        try:
            is_tail, have_child, to_word  = self._map[k]
            return Node(k, to_word, is_tail, have_child)
        except:
            return Node(k)
    def __contains__(self, k):
        return k in self._map
    def __len__(self):
        return len(self._map)
class StatesMachineException(Exception):
    pass
class StatesMachine(object):
    def __init__(self):
        self.state = START
        self.final = UEMPTY
        self.len = 0
        self.pool = UEMPTY
    def clone(self, pool):
        new = deepcopy(self)
        new.state = WAIT_TAIL
        new.pool = pool
        return new
    def feed(self, char, map):
        node = map[self.pool+char]
        if node.have_child:
            if node.is_tail:
                if node.is_original:
                    cond = UNMATCHED_SWITCH
                else:
                    cond = MATCHED_SWITCH
            else:
                cond = CONNECTOR
        else:
            if node.is_tail:
                cond = TAIL
            else:
                cond = ERROR
        new = None
        if cond == ERROR:
            self.state = FAIL
        elif cond == TAIL:
            if self.state == WAIT_TAIL and node.is_original_long_word():
                self.state = FAIL
            else:
                self.final += node.to_word
                self.len += 1
                self.pool = UEMPTY
                self.state = END
        elif self.state == START or self.state == WAIT_TAIL:
            if cond == MATCHED_SWITCH:
                new = self.clone(node.from_word)
                self.final += node.to_word
                self.len += 1
                self.state = END
                self.pool = UEMPTY
            elif cond == UNMATCHED_SWITCH or cond == CONNECTOR:
                if self.state == START:
                    new = self.clone(node.from_word)
                    self.final += node.to_word
                    self.len += 1
                    self.state = END
                else:
                    if node.is_follow(self.pool):
                        self.state = FAIL
                    else:
                        self.pool = node.from_word
        elif self.state == END:
            # END is a new START
            self.state = START
            new = self.feed(char, map)
        elif self.state == FAIL:
            raise StatesMachineException('Translate States Machine '
                    'have error with input data %s' % node)
        return new
    def __len__(self):
        return self.len + 1
    def __str__(self):
        return '<StatesMachine %s, pool: "%s", state: %s, final: %s>' % (
                id(self), self.pool, self.state, self.final)
    __repr__ = __str__
class Converter(object):
    def __init__(self, to_encoding):
        self.to_encoding = to_encoding
        self.map = MAPS[to_encoding]
        self.start()
    def feed(self, char):
        branches = []
        for fsm in self.machines:
            new = fsm.feed(char, self.map)
            if new:
                branches.append(new)
        if branches:
            self.machines.extend(branches)
        self.machines = [fsm for fsm in self.machines if fsm.state != FAIL]
        all_ok = True
        for fsm in self.machines:
            if fsm.state != END:
                all_ok = False
        if all_ok:
            self._clean()
        return self.get_result()
    def _clean(self):
        if len(self.machines):
            self.machines.sort(key=lambda x: len(x))
            # self.machines.sort(cmp=lambda x,y: cmp(len(x), len(y)))
            self.final += self.machines[0].final
        self.machines = [StatesMachine()]
    def start(self):
        self.machines = [StatesMachine()]
        self.final = UEMPTY
    def end(self):
        self.machines = [fsm for fsm in self.machines
                if fsm.state == FAIL or fsm.state == END]
        self._clean()
    def convert(self, string):
        self.start()
        for char in string:
            self.feed(char)
        self.end()
        return self.get_result()
    def get_result(self):
        return self.final
def registery(name, mapping):
    global MAPS
    MAPS[name] = ConvertMap(name, mapping)
registery('zh-hant', zh2Hant)
registery('zh-hans', zh2Hans)
del zh2Hant, zh2Hans
def run():
    import sys
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option('-e', type='string', dest='encoding',
            help='encoding')
    parser.add_option('-f', type='string', dest='file_in',
            help='input file (- for stdin)')
    parser.add_option('-t', type='string', dest='file_out',
            help='output file')
    (options, args) = parser.parse_args()
    if not options.encoding:
        parser.error('encoding must be set')
    if options.file_in:
        if options.file_in == '-':
            file_in = sys.stdin
        else:
            file_in = open(options.file_in)
    else:
        file_in = sys.stdin
    if options.file_out:
        if options.file_out == '-':
            file_out = sys.stdout
        else:
            file_out = open(options.file_out, 'wb')
    else:
        file_out = sys.stdout
    c = Converter(options.encoding)
    for line in file_in:
        # print >> file_out, c.convert(line.rstrip('\n').decode(
        file_out.write(c.convert(line.rstrip('\n').decode(
            'utf8')).encode('utf8'))
if __name__ == '__main__':
    run()
Tool/¼ò·±Ìåת»»¹¤¾ß/zh_wiki.py
New file
Diff too large
Tool/¼ò·±Ìåת»»¹¤¾ß/Í϶¯Îļþµ¼³ö·±Ìå.bat
New file
@@ -0,0 +1,10 @@
cd %~dp0
@echo off
setlocal enabledelayedexpansion
set str=
for %%i in (%*) do (set str=!str! %%i)
::echo !str!
python chscht.py !str!
pause