# -*- coding: utf-8 -*-
|
from langconv import *
|
import sys
|
import os
|
import ConfigParser
|
import time
|
import traceback
|
|
print(sys.version)
|
print(sys.version_info)
|
|
# 转换繁体到简体
|
def cht_to_chs(line):
|
line = Converter('zh-hans').convert(line)
|
line.encode('utf-8')
|
return line
|
|
# 转换简体到繁体
|
def chs_to_cht(line):
|
line = Converter('zh-hant').convert(line)
|
line.encode('utf-8')
|
return line
|
|
# 根据指定code名转换
|
def convertByCode(line, code):
|
line = Converter(code).convert(line)
|
line.encode('utf-8')
|
return line
|
|
def printlog(msg):
|
print msg.decode('utf-8').encode('gbk')
|
return
|
|
|
def main():
|
print 'Number of arguments:', len(sys.argv), 'arguments.'
|
# 元素0为自身模块名,不处理,从元素1开始处理
|
reload(sys)
|
sys.setdefaultencoding("utf-8")
|
argv = sys.argv[1:]
|
if not argv:
|
printlog("没有配置运行版本库文件配置参数! chscht.py 版本库文件名配置")
|
return
|
zh_version = argv[0]
|
|
cfg = ConfigParser.ConfigParser()
|
cfg.read("config/config.ini")
|
path = cfg.get("config", "path")
|
resultByTime = int(cfg.get("config", "resultByTime"))
|
parseFileList = eval(cfg.get("config", "parseFileList"))
|
ignoreFileList = eval(cfg.get("config", "ignoreFileList"))
|
zh_hantFileName = cfg.get("config", zh_version)
|
print "----------------------------"
|
printlog("转化的路径: %s" % path)
|
printlog("转化对应表: %s" % zh_hantFileName)
|
|
if resultByTime:
|
curTimeStr = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
|
writePath = r"result/%s/%s" % (zh_version, curTimeStr)
|
else:
|
writePath = r"result/%s" % (zh_version)
|
|
if not os.path.exists(writePath):
|
os.makedirs(writePath)
|
else:
|
# 文件夹存在,删除所有文件,防止历史导出的多余文件残留
|
for root, _, files in os.walk(writePath):
|
for name in files:
|
os.remove(os.path.join(root, name))
|
print "----------------------------"
|
|
# 读取注册转换词库
|
mapFile = open(r"config/%s" % zh_hantFileName, "r")
|
lines = mapFile.readlines()
|
mapFile.close()
|
mapping = {}
|
for line in lines:
|
lineInfo = line.split("\t")
|
mapping[lineInfo[0]] = lineInfo[1].replace("\r", "").replace("\n", "")
|
registery(zh_version, mapping)
|
|
# 开始转换
|
ignoreFiles = []
|
totalCount = 0
|
exportCount = 0
|
for parent, _, filenames in os.walk(path):
|
for filename in filenames:
|
fullPath = os.path.join(parent, filename)
|
if not fullPath.lower().endswith(".txt"):
|
continue
|
|
totalCount += 1
|
if parseFileList and filename not in parseFileList:
|
continue
|
if ignoreFileList:
|
if filename in ignoreFileList:
|
ignoreFiles.append(filename)
|
continue
|
isIgnore = False
|
for ignoreStr in ignoreFileList:
|
if filename.startswith(ignoreStr):
|
ignoreFiles.append(filename)
|
isIgnore = True
|
break
|
|
if isIgnore:
|
continue
|
|
fw = open(r"%s\%s" % (writePath, filename), "w")
|
fr = open(fullPath, "r")
|
lines = fr.readlines()
|
for line in lines:
|
try:
|
fw.write(convertByCode(line, zh_version))
|
except:
|
printlog("错误文件: %s" % filename)
|
print traceback.format_exc()
|
return
|
|
fr.close()
|
fw.close()
|
exportCount += 1
|
printlog("转换成功: %s" % fullPath)
|
|
print "----------------------------"
|
printlog("忽略的文件: %s %s" % (len(ignoreFiles), ignoreFiles))
|
printlog("转换文件数: %s / %s" % (exportCount, totalCount))
|
printlog("导出到目录: %s" % writePath)
|
print "----------------------------"
|
return
|
|
if __name__ == "__main__":
|
main()
|