使用Python进行Google翻译

start

有个需求是使用Google翻译8集俄剧的字幕文件,纯俄语字幕文件看不太懂啊,手动复制google翻译格式会出问题,直接写脚本搞吧

代码

# -*- coding: UTF-8 -*-


import re,time
import html
from urllib import parse
import requests

#定义google翻译网址
GOOGLE_TRANSLATE_URL = 'http://translate.google.co.jp/m?q=%s&tl=%s&sl=%s'

def translate(text, to_language="auto", text_language="auto"):

    text = parse.quote(text)
    url = GOOGLE_TRANSLATE_URL % (text,to_language,text_language)
    response = requests.get(url)
    data = response.text
    expr = r'(?s)class="(?:t0|result-container)">(.*?)<'
    result = re.findall(expr, data)
    if (len(result) == 0):
        return ""

    return html.unescape(result[0])
    
    
def vtt_translate(vtt_new,vtt):
    num_vtt = 1
    for text_line in vtt:
        if '-->' in text_line :
            vtt_new.write(text_line+'\n')
            print(text_line)
        else:
            if text_line is '':
                vtt_new.write(text_line+'\n')
                print(text_line)
            else:
                vtt_cn = translate(text_line, "zh-CN", "ru")
                vtt_new.write(text_line+'  '+vtt_cn+'\n')
                print(text_line+'  '+vtt_cn)
                time.sleep(1)

                #print(translate(text_line.encode('utf-8'), "zh-CN", "ru"),text_line)  # 俄语转汉语
        print('%d / %d'%(num_vtt,len(vtt)))
        num_vtt = num_vtt+1
        
if __name__ == '__main__':
    vtt_new = open('Налет_S2_E2_1080_zh-CN.vtt', 'w')
    vtt = open('Налет_S2_E2_1080.vtt', 'r').read().split('\n')
    vtt_translate(vtt_new, vtt)
    vtt_new.close()

Q.E.D.


一名北漂的网安工程师,希望这次能好好工作,不被毕业吧…